# Docker Setting

### hub์—์„œ ์ด๋ฏธ์ง€ ๊ฒ€์ƒ‰
docker search centos
### hub์—์„œ ์ด๋ฏธ์ง€ ๋‹ค์šด๋กœ๋“œ
docker pull centos
### ๋กœ์ปฌ ์ด๋ฏธ์ง€ ๋ชฉ๋ก
docker images

docker run -it --name hello centos /bin/bash

### ์‹คํ–‰ ์ค‘์ธ ์ปจํ…Œ์ด๋„ˆ ๋ชฉ๋ก + ์ค‘์ง€๋ชฉ๋ก ์ถ”๊ฐ€
docker ps -a

### ์ •์ง€๋œ ์ปจํ…Œ์ด๋„ˆ ์‹คํ–‰
docker start hadoop-base

### ์ปจํ…Œ์ด๋„ˆ ์ ‘์†
docker attach hadoop-base

### ์ปจํ…Œ์ด๋„ˆ ์ •์ง€
docker stop hadoop-base

### ์ปจํ…Œ์ด๋„ˆ ์‚ญ์ œ
docker rm hadoop-base

### docker ์ด๋ฏธ์ง€ ์‚ญ์ œ
docker rmi centos

# OS Setting

docker -it --name hadoop-base centos
yum update
yum install wget -y
yum install vim -y
yum install java-1.8.0-openjdk-devel.x86_64 -y

java -version

which java
readlink -f "/usr/bin/java"

vi ~/.bashrc


### ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.312.b07-2.el8_5.x86_64
export PATH=$PATH:$JAVA_HOME/bin
export JAVA_OPTS="-Dfile.encoding=UTF-8"
export CLASSPATH="."

source ~/.bashrc

* Cannot prepare internal mirrorlist: No URLs in mirrorlist
sudo sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
sudo sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-*

# Haddop Setting

### hadoop ํ™ˆ ๋””๋ ‰ํ† ๋ฆฌ๋กœ ์“ธ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
mkdir /hadoop_home
cd /hadoop_home
# hadoop ๋‹ค์šด๋กœ๋“œ ํ›„ ์••์ถ•ํ’€๊ธฐ
wget https://mirrors.sonic.net/apache/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
tar xvzf hadoop-3.3.1.tar.gz

vim ~/.bashrc

### ~/.bashrc
export HADOOP_HOME=/hadoop_home/hadoop-3.3.1
export HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

source ~/.bashrc

hadoop version

hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount $HADOOP_HOME/LICENSE.txt wordcount_output
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount /test /test_out


# Docker Setting

# ์ƒˆ๋กœ์šด ํ„ฐ๋ฏธ๋„์—์„œ

yum install openssh-server openssh-clients openssh-askpass -y
ssh-keygen -f /etc/ssh/ssh_host_rsa_key -t rsa -N ""
ssh-keygen -f /etc/ssh/ssh_host_ecdsa_key -t ecdsa -N ""
ssh-keygen -f /etc/ssh/ssh_host_ed25519_key -t ed25519 -N ""

vim ~/.bashrc

### ~/.bashrc
/usr/sbin/sshd

source ~/.bashrc

ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
ssh localhost


### ํ•˜๋‘ก ์„ค์ • ํŒŒ์ผ๋“ค์ด ์žˆ๋Š” ๋””๋ ‰ํ† ๋ฆฌ๋กœ ์ด๋™
cd $HADOOP_CONFIG_HOME
### hadoop-env.sh ์—ด๊ธฐ
vim hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.312.b07-2.el8_5.x86_64
export HDFS_NAMENODE_USER="root"
export HDFS_DATANODE_USER="root"
export HDFS_SECONDARYNAMENODE_USER="root"
export YARN_RESOURCEMANAGER_USER="root"
export YARN_NODEMANAGER_USER="root"


mkdir /opt/hadoop_home/temp
mkdir /opt/hadoop_home/namenode
mkdir /opt/hadoop_home/datanode

### 1. core-site.xml: HDFS์™€ MapReduce์—์„œ ๊ณตํ†ต์ ์œผ๋กœ ์‚ฌ์šฉํ•  ํ™˜๊ฒฝ์ •๋ณด
### 2. hdfs-site.xml: HDFS์—์„œ ์‚ฌ์šฉํ•  ํ™˜๊ฒฝ์ •๋ณด
### 3. mapred-site.xml: MapReduce์—์„œ ์‚ฌ์šฉํ•  ํ™˜๊ฒฝ์ •๋ณด
cd $HADOOP_CONFIG_HOME

vim core-site.xml
<configuration>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/hadoop_home/temp</value>
    </property>

    <property>
        <name>fs.default.name</name>
        <value>hdfs://localhost:9000</value>
        <final>true</final>
    </property>
</configuration>


vim hdfs-site.xml
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
        <final>true</final>
    </property>

    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/hadoop_home/namenode_home</value>
        <final>true</final>
    </property>

    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/hadoop_home/datanode_home</value>
        <final>true</final>
    </property>
</configuration>


vim mapred-site.xml
<configuration>
    <property>
        <name>mapred.job.tracker</name>
        <value>localhost:9001</value>
    </property>
</configuration>

hadoop namenode -format

docker commit hadoop-base centos:hadoop


start-all.sh
jps

docker run -it --name hadoop-base -p 9870:9870 centos:hadoop



# HUE Setting

yum -y install python2-devel sqlite-devel libxml2-devel libxslt-devel libffi-devel openssl-devel openldap-devel gmp-devel gcc gcc-c++

# HUE ์‚ฌ์šฉ

1. Query Editor : Sql ์‹คํ–‰๊ฐ€๋Šฅ
2. Table Browser : Table ๋‚ด์—ญ ์กฐํšŒ
3. Workflows : Batch ๊ตฌ์„ฑ์š”์†Œ ๊ด€๋ฆฌ
    - Workflows > ๋Œ€์‹œ๋ณด๋“œ : Batch ๋“ฑ๋ก ๋‚ด์—ญ ์กฐํšŒ
    - Workflows > ํŽธ์ง‘๊ธฐ : Batch ๋“ฑ๋ก ๋‚ด์—ญ ํŽธ์ง‘
    - Workflows > ํŽธ์ง‘๊ธฐ > Workflow : Batch ๊ตฌ์„ฑ ์š”์†Œ ๊ด€๋ฆฌ, ๋ฐฐ์น˜ ์žฌ์‹คํ–‰ 
    - Workflows > ํŽธ์ง‘๊ธฐ > Coordinator : Batch ์ˆ˜ํ–‰ ์ฃผ๊ธฐ/์‹œ๊ฐ„ ๊ด€๋ฆฌ
4. ํŒŒ์ผ๋ธŒ๋ผ์šฐ์ € : Batch ์—์„œ ์‹คํ–‰๋  sql ํ™•์ธ ๋ฐ ํŽธ์ง‘
5. Job Browser : Batch ์‹คํ–‰ log ํ™•์ธ

# References

https://taaewoo.tistory.com/22?category=862614 https://mungiyo.tistory.com/16 https://blog.acronym.co.kr/329?category=398102

Last Updated: 4/13/2025, 11:14:44 PM