# Docker Setting
### hub์์ ์ด๋ฏธ์ง ๊ฒ์
docker search centos
### hub์์ ์ด๋ฏธ์ง ๋ค์ด๋ก๋
docker pull centos
### ๋ก์ปฌ ์ด๋ฏธ์ง ๋ชฉ๋ก
docker images
docker run -it --name hello centos /bin/bash
### ์คํ ์ค์ธ ์ปจํ
์ด๋ ๋ชฉ๋ก + ์ค์ง๋ชฉ๋ก ์ถ๊ฐ
docker ps -a
### ์ ์ง๋ ์ปจํ
์ด๋ ์คํ
docker start hadoop-base
### ์ปจํ
์ด๋ ์ ์
docker attach hadoop-base
### ์ปจํ
์ด๋ ์ ์ง
docker stop hadoop-base
### ์ปจํ
์ด๋ ์ญ์
docker rm hadoop-base
### docker ์ด๋ฏธ์ง ์ญ์
docker rmi centos
# OS Setting
docker -it --name hadoop-base centos
yum update
yum install wget -y
yum install vim -y
yum install java-1.8.0-openjdk-devel.x86_64 -y
java -version
which java
readlink -f "/usr/bin/java"
vi ~/.bashrc
### ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.312.b07-2.el8_5.x86_64
export PATH=$PATH:$JAVA_HOME/bin
export JAVA_OPTS="-Dfile.encoding=UTF-8"
export CLASSPATH="."
source ~/.bashrc
* Cannot prepare internal mirrorlist: No URLs in mirrorlist
sudo sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
sudo sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-*
# Haddop Setting
### hadoop ํ ๋๋ ํ ๋ฆฌ๋ก ์ธ ๋๋ ํ ๋ฆฌ ์์ฑ
mkdir /hadoop_home
cd /hadoop_home
# hadoop ๋ค์ด๋ก๋ ํ ์์ถํ๊ธฐ
wget https://mirrors.sonic.net/apache/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
tar xvzf hadoop-3.3.1.tar.gz
vim ~/.bashrc
### ~/.bashrc
export HADOOP_HOME=/hadoop_home/hadoop-3.3.1
export HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source ~/.bashrc
hadoop version
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount $HADOOP_HOME/LICENSE.txt wordcount_output
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount /test /test_out
# Docker Setting
# ์๋ก์ด ํฐ๋ฏธ๋์์
yum install openssh-server openssh-clients openssh-askpass -y
ssh-keygen -f /etc/ssh/ssh_host_rsa_key -t rsa -N ""
ssh-keygen -f /etc/ssh/ssh_host_ecdsa_key -t ecdsa -N ""
ssh-keygen -f /etc/ssh/ssh_host_ed25519_key -t ed25519 -N ""
vim ~/.bashrc
### ~/.bashrc
/usr/sbin/sshd
source ~/.bashrc
ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
ssh localhost
### ํ๋ก ์ค์ ํ์ผ๋ค์ด ์๋ ๋๋ ํ ๋ฆฌ๋ก ์ด๋
cd $HADOOP_CONFIG_HOME
### hadoop-env.sh ์ด๊ธฐ
vim hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.312.b07-2.el8_5.x86_64
export HDFS_NAMENODE_USER="root"
export HDFS_DATANODE_USER="root"
export HDFS_SECONDARYNAMENODE_USER="root"
export YARN_RESOURCEMANAGER_USER="root"
export YARN_NODEMANAGER_USER="root"
mkdir /opt/hadoop_home/temp
mkdir /opt/hadoop_home/namenode
mkdir /opt/hadoop_home/datanode
### 1. core-site.xml: HDFS์ MapReduce์์ ๊ณตํต์ ์ผ๋ก ์ฌ์ฉํ ํ๊ฒฝ์ ๋ณด
### 2. hdfs-site.xml: HDFS์์ ์ฌ์ฉํ ํ๊ฒฝ์ ๋ณด
### 3. mapred-site.xml: MapReduce์์ ์ฌ์ฉํ ํ๊ฒฝ์ ๋ณด
cd $HADOOP_CONFIG_HOME
vim core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop_home/temp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
<final>true</final>
</property>
</configuration>
vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop_home/namenode_home</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop_home/datanode_home</value>
<final>true</final>
</property>
</configuration>
vim mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
</configuration>
hadoop namenode -format
docker commit hadoop-base centos:hadoop
start-all.sh
jps
docker run -it --name hadoop-base -p 9870:9870 centos:hadoop
# HUE Setting
yum -y install python2-devel sqlite-devel libxml2-devel libxslt-devel libffi-devel openssl-devel openldap-devel gmp-devel gcc gcc-c++
# HUE ์ฌ์ฉ
1. Query Editor : Sql ์คํ๊ฐ๋ฅ
2. Table Browser : Table ๋ด์ญ ์กฐํ
3. Workflows : Batch ๊ตฌ์ฑ์์ ๊ด๋ฆฌ
- Workflows > ๋์๋ณด๋ : Batch ๋ฑ๋ก ๋ด์ญ ์กฐํ
- Workflows > ํธ์ง๊ธฐ : Batch ๋ฑ๋ก ๋ด์ญ ํธ์ง
- Workflows > ํธ์ง๊ธฐ > Workflow : Batch ๊ตฌ์ฑ ์์ ๊ด๋ฆฌ, ๋ฐฐ์น ์ฌ์คํ
- Workflows > ํธ์ง๊ธฐ > Coordinator : Batch ์ํ ์ฃผ๊ธฐ/์๊ฐ ๊ด๋ฆฌ
4. ํ์ผ๋ธ๋ผ์ฐ์ : Batch ์์ ์คํ๋ sql ํ์ธ ๋ฐ ํธ์ง
5. Job Browser : Batch ์คํ log ํ์ธ
# References
https://taaewoo.tistory.com/22?category=862614 https://mungiyo.tistory.com/16 https://blog.acronym.co.kr/329?category=398102
โ - zebra - dynamodb โ