docker-machine

$ mkdir foo
$ docker-machine ssh dev mkdir foo
$ docker-machine mount dev:/home/docker/foo foo
$ touch foo/bar
$ docker-machine ssh dev ls foo
bar

$ eval $(docker-machine env dev)
$ docker run -v /home/docker/foo:/tmp/foo busybox ls /tmp/foo
bar
$ touch foo/baz
$ docker run -v /home/docker/foo:/tmp/foo busybox ls /tmp/foo
bar
baz

$ docker-machine mount -u dev:/home/docker/foo foo
$ rmdir foo

docker mysql-server

docker pull mysql/mysql-server
docker run -it -d –name mysql-server docker.io/mysql/mysql-server /bin/bash
docker logs mysql-server 2>&1 | grep GENERATED
docker exec -it mysql-server mysql -uroot -p
ALTER USER ‘root’@’localhost’ IDENTIFIED BY ‘password’;
use mysql
update user set host = ‘%’ where user = ‘root’;
FLUSH PRIVILEGES;

GRANT ALL PRIVILEGES ON *.* TO ‘root’@’%’ IDENTIFIED BY ‘password’ WITH GRANT OPTION;

FLUSH PRIVILEGES;

参考:
https://hub.docker.com/r/mysql/mysql-server/

https://blog.csdn.net/cxin917/article/details/76686227

docker hdfs+spark

vi dockerfile-centos-ssh

# Dockerfile文件内容
#基于centos镜像库创建
FROM centos
MAINTAINER dys
#安装ssh
RUN yum install -y openssh-server sudo
RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config
RUN yum  install -y openssh-clients

#配置root名
RUN echo "root:123456" | chpasswd
RUN echo "root   ALL=(ALL)       ALL" >> /etc/sudoers
#生成ssh key
RUN ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key

#配置sshd服务
RUN mkdir /var/run/sshd
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]

docker build -f dockerfile-centos-ssh -t centos-ssh:latest .

下载相关包

http://jdk.java.net/8/

wget https://download.java.net/java/jdk8u192/archive/b04/binaries/jdk-8u192-ea-bin-b04-linux-x64-01_aug_2018.tar.gz

https://hadoop.apache.org/releases.html

wget https://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-3.0.3/hadoop-3.0.3.tar.gz

https://www.scala-lang.org/download/all.html

wget https://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.tgz

http://spark.apache.org/downloads.html

wget https://archive.apache.org/dist/spark/spark-2.0.2/spark-2.0.2-bin-hadoop2.7.tgz

# Dockerfile文件内容
#基于centos-ssh构建
FROM centos-ssh
#安装java
ADD jdk-8u192-ea-bin-b04-linux-x64-01_aug_2018.tar.gz /usr/local/
RUN mv /usr/local/jdk1.8.0_192 /usr/local/jdk1.8
#配置JAVA环境变量
ENV JAVA_HOME /usr/local/jdk1.8
ENV PATH $JAVA_HOME/bin:$PATH
#安装hadoop
ADD hadoop-3.0.3.tar.gz /usr/local
RUN mv /usr/local/hadoop-3.0.3 /usr/local/hadoop
#配置hadoop环境变量
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $HADOOP_HOME/bin:$PATH

#安装scala 注意Spark2.0.1对于Scala的版本要求是2.11.x
ADD scala-2.11.8.tgz /usr/local
RUN mv /usr/local/scala-2.11.8 /usr/local/scala2.11.8

#配置scala环境变量
ENV SCALA_HOME /usr/local/scala
ENV PATH $SCALA_HOME/bin:$PATH

#安装spark
ADD spark-2.0.2-bin-hadoop2.7.tgz /usr/local
RUN mv /usr/local/spark-2.0.2-bin-hadoop2.7 /usr/local/spark2.0.2

#配置spark环境变量
ENV SPARK_HOME /usr/local/spark
ENV PATH $SPARK_HOME/bin:$PATH

#创建hdfs账号
RUN useradd hdfs
RUN echo "hdfs:12345678" | chpasswd

RUN yum install -y which sudo

#修改权限
RUN chown -R hdfs:hdfs /usr/local/hadoop
RUN chown -R hdfs:hdfs /usr/local/spark2.0.2

# ssh without key
USER hdfs
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && \
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

USER root

docker build -f dockerfile-hp-spark -t centos-hp-spark:latest .

docker network create –driver=bridge hadoop

开启容器

# the default node number is 3
N=${1:-3}

# start hadoop master container
sudo docker rm -f hadoop-master &> /dev/null
echo “start hadoop-master container…”
sudo docker run -itd \
–net=hadoop \
-p 50070:50070 \
-p 8088:8088 \
–name hadoop-master \
–hostname hadoop-master \
-v /home/anyuan/myProjects/bigdata:/opt \
centos-hp-spark &> /dev/null

# start hadoop slave container
i=1
while [ $i -lt $N ]
do
sudo docker rm -f hadoop-slave$i &> /dev/null
echo “start hadoop-slave$i container…”
sudo docker run -itd \
–net=hadoop \
–name hadoop-slave$i \
–hostname hadoop-slave$i \
-v /home/anyuan/myProjects/bigdata:/opt \
centos-hp-spark  &> /dev/null
i=$(( $i + 1 ))
done

# get into hadoop master container
sudo docker exec -it hadoop-master bash

 

进入容器master执行

export HADOOP_USER_NAME=hdfs
for HDFILE in core-site.xml hadoop-env.sh hdfs-site.xml mapred-site.xml  spark-env.sh yarn-site.xml;do
scp  /opt/config/$HDFILE  hadoop-master:/usr/local/hadoop/etc/hadoop/
scp  /opt/config/$HDFILE  hadoop-slave1:/usr/local/hadoop/etc/hadoop/
scp  /opt/config/$HDFILE  hadoop-slave2:/usr/local/hadoop/etc/hadoop/
done
\cp /opt/config/slaves /usr/local/hadoop/etc/hadoop/workers
\cp /opt/config/start-dfs.sh /usr/local/hadoop/sbin/start-dfs.sh
\cp /opt/config/stop-dfs.sh /usr/local/hadoop/sbin/stop-dfs.sh
hdfs namenode -format
/usr/local/hadoop/sbin/start-dfs.sh

config文件夹下:

core-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000/</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
</property>
</configuration>

hdfs-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-master:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.http.address</name>
<value>hadoop-master:50070</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>

yarn-site.xml

<?xml version=”1.0″?>
<configuration>

<!– Site specific YARN configuration properties –>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop-master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop-master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop-master:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop-master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop-master:8088</value>
</property>
</configuration>

hadoop-env.sh  修改

export JAVA_HOME=/usr/local/jdk1.8

mapred-site.xml

<?xml version=”1.0″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

spark-env.sh

export SCALA_HOME=/usr/local/scala2.11.8
export JAVA_HOME=/usr/local/jdk1.8
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
SPARK_MASTER_IP=hadoop-master
SPARK_LOCAL_DIRS=/usr/local/spark2.0.2
SPARK_DRIVER_MEMORY=1G

 

参考:

https://blog.csdn.net/iigeoxiaoyang/article/details/53020066  docker hdfs spark