Files

23 lines
692 B
Docker

FROM cluster-base
# -- Layer: Apache Spark
ARG spark_version=3.3.1
ARG hadoop_version=3
RUN apt-get update -y && \
apt-get install -y curl && \
curl https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \
tar -xf spark.tgz && \
mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \
mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \
rm spark.tgz
ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}
ENV SPARK_MASTER_HOST spark-master
ENV SPARK_MASTER_PORT 7077
ENV PYSPARK_PYTHON python3
# -- Runtime
WORKDIR ${SPARK_HOME}