Skip to content

Commit

Permalink
Merge pull request #5 from jaytmiller/handle-run-cache-mounts
Browse files Browse the repository at this point in the history
Handle run cache mounts
  • Loading branch information
jaytmiller authored Aug 9, 2023
2 parents 610eada + ec7250b commit 5a70edd
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 50 deletions.
28 changes: 10 additions & 18 deletions deployments/common/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,35 @@ ENV SHELL=/bin/bash
ENV PATH=${CONDA_DIR}/bin:${PATH}
ENV CFLAGS="-fcommon -fpic"

# ------------------------------------------------------------------------
USER root

ARG TZ='America/New_York'
ARG DEBIAN_FRONTEND=noninteractive

ARG USE_FROZEN=1
ENV USE_FROZEN=$USE_FROZEN

ARG PIP_SWITCHES="--no-cache-dir"
ARG PIP_SWITCHES=""
ENV PIP_SWITCHES=$PIP_SWITCHES

# Enable easy swap of conda with e.g. mamba
ARG CONDA_VER=conda
ENV CONDA_VER=${CONDA_VER}

# Directories used for Docker RUN caching
ARG CACHE_DIRS=""
ENV CACHE_DIRS=${CACHE_DIRS}

# ----- # Clearing caches at the end of each RUN saves image space
# ----- # but seems to mess up dependency debug tools like pipdeptree.
# ----- # NOT setting this is also a requirement of effective Docker
# ----- # buildkit cache use. ATM jupyter/docker-stacks will still wipe
# ----- # out caches created prior to the common base image regardless.
# ----- ARG CLEAR_PKG_CACHES="1"
# ----- ENV CLEAR_PKG_CACHES=${CLEAR_PKG_CACHES}
ARG USER_CACHE_DIRS=""
ENV USER_CACHE_DIRS=${USER_CACHE_DIRS}

# These are duplicated in jupyter/docker-stacks so need to be changed
# in both locations. Better than hard coding in scripts though.
ENV NB_UID=1000
ENV NB_GID=100

RUN mkdir -p /home/jovyan/.cache && chown -R jovyan:users /home/jovyan/.cache
RUN mkdir -p /home/jovyan/.conda/pkgs && chown -R jovyan:users /home/jovyan/.conda/pkgs
RUN mkdir -p /opt/conda/pkgs && chown -R jovyan:users /opt/conda/pkgs
# ------------------------------------------------------------------------
# Make sure user cache directories exist before RUN caching uses then as mount points

USER root

RUN for DIR in ${USER_CACHE_DIRS}; do mkdir -p ${DIR}; chown -R $NB_UID:$NB_GID ${DIR}; done

# ------------------------------------------------------------------------
# Breaking into separate runs will build slower but also defines storage
# consumption in docker history.
#
Expand Down
12 changes: 12 additions & 0 deletions deployments/common/Dockerfile.cache
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
ARG BASE_CONTAINER
FROM $BASE_CONTAINER

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

USER root

# Now that RUN caching is not being used, these cache dirs are just dirs, not
# mount points owned by root. Give them back to the user.
RUN for DIR in ${USER_CACHE_DIRS}; do mkdir -p ${DIR}; chown -R $NB_UID:$NB_GID ${DIR}; done

USER $NB_USER
5 changes: 0 additions & 5 deletions deployments/common/common-scripts/copy-default-home
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,3 @@ EOF
else
echo "Skipping default config init, $HOME already initialized."
fi

# Delete caches owned by root, immutable residue of Docker RUN cache
# which will block actual user caching. Fortunately $HOME is writable
# so root owned caches can be removed to make way for user caching.
find ${CACHE_DIRS} -user 0 | grep -v /var/cache | xargs rm -rvf
11 changes: 4 additions & 7 deletions infrequent-env
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ export DOCKER_BUILDKIT=1
export BUILDKIT_STEP_LOG_MAX_SIZE=10000000 # bytes
export BUILDKIT_STEP_LOG_MAX_SPEED=10000000

export CACHE_DIRS="/var/cache/apt /home/jovyan/.cache /opt/conda/pkgs /home/jovyan/.conda/pkgs"

export USER_CACHE_DIRS="/home/jovyan/.cache /opt/conda/pkgs /home/jovyan/.conda/pkgs"
export SYSTEM_CACHE_DIRS="/var/cache/apt"
export CACHE_DIRS="${USER_CACHE_DIRS} ${SYSTEM_CACHE_DIRS}"

# ----------------------------------------------------------------------------
# PIP_SWITCHES are passed through to pip by Docker and the pip install scripts
Expand All @@ -49,18 +52,12 @@ export PIP_SWITCHES='--no-color --default-timeout 100'
# ----------------- vvvvvv derived inputs, nominally don't change vvvvvv --------------
# automatically sourced into setup-env

export IMAGE_REPO=spacetelescope/science-platform-${DEPLOYMENT_NAME}
export COMMON_REPO=${IMAGE_REPO}

export JUPYTERHUB_DIR=`pwd`

export IMAGE_DIR=`pwd`/deployments/${DEPLOYMENT_NAME}

export COMMON_IMAGE_DIR=`pwd`/deployments/common

export COMMON_ID=${COMMON_REPO}:${COMMON_TAG}
export IMAGE_ID=${IMAGE_REPO}:${IMAGE_TAG}

# --------------- vvvvv misc env settings vvvvv ---------------------------------

function where () {
Expand Down
31 changes: 24 additions & 7 deletions scripts/image-build
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,46 @@ echo "========================= Building Common Image ========================"
add-caching Dockerfile >Dockerfile.cached
time docker build \
--progress plain \
--tag ${COMMON_ID} \
--tag notebook-common \
--build-arg BASE_IMAGE=${BASE_IMAGE} \
--build-arg USE_FROZEN=${USE_FROZEN} \
--build-arg PIP_SWITCHES="${PIP_SWITCHES}" \
--build-arg CONDA_VER=${CONDA_VER} \
--build-arg CACHE_DIRS="${CACHE_DIRS}" \
--build-arg USER_CACHE_DIRS="${USER_CACHE_DIRS}" \
--file Dockerfile.cached .
rm Dockerfile.cached

# ..................................................................................
# Build custom layers for this deployment
# Build custom layers for this deployment, RUN caching leaves behind cache
# mount points owned by root which must be corrected later so users can still
# use their caches, hence the tag has a -dirty suffix.

cd ${IMAGE_DIR}
echo "========================= Building $IMAGE_ID USE_FROZEN=${USE_FROZEN} =========================="
echo "========================= Building $NOTEBOOK_ID USE_FROZEN=${USE_FROZEN} =========================="
add-caching Dockerfile >Dockerfile.cached
time docker build \
--progress plain \
--tag ${IMAGE_ID} \
--tag "notebook-${DEPLOYMENT_NAME}" \
--build-arg BASE_IMAGE=${COMMON_ID} \
--tag "notebook-${DEPLOYMENT_NAME}-dirty" \
--build-arg BASE_IMAGE=notebook-common \
--file Dockerfile.cached .
rm Dockerfile.cached

# ..................................................................................
# In a Dockerfile which is not using RUN cache, touch up all the cache
# directory mount points so they are appropriately owned and writable vs. being
# universally owned by root and unwritable by jovyan / the user.

cd $COMMON_IMAGE_DIR
docker build \
--progress plain \
--tag $NOTEBOOK_ID \
--build-arg BASE_CONTAINER="notebook-${DEPLOYMENT_NAME}-dirty" \
--build-arg USER_CACHE_DIRS="${USER_CACHE_DIRS}" \
--file Dockerfile.cache .

# Once touched up we can discard this to reduce confusion
docker rmi "notebook-${DEPLOYMENT_NAME}-dirty"

# ..................................................................................
# Optionally update requirements used by frozen and chilly builds.
# In principle this may shift entirely to CI/CD
Expand Down
3 changes: 1 addition & 2 deletions scripts/image-build-all
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ for USE_FROZEN in $FREEZE_MODES; do
image-configure ${DEPLOYMENT_NAME} ${USE_FROZEN}
source setup-env
# tags for tracking here only, not intended for push, not valid for e.g. running on hub
export IMAGE_ID=${IMAGE_ID}-${USE_FROZEN}
export COMMON_ID=${COMMON_ID}-${USE_FROZEN}
export NOTEBOOK_ID=${NOTEBOOK_ID}-${USE_FROZEN}
image-build 2>&1 >${id}.log
done
done
2 changes: 1 addition & 1 deletion scripts/image-cp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ set -eu

PATH_TO_EXPORT=$1
WHERE_TO_PUT_IT=${2:-`pwd`}
ACTUAL_IMAGE=${3:-${IMAGE_ID}}
ACTUAL_IMAGE=${3:-${NOTEBOOK_ID}}

CONTAINER=$(docker run -d ${ACTUAL_IMAGE} /bin/bash)

Expand Down
8 changes: 5 additions & 3 deletions scripts/image-exec
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#! /bin/bash -eu

# Run an arbitrary command using the image specified by IMAGE_ID,
# Run an arbitrary command using the image specified by NOTEBOOK_ID,
# which normally defaults to the notebook image but can be overridden.
#
# Customize Docker parameters specified using env var IMAGE_RUN_PARS.
#
cd $IMAGE_DIR

export EXEC_ID=${EXEC_ID:-${NOTEBOOK_ID}}

SP_HOME=${SP_HOME:-""} # host dir to mount as rw /home/jovyan
SP_HUB_INIT=${SP_HUB_INIT:-""} # run hub's post-start-hook to init account
SP_USER=${SP_USER:-""} # run as the specified user id, or root or jovyan
Expand All @@ -19,7 +21,7 @@ fi

if [ ${SP_HUB_INIT} ]; then
echo "Running post-start-hook to create/update user environment."
docker run $IMAGE_RUN_PARS $IMAGE_ID /opt/environments/post-start-hook off-hub
docker run $IMAGE_RUN_PARS $EXEC_ID /opt/environments/post-start-hook off-hub
fi

if [[ ${SP_USER} ]]; then
Expand All @@ -28,4 +30,4 @@ if [[ ${SP_USER} ]]; then
IMAGE_RUN_PARS="$IMAGE_RUN_PARS --user ${SP_USER} --group-add users"
fi

docker run $IMAGE_RUN_PARS $IMAGE_ID $*
docker run $IMAGE_RUN_PARS $EXEC_ID $*
2 changes: 1 addition & 1 deletion scripts/image-sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fi
source image-dev-pars

# If no image is specified, run the last tagged mission container.
export IMAGE_ID=${1:-`docker image ls -q | head -1`}
export EXEC_ID=${1:-`docker image ls -q | head -1`}

# Both normal and --dev mode are interactive
export IMAGE_RUN_PARS="$IMAGE_RUN_PARS -it"
Expand Down
9 changes: 3 additions & 6 deletions setup-env.template
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@ export OWNER=spacetelescope # jupyter or spacetelescope base image repo
# jupyter will be pulled from DockerHub
# spacetelescope will be built from source code

export BASE_IMAGE=${OWNER}/scipy-notebook
export BASE_IMAGE=${OWNER}/scipy-notebook # from jupyter/docker-stacks

# Note that unscanned- tags are pushed, pulled, and scanned but
# the unscanned- is dropped from images permitted on the hub by
# virtue of passing ECR scanning.
export IMAGE_TAG=unscanned-latest-${ENVIRONMENT}
export COMMON_TAG=unscanned-common-latest-${ENVIRONMENT}
export NOTEBOOK_TAG=latest
export NOTEBOOK_ID=notebook-${DEPLOYMENT_NAME}:${NOTEBOOK_TAG} # abstract name of mission-specific notebook image

# ----------------- vvvv less frequently changed vvvv -------------------------------

Expand Down

0 comments on commit 5a70edd

Please sign in to comment.