From 8e0da711feca63b59b6a297ad28ceb22b7bc4e5a Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Fri, 10 May 2024 13:09:48 -0400 Subject: [PATCH 1/6] Try adding a device and sys admin capability to be able to mount a gcs bucket --- .../resources/init-resources/jupyter-docker-compose-gce.yaml | 4 ++++ .../main/resources/init-resources/jupyter-docker-compose.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml index fd782503b29..b0aee875432 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml @@ -19,6 +19,10 @@ services: volumes: # shared with welder - "/mnt/disks/work:${NOTEBOOKS_DIR}" + devices: + - /dev/fuse:/dev/fuse + cap_add: + - SYS_ADMIN restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml index c933c1e575c..ef8d7f3b8ad 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml @@ -36,6 +36,10 @@ services: - /hadoop:/hadoop - /hadoop_gcs_connector_metadata_cache:/hadoop_gcs_connector_metadata_cache - /usr/local/share/google/dataproc:/usr/local/share/google/dataproc + devices: + - /dev/fuse:/dev/fuse + cap_add: + - SYS_ADMIN restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" From 57810b5ffb7fcee5240d60521ee93c652caff40e Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Wed, 15 May 2024 13:52:27 -0400 Subject: [PATCH 2/6] add security-opt in docker-compose --- .../resources/init-resources/jupyter-docker-compose-gce.yaml | 2 ++ .../main/resources/init-resources/jupyter-docker-compose.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml index b0aee875432..ddcd12f1069 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml @@ -23,6 +23,8 @@ services: - /dev/fuse:/dev/fuse cap_add: - SYS_ADMIN + security_opt: + - apparmor:unconfined restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml index ef8d7f3b8ad..b3b3dd74da2 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml @@ -40,6 +40,8 @@ services: - /dev/fuse:/dev/fuse cap_add: - SYS_ADMIN + security_opt: + - apparmor:unconfined restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" From 8d50bccf3b84df33186adec4f8c041a6501bba97 Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Thu, 6 Jun 2024 13:31:23 -0400 Subject: [PATCH 3/6] Try GCSFuse mounting as a side car --- .../init-resources/jupyter-docker-compose-gce.yaml | 7 +------ .../resources/init-resources/jupyter-docker-compose.yaml | 7 +------ http/src/main/resources/init-resources/startup.sh | 6 ++++++ 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml index ddcd12f1069..97146cdb4ce 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml @@ -19,12 +19,7 @@ services: volumes: # shared with welder - "/mnt/disks/work:${NOTEBOOKS_DIR}" - devices: - - /dev/fuse:/dev/fuse - cap_add: - - SYS_ADMIN - security_opt: - - apparmor:unconfined + - /mnt/disks/bucket:/usr/local/genomics restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml index b3b3dd74da2..80d36504cb7 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml @@ -36,12 +36,7 @@ services: - /hadoop:/hadoop - /hadoop_gcs_connector_metadata_cache:/hadoop_gcs_connector_metadata_cache - /usr/local/share/google/dataproc:/usr/local/share/google/dataproc - devices: - - /dev/fuse:/dev/fuse - cap_add: - - SYS_ADMIN - security_opt: - - apparmor:unconfined + - /mnt/disks/bucket:/usr/local/genomics restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh index 743abaa4c01..07d10e3767e 100644 --- a/http/src/main/resources/init-resources/startup.sh +++ b/http/src/main/resources/init-resources/startup.sh @@ -123,6 +123,12 @@ then mount -t ext4 -O discard,defaults /dev/${DISK_DEVICE_ID} ${WORK_DIRECTORY} chmod a+rwx /mnt/disks/work + # Start gcsfuse as a sidecar + mkdir -p /mnt/disks/bucket + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mkdir /usr/local/genomics" + docker run -d --name gcsfuse-container --platform linux/amd64 --privileged --env BUCKET_NAME=genomics-public-data --device /dev/fuse --security-opt apparmor=unconfined -v /mnt/disks/bucket:/mnt/gcs-bucket:shared -v /mnt/disks/bucket:/mnt/gcs-data tarekmahmed/gcsfuse-container:latest + + # (1/6/22) Restart Jupyter Container to reset `NOTEBOOKS_DIR` for existing runtimes. This code can probably be removed after a year if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..." From 6c9d0a0f4cda40c64717c825d1d0302f5e296268 Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Fri, 7 Jun 2024 10:45:09 -0400 Subject: [PATCH 4/6] try shared mounting --- http/src/main/resources/init-resources/gce-init.sh | 11 +++++++++++ .../init-resources/jupyter-docker-compose-gce.yaml | 2 +- .../init-resources/jupyter-docker-compose.yaml | 2 +- http/src/main/resources/init-resources/startup.sh | 6 ------ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh index d784e8a444c..9ae21d9dc3a 100644 --- a/http/src/main/resources/init-resources/gce-init.sh +++ b/http/src/main/resources/init-resources/gce-init.sh @@ -276,6 +276,14 @@ STEP_TIMINGS+=($(date +%s)) log 'Starting up the Jupyter...' + +# Start gcsfuse as a sidecar +mkdir -p /mnt/disks/bucket +docker run -d --name gcsfuse-container --privileged -u root -e PIP_USER=false --env BUCKET_NAME=genomics-public-data --device /dev/fuse:/dev/fuse --security-opt apparmor=unconfined -v /mnt/disks/bucket:/mnt/gcs-bucket:shared tarekmahmed/gcsfuse-container:latest +# Use docker compose here +# $(DOCKER_COMPOSE) -f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename gcsfuse-docker-compose-gce.yaml` config + + # Run docker-compose for each specified compose file. # Note the `docker-compose pull` is retried to avoid intermittent network errors, but # `docker-compose up` is not retried since if that fails, something is probably broken @@ -348,6 +356,9 @@ chmod a+rwx ${WORK_DIRECTORY} ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d +# Create genomics folder in the Jupyter container +docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mkdir /usr/local/genomics" + # Start up crypto detector, if enabled. # This should be started after other containers. # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml index 97146cdb4ce..a15355a7a2d 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose-gce.yaml @@ -19,7 +19,7 @@ services: volumes: # shared with welder - "/mnt/disks/work:${NOTEBOOKS_DIR}" - - /mnt/disks/bucket:/usr/local/genomics + - /mnt/disks/bucket:/usr/local/genomics:shared restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml index 80d36504cb7..161440f1209 100644 --- a/http/src/main/resources/init-resources/jupyter-docker-compose.yaml +++ b/http/src/main/resources/init-resources/jupyter-docker-compose.yaml @@ -36,7 +36,7 @@ services: - /hadoop:/hadoop - /hadoop_gcs_connector_metadata_cache:/hadoop_gcs_connector_metadata_cache - /usr/local/share/google/dataproc:/usr/local/share/google/dataproc - - /mnt/disks/bucket:/usr/local/genomics + - /mnt/disks/bucket:/usr/local/genomics:shared restart: always environment: GOOGLE_PROJECT: "${GOOGLE_PROJECT}" diff --git a/http/src/main/resources/init-resources/startup.sh b/http/src/main/resources/init-resources/startup.sh index 07d10e3767e..743abaa4c01 100644 --- a/http/src/main/resources/init-resources/startup.sh +++ b/http/src/main/resources/init-resources/startup.sh @@ -123,12 +123,6 @@ then mount -t ext4 -O discard,defaults /dev/${DISK_DEVICE_ID} ${WORK_DIRECTORY} chmod a+rwx /mnt/disks/work - # Start gcsfuse as a sidecar - mkdir -p /mnt/disks/bucket - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mkdir /usr/local/genomics" - docker run -d --name gcsfuse-container --platform linux/amd64 --privileged --env BUCKET_NAME=genomics-public-data --device /dev/fuse --security-opt apparmor=unconfined -v /mnt/disks/bucket:/mnt/gcs-bucket:shared -v /mnt/disks/bucket:/mnt/gcs-data tarekmahmed/gcsfuse-container:latest - - # (1/6/22) Restart Jupyter Container to reset `NOTEBOOKS_DIR` for existing runtimes. This code can probably be removed after a year if [ ! -z "$JUPYTER_DOCKER_IMAGE" ] ; then echo "Restarting Jupyter Container $GOOGLE_PROJECT / $CLUSTER_NAME..." From 9cd815397a82743bb8bfe8b41ad2a88980c9853f Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Tue, 11 Jun 2024 10:56:23 -0400 Subject: [PATCH 5/6] No need to create the folder, it's already created --- http/src/main/resources/init-resources/gce-init.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/http/src/main/resources/init-resources/gce-init.sh b/http/src/main/resources/init-resources/gce-init.sh index 9ae21d9dc3a..124075a0030 100644 --- a/http/src/main/resources/init-resources/gce-init.sh +++ b/http/src/main/resources/init-resources/gce-init.sh @@ -356,9 +356,6 @@ chmod a+rwx ${WORK_DIRECTORY} ${DOCKER_COMPOSE} --env-file=/var/variables.env "${COMPOSE_FILES[@]}" up -d -# Create genomics folder in the Jupyter container -docker exec $JUPYTER_SERVER_NAME /bin/bash -c "mkdir /usr/local/genomics" - # Start up crypto detector, if enabled. # This should be started after other containers. # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. From 84fc382fd9c5760b4faa95fdc08ed67be715d18b Mon Sep 17 00:00:00 2001 From: "Tarek M. Ahmed" Date: Tue, 11 Jun 2024 13:09:29 -0400 Subject: [PATCH 6/6] Add gcsfuse in dataproc to experiment with --- http/src/main/resources/init-resources/init-actions.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 21bbcb9189d..8f5d8237e5b 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -304,6 +304,13 @@ END log 'Starting up the Jupydocker...' + # Start gcsfuse as a sidecar + mkdir -p /mnt/disks/bucket + docker run -d --name gcsfuse-container --privileged -u root -e PIP_USER=false --env BUCKET_NAME=genomics-public-data --device /dev/fuse:/dev/fuse --security-opt apparmor=unconfined -v /mnt/disks/bucket:/mnt/gcs-bucket:shared tarekmahmed/gcsfuse-container:latest + # Use docker compose here + # $(DOCKER_COMPOSE) -f ${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename gcsfuse-docker-compose-gce.yaml` config + + # Run docker-compose for each specified compose file. # Note the `docker-compose pull` is retried to avoid intermittent network errors, but # `docker-compose up` is not retried.