diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile index 7cce68b58b..04ebe5864a 100644 --- a/Docker/jenkins/Jenkins/Dockerfile +++ b/Docker/jenkins/Jenkins/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.439-jdk21 +FROM jenkins/jenkins:2.426.3-lts-jdk21 USER root diff --git a/Docker/jenkins/Jenkins2/Dockerfile b/Docker/jenkins/Jenkins2/Dockerfile index 9976a07c20..e6b73bc76d 100644 --- a/Docker/jenkins/Jenkins2/Dockerfile +++ b/Docker/jenkins/Jenkins2/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.415-jdk11 +FROM jenkins/jenkins:2.426.3-lts-jdk21 USER root diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py index 4a7d88c3cf..71575e3c56 100644 --- a/files/scripts/healdata/heal-cedar-data-ingest.py +++ b/files/scripts/healdata/heal-cedar-data-ingest.py @@ -24,11 +24,14 @@ "Questionnaire/Survey/Assessment - unvalidated instrument": "Questionnaire/Survey/Assessment", "Cis Male": "Male", "Cis Female": "Female", - "Trans Male": "Female-to-male transsexual", - "Trans Female": "Male-to-female transsexual", - "Agender, Non-binary, gender non-conforming": "Other", - "Gender Queer": "Other", - "Intersex": "Intersexed", + "Trans Male": "Transgender man/trans man/female-to-male (FTM)", + "Female-to-male transsexual": "Transgender man/trans man/female-to-male (FTM)", + "Trans Female": "Transgender woman/trans woman/male-to-female (MTF)", + "Male-to-female transsexual": "Transgender woman/trans woman/male-to-female (MTF)", + "Agender, Non-binary, gender non-conforming": "Genderqueer/gender nonconforming/neither exclusively male nor female", + "Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female", + "Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female", + "Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female", "Buisness Development": "Business Development" } diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index 83070d3352..625c20b299 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -34,6 +34,7 @@ cernvm.cern.ch charts.bitnami.com charts.helm.sh cloud.r-project.org +coredns.github.io coreos.com covidstoplight.org cpan.mirrors.tds.net diff --git a/files/squid_whitelist/web_wildcard_whitelist b/files/squid_whitelist/web_wildcard_whitelist index 44f4680971..3dca3946ad 100644 --- a/files/squid_whitelist/web_wildcard_whitelist +++ b/files/squid_whitelist/web_wildcard_whitelist @@ -100,6 +100,7 @@ .sourceforge.net .southsideweekly.com .theanvil.io +.tigera.io .twistlock.com .ubuntu.com .ucsc.edu diff --git a/flavors/squid_auto/squid_running_on_docker.sh b/flavors/squid_auto/squid_running_on_docker.sh index 05607f3044..2d7cf8e688 100644 --- a/flavors/squid_auto/squid_running_on_docker.sh +++ b/flavors/squid_auto/squid_running_on_docker.sh @@ -8,6 +8,9 @@ DISTRO=$(awk -F '[="]*' '/^NAME/ { print $2 }' < /etc/os-release) WORK_USER="ubuntu" if [[ $DISTRO == "Amazon Linux" ]]; then WORK_USER="ec2-user" + if [[ $(awk -F '[="]*' '/^VERSION_ID/ { print $2 }' < /etc/os-release) == "2023" ]]; then + DISTRO="al2023" + fi fi HOME_FOLDER="/home/${WORK_USER}" SUB_FOLDER="${HOME_FOLDER}/cloud-automation" @@ -60,6 +63,8 @@ fi function install_basics(){ if [[ $DISTRO == "Ubuntu" ]]; then apt -y install atop + elif [[ $DISTRO == "al2023" ]]; then + sudo dnf install cronie nc -y fi } @@ -201,8 +206,10 @@ function install_awslogs { if [[ $DISTRO == "Ubuntu" ]]; then wget ${AWSLOGS_DOWNLOAD_URL} -O amazon-cloudwatch-agent.deb dpkg -i -E ./amazon-cloudwatch-agent.deb - else + elif [[ $DISTRO == "Amazon Linux" ]]; then sudo yum install amazon-cloudwatch-agent nc -y + elif [[ $DISTRO == "al2023" ]]; then + sudo dnf install amazon-cloudwatch-agent -y fi # Configure the AWS logs @@ -292,6 +299,19 @@ function main(){ --volume ${SQUID_CACHE_DIR}:${SQUID_CACHE_DIR} \ --volume ${SQUID_CONFIG_DIR}:${SQUID_CONFIG_DIR}:ro \ quay.io/cdis/squid:${SQUID_IMAGE_TAG} + + max_attempts=10 + attempt_counter=0 + while [ $attempt_counter -lt $max_attempts ]; do + #((attempt_counter++)) + sleep 10 + if [[ -z "$(sudo lsof -i:3128)" ]]; then + echo "Squid not healthy, restarting." + docker restart squid + else + echo "Squid healthy" + fi + done } main diff --git a/gen3/bin/kube-roll-all.sh b/gen3/bin/kube-roll-all.sh index c9cec5a25d..6a67f2bdd2 100644 --- a/gen3/bin/kube-roll-all.sh +++ b/gen3/bin/kube-roll-all.sh @@ -274,7 +274,7 @@ if [[ "$GEN3_ROLL_FAST" != "true" ]]; then else gen3 kube-setup-autoscaler & fi - gen3 kube-setup-kube-dns-autoscaler & + #gen3 kube-setup-kube-dns-autoscaler & gen3 kube-setup-metrics deploy || true gen3 kube-setup-tiller || true # diff --git a/gen3/bin/kube-setup-ingress.sh b/gen3/bin/kube-setup-ingress.sh index d0bcff9a4b..b75470f733 100644 --- a/gen3/bin/kube-setup-ingress.sh +++ b/gen3/bin/kube-setup-ingress.sh @@ -232,6 +232,28 @@ gen3_ingress_setup_role() { } } }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "StringEquals": { + "elasticloadbalancing:CreateAction": [ + "CreateTargetGroup", + "CreateLoadBalancer" + ] + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, { "Effect": "Allow", "Action": [ @@ -329,4 +351,4 @@ g3kubectl apply -f "${GEN3_HOME}/kube/services/revproxy/revproxy-service.yaml" envsubst <$scriptDir/ingress.yaml | g3kubectl apply -f - if [ "$deployWaf" = true ]; then gen3_ingress_setup_waf -fi \ No newline at end of file +fi diff --git a/gen3/bin/kube-setup-karpenter.sh b/gen3/bin/kube-setup-karpenter.sh index 8ba8ed9d97..c8762c2e58 100644 --- a/gen3/bin/kube-setup-karpenter.sh +++ b/gen3/bin/kube-setup-karpenter.sh @@ -23,8 +23,10 @@ gen3_deploy_karpenter() { if g3k_config_lookup .global.karpenter_version; then karpenter=$(g3k_config_lookup .global.karpenter_version) fi - export clusterversion=`kubectl version --short -o json | jq -r .serverVersion.minor` - if [ "${clusterversion}" = "24+" ]; then + export clusterversion=`kubectl version -o json | jq -r .serverVersion.minor` + if [ "${clusterversion}" = "25+" ]; then + karpenter=${karpenter:-v0.27.0} + elif [ "${clusterversion}" = "24+" ]; then karpenter=${karpenter:-v0.24.0} else karpenter=${karpenter:-v0.22.0} diff --git a/gen3/bin/kube-setup-system-services.sh b/gen3/bin/kube-setup-system-services.sh index 0afa7d5860..c26a04cb5d 100644 --- a/gen3/bin/kube-setup-system-services.sh +++ b/gen3/bin/kube-setup-system-services.sh @@ -39,7 +39,7 @@ calico_yaml="https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v${calico} g3kubectl set image daemonset.apps/kube-proxy -n kube-system kube-proxy=${kube_proxy_image} g3kubectl set image --namespace kube-system deployment.apps/coredns coredns=${coredns_image} -g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f - +#g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f - g3kubectl apply -f ${cni_image} g3kubectl apply -f ${calico_yaml} diff --git a/gen3/bin/migrate-to-vpc-cni.sh b/gen3/bin/migrate-to-vpc-cni.sh new file mode 100644 index 0000000000..510d9ebeff --- /dev/null +++ b/gen3/bin/migrate-to-vpc-cni.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +#Get the K8s NS +ctx="$(g3kubectl config current-context)" +ctxNamespace="$(g3kubectl config view -ojson | jq -r ".contexts | map(select(.name==\"$ctx\")) | .[0] | .context.namespace")" + +# Set the cluster name variable +CLUSTER_NAME=`gen3 api environment` + +# Check if in default ns +if [[ ("$ctxNamespace" != "default" && "$ctxNamespace" != "null") ]]; then + gen3_log_err "Namespace must be default" + exit 1 +fi + +# Cd into Cloud-automation repo and pull the latest from master +gen3_log_info "Pulling the latest from Cloud-Auto" +cd /home/$CLUSTER_NAME/cloud-automation || { gen3_log_err "Cloud-automation repo not found"; exit 1; } +#### Change to master +git checkout master || { gen3_log_err "Failed to checkout master branch"; exit 1; } +git pull || { gen3_log_err "Failed to pull from the repository"; exit 1; } + +# Update the Karpenter Node Template +gen3_log_info "Apply new Karpenter Node Template" +if [[ -d $(g3k_manifest_init)/$(g3k_hostname)/manifests/karpenter ]]; then + gen3_log_info "Karpenter setup in manifest. Open a cdismanifest PR and add this line to aws node templates: https://github.com/uc-cdis/cloud-automation/blob/master/kube/services/karpenter/nodeTemplateDefault.yaml#L40" + while true; do + read -p "Have you updated your manifest? (yes/no): " yn + case $yn in + [Yy]* ) + gen3_log_info "Proceeding with Karpenter deployment..." + gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; } + break + ;; + [Nn]* ) + gen3_log_info "Please update the cdismanifest before proceeding." + exit 1 + ;; + * ) + gen3_log_info "Please answer yes or no." + ;; + esac + done +else + gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; } +fi + +# Cordon all the nodes before running gen3 roll all" +gen3_log_info "Cordoning all nodes" +kubectl get nodes --no-headers -o custom-columns=":metadata.name" | grep -v '^fargate' | xargs -I{} kubectl cordon {} + +# Run a "gen3 roll all" so all nodes use the new mounted BPF File System +gen3_log_info "Cycling all the nodes by running gen3 roll all" +gen3 roll all --fast || exit 1 + +# Confirm that all nodes have been rotated +while true; do + read -p "Roll all complete. Have all cordoned nodes been rotated? (yes/no): " yn + case $yn in + [Yy]* ) + gen3_log_info "Continuing with script..." + break + ;; + [Nn]* ) + gen3_log_info "Please drain any remaining nodes with 'kubectl drain --ignore-daemonsets --delete-emptydir-data'" + ;; + * ) + gen3_log_info "Please answer yes or no." + ;; + esac +done + + +# Delete all existing network policies +gen3_log_info "Deleting networkpolicies" +kubectl delete networkpolicies --all + +# Delete all Calico related resources from the “kube-system” namespace +gen3_log_info "Deleting all Calico related resources" +kubectl get deployments -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete deployment -n kube-system +kubectl get daemonsets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete daemonset -n kube-system +kubectl get services -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete service -n kube-system +kubectl get replicasets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete replicaset -n kube-system + +# Backup the current VPC CNI configuration in case of rollback +gen3_log_info "Backing up current VPC CNI Configuration..." +kubectl get daemonset aws-node -n kube-system -o yaml > aws-k8s-cni-old.yaml || { gen3_log_err "Error backig up VPC CNI configuration"; exit 1; } + +# Check to ensure we are not using an AWS plugin to manage the VPC CNI Plugin +if aws eks describe-addon --cluster-name "$CLUSTER_NAME" --addon-name vpc-cni --query addon.addonVersion --output text 2>/dev/null; then + gen3_log_err "Error: VPC CNI Plugin is managed by AWS. Please log into the AWS UI and delete the VPC CNI Plugin in Amazon EKS, then re-run this script." + exit 1 +else + gen3_log_info "No managed VPC CNI Plugin found, proceeding with the script." +fi + +# Apply the new VPC CNI Version +gen3_log_info "Applying new version of VPC CNI" +g3kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v1.14.1/config/master/aws-k8s-cni.yaml || { gen3_log_err "Failed to apply new VPC CNI version"; exit 1; } + +# Check the version to make sure it updated +NEW_VERSION=$(kubectl describe daemonset aws-node --namespace kube-system | grep amazon-k8s-cni: | cut -d : -f 3) +gen3_log_info "Current version of aws-k8s-cni is: $NEW_VERSION" +if [ "$NEW_VERSION" != "v1.14.1" ]; then + gen3_log_info "The version of aws-k8s-cni has not been updated correctly." + exit 1 +fi + +# Edit the amazon-vpc-cni configmap to enable network policy controller +gen3_log_info "Enabling NetworkPolicies in VPC CNI Configmap" +kubectl patch configmap -n kube-system amazon-vpc-cni --type merge -p '{"data":{"enable-network-policy-controller":"true"}}' || { gen3_log_err "Configmap patch failed"; exit 1; } + +# Edit the aws-node daemonset +gen3_log_info "Enabling NetworkPolicies in aws-node Daemonset" +kubectl patch daemonset aws-node -n kube-system --type=json -p='[{"op": "add", "path": "/spec/template/spec/containers/1/args", "value": ["--enable-network-policy=true", "--enable-ipv6=false", "--enable-cloudwatch-logs=false", "--metrics-bind-addr=:8162", "--health-probe-bind-addr=:8163"]}]' || { gen3_log_err "Daemonset edit failed"; exit 1; } + +# Ensure all the aws-nodes are running +kubectl get pods -n kube-system | grep aws +while true; do + read -p "Do all the aws-node pods in the kube-system ns have 2/2 containers running? (yes/no): " yn + case $yn in + [Yy]* ) + gen3_log_info "Running kube-setup-networkpolicy..." + gen3 kube-setup-networkpolicy || exit 1 + break + ;; + [Nn]* ) + gen3_log_err "Look at aws-node logs to figure out what went wrong. View this document for more details: https://docs.google.com/document/d/1fcBTciQSSwjvHktEnO_7EObY-xR_EvJ2NtgUa70wvL8" + gen3_log_info "Rollback instructions are also available in the above document" + ;; + * ) + gen3_log_info "Please answer yes or no." + ;; + esac +done \ No newline at end of file diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index 67fa05a09d..473f7041ea 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -1,5 +1,5 @@ controller: - parallelism: 5 + parallelism: 3 metricsConfig: # -- Enables prometheus metrics server enabled: true @@ -28,11 +28,11 @@ controller: } ] } - } + } resourceRateLimit: limit: 40 - burst: 4 + burst: 4 # -- enable persistence using postgres persistence: @@ -49,7 +49,7 @@ controller: port: 5432 database: GEN3_ARGO_DB_NAME tableName: argo_workflows - # # the database secrets must be in the same namespace of the controller + # # the database secrets must be in the same namespace of the controller userNameSecret: name: argo-db-creds key: db_username @@ -58,7 +58,7 @@ controller: key: db_password nodeStatusOffLoad: true - workflowDefaults: + workflowDefaults: spec: archiveLogs: true @@ -77,11 +77,11 @@ server: baseHref: "/argo/" # -- Extra arguments to provide to the Argo server binary, such as for disabling authentication. extraArgs: - - --auth-mode=server - - --auth-mode=client + - --auth-mode=server + - --auth-mode=client extraEnv: - - name: ARGO_HTTP1 - value: "true" + - name: ARGO_HTTP1 + value: "true" resources: requests: memory: 8Gi diff --git a/kube/services/ingress/ingress.yaml b/kube/services/ingress/ingress.yaml index 65916679a7..3f1f312592 100644 --- a/kube/services/ingress/ingress.yaml +++ b/kube/services/ingress/ingress.yaml @@ -11,7 +11,7 @@ metadata: alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]' alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600 alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-2021-06 + alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-Res-FIPS-2023-04 spec: ingressClassName: alb rules: diff --git a/kube/services/karpenter/nodeTemplateDefault.yaml b/kube/services/karpenter/nodeTemplateDefault.yaml index 107c5e6cca..6ba8b3a0f7 100644 --- a/kube/services/karpenter/nodeTemplateDefault.yaml +++ b/kube/services/karpenter/nodeTemplateDefault.yaml @@ -37,9 +37,15 @@ spec: sudo dracut -f # configure grub sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - sudo mount -t bpf bpffs /sys/fs/bpf + + # --BOUNDARY + # Content-Type: text/cloud-config; charset="us-ascii" + + # mounts: + # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] --BOUNDARY + Content-Type: text/cloud-config; charset="us-ascii" power_state: diff --git a/kube/services/karpenter/nodeTemplateGPU.yaml b/kube/services/karpenter/nodeTemplateGPU.yaml index c4fd535d74..925e7a9a08 100644 --- a/kube/services/karpenter/nodeTemplateGPU.yaml +++ b/kube/services/karpenter/nodeTemplateGPU.yaml @@ -37,7 +37,12 @@ spec: sudo dracut -f # configure grub sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - sudo mount -t bpf bpffs /sys/fs/bpf + + # --BOUNDARY + # Content-Type: text/cloud-config; charset="us-ascii" + + # mounts: + # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] --BOUNDARY Content-Type: text/cloud-config; charset="us-ascii" diff --git a/kube/services/karpenter/nodeTemplateJupyter.yaml b/kube/services/karpenter/nodeTemplateJupyter.yaml index bca4436d1f..1c8970ad64 100644 --- a/kube/services/karpenter/nodeTemplateJupyter.yaml +++ b/kube/services/karpenter/nodeTemplateJupyter.yaml @@ -37,7 +37,12 @@ spec: sudo dracut -f # configure grub sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - sudo mount -t bpf bpffs /sys/fs/bpf + + # --BOUNDARY + # Content-Type: text/cloud-config; charset="us-ascii" + + # mounts: + # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] --BOUNDARY Content-Type: text/cloud-config; charset="us-ascii" diff --git a/kube/services/karpenter/nodeTemplateWorkflow.yaml b/kube/services/karpenter/nodeTemplateWorkflow.yaml index 22c95aba11..6e47b22f97 100644 --- a/kube/services/karpenter/nodeTemplateWorkflow.yaml +++ b/kube/services/karpenter/nodeTemplateWorkflow.yaml @@ -37,7 +37,12 @@ spec: sudo dracut -f # configure grub sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - sudo mount -t bpf bpffs /sys/fs/bpf + + # --BOUNDARY + # Content-Type: text/cloud-config; charset="us-ascii" + + # mounts: + # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] --BOUNDARY Content-Type: text/cloud-config; charset="us-ascii"