Skip to content

Commit

Permalink
Merge branch 'master' into feat/argo-va-testing-revproxy-modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
AidanHilt authored Feb 12, 2024
2 parents fca6070 + 0f98195 commit 13fb054
Show file tree
Hide file tree
Showing 17 changed files with 235 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Docker/jenkins/Jenkins/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jenkins/jenkins:2.439-jdk21
FROM jenkins/jenkins:2.426.3-lts-jdk21

USER root

Expand Down
2 changes: 1 addition & 1 deletion Docker/jenkins/Jenkins2/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jenkins/jenkins:2.415-jdk11
FROM jenkins/jenkins:2.426.3-lts-jdk21

USER root

Expand Down
13 changes: 8 additions & 5 deletions files/scripts/healdata/heal-cedar-data-ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
"Questionnaire/Survey/Assessment - unvalidated instrument": "Questionnaire/Survey/Assessment",
"Cis Male": "Male",
"Cis Female": "Female",
"Trans Male": "Female-to-male transsexual",
"Trans Female": "Male-to-female transsexual",
"Agender, Non-binary, gender non-conforming": "Other",
"Gender Queer": "Other",
"Intersex": "Intersexed",
"Trans Male": "Transgender man/trans man/female-to-male (FTM)",
"Female-to-male transsexual": "Transgender man/trans man/female-to-male (FTM)",
"Trans Female": "Transgender woman/trans woman/male-to-female (MTF)",
"Male-to-female transsexual": "Transgender woman/trans woman/male-to-female (MTF)",
"Agender, Non-binary, gender non-conforming": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Buisness Development": "Business Development"
}

Expand Down
1 change: 1 addition & 0 deletions files/squid_whitelist/web_whitelist
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ cernvm.cern.ch
charts.bitnami.com
charts.helm.sh
cloud.r-project.org
coredns.github.io
coreos.com
covidstoplight.org
cpan.mirrors.tds.net
Expand Down
1 change: 1 addition & 0 deletions files/squid_whitelist/web_wildcard_whitelist
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
.sourceforge.net
.southsideweekly.com
.theanvil.io
.tigera.io
.twistlock.com
.ubuntu.com
.ucsc.edu
Expand Down
22 changes: 21 additions & 1 deletion flavors/squid_auto/squid_running_on_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ DISTRO=$(awk -F '[="]*' '/^NAME/ { print $2 }' < /etc/os-release)
WORK_USER="ubuntu"
if [[ $DISTRO == "Amazon Linux" ]]; then
WORK_USER="ec2-user"
if [[ $(awk -F '[="]*' '/^VERSION_ID/ { print $2 }' < /etc/os-release) == "2023" ]]; then
DISTRO="al2023"
fi
fi
HOME_FOLDER="/home/${WORK_USER}"
SUB_FOLDER="${HOME_FOLDER}/cloud-automation"
Expand Down Expand Up @@ -60,6 +63,8 @@ fi
function install_basics(){
if [[ $DISTRO == "Ubuntu" ]]; then
apt -y install atop
elif [[ $DISTRO == "al2023" ]]; then
sudo dnf install cronie nc -y
fi
}

Expand Down Expand Up @@ -201,8 +206,10 @@ function install_awslogs {
if [[ $DISTRO == "Ubuntu" ]]; then
wget ${AWSLOGS_DOWNLOAD_URL} -O amazon-cloudwatch-agent.deb
dpkg -i -E ./amazon-cloudwatch-agent.deb
else
elif [[ $DISTRO == "Amazon Linux" ]]; then
sudo yum install amazon-cloudwatch-agent nc -y
elif [[ $DISTRO == "al2023" ]]; then
sudo dnf install amazon-cloudwatch-agent -y
fi

# Configure the AWS logs
Expand Down Expand Up @@ -292,6 +299,19 @@ function main(){
--volume ${SQUID_CACHE_DIR}:${SQUID_CACHE_DIR} \
--volume ${SQUID_CONFIG_DIR}:${SQUID_CONFIG_DIR}:ro \
quay.io/cdis/squid:${SQUID_IMAGE_TAG}

max_attempts=10
attempt_counter=0
while [ $attempt_counter -lt $max_attempts ]; do
#((attempt_counter++))
sleep 10
if [[ -z "$(sudo lsof -i:3128)" ]]; then
echo "Squid not healthy, restarting."
docker restart squid
else
echo "Squid healthy"
fi
done
}

main
2 changes: 1 addition & 1 deletion gen3/bin/kube-roll-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ if [[ "$GEN3_ROLL_FAST" != "true" ]]; then
else
gen3 kube-setup-autoscaler &
fi
gen3 kube-setup-kube-dns-autoscaler &
#gen3 kube-setup-kube-dns-autoscaler &
gen3 kube-setup-metrics deploy || true
gen3 kube-setup-tiller || true
#
Expand Down
24 changes: 23 additions & 1 deletion gen3/bin/kube-setup-ingress.sh
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,28 @@ gen3_ingress_setup_role() {
}
}
},
{
"Effect": "Allow",
"Action": [
"elasticloadbalancing:AddTags"
],
"Resource": [
"arn:aws:elasticloadbalancing:*:*:targetgroup/*/*",
"arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*",
"arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*"
],
"Condition": {
"StringEquals": {
"elasticloadbalancing:CreateAction": [
"CreateTargetGroup",
"CreateLoadBalancer"
]
},
"Null": {
"aws:RequestTag/elbv2.k8s.aws/cluster": "false"
}
}
},
{
"Effect": "Allow",
"Action": [
Expand Down Expand Up @@ -329,4 +351,4 @@ g3kubectl apply -f "${GEN3_HOME}/kube/services/revproxy/revproxy-service.yaml"
envsubst <$scriptDir/ingress.yaml | g3kubectl apply -f -
if [ "$deployWaf" = true ]; then
gen3_ingress_setup_waf
fi
fi
6 changes: 4 additions & 2 deletions gen3/bin/kube-setup-karpenter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ gen3_deploy_karpenter() {
if g3k_config_lookup .global.karpenter_version; then
karpenter=$(g3k_config_lookup .global.karpenter_version)
fi
export clusterversion=`kubectl version --short -o json | jq -r .serverVersion.minor`
if [ "${clusterversion}" = "24+" ]; then
export clusterversion=`kubectl version -o json | jq -r .serverVersion.minor`
if [ "${clusterversion}" = "25+" ]; then
karpenter=${karpenter:-v0.27.0}
elif [ "${clusterversion}" = "24+" ]; then
karpenter=${karpenter:-v0.24.0}
else
karpenter=${karpenter:-v0.22.0}
Expand Down
2 changes: 1 addition & 1 deletion gen3/bin/kube-setup-system-services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ calico_yaml="https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v${calico}

g3kubectl set image daemonset.apps/kube-proxy -n kube-system kube-proxy=${kube_proxy_image}
g3kubectl set image --namespace kube-system deployment.apps/coredns coredns=${coredns_image}
g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f -
#g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f -
g3kubectl apply -f ${cni_image}
g3kubectl apply -f ${calico_yaml}

Expand Down
138 changes: 138 additions & 0 deletions gen3/bin/migrate-to-vpc-cni.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/bin/bash

source "${GEN3_HOME}/gen3/lib/utils.sh"
gen3_load "gen3/gen3setup"

#Get the K8s NS
ctx="$(g3kubectl config current-context)"
ctxNamespace="$(g3kubectl config view -ojson | jq -r ".contexts | map(select(.name==\"$ctx\")) | .[0] | .context.namespace")"

# Set the cluster name variable
CLUSTER_NAME=`gen3 api environment`

# Check if in default ns
if [[ ("$ctxNamespace" != "default" && "$ctxNamespace" != "null") ]]; then
gen3_log_err "Namespace must be default"
exit 1
fi

# Cd into Cloud-automation repo and pull the latest from master
gen3_log_info "Pulling the latest from Cloud-Auto"
cd /home/$CLUSTER_NAME/cloud-automation || { gen3_log_err "Cloud-automation repo not found"; exit 1; }
#### Change to master
git checkout master || { gen3_log_err "Failed to checkout master branch"; exit 1; }
git pull || { gen3_log_err "Failed to pull from the repository"; exit 1; }

# Update the Karpenter Node Template
gen3_log_info "Apply new Karpenter Node Template"
if [[ -d $(g3k_manifest_init)/$(g3k_hostname)/manifests/karpenter ]]; then
gen3_log_info "Karpenter setup in manifest. Open a cdismanifest PR and add this line to aws node templates: https://github.com/uc-cdis/cloud-automation/blob/master/kube/services/karpenter/nodeTemplateDefault.yaml#L40"
while true; do
read -p "Have you updated your manifest? (yes/no): " yn
case $yn in
[Yy]* )
gen3_log_info "Proceeding with Karpenter deployment..."
gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; }
break
;;
[Nn]* )
gen3_log_info "Please update the cdismanifest before proceeding."
exit 1
;;
* )
gen3_log_info "Please answer yes or no."
;;
esac
done
else
gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; }
fi

# Cordon all the nodes before running gen3 roll all"
gen3_log_info "Cordoning all nodes"
kubectl get nodes --no-headers -o custom-columns=":metadata.name" | grep -v '^fargate' | xargs -I{} kubectl cordon {}

# Run a "gen3 roll all" so all nodes use the new mounted BPF File System
gen3_log_info "Cycling all the nodes by running gen3 roll all"
gen3 roll all --fast || exit 1

# Confirm that all nodes have been rotated
while true; do
read -p "Roll all complete. Have all cordoned nodes been rotated? (yes/no): " yn
case $yn in
[Yy]* )
gen3_log_info "Continuing with script..."
break
;;
[Nn]* )
gen3_log_info "Please drain any remaining nodes with 'kubectl drain <node_name> --ignore-daemonsets --delete-emptydir-data'"
;;
* )
gen3_log_info "Please answer yes or no."
;;
esac
done


# Delete all existing network policies
gen3_log_info "Deleting networkpolicies"
kubectl delete networkpolicies --all

# Delete all Calico related resources from the “kube-system” namespace
gen3_log_info "Deleting all Calico related resources"
kubectl get deployments -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete deployment -n kube-system
kubectl get daemonsets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete daemonset -n kube-system
kubectl get services -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete service -n kube-system
kubectl get replicasets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete replicaset -n kube-system

# Backup the current VPC CNI configuration in case of rollback
gen3_log_info "Backing up current VPC CNI Configuration..."
kubectl get daemonset aws-node -n kube-system -o yaml > aws-k8s-cni-old.yaml || { gen3_log_err "Error backig up VPC CNI configuration"; exit 1; }

# Check to ensure we are not using an AWS plugin to manage the VPC CNI Plugin
if aws eks describe-addon --cluster-name "$CLUSTER_NAME" --addon-name vpc-cni --query addon.addonVersion --output text 2>/dev/null; then
gen3_log_err "Error: VPC CNI Plugin is managed by AWS. Please log into the AWS UI and delete the VPC CNI Plugin in Amazon EKS, then re-run this script."
exit 1
else
gen3_log_info "No managed VPC CNI Plugin found, proceeding with the script."
fi

# Apply the new VPC CNI Version
gen3_log_info "Applying new version of VPC CNI"
g3kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v1.14.1/config/master/aws-k8s-cni.yaml || { gen3_log_err "Failed to apply new VPC CNI version"; exit 1; }

# Check the version to make sure it updated
NEW_VERSION=$(kubectl describe daemonset aws-node --namespace kube-system | grep amazon-k8s-cni: | cut -d : -f 3)
gen3_log_info "Current version of aws-k8s-cni is: $NEW_VERSION"
if [ "$NEW_VERSION" != "v1.14.1" ]; then
gen3_log_info "The version of aws-k8s-cni has not been updated correctly."
exit 1
fi

# Edit the amazon-vpc-cni configmap to enable network policy controller
gen3_log_info "Enabling NetworkPolicies in VPC CNI Configmap"
kubectl patch configmap -n kube-system amazon-vpc-cni --type merge -p '{"data":{"enable-network-policy-controller":"true"}}' || { gen3_log_err "Configmap patch failed"; exit 1; }

# Edit the aws-node daemonset
gen3_log_info "Enabling NetworkPolicies in aws-node Daemonset"
kubectl patch daemonset aws-node -n kube-system --type=json -p='[{"op": "add", "path": "/spec/template/spec/containers/1/args", "value": ["--enable-network-policy=true", "--enable-ipv6=false", "--enable-cloudwatch-logs=false", "--metrics-bind-addr=:8162", "--health-probe-bind-addr=:8163"]}]' || { gen3_log_err "Daemonset edit failed"; exit 1; }

# Ensure all the aws-nodes are running
kubectl get pods -n kube-system | grep aws
while true; do
read -p "Do all the aws-node pods in the kube-system ns have 2/2 containers running? (yes/no): " yn
case $yn in
[Yy]* )
gen3_log_info "Running kube-setup-networkpolicy..."
gen3 kube-setup-networkpolicy || exit 1
break
;;
[Nn]* )
gen3_log_err "Look at aws-node logs to figure out what went wrong. View this document for more details: https://docs.google.com/document/d/1fcBTciQSSwjvHktEnO_7EObY-xR_EvJ2NtgUa70wvL8"
gen3_log_info "Rollback instructions are also available in the above document"
;;
* )
gen3_log_info "Please answer yes or no."
;;
esac
done
18 changes: 9 additions & 9 deletions kube/services/argo/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
controller:
parallelism: 5
parallelism: 3
metricsConfig:
# -- Enables prometheus metrics server
enabled: true
Expand Down Expand Up @@ -28,11 +28,11 @@ controller:
}
]
}
}
}
resourceRateLimit:
limit: 40
burst: 4
burst: 4

# -- enable persistence using postgres
persistence:
Expand All @@ -49,7 +49,7 @@ controller:
port: 5432
database: GEN3_ARGO_DB_NAME
tableName: argo_workflows
# # the database secrets must be in the same namespace of the controller
# # the database secrets must be in the same namespace of the controller
userNameSecret:
name: argo-db-creds
key: db_username
Expand All @@ -58,7 +58,7 @@ controller:
key: db_password
nodeStatusOffLoad: true

workflowDefaults:
workflowDefaults:
spec:
archiveLogs: true

Expand All @@ -77,11 +77,11 @@ server:
baseHref: "/argo/"
# -- Extra arguments to provide to the Argo server binary, such as for disabling authentication.
extraArgs:
- --auth-mode=server
- --auth-mode=client
- --auth-mode=server
- --auth-mode=client
extraEnv:
- name: ARGO_HTTP1
value: "true"
- name: ARGO_HTTP1
value: "true"
resources:
requests:
memory: 8Gi
Expand Down
2 changes: 1 addition & 1 deletion kube/services/ingress/ingress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ metadata:
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]'
alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}'
alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-2021-06
alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-Res-FIPS-2023-04
spec:
ingressClassName: alb
rules:
Expand Down
8 changes: 7 additions & 1 deletion kube/services/karpenter/nodeTemplateDefault.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,15 @@ spec:
sudo dracut -f
# configure grub
sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
sudo mount -t bpf bpffs /sys/fs/bpf
# --BOUNDARY
# Content-Type: text/cloud-config; charset="us-ascii"
# mounts:
# - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime']
--BOUNDARY
Content-Type: text/cloud-config; charset="us-ascii"
power_state:
Expand Down
Loading

0 comments on commit 13fb054

Please sign in to comment.