Skip to content

Commit

Permalink
Merge branch 'master' into feat/gen3-discovery-ai
Browse files Browse the repository at this point in the history
  • Loading branch information
Avantol13 authored Mar 1, 2024
2 parents 5836539 + b9ab8af commit 6aa015d
Show file tree
Hide file tree
Showing 30 changed files with 582 additions and 345 deletions.
348 changes: 73 additions & 275 deletions .secrets.baseline

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Docker/jenkins/Jenkins/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jenkins/jenkins:2.439-jdk21
FROM jenkins/jenkins:2.426.3-lts-jdk21

USER root

Expand Down
2 changes: 1 addition & 1 deletion Docker/jenkins/Jenkins2/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jenkins/jenkins:2.415-jdk11
FROM jenkins/jenkins:2.426.3-lts-jdk21

USER root

Expand Down
1 change: 1 addition & 0 deletions files/scripts/ci-env-pool-reset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ source "${GEN3_HOME}/gen3/gen3setup.sh"
cat - > jenkins-envs-services.txt <<EOF
jenkins-blood
jenkins-brain
jenkins-dcp
jenkins-genomel
jenkins-niaid
EOF
Expand Down
111 changes: 105 additions & 6 deletions files/scripts/healdata/heal-cedar-data-ingest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import argparse
import json
import copy
import sys
import requests
import pydash
Expand All @@ -24,14 +24,27 @@
"Questionnaire/Survey/Assessment - unvalidated instrument": "Questionnaire/Survey/Assessment",
"Cis Male": "Male",
"Cis Female": "Female",
"Trans Male": "Female-to-male transsexual",
"Trans Female": "Male-to-female transsexual",
"Agender, Non-binary, gender non-conforming": "Other",
"Gender Queer": "Other",
"Intersex": "Intersexed",
"Trans Male": "Transgender man/trans man/female-to-male (FTM)",
"Female-to-male transsexual": "Transgender man/trans man/female-to-male (FTM)",
"Trans Female": "Transgender woman/trans woman/male-to-female (MTF)",
"Male-to-female transsexual": "Transgender woman/trans woman/male-to-female (MTF)",
"Agender, Non-binary, gender non-conforming": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female",
"Buisness Development": "Business Development"
}

# repository links
REPOSITORY_STUDY_ID_LINK_TEMPLATE = {
"NIDDK Central": "https://repository.niddk.nih.gov/studies/<STUDY_ID>/",
"NIDA Data Share": "https://datashare.nida.nih.gov/study/<STUDY_ID>",
"NICHD DASH": "https://dash.nichd.nih.gov/study/<STUDY_ID>",
"ICPSR": "https://www.icpsr.umich.edu/web/ICPSR/studies/<STUDY_ID>",
"BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy/<STUDY_ID>/",
}


# Defines field that we don't want to include in the filters
OMITTED_VALUES_MAPPING = {
"study_metadata.human_subject_applicability.gender_applicability": "Not applicable"
Expand Down Expand Up @@ -111,6 +124,31 @@ def get_client_token(client_id: str, client_secret: str):
return token


def get_related_studies(serial_num, hostname):
related_study_result = []

if serial_num:
mds = requests.get(f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000")
if mds.status_code == 200:
related_study_metadata = mds.json()

for (
related_study_metadata_key,
related_study_metadata_value,
) in related_study_metadata.items():
title = (
related_study_metadata_value.get(
"gen3_discovery", {}
)
.get("study_metadata", {})
.get("minimal_info", {})
.get("study_name", "")
)
link = f"https://{hostname}/portal/discovery/{related_study_metadata_key}/"
related_study_result.append({"title": title, "link": link})
return related_study_result


parser = argparse.ArgumentParser()

parser.add_argument("--directory", help="CEDAR Directory ID for registering ")
Expand Down Expand Up @@ -211,6 +249,67 @@ def get_client_token(client_id: str, client_secret: str):
mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
mds_res["gen3_discovery"]["study_metadata"]["metadata_location"]["other_study_websites"] = cedar_record_other_study_websites

# setup citations
doi_citation = mds_res["gen3_discovery"]["study_metadata"].get("doi_citation", "")
mds_res["gen3_discovery"]["study_metadata"]["citation"]["heal_platform_citation"] = doi_citation


# setup repository_study_link
data_repositories = (
mds_res.get("study_metadata", {})
.get("metadata_location", {})
.get("data_repositories", [])
)
repository_citation = "Users must also include a citation to the data as specified by the local repository."
repository_citation_additional_text = ' The link to the study page at the local repository can be found in the "Data" tab.'
for repository in data_repositories:
if (
repository["repository_name"]
and repository["repository_name"]
in REPOSITORY_STUDY_ID_LINK_TEMPLATE
and repository["repository_study_ID"]
):
repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[
repository["repository_name"]
].replace("<STUDY_ID>", repository["repository_study_ID"])
repository.update({"repository_study_link": repository_study_link})
if repository_citation_additional_text not in repository_citation:
repository_citation += repository_citation_additional_text
if len(data_repositories):
data_repositories[0] = {
**data_repositories[0],
"repository_citation": repository_citation,
}
mds_res["gen3_discovery"]["study_metadata"][
"metadata_location"
]["data_repositories"] = data_repositories



# set up related studies
serial_num = None
try:
serial_num = (
mds_res
.get("nih_reporter", {})
.get("project_num_split", {})
.get("serial_num", None)
)
except Exception:
print(f"Unable to get serial number for study")

if serial_num == None:
print(f"Unable to get serial number for study")

related_study_result = get_related_studies(serial_num, hostname)
existing_related_study_result = mds_res.get("related_studies", [])
for related_study in related_study_result:
if related_study not in existing_related_study_result:
existing_related_study_result.append(copy.deepcopy(related_study))
mds_res["gen3_discovery"][
"related_studies"
] = copy.deepcopy(existing_related_study_result)

# merge data from cedar that is not study level metadata into a level higher
deleted_keys = []
for key, value in mds_res["gen3_discovery"]["study_metadata"].items():
Expand Down
2 changes: 1 addition & 1 deletion files/squid_whitelist/web_whitelist
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ctds-planx.atlassian.net
data.cityofchicago.org
dataguids.org
api.login.yahoo.com
api.snapcraft.io
apt.kubernetes.io
argoproj.github.io
archive.cloudera.com
Expand All @@ -34,6 +33,7 @@ cernvm.cern.ch
charts.bitnami.com
charts.helm.sh
cloud.r-project.org
coredns.github.io
coreos.com
covidstoplight.org
cpan.mirrors.tds.net
Expand Down
3 changes: 3 additions & 0 deletions files/squid_whitelist/web_wildcard_whitelist
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,12 @@
.sks-keyservers.net
.slack.com
.slack-msgs.com
.snapcraft.io
.snapcraftcontent.com
.sourceforge.net
.southsideweekly.com
.theanvil.io
.tigera.io
.twistlock.com
.ubuntu.com
.ucsc.edu
Expand Down
38 changes: 33 additions & 5 deletions flavors/squid_auto/squid_running_on_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ DISTRO=$(awk -F '[="]*' '/^NAME/ { print $2 }' < /etc/os-release)
WORK_USER="ubuntu"
if [[ $DISTRO == "Amazon Linux" ]]; then
WORK_USER="ec2-user"
if [[ $(awk -F '[="]*' '/^VERSION_ID/ { print $2 }' < /etc/os-release) == "2023" ]]; then
DISTRO="al2023"
fi
fi
HOME_FOLDER="/home/${WORK_USER}"
SUB_FOLDER="${HOME_FOLDER}/cloud-automation"
Expand Down Expand Up @@ -60,6 +63,8 @@ fi
function install_basics(){
if [[ $DISTRO == "Ubuntu" ]]; then
apt -y install atop
elif [[ $DISTRO == "al2023" ]]; then
sudo dnf install cronie nc -y
fi
}

Expand All @@ -69,10 +74,18 @@ function install_docker(){
# Docker
###############################################################
# Install docker from sources
curl -fsSL ${DOCKER_DOWNLOAD_URL}/gpg | sudo apt-key add -
add-apt-repository "deb [arch=amd64] ${DOCKER_DOWNLOAD_URL} $(lsb_release -cs) stable"
apt update
apt install -y docker-ce
if [[ $DISTRO == "Ubuntu" ]]; then
curl -fsSL ${DOCKER_DOWNLOAD_URL}/gpg | sudo apt-key add -
add-apt-repository "deb [arch=amd64] ${DOCKER_DOWNLOAD_URL} $(lsb_release -cs) stable"
apt update
apt install -y docker-ce
else
sudo yum update -y
sudo yum install -y docker
# Start and enable Docker service
sudo systemctl start docker
sudo systemctl enable docker
fi
mkdir -p /etc/docker
cp ${SUB_FOLDER}/flavors/squid_auto/startup_configs/docker-daemon.json /etc/docker/daemon.json
chmod -R 0644 /etc/docker
Expand Down Expand Up @@ -201,8 +214,10 @@ function install_awslogs {
if [[ $DISTRO == "Ubuntu" ]]; then
wget ${AWSLOGS_DOWNLOAD_URL} -O amazon-cloudwatch-agent.deb
dpkg -i -E ./amazon-cloudwatch-agent.deb
else
elif [[ $DISTRO == "Amazon Linux" ]]; then
sudo yum install amazon-cloudwatch-agent nc -y
elif [[ $DISTRO == "al2023" ]]; then
sudo dnf install amazon-cloudwatch-agent -y
fi

# Configure the AWS logs
Expand Down Expand Up @@ -292,6 +307,19 @@ function main(){
--volume ${SQUID_CACHE_DIR}:${SQUID_CACHE_DIR} \
--volume ${SQUID_CONFIG_DIR}:${SQUID_CONFIG_DIR}:ro \
quay.io/cdis/squid:${SQUID_IMAGE_TAG}

max_attempts=10
attempt_counter=0
while [ $attempt_counter -lt $max_attempts ]; do
#((attempt_counter++))
sleep 10
if [[ -z "$(sudo lsof -i:3128)" ]]; then
echo "Squid not healthy, restarting."
docker restart squid
else
echo "Squid healthy"
fi
done
}

main
2 changes: 1 addition & 1 deletion gen3/bin/kube-roll-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ if [[ "$GEN3_ROLL_FAST" != "true" ]]; then
else
gen3 kube-setup-autoscaler &
fi
gen3 kube-setup-kube-dns-autoscaler &
#gen3 kube-setup-kube-dns-autoscaler &
gen3 kube-setup-metrics deploy || true
gen3 kube-setup-tiller || true
#
Expand Down
22 changes: 21 additions & 1 deletion gen3/bin/kube-setup-argo-wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@ if [[ -z "$GEN3_SOURCE_ONLY" ]]; then

gen3 roll argo-wrapper
g3kubectl apply -f "${GEN3_HOME}/kube/services/argo-wrapper/argo-wrapper-service.yaml"


if g3k_manifest_lookup .argo.argo_server_service_url 2> /dev/null; then
export ARGO_HOST=$(g3k_manifest_lookup .argo.argo_server_service_url)
else
export ARGO_HOST="http://argo-argo-workflows-server.argo.svc.cluster.local:2746"
fi

if g3k_config_lookup '.argo_namespace' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json 2> /dev/null; then
export ARGO_NAMESPACE=$(g3k_config_lookup '.argo_namespace' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
else
export ARGO_NAMESPACE="argo"
fi

envsubst <"${GEN3_HOME}/kube/services/argo-wrapper/config.ini" > /tmp/config.ini

g3kubectl delete configmap argo-wrapper-namespace-config
g3kubectl create configmap argo-wrapper-namespace-config --from-file /tmp/config.ini

rm /tmp/config.ini

gen3_log_info "the argo-wrapper service has been deployed onto the kubernetes cluster"
fi
fi
Loading

0 comments on commit 6aa015d

Please sign in to comment.