Skip to content

Commit

Permalink
Merge branch 'master' into feat/GPE-1038
Browse files Browse the repository at this point in the history
  • Loading branch information
EliseCastle23 authored Jun 25, 2024
2 parents 82fda69 + e5315f4 commit 0263ab1
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 39 deletions.
121 changes: 107 additions & 14 deletions files/scripts/healdata/heal-cedar-data-ingest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import argparse
import copy
import json
import sys
import requests
import pydash
Expand Down Expand Up @@ -50,6 +49,50 @@
"BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy/<STUDY_ID>/",
}

CLINICAL_TRIALS_GOV_FIELDS = [
"NCTId",
"OfficialTitle",
"BriefTitle",
"Acronym",
"StudyType",
"OverallStatus",
"StartDate",
"StartDateType",
"CompletionDate",
"CompletionDateType",
"IsFDARegulatedDrug",
"IsFDARegulatedDevice",
"IsPPSD",
"BriefSummary",
"DetailedDescription",
"Condition",
"DesignAllocation",
"DesignPrimaryPurpose",
"Phase",
"DesignInterventionModel",
"EnrollmentCount",
"EnrollmentType",
"DesignObservationalModel",
"InterventionType",
"PrimaryOutcomeMeasure",
"SecondaryOutcomeMeasure",
"OtherOutcomeMeasure",
"Gender",
"GenderBased",
"MaximumAge",
"MinimumAge",
"IPDSharing",
"IPDSharingTimeFrame",
"IPDSharingAccessCriteria",
"IPDSharingURL",
"SeeAlsoLinkURL",
"AvailIPDURL",
"AvailIPDId",
"AvailIPDComment",
"PatientRegistry",
"DesignTimePerspective",
]


def is_valid_uuid(uuid_to_test, version=4):
"""
Expand All @@ -76,7 +119,11 @@ def is_valid_uuid(uuid_to_test, version=4):
def update_filter_metadata(metadata_to_update):
# Retain these from existing filters
save_filters = ["Common Data Elements"]
filter_metadata = [filter for filter in metadata_to_update["advSearchFilters"] if filter["key"] in save_filters]
filter_metadata = [
filter
for filter in metadata_to_update["advSearchFilters"]
if filter["key"] in save_filters
]
for metadata_field_key, filter_field_key in FILTER_FIELD_MAPPINGS.items():
filter_field_values = pydash.get(metadata_to_update, metadata_field_key)
if filter_field_values:
Expand All @@ -99,7 +146,12 @@ def update_filter_metadata(metadata_to_update):
filter_metadata = pydash.uniq(filter_metadata)
metadata_to_update["advSearchFilters"] = filter_metadata
# Retain these from existing tags
save_tags = ["Data Repository", "Common Data Elements", "RequiredIDP", "Additional Acknowledgement"]
save_tags = [
"Data Repository",
"Common Data Elements",
"RequiredIDP",
"Additional Acknowledgement",
]
tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags]
# Add any new tags from advSearchFilters
for f in metadata_to_update["advSearchFilters"]:
Expand Down Expand Up @@ -166,6 +218,21 @@ def get_related_studies(serial_num, guid, hostname):
return related_study_result


def get_clinical_trials_gov_metadata(nct_id):
if not nct_id:
return None
ct_metadata = {}
try:
ct_metadata_result = requests.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}?fields={'|'.join(CLINICAL_TRIALS_GOV_FIELDS)}")
if ct_metadata_result.status_code != 200:
raise Exception(f"Could not get clinicaltrials.gov metadata, error code {ct_metadata_result.status_code}")
else:
ct_metadata = ct_metadata_result.json()
except Exception as exc:
raise Exception(f"Could not get clinicaltrials.gov metadata: {exc}") from exc
return ct_metadata


parser = argparse.ArgumentParser()

parser.add_argument("--directory", help="CEDAR Directory ID for registering ")
Expand Down Expand Up @@ -231,7 +298,8 @@ def get_related_studies(serial_num, guid, hostname):
for cedar_record in metadata_return["metadata"]["records"]:
# get the CEDAR instance id from cedar for querying in our MDS
cedar_instance_id = pydash.get(
cedar_record, "metadata_location.cedar_study_level_metadata_template_instance_ID"
cedar_record,
"metadata_location.cedar_study_level_metadata_template_instance_ID",
)
if cedar_instance_id is None:
print("This record doesn't have CEDAR instance id, skipping...")
Expand All @@ -246,7 +314,9 @@ def get_related_studies(serial_num, guid, hostname):

# the query result key is the record of the metadata. If it doesn't return anything then our query failed.
if len(list(mds_res.keys())) == 0 or len(list(mds_res.keys())) > 1:
print(f"Query returned nothing for template_instance_ID={cedar_instance_id}&data=true")
print(
f"Query returned nothing for template_instance_ID={cedar_instance_id}&data=true"
)
continue

# get the key for our mds record
Expand All @@ -273,8 +343,10 @@ def get_related_studies(serial_num, guid, hostname):
).get("other_study_websites", [])
# this ensures the nih_application_id, cedar_study_level_metadata_template_instance_ID and study_name are not alterable from CEDAR side
del cedar_record["metadata_location"]
cedar_record["minimal_info"]["study_name"] = mds_res["gen3_discovery"]["study_metadata"].get("minimal_info", {}).get(
"study_name", ""
cedar_record["minimal_info"]["study_name"] = (
mds_res["gen3_discovery"]["study_metadata"]
.get("minimal_info", {})
.get("study_name", "")
)

mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
Expand All @@ -283,7 +355,7 @@ def get_related_studies(serial_num, guid, hostname):
] = cedar_record_other_study_websites

# setup citations
doi_citation = mds_res["gen3_discovery"]["study_metadata"].get(
doi_citation = mds_res["gen3_discovery"].get(
"doi_citation", ""
)
mds_res["gen3_discovery"]["study_metadata"]["citation"][
Expand Down Expand Up @@ -312,11 +384,9 @@ def get_related_studies(serial_num, guid, hostname):
repository.update(
{"repository_study_link": repository_study_link}
)
if (
repository_citation_additional_text
not in repository_citation
):
repository_citation += repository_citation_additional_text
if (repository.get("repository_study_link", None) and repository_citation_additional_text
not in repository_citation):
repository_citation += repository_citation_additional_text
if len(data_repositories):
data_repositories[0] = {
**data_repositories[0],
Expand Down Expand Up @@ -344,7 +414,9 @@ def get_related_studies(serial_num, guid, hostname):
related_study_result = get_related_studies(
serial_num, mds_record_guid, hostname
)
mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy(related_study_result)
mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy(
related_study_result
)

# merge data from cedar that is not study level metadata into a level higher
deleted_keys = []
Expand All @@ -359,6 +431,27 @@ def get_related_studies(serial_num, guid, hostname):
mds_res["gen3_discovery"]
)

clinical_trials_id = None
try:
clinical_trials_id = (
mds_res["gen3_discovery"]["study_metadata"]
.get("metadata_location", {})
.get("clinical_trials_study_ID", "")
)
except Exception:
print("Unable to get clinical_trials_study_ID for study")
if clinical_trials_id:
try:
ct_gov_metadata = get_clinical_trials_gov_metadata(clinical_trials_id)
if ct_gov_metadata:
print(f"Got clinicaltrials.gov metadata for {mds_record_guid} with NCT ID {clinical_trials_id}")
mds_cedar_register_data_body["clinicaltrials_gov"] = copy.deepcopy(ct_gov_metadata)
except Exception as ex:
print(f'{ex}')
# This means the old clinicaltrials_gov section is actually from CEDAR not clinicaltrials.gov, so remove it
elif "clinicaltrials_gov" in mds_cedar_register_data_body:
del mds_cedar_register_data_body["clinicaltrials_gov"]

mds_cedar_register_data_body["gen3_discovery"] = mds_discovery_data_body

mds_cedar_register_data_body["_guid_type"] = "discovery_metadata"
Expand Down
2 changes: 1 addition & 1 deletion files/squid_whitelist/web_whitelist
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ centos.mirrors.hoobly.com
centos.mirrors.tds.net
centos.mirrors.wvstateu.edu
cernvm.cern.ch
charts.bitnami.com
charts.helm.sh
cloud.r-project.org
coredns.github.io
Expand Down Expand Up @@ -137,6 +136,7 @@ registry.terraform.io
releases.rancher.com
rendersnake.googlecode.com
repec.org
repo.broadcom.com
repo-prod.prod.sagebase.org
repo-staging.prod.sagebase.org
repo.continuum.io
Expand Down
12 changes: 0 additions & 12 deletions kube/services/revproxy/gen3.nginx.conf/ohdsi-atlas-service.conf

This file was deleted.

12 changes: 0 additions & 12 deletions kube/services/revproxy/gen3.nginx.conf/ohdsi-webapi-service.conf

This file was deleted.

0 comments on commit 0263ab1

Please sign in to comment.