Skip to content

Commit

Permalink
Merge branch 'master' into chore/add-create-table-to-hatchery
Browse files Browse the repository at this point in the history
  • Loading branch information
george42-ctds authored Feb 29, 2024
2 parents 6bb29c3 + 6c27fc9 commit 1f9afc5
Showing 1 changed file with 96 additions and 0 deletions.
96 changes: 96 additions & 0 deletions files/scripts/healdata/heal-cedar-data-ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@
"Buisness Development": "Business Development"
}

# repository links
REPOSITORY_STUDY_ID_LINK_TEMPLATE = {
"NIDDK Central": "https://repository.niddk.nih.gov/studies/<STUDY_ID>/",
"NIDA Data Share": "https://datashare.nida.nih.gov/study/<STUDY_ID>",
"NICHD DASH": "https://dash.nichd.nih.gov/study/<STUDY_ID>",
"ICPSR": "https://www.icpsr.umich.edu/web/ICPSR/studies/<STUDY_ID>",
"BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy/<STUDY_ID>/",
}


# Defines field that we don't want to include in the filters
OMITTED_VALUES_MAPPING = {
"study_metadata.human_subject_applicability.gender_applicability": "Not applicable"
Expand Down Expand Up @@ -114,6 +124,31 @@ def get_client_token(client_id: str, client_secret: str):
return token


def get_related_studies(serial_num, hostname):
related_study_result = []

if serial_num:
mds = requests.get(f"https://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000")
if mds.status_code == 200:
related_study_metadata = mds.json()

for (
related_study_metadata_key,
related_study_metadata_value,
) in related_study_metadata.items():
title = (
related_study_metadata_value.get(
"gen3_discovery", {}
)
.get("study_metadata", {})
.get("minimal_info", {})
.get("study_name", "")
)
link = f"https://{hostname}/portal/discovery/{related_study_metadata_key}/"
related_study_result.append({"title": title, "link": link})
return related_study_result


parser = argparse.ArgumentParser()

parser.add_argument("--directory", help="CEDAR Directory ID for registering ")
Expand Down Expand Up @@ -214,6 +249,67 @@ def get_client_token(client_id: str, client_secret: str):
mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
mds_res["gen3_discovery"]["study_metadata"]["metadata_location"]["other_study_websites"] = cedar_record_other_study_websites

# setup citations
doi_citation = mds_res["gen3_discovery"]["study_metadata"].get("doi_citation", "")
mds_res["gen3_discovery"]["study_metadata"]["citation"]["heal_platform_citation"] = doi_citation


# setup repository_study_link
data_repositories = (
mds_res.get("study_metadata", {})
.get("metadata_location", {})
.get("data_repositories", [])
)
repository_citation = "Users must also include a citation to the data as specified by the local repository."
repository_citation_additional_text = ' The link to the study page at the local repository can be found in the "Data" tab.'
for repository in data_repositories:
if (
repository["repository_name"]
and repository["repository_name"]
in REPOSITORY_STUDY_ID_LINK_TEMPLATE
and repository["repository_study_ID"]
):
repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[
repository["repository_name"]
].replace("<STUDY_ID>", repository["repository_study_ID"])
repository.update({"repository_study_link": repository_study_link})
if repository_citation_additional_text not in repository_citation:
repository_citation += repository_citation_additional_text
if len(data_repositories):
data_repositories[0] = {
**data_repositories[0],
"repository_citation": repository_citation,
}
mds_res["gen3_discovery"]["study_metadata"][
"metadata_location"
]["data_repositories"] = data_repositories



# set up related studies
serial_num = None
try:
serial_num = (
mds_res
.get("nih_reporter", {})
.get("project_num_split", {})
.get("serial_num", None)
)
except Exception:
print(f"Unable to get serial number for study")

if serial_num == None:
print(f"Unable to get serial number for study")

related_study_result = get_related_studies(serial_num, hostname)
existing_related_study_result = mds_res.get("related_studies", [])
for related_study in related_study_result:
if related_study not in existing_related_study_result:
existing_related_study_result.append(copy.deepcopy(related_study))
mds_res["gen3_discovery"][
"related_studies"
] = copy.deepcopy(existing_related_study_result)

# merge data from cedar that is not study level metadata into a level higher
deleted_keys = []
for key, value in mds_res["gen3_discovery"]["study_metadata"].items():
Expand Down

0 comments on commit 1f9afc5

Please sign in to comment.