diff --git a/.secrets.baseline b/.secrets.baseline index 2e574927ab..9da1ada7ed 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3724,5 +3724,5 @@ } ] }, - "generated_at": "2024-03-01T21:46:43Z" + "generated_at": "2024-03-01T21:49:27Z" } diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py index 730a3b36e8..71575e3c56 100644 --- a/files/scripts/healdata/heal-cedar-data-ingest.py +++ b/files/scripts/healdata/heal-cedar-data-ingest.py @@ -1,5 +1,5 @@ import argparse -import copy +import json import sys import requests import pydash @@ -35,16 +35,6 @@ "Buisness Development": "Business Development" } -# repository links -REPOSITORY_STUDY_ID_LINK_TEMPLATE = { - "NIDDK Central": "https://repository.niddk.nih.gov/studies//", - "NIDA Data Share": "https://datashare.nida.nih.gov/study/", - "NICHD DASH": "https://dash.nichd.nih.gov/study/", - "ICPSR": "https://www.icpsr.umich.edu/web/ICPSR/studies/", - "BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy//", -} - - # Defines field that we don't want to include in the filters OMITTED_VALUES_MAPPING = { "study_metadata.human_subject_applicability.gender_applicability": "Not applicable" @@ -124,31 +114,6 @@ def get_client_token(client_id: str, client_secret: str): return token -def get_related_studies(serial_num, hostname): - related_study_result = [] - - if serial_num: - mds = requests.get(f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000") - if mds.status_code == 200: - related_study_metadata = mds.json() - - for ( - related_study_metadata_key, - related_study_metadata_value, - ) in related_study_metadata.items(): - title = ( - related_study_metadata_value.get( - "gen3_discovery", {} - ) - .get("study_metadata", {}) - .get("minimal_info", {}) - .get("study_name", "") - ) - link = f"https://{hostname}/portal/discovery/{related_study_metadata_key}/" - related_study_result.append({"title": title, "link": link}) - return related_study_result - - parser = argparse.ArgumentParser() parser.add_argument("--directory", help="CEDAR Directory ID for registering ") @@ -249,67 +214,6 @@ def get_related_studies(serial_num, hostname): mds_res["gen3_discovery"]["study_metadata"].update(cedar_record) mds_res["gen3_discovery"]["study_metadata"]["metadata_location"]["other_study_websites"] = cedar_record_other_study_websites - # setup citations - doi_citation = mds_res["gen3_discovery"]["study_metadata"].get("doi_citation", "") - mds_res["gen3_discovery"]["study_metadata"]["citation"]["heal_platform_citation"] = doi_citation - - - # setup repository_study_link - data_repositories = ( - mds_res.get("study_metadata", {}) - .get("metadata_location", {}) - .get("data_repositories", []) - ) - repository_citation = "Users must also include a citation to the data as specified by the local repository." - repository_citation_additional_text = ' The link to the study page at the local repository can be found in the "Data" tab.' - for repository in data_repositories: - if ( - repository["repository_name"] - and repository["repository_name"] - in REPOSITORY_STUDY_ID_LINK_TEMPLATE - and repository["repository_study_ID"] - ): - repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[ - repository["repository_name"] - ].replace("", repository["repository_study_ID"]) - repository.update({"repository_study_link": repository_study_link}) - if repository_citation_additional_text not in repository_citation: - repository_citation += repository_citation_additional_text - if len(data_repositories): - data_repositories[0] = { - **data_repositories[0], - "repository_citation": repository_citation, - } - mds_res["gen3_discovery"]["study_metadata"][ - "metadata_location" - ]["data_repositories"] = data_repositories - - - - # set up related studies - serial_num = None - try: - serial_num = ( - mds_res - .get("nih_reporter", {}) - .get("project_num_split", {}) - .get("serial_num", None) - ) - except Exception: - print(f"Unable to get serial number for study") - - if serial_num == None: - print(f"Unable to get serial number for study") - - related_study_result = get_related_studies(serial_num, hostname) - existing_related_study_result = mds_res.get("related_studies", []) - for related_study in related_study_result: - if related_study not in existing_related_study_result: - existing_related_study_result.append(copy.deepcopy(related_study)) - mds_res["gen3_discovery"][ - "related_studies" - ] = copy.deepcopy(existing_related_study_result) - # merge data from cedar that is not study level metadata into a level higher deleted_keys = [] for key, value in mds_res["gen3_discovery"]["study_metadata"].items():