Skip to content

Commit

Permalink
Add opentargets
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 31, 2024
1 parent dc6c5ec commit 2e6def8
Show file tree
Hide file tree
Showing 17 changed files with 419 additions and 139 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ Suggests:
scales,
tidyr,
DiagrammeR,
forcats
forcats,
arrow
Remotes:
github::charlieccarey/monarchr,
github::phenoscape/rphenoscape,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export(get_ontology)
export(get_ontology_descendants)
export(get_ontology_dict)
export(get_ontology_levels)
export(get_opentargets)
export(get_pli)
export(get_prevalence)
export(get_ttd)
Expand Down Expand Up @@ -62,7 +63,10 @@ export(plot_upheno)
export(prune_ancestors)
export(query_monarch)
export(query_oard)
export(set_cores)
export(to_graph)
export(unlist_dt)
import(BiocParallel)
import(data.table)
import(orthogene)
import(pals)
Expand All @@ -73,6 +77,7 @@ import(tidygraph)
importFrom(Matrix,colSums)
importFrom(methods,show)
importFrom(orthogene,map_genes)
importFrom(parallel,detectCores)
importFrom(simona,LCA_depth)
importFrom(stats,as.dist)
importFrom(stats,cutree)
Expand Down
3 changes: 0 additions & 3 deletions R/0docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,6 @@ NULL
#' @param add_ancestors Add ancestors for each term.
#' @param add_n_edges Add the number of edges (connections) for each term.
#' @param add_ontology_levels Add the ontology level for each term.
#' @inheritParams main_
#' @inheritParams plot_
#' @inheritParams simona::dag_ancestors
#' @import simona
#' @family add_
#' @returns Added data.
Expand Down
5 changes: 3 additions & 2 deletions R/add_ancestors.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_ancestors <- function(ont,
include_self=TRUE,
prefix="ancestor",
fill_na=TRUE,
i=1,
force_new=FALSE){
term <- NULL;

Expand All @@ -47,14 +48,14 @@ add_ancestors <- function(ont,
)
}) |> data.table::rbindlist(idcol = prefix, fill = TRUE)
#### Ensure one row per term ####
ancestors_groups <- ancestors_groups[, .SD[1], keyby = "term"]
ancestors_groups <- ancestors_groups[, .SD[i], keyby = "term"]
if(isTRUE(fill_na)){
ancestors_groups <- ancestors_groups[ont@terms][is.na(get(prefix)),
(prefix):=term]
}
ont@elementMetadata[[prefix]] <- ancestors_groups[[prefix]]
#### Add ancestor_name col
ont@elementMetadata[[prefix_name]] <- map_ontology_terms(
ont@elementMetadata[[prefix_name]] <- map_ontology_terms(
ont = ont,
terms = ont@elementMetadata[[prefix]],
to = "name")
Expand Down
6 changes: 4 additions & 2 deletions R/filter_ontology.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' Filter ontology
#'
#' Filter ontology by terms.
#' @inheritDotParams simona::dag_filter
#' @export
#' @examples
#' ont <- get_ontology("hp")
Expand All @@ -12,6 +13,7 @@ filter_ontology <- function(ont,
remove_terms=NULL,
keep_descendants=NULL,
remove_descendants=NULL,
include_self = TRUE,
use_simona=FALSE,
...){
#### Check remove_terms ####
Expand All @@ -29,7 +31,7 @@ filter_ontology <- function(ont,
if(length(keep_descendants)>0){
messager("Keeping descendants of",length(keep_descendants),"term(s).")
keep_descendants <- simona::dag_offspring(dag = ont,
include_self = TRUE,
include_self = include_self,
term = keep_descendants)
ont <- simona::dag_filter(ont,
terms=keep_descendants,
Expand All @@ -48,7 +50,7 @@ filter_ontology <- function(ont,
if(length(remove_descendants)>0){
messager("Removing descendants of",length(remove_descendants),"term(s).")
remove_descendants <- simona::dag_offspring(dag = ont,
include_self = TRUE,
include_self = include_self,
term = remove_descendants)
keep_terms <- ont@terms[!ont@terms %in% remove_descendants]
ont <- simona::dag_filter(ont,
Expand Down
79 changes: 79 additions & 0 deletions R/get_opentargets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#' Get OpenTargets
#'
#' Get OpenTargets disease-gene associations data
#' @source \href{https://community.opentargets.org/t/r-script-for-graphql-query-query-targetdiseaseevidence/662/5}{OpenTargets GraphQL queries in R}
#' @import rvest
#' @export
#' @examples
#' d <- get_opentargets()
get_opentargets <- function(release="latest",
data_type=c("associationByDatasourceDirect",
"associationByDatasourceIndirect",

"associationByDatatypeDirect",
"associationByDatatypeIndirect",

"associationByOverallDirect",
"associationByOverallIndirect"
)[1],
server="https://ftp.ebi.ac.uk/pub/databases/opentargets/",
subdir=c("platform/","genetics/")[1],
subdir2=c("/output/etl/parquet/","/")[1],
ftp = paste0(server,
subdir,
release,
subdir2,
data_type[1],"/"),
save_dir=cache_dir(),
force_new=FALSE){

## Variant and gene level data merged for all genome-wide summary statistics:
# ftp="https://ftp.ebi.ac.uk/pub/databases/opentargets/genetics/latest/d2v2g_scored/"
save_path <- file.path(save_dir,
paste0("opentargets_",data_type,".rds"))
#### Import cached data ####
if(file.exists(save_path) &&
isFALSE(force_new)){
messager("Loading cached file -->",save_path)
return(readRDS(save_path))
}
#### Get new data ####
requireNamespace("arrow")
#### Scrape FTP to get file names ####
tbl <- (
rvest::read_html(ftp)|>
rvest::html_table()
)[[1]]|>
subset(endsWith(Name,".parquet")|endsWith(Name,".json"))
tbl$Date <- stringr::str_split(tbl$`Last modified`," ",simplify = TRUE)[,1]
if(nrow(tbl)==0) stopper("No data files found at", ftp)
BPPARAM <- set_cores()
d <- BiocParallel::bplapply(stats::setNames(tbl$Name,
tbl$Name),
BPPARAM = BPPARAM,
function(f){

if(endsWith(f,".json")){
j <- jsonlite::fromJSON(file.path(ftp, f))
jsonlite::fromJSON(j$serialisedSchema)
} else if (endsWith(f,".parquet")){
arrow::read_parquet(file.path(ftp, f)) |>
data.table::data.table()
}
}) |> data.table::rbindlist(idcol = "file")
attr(d,"version") <- tbl$Date[[1]]
#### Report ####
messager("OpenTargets data loaded with:",
"\n-",formatC(nrow(d), big.mark = ","),"rows",
if("targetId" %in% names(d)){
paste("\n-",formatC(length(unique(d$targetId)), big.mark = ","),
"unique targets across" )
},
if("diseaseId" %in% names(d)){
paste("\n-",formatC(length(unique(d$diseaseId)), big.mark = ","),
"unique diseases across" )
},
v = TRUE)
cache_save(d, save_path)
return(d)
}
4 changes: 3 additions & 1 deletion R/plot_graph_visnetwork.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#' @describeIn plot_ plot_
#' Plot graph using visNetwork.
#'
#' Plot graph using visNetwork.
#' @inheritParams plot_
#' @param add_visExport Add PDF download button.
#' @inheritParams map_colors
#' @inheritParams visNetwork::visIgraph
Expand Down
5 changes: 1 addition & 4 deletions R/query_monarch.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
#' @describeIn query_ query_
#' @inheritDotParams monarchr::biolink_search
#' @export
#' @examples
#' cells <- monarchr::biolink_search(phrase_or_id = "T-cell")
query_monarch <- function(...){
requireNamespace("monarchr")
monarchr::biolink_search(...)
# monarchr::biolink_search(...)

}
51 changes: 51 additions & 0 deletions R/set_cores.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' Set cores
#'
#' Assign cores automatically for parallel processing, while reserving some.
#'
#' @param workers Number (>1) or proportion (<1) of worker cores to use.
#' @param verbose Print messages.
#' @param progressbar logical(1) Enable progress bar
#' (based on \code{plyr:::progress_text}).
#' Enabling the progress bar changes the default value of tasks to
#' \code{.Machine$integer.max}, so that progress is reported for
#' each element of X.
#' @returns List of core allocations.
#'
#' @export
#' @import data.table
#' @import BiocParallel
#' @importFrom parallel detectCores
set_cores <- function(workers = .90,
progressbar = TRUE,
verbose = TRUE) {

# Enable parallelization of HDF5 functions
## Allocate ~10% of your available cores to non-parallelized processes
workers <- if (is.null(workers)) .90 else workers
total_cores <- parallel::detectCores()
if (workers < 1) {
reserved_cores <- ceiling(total_cores * (1 - workers))
workers <- total_cores - reserved_cores
} else {
workers <- workers
reserved_cores <- total_cores - workers
}
messager(workers, "core(s) assigned as workers",
paste0("(",reserved_cores, " reserved)."),
v = verbose
)
### Ensure data.table doesn't interfere with parallelization ####
if(workers>1) data.table::setDTthreads(threads = 1)
#### Handle Windows ####
if (.Platform$OS.type == "windows") {
params <- BiocParallel::SnowParam(workers = workers,
progressbar = progressbar)
} else {
params <- BiocParallel::MulticoreParam(workers = workers,
progressbar = progressbar)
}
# DelayedArray::setAutoBPPARAM(params)
#### Not allowed to use internal functions ####
# DelayedArray:::set_verbose_block_processing(verbose)
return(params)
}
29 changes: 29 additions & 0 deletions R/unlist_dt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Unlist a \link{data.table}
#'
#' \link{data.table}s can sometimes have columns that are nested lists.
#' This function will either drop these columns or convert them into
#' character strings.
#' @param dat \link{data.table}
#' @param drop Drop columns that are lists.
#' @param collapse Character to collapse lists with.
#' Used only when drop is \code{FALSE}.
#' @returns \link{data.table}
#' @export
#' @examples
#' dat <- data.table::data.table(a=1:3,b=list(1:2,3:4))
#' unlist_dt(dat)
unlist_dt <- function(dat,
drop=FALSE,
collapse=";"){
lst_cols <- sapply(dat, is.list)
if(isTRUE(drop)){
messager("Dropping",length(lst_cols),"data.table columns.")
dat[,-c(lst_cols),with=FALSE]
} else{
messager("Unlisting",length(lst_cols),"data.table columns.")
dat[,lapply(.SD,
function(x) if(is.list(x)) paste(unique(unlist(x)),
collapse = collapse) else x),
.SDcols = names(dat),by=.I]
}
}
17 changes: 16 additions & 1 deletion man/filter_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions man/get_opentargets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 2e6def8

Please sign in to comment.