Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmark scripts and template #82

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions inst/benchmark_scripts/.batchtools.slurm.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

## Job Resource Interface Definition
##
## ntasks [integer(1)]: Number of required tasks,
## Set larger than 1 if you want to further parallelize
## with MPI within your job.
## ncpus [integer(1)]: Number of required cpus per task,
## Set larger than 1 if you want to further parallelize
## with multicore/parallel within each task.
## walltime [integer(1)]: Walltime for this job, in seconds.
## Must be at least 60 seconds for Slurm to work properly.
## memory [integer(1)]: Memory in megabytes for each cpu.
## Must be at least 100 (when I tried lower values my
## jobs did not start at all).
##
## Default resources can be set in your .batchtools.conf.R by defining the variable
## 'default.resources' as a named list.

<%
# relative paths are not handled well by Slurm
log.file = fs::path_expand(log.file)
-%>


#SBATCH --job-name=<%= resources$job.name %>
#SBATCH --output=<%= resources$output %>
#SBATCH --error=<%= log.file %>
#SBATCH --time=<%= resources$walltime %>
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=<%= resources$ncpus %>
#SBATCH --mem-per-cpu=<%= resources$memory %>
#SBATCH --mail-user=<%= resources$email %>
#SBATCH --mail-type=ALL
<%= if (!is.null(resources$partition)) sprintf(paste0("#SBATCH --partition='", resources$partition, "'")) %>
<%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %>

## Initialize work environment like
module load foss/2020b R/4.0.4-2
cd fundiversity

## Export value of DEBUGME environemnt var to slave
export DEBUGME=<%= Sys.getenv("DEBUGME") %>

<%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%>
<%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%>
<%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%>

## Run R:
## we merge R output with stdout from SLURM, which gets then logged via --output option
Rscript -e 'batchtools::doJobCollection("<%= uri %>")'
126 changes: 126 additions & 0 deletions inst/benchmark_scripts/benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_multicore.R")
)
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_alternatives.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))


# FD indices expressions -------------------------------------------------------

dbfd_indices <- rlang::exprs(
FD_dbFD_fdiv = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = TRUE,
calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
),
FD_dbFD_feve = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = FALSE,
calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE
),
FD_dbFD_fric = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FRic = TRUE,
calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
),
FD_dbFD_raoq = FD::dbFD(
given_data[["traits"]], given_data[["site_sp"]], calc.FRic = FALSE,
calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE
)
)

fd_indices <- list(
"fdis" = rlang::exprs(
fundiversity_fd_fdis_unparallel =
{
fd_fdis(given_data[["traits"]], given_data[["site_sp"]])
},
BAT_dispersion = BAT::dispersion(
given_data[["site_sp"]], given_data[["traits"]]
),
FD_fdisp = FD::fdisp(
dist(given_data[["traits"]]), given_data[["site_sp"]]
),
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdis",
scaling = FALSE, verbose = FALSE
)
),
"fdiv" = rlang::exprs(
fundiversity_fd_fdiv_unparallel =
{
fd_fdiv(given_data[["traits"]], given_data[["site_sp"]])
},
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdiv",
scaling = FALSE, verbose = FALSE
)
),
"feve" = rlang::exprs(
fundiversity_fd_feve_unparallel =
{
fd_feve(given_data[["traits"]], given_data[["site_sp"]])
},
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "feve",
scaling = FALSE, verbose = FALSE
)
),
"raoq" = rlang::exprs(
fundiversity_fd_raoq_unparallel =
{
fd_raoq(given_data[["traits"]], given_data[["site_sp"]])
},
adiv_qe = adiv::QE(
given_data[["site_sp"]], dist(given_data[["traits"]])
),
BAT_rao = BAT::rao(
given_data[["site_sp"]], distance = given_data[["traits"]]
),
hillR_hill_func = hillR::hill_func(
given_data[["site_sp"]], given_data[["traits"]], fdis = FALSE
),
mFD_alpha_fd_hill = mFD::alpha.fd.hill(
given_data[["site_sp"]], dist(given_data[["traits"]]), q = 2,
tau = "max"
)
)
)

# Benchmarks -------------------------------------------------------------------
bench_sites = c(5e1 , 1e2, 5e2)
bench_traits = c(2, 4, 10)
bench_species = c(2e2, 5e2, 1e3)

n_iterations = 10

## Regular indices benchmark
# Parallelize across indices
future::plan(
future.batchtools::batchtools_slurm,
template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
label = "funbench",
resources = list(
job.name = "funbench",
walltime = 5760,
memory = "150G",
ncpus = 1,
output = "/work/%u/%j-%x.log",
email = Sys.getenv("USEREMAIL")
),
finalize = FALSE
)

fd_indices %>%
furrr::future_imap(
~ fd_benchmark_alternatives(.x) %>%
saveRDS(glue::glue("bench_{index}_alternatives.rds", index = .y))
)
19 changes: 19 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
fd_benchmark_alternatives <- function(list_alternatives, seed = 20210906) {

bench::press(
n_sites = bench_sites,
n_traits = bench_traits,
n_species = bench_species,
{
set.seed(seed)
given_data = make_data(n_species, n_traits, n_sites)
bench::mark(
exprs = list_alternatives,
iterations = n_iterations,
check = FALSE,
memory = FALSE
)
}
)

}
24 changes: 24 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
fd_benchmark_multicore <- function(fd_index) {

fd_fct <- rlang::as_function(fd_index)

bench::press(
n_sites = bench_sites,
n_traits = bench_traits,
n_species = bench_species,
{
set.seed(20210906)
given_data = make_data(n_species, n_traits, n_sites)
bench::mark(
iterations = n_iterations,
fundiversity_multicore =
{
fd_fct(given_data[["traits"]], given_data[["site_sp"]])
},
check = FALSE,
memory = FALSE
)
}
)

}
19 changes: 19 additions & 0 deletions inst/benchmark_scripts/benchmark_fcts/make_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
make_data = function(n_species = 10, n_traits = 5, n_sites = 20) {
traits = matrix(runif(n_species * n_traits),
ncol = n_traits, nrow = n_species,
dimnames = list(species = paste0("sp", seq(n_species)),
traits = paste0("t", seq(n_traits)))
)

site_sp = matrix(
as.numeric(runif(n_species * n_sites) >= 0.5),
nrow = n_sites,
ncol = n_species,
dimnames = list(
sites = paste0("s", seq(n_sites)),
species = paste0("sp", seq(n_species))
)
)

list(traits = traits, site_sp = site_sp)
}
57 changes: 57 additions & 0 deletions inst/benchmark_scripts/benchmark_fd_multicore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
library("magrittr")
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_multicore.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))

# Benchmarks Parameters --------------------------------------------------------

# Data Parameters
bench_sites = c(5e1 , 1e2, 5e2)
bench_traits = c(2, 4)
bench_species = c(2e2, 5e2, 1e3)

# Number of Repeats per function
n_iterations = 20

# Number of Cores over which to parallelize
n_cores = c(1, seq(2, 6, by = 2))


# Running the benchmark --------------------------------------------------------
all_funs = alist(
fdis = fd_fdis, fdiv = fd_fdiv, feve = fd_feve, fric = fd_fric
) %>%
purrr::cross2(n_cores) %>%
purrr::map(setNames, c("fundiv_function", "n_core")) %>%
purrr::map(
function(x) {

fundiv_function = x$fundiv_function
n_core = x$n_core

future::plan(
list(
future::tweak(
future::multisession, workers = n_core
)
)
)

fd_benchmark_multicore(get(fundiv_function)) %>%
tidyr::unnest(c(time, gc)) %>%
select(-expression) %>%
mutate(fd_fct = as.character(fundiv_function), n_core = n_core)

}
)

saveRDS(all_funs, "all_multicore_bench.Rds")
76 changes: 76 additions & 0 deletions inst/benchmark_scripts/benchmark_fric_comparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Script to make benchmark across different functions of similar packages

# Options ----------------------------------------------------------------------
options(fundiversity.memoise = FALSE) # Deactivate memoisation

# Package & Functions ----------------------------------------------------------
pkgload::load_all() # Need last version of fundiversity
source(
here::here("inst", "benchmark_scripts", "benchmark_fcts",
"fd_benchmark_alternatives.R")
)
source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R"))


# FD indices expressions -------------------------------------------------------

fric_exprs <- list(
"fric" = rlang::exprs(
fundiversity_fd_fric_unparallel =
{
fd_fric(given_data[["traits"]], given_data[["site_sp"]])
},
BAT_alpha_tree = BAT::alpha(
given_data[["site_sp"]], given_data[["traits"]]
),
BAT_alpha_hull = BAT::hull.alpha(
BAT::hull.build(given_data[["site_sp"]], given_data[["traits"]])
),
mFD_alpha_fd = mFD::alpha.fd.multidim(
given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fric",
scaling = FALSE, verbose = FALSE
)
),
"fric_intersect" = rlang::exprs(
fundiversity_fd_fric_intersect_unparallel =
{
fd_fric_intersect(given_data[["traits"]], given_data[["site_sp"]])
},
betapart_functional_beta = betapart::functional.beta.pair(
given_data[["site_sp"]], given_data[["traits"]]
),
hillR_funct_pairwise = hillR::hill_func_parti_pairwise(
given_data[["site_sp"]], given_data[["traits"]]
)
)
)

# Functional Richness Benchmark ------------------------------------------------

bench_sites = c(5e1 , 1e2)
bench_traits = c(2, 4)
bench_species = c(2e2, 5e2)

n_iterations = 30

## Regular indices benchmark
# Parallelize across indices
future::plan(
future.batchtools::batchtools_slurm,
template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"),
label = "funbench_fric",
resources = list(
job.name = "funbench_fric",
walltime = 7200,
memory = "150G",
ncpus = 1,
output = "/work/%u/%x-%j.log",
email = Sys.getenv("USEREMAIL")
)
)

fric_exprs["fric_intersect"] %>%
furrr::future_imap(
~ fd_benchmark_alternatives(.x, seed=20210915) %>%
saveRDS(glue::glue("fric_bench_{index}_alternatives.rds", index = .y))
)
Loading