Skip to content

Commit

Permalink
update bugfix in paired analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
snaketron committed Mar 26, 2024
1 parent 936fed9 commit d43cb09
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 97 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ Suggests:
testthat (>= 2.1.0),
ggplot2,
ggforce,
gridExtra,
ggrepel,
patchwork
LinkingTo:
Expand Down
27 changes: 18 additions & 9 deletions R/utils_usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ get_usage <- function(u) {
u$replicate_id <- u$replicate
u$sample_id <- paste0(u$individual_id, '|', u$condition, '|', u$replicate)
u$sample_id <- as.numeric(as.factor(u$sample_id))
u$individual_id <- paste0(u$individual_id, '|', u$condition)
# u$individual_id <- paste0(u$individual_id, '|', u$condition)

m <- u[duplicated(u$sample_id)==FALSE, c("sample_id",
"individual_id",
Expand All @@ -34,7 +34,7 @@ get_usage <- function(u) {

u$sample_id <- paste0(u$individual_id, '|', u$condition)
u$sample_id <- as.numeric(as.factor(u$sample_id))
u$individual_id <- paste0(u$individual_id, '|', u$condition)
# u$individual_id <- paste0(u$individual_id, '|', u$condition)

m <- u[duplicated(u$sample_id)==FALSE, c("sample_id",
"individual_id",
Expand Down Expand Up @@ -90,12 +90,22 @@ get_usage <- function(u) {
tr <- table(replicate_ids)
has_balanced_replicates <- ifelse(test = all(tr==tr[1]), yes=TRUE, no=FALSE)

condition_names <- character(length = max(individual_ids))
for(i in 1:max(individual_ids)) {
q <- individual_names[individual_ids==i][1]
condition_names[i] <- m$condition[m$individual_id == q][1]
if(has_replicates) {
# condition at individual
condition_names <- character(length = max(individual_ids))
for(i in 1:max(individual_ids)) {
q <- individual_names[individual_ids==i][1]
condition_names[i] <- m$condition[m$individual_id == q][1]
}
condition_ids <- as.numeric(as.factor(condition_names))
} else {
# condition at sample
condition_names <- character(length = length(sample_ids))
for(i in 1:length(sample_ids)) {
condition_names[i] <- m$condition[m$sample_id == sample_ids[i]]
}
condition_ids <- as.numeric(as.factor(condition_names))
}
condition_ids <- as.numeric(as.factor(condition_names))

return(list(Y = Y,
N = as.numeric(N),
Expand Down Expand Up @@ -168,8 +178,7 @@ get_model <- function(has_replicates,
"log_lik", "theta")
model_name <- "GU"
}
}
else {
} else {
if(has_replicates) {
model <- stanmodels$dgu_rep
pars <- c("phi", "kappa", "alpha",
Expand Down
31 changes: 15 additions & 16 deletions inst/stan/dgu.stan
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@ functions {
}

data {
// in this model: N_sample = N_individual (no replicates)
int<lower=0> N_sample; // number of repertoires
int<lower=0> N_gene; // gene
int<lower=0> N_individual; // number of individuals
int<lower=0> N_condition; // number of conditions
array [N_individual] int N; // number of tries (repertoire size)
array [N_gene, N_individual] int Y; // number of heads for each coin
array [N_individual] int condition_id; // id of conditions
//array [N_individual] int individual_id; // id of replicate
array [N_sample] int N; // number of tries (repertoire size)
array [N_gene, N_sample] int Y; // number of heads for each coin
array [N_sample] int condition_id; // id of conditions
array [N_sample] int individual_id; // id of replicate
}

transformed data {
// convert int N -> real N fo convenient division
// in generated quantities block
array [N_individual] real Nr;
array [N_sample] real Nr;
Nr = N;
}

Expand All @@ -55,20 +54,20 @@ parameters {
vector <lower=0> [N_condition] sigma_condition;
vector <lower=0> [N_condition] sigma_individual;

array [N_individual] vector [N_gene] z_beta_individual;
array [N_sample] vector [N_gene] z_beta_individual;
array [N_condition] vector [N_gene] z_beta_condition;
}

transformed parameters {
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
array [N_individual] vector [N_gene] beta_individual;
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
array [N_sample] vector [N_gene] beta_individual;
array [N_condition] vector [N_gene] beta_condition;

for(i in 1:N_condition) {
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
}

for(i in 1:N_individual) {
for(i in 1:N_sample) {
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
theta[i] = inv_logit(alpha + beta_individual[i]);
}
Expand All @@ -82,14 +81,14 @@ model {
for(i in 1:N_condition) {
target += std_normal_lpdf(z_beta_condition[i]);
}
for(i in 1:N_individual) {
for(i in 1:N_sample) {
target += std_normal_lpdf(z_beta_individual[i]);
}

target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);

for(i in 1:N_individual) {
for(i in 1:N_sample) {
for(j in 1:N_gene) {
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
}
Expand All @@ -98,16 +97,16 @@ model {

generated quantities {
// PPC: count usage (repertoire-level)
array [N_gene, N_individual] int Yhat_rep;
array [N_gene, N_sample] int Yhat_rep;

// PPC: proportion usage (repertoire-level)
array [N_gene, N_individual] real Yhat_rep_prop;
array [N_gene, N_sample] real Yhat_rep_prop;

// PPC: proportion usage at a gene level in condition
array [N_condition] vector [N_gene] Yhat_condition_prop;

// LOG-LIK
array [N_individual] vector [N_gene] log_lik;
array [N_sample] vector [N_gene] log_lik;

// DGU matrix
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
Expand All @@ -116,7 +115,7 @@ generated quantities {

//TODO: speedup, run in C++ not big factor on performance
for(j in 1:N_gene) {
for(i in 1:N_individual) {
for(i in 1:N_sample) {
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);

Expand Down
43 changes: 24 additions & 19 deletions inst/stan/dgu_paired.stan
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ data {
int<lower=0> N_gene; // gene
int<lower=0> N_individual; // number of individuals
int<lower=0> N_condition; // number of conditions
array [N_individual] int N; // number of tries
array [N_gene, N_individual] int Y; // number of heads for each coin
array [N_individual] int condition_id; // id of conditions
array [N_sample] int N; // number of tries
array [N_gene, N_sample] int Y; // number of heads for each coin
array [N_sample] int condition_id; // id of conditions
array [N_sample] int individual_id; // id of replicate
}

transformed data {
// convert int to real N for easier division in generated quantities block
array [N_individual] real Nr;
array [N_sample] real Nr;
Nr = N;
}

Expand All @@ -55,27 +55,29 @@ parameters {
real <lower=0> sigma_alpha;

array [N_individual] vector [N_gene] z_alpha_individual;
array [N_individual] vector [N_gene] z_beta_individual;
array [N_individual, N_condition] vector [N_gene] z_beta_individual;
array [N_condition] vector [N_gene] z_beta_condition;
}

transformed parameters {
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
array [N_individual] vector [N_gene] alpha_individual;
array [N_individual] vector [N_gene] beta_individual;
array [N_individual, N_condition] vector [N_gene] beta_individual;
array [N_condition] vector [N_gene] beta_condition;

for(i in 1:N_condition) {
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
for(j in 1:N_condition) {
beta_condition[j] = 0 + sigma_condition[j] * z_beta_condition[j];
}

for(i in 1:N_individual) {
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
for(j in 1:N_condition) {
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
}
}

for(i in 1:N_sample) {
theta[i] = inv_logit(alpha_individual[individual_id[i]] + beta_individual[individual_id[i]]);
theta[i] = inv_logit(alpha_individual[individual_id[i]] + beta_individual[individual_id[i], condition_id[i]]);
}
}

Expand All @@ -84,18 +86,21 @@ model {
target += exponential_lpdf(phi | 0.01);
target += normal_lpdf(alpha | -3.0, 3.0);

for(i in 1:N_condition) {
target += std_normal_lpdf(z_beta_condition[i]);
for(j in 1:N_condition) {
target += std_normal_lpdf(z_beta_condition[j]);
for(i in 1:N_individual) {
target += std_normal_lpdf(z_beta_individual[i,j]);
}
}
for(i in 1:N_individual) {
target += std_normal_lpdf(z_beta_individual[i]);
target += std_normal_lpdf(z_alpha_individual[i]);
}

target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
target += cauchy_lpdf(sigma_alpha | 0.0, 1.0);

for(i in 1:N_individual) {
for(i in 1:N_sample) {
for(j in 1:N_gene) {
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
}
Expand All @@ -104,16 +109,16 @@ model {

generated quantities {
// PPC: count usage (repertoire-level)
array [N_gene, N_individual] int Yhat_rep;
array [N_gene, N_sample] int Yhat_rep;

// PPC: proportion usage (repertoire-level)
array [N_gene, N_individual] real Yhat_rep_prop;
array [N_gene, N_sample] real Yhat_rep_prop;

// PPC: proportion usage at a gene level in condition
array [N_condition] vector [N_gene] Yhat_condition_prop;

// LOG-LIK
array [N_individual] vector [N_gene] log_lik;
array [N_sample] vector [N_gene] log_lik;

// DGU matrix
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
Expand All @@ -122,7 +127,7 @@ generated quantities {

//TODO: speedup, run in C++ not big factor on performance
for(j in 1:N_gene) {
for(i in 1:N_individual) {
for(i in 1:N_sample) {
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);

Expand Down
18 changes: 9 additions & 9 deletions inst/stan/dgu_paired_rep.stan
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ data {
int<lower=0> N_individual; // number of individuals
int<lower=0> N_condition; // number of conditions
int<lower=0> N_replicate; // number of replicates
array [N_individual] int N; // number of tries
array [N_gene, N_individual] int Y; // number of heads for each coin
array [N_sample] int N; // number of tries
array [N_gene, N_sample] int Y; // number of heads for each coin
array [N_individual] int condition_id; // id of conditions
array [N_sample] int individual_id; // id of individual
array [N_sample] int replicate_id; // id of replicate
}

transformed data {
// convert int to real N for easier division in generated quantities block
array [N_individual] real Nr;
array [N_sample] real Nr;
Nr = N;
}

Expand Down Expand Up @@ -71,7 +71,7 @@ transformed parameters {
array [N_individual] vector [N_gene] beta_individual;
array [N_individual, N_replicate] vector [N_gene] alpha_sample;
array [N_individual, N_replicate] vector [N_gene] beta_sample;
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;

for(i in 1:N_condition) {
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
Expand Down Expand Up @@ -111,7 +111,7 @@ model {
target += cauchy_lpdf(sigma_alpha_rep | 0.0, 1.0);
target += cauchy_lpdf(sigma_beta_rep | 0.0, 1.0);

for(i in 1:N_individual) {
for(i in 1:N_sample) {
for(j in 1:N_gene) {
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
}
Expand All @@ -120,16 +120,16 @@ model {

generated quantities {
// PPC: count usage (repertoire-level)
array [N_gene, N_individual] int Yhat_rep;
array [N_gene, N_sample] int Yhat_rep;

// PPC: proportion usage (repertoire-level)
array [N_gene, N_individual] real Yhat_rep_prop;
array [N_gene, N_sample] real Yhat_rep_prop;

// PPC: proportion usage at a gene level in condition
array [N_condition] vector [N_gene] Yhat_condition_prop;

// LOG-LIK
array [N_individual] vector [N_gene] log_lik;
array [N_sample] vector [N_gene] log_lik;

// DGU matrix
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
Expand All @@ -138,7 +138,7 @@ generated quantities {

//TODO: speedup, run in C++ not big factor on performance
for(j in 1:N_gene) {
for(i in 1:N_individual) {
for(i in 1:N_sample) {
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);

Expand Down
Loading

0 comments on commit d43cb09

Please sign in to comment.