diff --git a/DESCRIPTION b/DESCRIPTION index 4932a7e..2d59bce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ Depends: Imports: BiocGenerics, Rcpp (>= 0.11.3), - MASS + MASS, + irlba Suggests: matrixStats, lattice, diff --git a/NAMESPACE b/NAMESPACE index f1627bf..ce6a44a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,4 +60,5 @@ import(Biobase) import(BiocGenerics) import(methods) importFrom(Rcpp,evalCpp) +importFrom(irlba,prcomp_irlba) useDynLib(pcaMethods) diff --git a/R/pca.R b/R/pca.R index 3ccda3d..372d1f3 100644 --- a/R/pca.R +++ b/R/pca.R @@ -9,12 +9,12 @@ listPcaMethods <- function(which=c("all", "linear", "nonlinear")) { switch(match.arg(which), all={ - return(c("svd", "nipals", "rnipals", "bpca", "ppca", + return(c("svd", "irlba", "nipals", "rnipals", "bpca", "ppca", "svdImpute", "robustPca", "nlpca", "llsImpute", "llsImputeAll")) }, linear={ - return(c("svd", "nipals", "rnipals", "bpca", "ppca", + return(c("svd", "irlba", "nipals", "rnipals", "bpca", "ppca", "svdImpute", "robustPca")) }, nonlinear={ @@ -123,11 +123,13 @@ pca <- function(object, method, nPcs=2, } else if(inherits(object, "ExpressionSet")) { Matrix <- t(exprs(object)) - } else - Matrix <- as.matrix(object, rownames.force=TRUE) + } else if (inherits(object, "sparseMatrix")) + Matrix <- object + else + Matrix <- as.matrix(object, rownames.force=TRUE) if(!is.null(subset)) - Matrix <- Matrix[,subset] + Matrix <- Matrix[, subset, drop = FALSE] cv <- match.arg(cv) scale <- match.arg(scale) @@ -150,6 +152,8 @@ pca <- function(object, method, nPcs=2, if(missing(method)) { if(any(missing)) method <- 'nipals' + else if(inherits(Matrix, 'sparseMatrix')) + method <- 'irlba' else method <- 'svd' } @@ -165,6 +169,15 @@ pca <- function(object, method, nPcs=2, svd={ res <- svdPca(prepres$data, nPcs=nPcs,...) }, + irlba=if(inherits(prepres$data, "sparseMatrix")) { + # The matrix was not actually preprocessed, we just pass the arguments on + res <- irlbaPca(prepres$data, nPcs=nPcs, + scale=prepres$scale == "uv", + center=prepres$center, + ...) + } else { + res <- irlbaPca(prepres$data, nPcs=nPcs, ...) + }, nipals={ res <- nipalsPca(prepres$data, nPcs=nPcs, ...) }, @@ -350,7 +363,8 @@ plotPcs <- function(object, ##' @param verbose Verbose complaints to matrix structure ##' @param ... Only used for passing through arguments. ##' @return A \code{pcaRes} object. -##' @seealso \code{prcomp}, \code{princomp}, \code{pca} +##' @seealso \code{\link[stats]{prcomp}}, \code{\link[stats]{princomp}}, +##' \code{\link{pca}} ##' @examples ##' data(metaboliteDataComplete) ##' mat <- prep(t(metaboliteDataComplete)) @@ -378,6 +392,32 @@ svdPca <- function(Matrix, nPcs=2, return(res) } + +##' @importFrom irlba prcomp_irlba +irlbaPca <- function( + Matrix, + nPcs = 2, + varLimit = 1, + verbose = interactive(), + center = TRUE, + scale. = FALSE, + ... +) { + pcs <- prcomp_irlba(Matrix, center = center, scale. = scale) + imp <- summary(pcs)$importance + if (varLimit < 1) + nPcs <- sum(imp[3, ] < varLimit) + 1 + + res <- new("pcaRes") + res@scores <- cbind(pcs$x[, seq_len(nPcs)]) + res@loadings <- cbind(pcs$rotation[, seq_len(nPcs)]) + res@R2cum <- imp[3, seq_len(nPcs)] + res@varLimit <- varLimit + res@method <- "irlba" + res +} + + ##' Get a confidence ellipse for uncorrelated bivariate data ##' ##' As described in 'Introduction to multi and megavariate data analysis diff --git a/R/prep.R b/R/prep.R index 8ff87d5..13864dd 100644 --- a/R/prep.R +++ b/R/prep.R @@ -44,8 +44,13 @@ prep <- function(object, scale=c("none", "pareto", "vector", "uv"), center=TRUE, eps=1e-12, simple=TRUE, reverse=FALSE, ...) { if(inherits(object, "ExpressionSet")) obj <- t(exprs(object)) - else + else if(!inherits(object, 'sparseMatrix')) + obj <- as.matrix(object) + else if (!is.null(scale) && !(scale %in% c("none", "uv"))) { + warning("Sparse matrices only support the 'uv' scaling. Converting to a dense matrix") obj <- as.matrix(object) + } else # let irlba handle sparse matrices for now + return(list(data = object, center = center, scale = scale)) if(is.null(center)) center <- FALSE diff --git a/man/svdPca.Rd b/man/svdPca.Rd index 4af86e5..a4a4c77 100644 --- a/man/svdPca.Rd +++ b/man/svdPca.Rd @@ -40,7 +40,8 @@ pc <- pca(t(metaboliteDataComplete), method="svd", nPcs=2) \dontshow{stopifnot(sum((fitted(pc) - t(metaboliteDataComplete))^2, na.rm=TRUE) < 200)} } \seealso{ -\code{prcomp}, \code{princomp}, \code{pca} +\code{\link[stats]{prcomp}}, \code{\link[stats]{princomp}}, +\code{\link{pca}} } \author{ Henning Redestig