Skip to content

Commit

Permalink
merge median intensities and write complete-cases table, t-test tech …
Browse files Browse the repository at this point in the history
…reps
  • Loading branch information
animesh committed Nov 16, 2023
1 parent e95d6f6 commit eeaa135
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 50 deletions.
46 changes: 35 additions & 11 deletions diffExprTestRank.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#F:\R-4.3.1\bin\Rscript.exe diffExprTestRank.r "L:\promec\TIMSTOF\LARS\2023\231027_plasma_beads\combined\txt\proteinGroups.txt" L:\promec\TIMSTOF\LARS\2023\231027_plasma_beads\combined\txt\Groups.txt "Intensity." "Bead" "Rem"
#F:\R-4.3.1\bin\Rscript.exe diffExprTestRank.r "L:\promec\TIMSTOF\LARS\2023\231025_Kamila_zub\combined\txt\proteinGroups.txt" "L:\promec\TIMSTOF\LARS\2023\231025_Kamila_zub\combined\txt\Groups.txt" "Group" "Rem"
#setup####
#install.packages(c("readxl","writexl","svglite","ggplot2","BiocManager"),repos="http://cran.us.r-project.org",lib=.libPaths())
#BiocManager::install(c("limma","pheatmap"),repos="http://cran.us.r-project.org",lib=.libPaths())
Expand All @@ -8,21 +8,20 @@ print("USAGE:<path to>Rscript diffExprTestRank.r <complete path to directory con
args = commandArgs(trailingOnly=TRUE)
print(paste("supplied argument(s):", length(args)))
print(args)
if (length(args) != 5) {stop("\n\nNeeds 5 arguments, the full path of the directory containing BOTH proteinGroups.txt AND Groups.txt files followed by the name column to use for LFQ, GROUP-to-compare, column to correct for, and data-to-REMOVE columns in Groups.txt file, for example: c:/Users/animeshs/R-4.2.1-win/bin/Rscript.exe diffExprTestRank.r L:/promec/TIMSTOF/LARS/2023/231027_plasma_beads/combined/txtMC3MS3010/proteinGroups.txt L:/promec/TIMSTOF/LARS/2023/231027_plasma_beads/combined/txtMC0/Groups.txt Intensity. Bead Rem", call.=FALSE)}
if (length(args) != 4) {stop("\n\nNeeds 4 arguments, the full path of the directory containing BOTH proteinGroups.txt AND Groups.txt files followed by the name column to use for Intensity, GROUP-to-compare, column to correct for, and data-to-REMOVE columns in Groups.txt file, for example: c:/Users/animeshs/R-4.2.1-win/bin/Rscript.exe diffExprTestRank.r L:/promec/TIMSTOF/LARS/2023/Data/combined/txtMC3MS3010/proteinGroups.txt L:/promec/TIMSTOF/LARS/2023/Data/combined/txtMC0/Groups.txt Intensity. Group Rem", call.=FALSE)}
#args####
inpF <- args[1]
#inpF <-"L:/promec/TIMSTOF/LARS/2023/231027_plasma_beads/combined/txtMC3MS3010/proteinGroups.txt"
#inpF <-"L:/promec/TIMSTOF/LARS/2023/231025_Kamila_zub/combined/txt/proteinGroups.txt"
inpL <- args[2]
#inpL <-"L:/promec/TIMSTOF/LARS/2023/231027_plasma_beads/combined/txtMC0/Groups.txt"
selection<-args[3]
#selection<-"Intensity."
lGroup <- args[4]
#lGroup<-"Bead"
rGroup <- args[5]
#inpL <-"L:/promec/TIMSTOF/LARS/2023/231025_Kamila_zub/combined/txt/Groups.txt"
lGroup <- args[3]
#lGroup<-"Group"
rGroup <- args[4]
#rGroup<-"Rem"
inpD<-dirname(inpF)
fName<-basename(inpF)
lName<-basename(inpL)
selection<-"Intensity."
thr=0.0#count
selThr=0.1#pValue-WilcoxTest
selThrFC=0.01#log2-MedianMinMaxDifference
Expand Down Expand Up @@ -166,6 +165,7 @@ testWilcox <- function(log2LFQmm,sel1,sel2,fName){
cat(paste(sel1,paste(wilcoxTest.results[!is.na(wilcoxTest.results$logFCmedianGrp1),"Uniprot"],collapse=","),sum(!is.na(wilcoxTest.results$logFCmedianGrp1)),sep = "\t"),file=paste0(inpF,fName,"combine.txt"),sep="\n",append=TRUE)
#write.csv(cbind(sel1,sum(!is.na(wilcoxTest.results$logFCmedianGrp1)),paste(wilcoxTest.results[!is.na(wilcoxTest.results$logFCmedianGrp1),"Uniprot"],collapse=" ")),paste0(inpF,"combine.csv"))
#volcano
wilcoxTest.results.ret<-wilcoxTest.results
wilcoxTest.results$RowGeneUniProtScorePeps<-data$geneName
wilcoxTest.results[is.na(wilcoxTest.results)]=selThr
Significance=(wilcoxTest.results$CorrectedPValueBH<selThr)&(wilcoxTest.results$Log2MedianChange>selThrFC)
Expand All @@ -176,7 +176,7 @@ testWilcox <- function(log2LFQmm,sel1,sel2,fName){
#install.packages("svglite")
ggplot2::ggsave(paste0(inpF,fName,selection,scale,lGroup,sCol,sel1,mCol,paste(sel2,collapse=""),eCol,selThr,selThrFC,cvThr,rGroup,lName,"WilcoxTestBH.svg"), p)
print(p)
return(sum(Significance))
return(wilcoxTest.results.ret)
}
}
#log2selection####
Expand Down Expand Up @@ -227,8 +227,32 @@ rownames(log2LFQselScaleimpCorr)<-colnames(log2LFQselMM)
svgPHC<-pheatmap::pheatmap(log2LFQselScaleimpCorr,annotation_row = annoR,annotation_col = annoR,clustering_distance_rows = "euclidean",clustering_distance_cols = "euclidean",fontsize_row=4,cluster_cols=T,cluster_rows=T,fontsize_col=4)
ggplot2::ggsave(paste0(inpF,lName,lGroup,selection,scale,"heatmap.minMax.pearson.svg"), svgPHC)
#compare####
colnames(log2LFQ)
log2LFQsel=log2LFQ[,gsub("-",".",rownames(label[is.na(label$removed)|label$removed==" "|label$removed=='',]))]
colnames(log2LFQsel)
dim(log2LFQsel)
label=label[is.na(label$removed)|label$removed==" "|label$removed=='',]
table(label$pair2test)
for(i in (names(table(label[,lGroup])))){
print(i)
print(label[label[,lGroup]==i,])
print(testWilcox(log2LFQselMM,i,setdiff(names(table(label[,lGroup])),i),"minMaxLog2LFQ"))
assign(i,testWilcox(log2LFQsel,i,setdiff(names(table(label[,lGroup])),i),"log2Int"))
}
#merge####
dataFC<-data.frame(rowSums(log2LFQsel,na.rm=T))
colnames(dataFC)<-"sum"
dataFC["RowGeneUniProtScorePeps"]<-row.names(data)
dim(dataFC)
summary(dataFC)
for (obj in (names(table(label[,lGroup])))) {
print(obj)
dataT<-get(obj)
dataC<-data.frame(cbind(dataT$RowGeneUniProtScorePeps,dataT$logFCmedianGrp1))
colnames(dataC)<-c("RowGeneUniProtScorePeps",paste0(obj,"log2MedInt"))
dataFC<-merge(dataFC,dataC,by='RowGeneUniProtScorePeps',all=T)
}
writexl::write_xlsx(dataFC,paste0(inpF,fName,selection,selThr,selThrFC,cvThr,rGroup,lName,"merge.xlsx"))
#completeCases####
dataFCnar<-dataFC[complete.cases(dataFC), ]
dataFCnar<-dataFCnar[order(dataFCnar$sum,decreasing=F),]
writexl::write_xlsx(dataFCnar,paste0(inpF,fName,selection,selThr,selThrFC,cvThr,rGroup,lName,"completeCase.xlsx"))
106 changes: 67 additions & 39 deletions diffExprTestT.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#F:\R-4.3.1\bin\Rscript.exe diffExprTestT.r "F:\OneDrive - NTNU\IRD\proteinGroups.txt" "f:\OneDrive - NTNU\IRD\Groups.txt" Group Rem
#F:\R-4.3.1\bin\Rscript.exe diffExprTestT.r "L:\promec\TIMSTOF\LARS\2023\231025_Kamila_zub\combined\txt\proteinGroups.txt" "L:\promec\TIMSTOF\LARS\2023\231025_Kamila_zub\combined\txt\Groups.txt" Group Rem
#setup
#install.packages(c("readxl","writexl","svglite","ggplot2","BiocManager"),repos="http://cran.us.r-project.org",lib=.libPaths())
#BiocManager::install(c("limma","pheatmap"),repos="http://cran.us.r-project.org",lib=.libPaths())
Expand All @@ -13,17 +13,17 @@ if (length(args) != 4) {stop("\n\nNeeds FOUR arguments, the full path of the dir
c:/Users/animeshs/R-4.2.1-win/bin/Rscript.exe diffExprTestT.r \"L:/promec/TIMSTOF/Data/combined/txt/proteinGroups.txt\" \"L:/promec/TIMSTOF/Data/combined/txt/Groups.txt\" Groups Remove
", call.=FALSE)}
inpF <- args[1]
#inpF <-"L:/promec/HF/Lars/2023/231006 IRD DDA Kristine Solveig/txt/matchedFeatures.txt"
#inpF <-"L:/promec/TIMSTOF/LARS/2023/231025_Kamila_zub/combined/txt/proteinGroups.txt"
inpL <- args[2]
#inpL <-"L:/promec/HF/Lars/2023/231006 IRD DDA Kristine Solveig/txt/Groups.txt"
#inpL <-"L:/promec/TIMSTOF/LARS/2023/231025_Kamila_zub/combined/txt/Groups.txt"
lGroup <- args[3]
#lGroup<-"Group"
rGroup <- args[4]
#rGroup<-"Remove"
#rGroup<-"Rem"
inpD<-dirname(inpF)
fName<-basename(inpF)
lName<-basename(inpL)
selection<-"Intensity."
selection<-"LFQ.intensity."
thr=0.0#count
selThr=0.05#pValue-tTest
selThrFC=0.5#log2-MedianDifference
Expand All @@ -37,23 +37,48 @@ data <- read.table(inpF,stringsAsFactors = FALSE, header = TRUE, quote = "", com
#data = data[!data$Reverse=="+",]
#data = data[!data$Potential.contaminant=="+",]
#data = data[!data$Only.identified.by.site=="+",]
#row.names(data)<-paste(row.names(data),data[,c(1:(grep(selection,colnames(data))[1]-1))],sep=";;")
row.names(data)<-paste(row.names(data),data$Fasta.headers,data$Protein.IDs,data$Protein.names,data$Gene.names,data$Score,data$Peptide.counts..unique.,sep=";;")
summary(data)
dim(data)
log2Int<-as.matrix(log2(data[,grep("Intensity",colnames(data))]))
log2Int[log2Int==-Inf]=NA
hist(log2Int,main=paste("Mean:",mean(log2Int,na.rm=T),"SD:",sd(log2Int,na.rm=T)),breaks=round(max(log2Int,na.rm=T)),xlim=range(min(log2Int,na.rm=T),max(log2Int,na.rm=T)))
summary(log2(data[,grep("Intensity",colnames(data))]))
par(mar=c(12,3,1,1))
boxplot(log2Int,las=2)
#corHCint####
scale=3
log2Intimp<-matrix(rnorm(dim(log2Int)[1]*dim(log2Int)[2],mean=mean(log2Int,na.rm = T)-scale,sd=sd(log2Int,na.rm = T)/(scale)), dim(log2Int)[1],dim(log2Int)[2])
log2Intimp[log2Intimp<0]<-0
boxplot(log2Intimp,las=2)
bk1 <- c(seq(-3,-0.01,by=0.01))
bk2 <- c(seq(0.01,3,by=0.01))
bk <- c(bk1,bk2) #combine the break limits for purpose of graphing
my_palette <- c(colorRampPalette(colors = c("darkblue", "white"))(n = length(bk1)-1),"gray", "gray",c(colorRampPalette(colors = c("white","darkred"))(n = length(bk2)-1)))
colnames(log2Intimp)<-colnames(log2Int)
log2IntimpCorr<-cor(log2Int,use="pairwise.complete.obs",method="spearman")
colnames(log2IntimpCorr)<-colnames(log2Int)
rownames(log2IntimpCorr)<-colnames(log2Int)
svgPHC<-pheatmap::pheatmap(log2IntimpCorr,clustering_distance_rows = "euclidean",clustering_distance_cols = "euclidean",fontsize_row=8,cluster_cols=T,cluster_rows=T,fontsize_col = 8)
#maxLFQ####
LFQ<-as.matrix(data[,grep(selection,colnames(data))])
#protNum<-1:ncol(LFQ)
#protNum<-"LFQ intensity"#1:ncol(LFQ)
#colnames(LFQ)=paste(protNum,sub(selection,"",colnames(LFQ)),sep=";")
colnames(LFQ)=sub(selection,"",colnames(LFQ))
dim(LFQ)
log2LFQ<-log2(LFQ)
log2LFQ[log2LFQ==-Inf]=NA
log2LFQ[log2LFQ==0]=NA
summary(log2LFQ)
hist(log2LFQ,main=paste("Mean:",mean(log2LFQ,na.rm=T),"SD:",sd(log2LFQ,na.rm=T)),breaks=round(max(log2LFQ,na.rm=T)),xlim=range(min(log2LFQ,na.rm=T),max(log2LFQ,na.rm=T)))
par(mar=c(12,3,1,1))
hist(log2Int,main=paste("Mean:",mean(log2Int,na.rm=T),"SD:",sd(log2Int,na.rm=T)),breaks=round(max(log2Int,na.rm=T)),xlim=range(min(log2Int,na.rm=T),max(log2Int,na.rm=T)))
hist(log2LFQ,main=paste("Mean:",mean(log2LFQ,na.rm=T),"SD:",sd(log2LFQ,na.rm=T)),breaks=round(max(log2Int,na.rm=T)),xlim=range(min(log2Int,na.rm=T),max(log2Int,na.rm=T)))
boxplot(log2Int,las=2)
boxplot(log2LFQ,las=2)
rowName<-paste(data$Mass,data$Calibrated.retention.time,data$m.z,data$Charge,data$Intensity,data$Sequence,sep=";;")
#writexl::write_xlsx(as.data.frame(cbind(rowName,log2LFQ,rownames(data))),paste0(inpD,"log2LFQ.xlsx"))
data$geneName<-paste(sapply(strsplit(paste(sapply(strsplit(data$Raw.files, ";",fixed=T), "[", 1)), " "), "[", 1))
data$uniprotID<-paste(sapply(strsplit(paste(sapply(strsplit(data$Multiplet.ids, ";",fixed=T), "[", 1)), "-"), "[", 1))
rowName<-paste(sapply(strsplit(paste(sapply(strsplit(data$Fasta.headers, "|",fixed=T), "[", 2)), "-"), "[", 1))
writexl::write_xlsx(as.data.frame(cbind(rowName,log2LFQ,rownames(data))),paste0(inpD,"log2LFQ.xlsx"))
data$geneName<-paste(sapply(strsplit(paste(sapply(strsplit(data$Gene.names, ";",fixed=T), "[", 1)), " "), "[", 1))
data$uniprotID<-paste(sapply(strsplit(paste(sapply(strsplit(data$Protein.IDs, ";",fixed=T), "[", 1)), "-"), "[", 1))
data[data$geneName=="NA","geneName"]=data[data$geneName=="NA","uniprotID"]
#label####
label<-read.table(inpL,header=T,sep="\t",row.names=1)#, colClasses=c(rep("factor",3)))
Expand All @@ -73,30 +98,36 @@ row.names(annoR)<-gsub("\\-","\\.",rownames(label[is.na(label$removed)|label$rem
names(annoR)<-lGroup
summary(annoR)
#corHClfq####
log2LFQimp<-matrix(rnorm(dim(log2LFQ)[1]*dim(log2LFQ)[2],mean=mean(log2LFQ,na.rm = T)-scale,sd=sd(log2LFQ,na.rm = T)/(scale)), dim(log2LFQ)[1],dim(log2LFQ)[2])
log2LFQimp[log2LFQimp<0]<-0
par(mar=c(12,3,1,1))
boxplot(log2LFQimp,las=2)
colnames(log2LFQimp)<-colnames(log2LFQ)
log2LFQimpCorr<-cor(log2LFQ,use="pairwise.complete.obs",method="pearson")
colnames(log2LFQimpCorr)<-colnames(log2LFQ)
rownames(log2LFQimpCorr)<-colnames(log2LFQ)
svgPHC<-pheatmap::pheatmap(log2LFQimpCorr,clustering_distance_rows = "euclidean",clustering_distance_cols = "euclidean",fontsize_row=8,cluster_cols=T,cluster_rows=T,fontsize_col = 8,annotation_row = annoR,annotation_col = annoR)
#test####
testT <- function(log2LFQ,sel1,sel2,cvThr){
#sel1<-"D11N"
#sel2<-"D20N"
#log2LFQ<-log2LFQsel#[,gsub("-",".",rownames(label[label$Remove!="Y",]))]
#log2LFQ<-sapply(log2LFQ, as.numeric)
#colnames(log2LFQ)
d1<-as.matrix(log2LFQ[,gsub("-",".",rownames(label[label$pair2test==sel1,]))])
testT <- function(log2LFQselT,sel1,sel2,cvThr){
#sel1<-"S2"
#sel2<-"S4"
#log2LFQselT<-log2LFQ#[,gsub("-",".",rownames(label[label$Remove!="Y",]))]
#colnames(log2LFQselT)
d1<-as.matrix(log2LFQselT[,gsub("-",".",rownames(label[label$pair2test==sel1,]))])
colnames(d1)<-gsub("-",".",rownames(label[label$pair2test==sel1,]))
d2<-as.matrix(log2LFQ[,gsub("-",".",rownames(label[label$pair2test==sel2,]))])
d2<-as.matrix(log2LFQselT[,gsub("-",".",rownames(label[label$pair2test==sel2,]))])
colnames(d2)<-gsub("-",".",rownames(label[label$pair2test==sel2,]))
dataSellog2grpTtest<-as.matrix(cbind(d1,d2))
dataSellog2grpTtest[dataSellog2grpTtest==0]=NA
#summary(dataSellog2grpTtest)
hist(dataSellog2grpTtest,breaks=round(max(dataSellog2grpTtest,na.rm=T)))
#dataSellog2grpTtest<-as.numeric(dataSellog2grpTtest)
row.names(dataSellog2grpTtest)<-row.names(data)
if(sum(!is.na(d1))>1&sum(!is.na(d2))>1){
hist(d1,breaks=round(max(dataSellog2grpTtest,na.rm=T)))
hist(d2,breaks=round(max(dataSellog2grpTtest,na.rm=T)))
#assign(paste0("hda",sel1,sel2),dataSellog2grpTtest)
#get(paste0("hda",sel1,sel2))
dataSellog2grpTtest[dataSellog2grpTtest==0]=NA
hist(dataSellog2grpTtest,breaks=round(max(dataSellog2grpTtest,na.rm=T)))
row.names(dataSellog2grpTtest)<-row.names(data)
comp<-paste0(sel1,sel2)
sCol<-1
eCol<-ncol(dataSellog2grpTtest)
Expand All @@ -106,17 +137,14 @@ testT <- function(log2LFQ,sel1,sel2,cvThr){
pValNA = apply(
dataSellog2grpTtest, 1, function(x)
if(sum(!is.na(x[c(sCol:mCol)]))<2&sum(!is.na(x[c((mCol+1):eCol)]))<2){NA}
else if((sum(x[c(sCol:mCol)],na.rm=T)-sum(x[c((mCol+1):eCol)],na.rm=T))==0){NA}
else if(!is.na(sd(x[c(sCol:mCol)],na.rm=T))&sd(x[c(sCol:mCol)],na.rm=T)==0){NA}
else if(!is.na(sd(x[c((mCol+1):eCol)],na.rm=T))&sd(x[c((mCol+1):eCol)],na.rm=T)==0){NA}
else if(sum(is.na(x[c(sCol:mCol)]))==0&sum(is.na(x[c((mCol+1):eCol)]))==0){
t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),var.equal=T)$p.value}
else if(sum(!is.na(x[c(sCol:mCol)]))>=1&sum(!is.na(x[c((mCol+1):eCol)]))==0){0}
else if(sum(!is.na(x[c(sCol:mCol)]))==0&sum(!is.na(x[c((mCol+1):eCol)]))>=1){0}
else if(sum(!is.na(x[c(sCol:mCol)]))==1&sum(!is.na(x[c((mCol+1):eCol)]))==1){1}
else if(sum(is.na(x[c(sCol:mCol)]))==0&sum(is.na(x[c((mCol+1):eCol)]))==0){t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),var.equal=T)$p.value}
else if(sum(!is.na(x[c(sCol:mCol)]))>1&sum(!is.na(x[c((mCol+1):eCol)]))<1&(sd(x[c(sCol:mCol)],na.rm=T)/mean(x[c(sCol:mCol)],na.rm=T))<cvThr){0}
else if(sum(!is.na(x[c(sCol:mCol)]))<1&sum(!is.na(x[c((mCol+1):eCol)]))>1&(sd(x[c((mCol+1):eCol)],na.rm=T)/mean(x[c((mCol+1):eCol)],na.rm=T))<cvThr){0}
else if(sum(!is.na(x[c(sCol:mCol)]))>=2&sum(!is.na(x[c((mCol+1):eCol)]))>=1){
t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),na.rm=T,var.equal=T)$p.value}
else if(sum(!is.na(x[c(sCol:mCol)]))>=1&sum(!is.na(x[c((mCol+1):eCol)]))>=2){
t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),na.rm=T,var.equal=T)$p.value}
#else if(sum(!is.na(x[c(sCol:mCol)]))>=2&sum(!is.na(x[c((mCol+1):eCol)]))>=1){t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),na.rm=T,var.equal=T)$p.value}
#else if(sum(!is.na(x[c(sCol:mCol)]))>=1&sum(!is.na(x[c((mCol+1):eCol)]))>=2){t.test(as.numeric(x[c(sCol:mCol)]),as.numeric(x[c((mCol+1):eCol)]),na.rm=T,var.equal=T)$p.value}
else{NA}
)
summary(warnings())
Expand All @@ -143,24 +171,24 @@ testT <- function(log2LFQ,sel1,sel2,cvThr){
logFCmedianGrp1[is.na(logFCmedianGrp1)]=0
logFCmedianGrp2[is.na(logFCmedianGrp2)]=0
hda<-cbind(logFCmedianGrp1,logFCmedianGrp2)
#plot(hda)
plot(hda)
limma::vennDiagram(hda>0)
logFCmedian = logFCmedianGrp1-logFCmedianGrp2
logFCmedianFC = 2^(logFCmedian+.Machine$double.xmin)
logFCmedianFC=squish(logFCmedianFC,c(0.01,100))
hist(logFCmedianFC)
log2FCmedianFC=log2(logFCmedianFC)
hist(log2FCmedianFC)
ttest.results = data.frame(Uniprot=rowName,Gene=data$geneName,Protein=data$Sequence,logFCmedianGrp1,logFCmedianGrp2,PValueMinusLog10=pValNAminusLog10,FoldChanglog2median=logFCmedianFC,CorrectedPValueBH=pValBHna,TtestPval=pValNA,dataSellog2grpTtest,Log2MedianChange=logFCmedian,grp1CV,grp2CV,RowGeneUniProtScorePeps=rownames(dataSellog2grpTtest))
ttest.results = data.frame(Uniprot=rowName,Gene=data$Gene.names,Protein=data$Protein.names,logFCmedianGrp1,logFCmedianGrp2,PValueMinusLog10=pValNAminusLog10,FoldChanglog2median=logFCmedianFC,CorrectedPValueBH=pValBHna,TtestPval=pValNA,dataSellog2grpTtest,Log2MedianChange=logFCmedian,grp1CV,grp2CV,RowGeneUniProtScorePeps=rownames(dataSellog2grpTtest))
writexl::write_xlsx(ttest.results,paste0(inpF,selection,sCol,eCol,comp,selThr,selThrFC,cvThr,lGroup,rGroup,lName,"tTestBH.xlsx"))
write.csv(ttest.results,paste0(inpF,selection,sCol,eCol,comp,selThr,selThrFC,cvThr,lGroup,rGroup,lName,"tTestBH.csv"),row.names = F)
ttest.results.return<-ttest.results
#volcano
ttest.results$RowGeneUniProtScorePeps<-data$geneName
ttest.results[is.na(ttest.results)]=selThr
Significance=ttest.results$CorrectedPValueBH<selThr&ttest.results$CorrectedPValueBH>0&abs(ttest.results$Log2MedianChange)>selThrFC
sum(Significance)
dsub <- subset(ttest.results,Significance)
writexl::write_xlsx(dsub,paste0(inpF,selection,sCol,eCol,comp,selThr,selThrFC,cvThr,lGroup,rGroup,lName,"tTestBH.xlsx"))
write.csv(dsub,paste0(inpF,selection,sCol,eCol,comp,selThr,selThrFC,cvThr,lGroup,rGroup,lName,"tTestBH.csv"),row.names = F)
ttest.results.return<-ttest.results
#volcano
ttest.results$RowGeneUniProtScorePeps<-data$geneName
p <- ggplot2::ggplot(ttest.results,ggplot2::aes(Log2MedianChange,PValueMinusLog10))+ ggplot2::geom_point(ggplot2::aes(color=Significance))
p<-p + ggplot2::theme_bw(base_size=8) + ggplot2::geom_text(data=dsub,ggplot2::aes(label=RowGeneUniProtScorePeps),hjust=0, vjust=0,size=1,position=ggplot2::position_jitter(width=0.5,height=0.1)) + ggplot2::scale_fill_gradient(low="white", high="darkblue") + ggplot2::xlab("Log2 Median Change") + ggplot2::ylab("-Log10 P-value")
#f=paste(file,proc.time()[3],".jpg")
Expand Down

0 comments on commit eeaa135

Please sign in to comment.