diff --git a/SEACR_1.0.R b/SEACR_1.0.R index 00df9d9..7def3a7 100644 --- a/SEACR_1.0.R +++ b/SEACR_1.0.R @@ -47,35 +47,52 @@ if(is.null(argsL$exp) | is.null(argsL$ctrl) | is.null(argsL$output) | is.null(ar exp<-read.table(argsL$exp) expvec<-exp$V1 expmax<-exp$V2 -numtest<-as.numeric(argsL$ctrl) +suppressWarnings(numtest<-as.numeric(argsL$ctrl)) if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold # print("Ctrl is a file") ctrl<-read.table(argsL$ctrl) ctrlvec<-ctrl$V1 - ctrlmax<-ctrl$V2 if(argsL$norm=="yes"){ ## Calculate peaks of density plots to generate normalization factor y<-seq(0,max(ctrlvec),length.out=max(ctrlvec)-1) ctrlvalue<-max(which(ecdf(ctrlvec)(y)<=0.9)) expvalue<-max(which(ecdf(expvec)(y)<=0.9)) - value<-max(c(expvalue,ctrlvalue)) - ctrltest<-density(ctrlvec[ctrlvec <= value]) - exptest<-density(expvec[expvec <= value]) + ctrltest<-density(ctrlvec[ctrlvec <= ctrlvalue]) ## New for SEACR_1.1 + exptest<-density(expvec[expvec <= expvalue]) ## New for SEACR_1.1 constant<-(exptest$x[exptest$y==max(exptest$y)])/(ctrltest$x[ctrltest$y==max(ctrltest$y)]) ctrlvec<-ctrlvec*constant - ctrlmax<-ctrlmax*constant } ## Calculate total signal and max signal thresholds both<-c(expvec,ctrlvec) - both2<-c(expmax,ctrlmax) pctremain<-function(x) (length(expvec)-(ecdf(expvec)(x)*length(expvec)))/(length(both)-(ecdf(both)(x)*length(both))) - x<-seq(0,floor(max(ctrlvec))-1,length.out=floor(max(ctrlvec))-1) - x0<-x[which(na.omit(pctremain(x)) == max(na.omit(pctremain(x))))] - pctremain2<-function(x) (length(expmax)-(ecdf(expmax)(x)*length(expmax)))/(length(both2)-(ecdf(both2)(x)*length(both2))) - if(max(ctrlmax) < 1){ - z<-seq(0,max(ctrlmax),length.out=1000) - }else{ - z<-seq(0,floor(max(ctrlmax))-1,length.out=floor(max(ctrlmax))-1) + x<-sort(unique(both)) ## New for SEACR_1.1 + x0<-x[which(na.omit(pctremain(x[pctremain(x) < 1])) == max(na.omit(pctremain(x[pctremain(x) < 1]))))] ## New for SEACR_1.1 + z<-x[x <= x0[1]] ## New for SEACR_1.1 + z2<-z[abs(((pctremain(x0)+min(pctremain(z)))/2)-pctremain(z))==min(abs(((pctremain(x0)+min(pctremain(z)))/2)-pctremain(z)))] ## New for SEACR_1.1 + z<-z[z > z2[1]] ## New for SEACR_1.1 + z0<-z[abs(z-(max(z)-((1/2)*(max(z)-min(z)))))==min(abs(z-(max(z)-((1/2)*(max(z)-min(z))))))] ## New for SEACR_1.1 + + ## The following code segment was added to avoid spurious high thresholding when the peak of a lower threshold is within 95% of the peak of the maximum threshold + + frame<-data.frame(thresh=x[1:(length(x)-1)], pct=pctremain(x[1:(length(x)-1)]), diff=abs(diff(pctremain(x)))) + frame<-na.omit(frame) + i<-2 + output<-0 + while(output==0){ + test<-as.numeric(paste(c(0,".",rep(9,i)),sep="",collapse="")) + output<-as.numeric(quantile(frame$diff, test)) +# print(output) + i<-i+1 } - z0<-z[which(na.omit(pctremain2(z)) == max(na.omit(pctremain2(z))))] + a<-frame$thresh[frame$diff != 0 & frame$diff < quantile(frame$diff, test)] + a0<-a[which(na.omit(pctremain(a[pctremain(a) < 1])) == max(na.omit(pctremain(a[pctremain(a) < 1]))))] + b<-a[a <= a0[1]] + b2<-b[abs(((pctremain(a0)+min(pctremain(b)))/2)-pctremain(b))==min(abs(((pctremain(a0)+min(pctremain(b)))/2)-pctremain(b)))] + b<-b[b > b2[1]] + b0<-b[abs(b-(max(b)-((1/2)*(max(b)-min(b)))))==min(abs(b-(max(b)-((1/2)*(max(b)-min(b))))))] + if(max(na.omit(pctremain(a[pctremain(a) < 1])))/max(na.omit(pctremain(x[pctremain(x) < 1]))) > 0.95){ + x0<-a0 + z0<-b0 + } + fdr<-c(1-pctremain(x0[1]), 1-pctremain(z0[1])) ## New for SEACR_1.1 }else{ ## If 2nd field is numeric, calculate percentile threshold # print("Ctrl is numeric") test<-ecdf(exp$V1)(exp$V1) @@ -85,8 +102,10 @@ if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold ctrl<-as.vector(argsL$ctrl) x0<-min(frame$values[frame$percentile <= ctrl[1]]) z0<-min(frame2$values[frame2$percentile <= ctrl[1]]) + fdr<-ctrl[1] ## New for SEACR_1.1 } write.table(c(x0[1],z0[1]), file=paste(argsL$output, ".threshold.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) if(argsL$norm=="yes"){ write.table(constant, file=paste(argsL$output, ".norm.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) #Added 7/19/18 to ensure norm value is multiplied by ctrl } +write.table(fdr, file=paste(argsL$output, ".fdr.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) #Added 5/15/19 to report empirical FDR for threshold detection