FredHutch · mpmeers · Jun 20, 2019
diff --git a/SEACR_1.0.R b/SEACR_1.0.R
@@ -47,35 +47,52 @@ if(is.null(argsL$exp) | is.null(argsL$ctrl) | is.null(argsL$output) | is.null(ar
 exp<-read.table(argsL$exp)
 expvec<-exp$V1
 expmax<-exp$V2
-numtest<-as.numeric(argsL$ctrl)
+suppressWarnings(numtest<-as.numeric(argsL$ctrl))
 if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold
 #	print("Ctrl is a file")
 	ctrl<-read.table(argsL$ctrl)
 	ctrlvec<-ctrl$V1
-	ctrlmax<-ctrl$V2
 	if(argsL$norm=="yes"){  ## Calculate peaks of density plots to generate normalization factor
 		y<-seq(0,max(ctrlvec),length.out=max(ctrlvec)-1)
 		ctrlvalue<-max(which(ecdf(ctrlvec)(y)<=0.9))
 		expvalue<-max(which(ecdf(expvec)(y)<=0.9))
-		value<-max(c(expvalue,ctrlvalue))
-		ctrltest<-density(ctrlvec[ctrlvec <= value])
-		exptest<-density(expvec[expvec <= value])
+		ctrltest<-density(ctrlvec[ctrlvec <= ctrlvalue]) ## New for SEACR_1.1
+		exptest<-density(expvec[expvec <= expvalue]) ## New for SEACR_1.1
 		constant<-(exptest$x[exptest$y==max(exptest$y)])/(ctrltest$x[ctrltest$y==max(ctrltest$y)])
 		ctrlvec<-ctrlvec*constant
-		ctrlmax<-ctrlmax*constant
 	} ## Calculate total signal and max signal thresholds
 	both<-c(expvec,ctrlvec)
-	both2<-c(expmax,ctrlmax)
 	pctremain<-function(x) (length(expvec)-(ecdf(expvec)(x)*length(expvec)))/(length(both)-(ecdf(both)(x)*length(both)))
-	x<-seq(0,floor(max(ctrlvec))-1,length.out=floor(max(ctrlvec))-1)
-	x0<-x[which(na.omit(pctremain(x)) == max(na.omit(pctremain(x))))]
-	pctremain2<-function(x) (length(expmax)-(ecdf(expmax)(x)*length(expmax)))/(length(both2)-(ecdf(both2)(x)*length(both2)))
-	if(max(ctrlmax) < 1){
-		z<-seq(0,max(ctrlmax),length.out=1000)
-	}else{
-		z<-seq(0,floor(max(ctrlmax))-1,length.out=floor(max(ctrlmax))-1)
+	x<-sort(unique(both)) ## New for SEACR_1.1
+	x0<-x[which(na.omit(pctremain(x[pctremain(x) < 1])) == max(na.omit(pctremain(x[pctremain(x) < 1]))))]  ## New for SEACR_1.1
+	z<-x[x <= x0[1]]	## New for SEACR_1.1
+	z2<-z[abs(((pctremain(x0)+min(pctremain(z)))/2)-pctremain(z))==min(abs(((pctremain(x0)+min(pctremain(z)))/2)-pctremain(z)))] ## New for SEACR_1.1
+	z<-z[z > z2[1]] ## New for SEACR_1.1
+	z0<-z[abs(z-(max(z)-((1/2)*(max(z)-min(z)))))==min(abs(z-(max(z)-((1/2)*(max(z)-min(z))))))] ## New for SEACR_1.1
+
+	## The following code segment was added to avoid spurious high thresholding when the peak of a lower threshold is within 95% of the peak of the maximum threshold
+
+	frame<-data.frame(thresh=x[1:(length(x)-1)], pct=pctremain(x[1:(length(x)-1)]), diff=abs(diff(pctremain(x))))
+	frame<-na.omit(frame)
+	i<-2
+	output<-0
+	while(output==0){
+		test<-as.numeric(paste(c(0,".",rep(9,i)),sep="",collapse=""))
+		output<-as.numeric(quantile(frame$diff, test))
+#		print(output)
+		i<-i+1
 	}
-	z0<-z[which(na.omit(pctremain2(z)) == max(na.omit(pctremain2(z))))]
+	a<-frame$thresh[frame$diff != 0 & frame$diff < quantile(frame$diff, test)]
+	a0<-a[which(na.omit(pctremain(a[pctremain(a) < 1])) == max(na.omit(pctremain(a[pctremain(a) <  1]))))]
+	b<-a[a <= a0[1]]
+	b2<-b[abs(((pctremain(a0)+min(pctremain(b)))/2)-pctremain(b))==min(abs(((pctremain(a0)+min(pctremain(b)))/2)-pctremain(b)))]
+	b<-b[b > b2[1]]
+	b0<-b[abs(b-(max(b)-((1/2)*(max(b)-min(b)))))==min(abs(b-(max(b)-((1/2)*(max(b)-min(b))))))]
+	if(max(na.omit(pctremain(a[pctremain(a) < 1])))/max(na.omit(pctremain(x[pctremain(x) < 1]))) > 0.95){
+		x0<-a0
+		z0<-b0
+	}
+	fdr<-c(1-pctremain(x0[1]), 1-pctremain(z0[1])) ## New for SEACR_1.1
 }else{ ## If 2nd field is numeric, calculate percentile threshold
 #	print("Ctrl is numeric")
 	test<-ecdf(exp$V1)(exp$V1)
@@ -85,8 +102,10 @@ if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold
 	ctrl<-as.vector(argsL$ctrl)
 	x0<-min(frame$values[frame$percentile <= ctrl[1]])
 	z0<-min(frame2$values[frame2$percentile <= ctrl[1]])
+	fdr<-ctrl[1] ## New for SEACR_1.1
 }
 write.table(c(x0[1],z0[1]), file=paste(argsL$output, ".threshold.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
 if(argsL$norm=="yes"){
 	write.table(constant, file=paste(argsL$output, ".norm.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) #Added 7/19/18 to ensure norm value is multiplied by ctrl
 }
+write.table(fdr, file=paste(argsL$output, ".fdr.txt", sep=""), sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) #Added 5/15/19 to report empirical FDR for threshold detection