2.6.1a

alexdobin · Aug 14, 2018 · 45f7bd7 · 45f7bd7
1 parent e8b215f
commit 45f7bd7
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 5 deletions.
diff --git a/bin/MacOSX_x86_64/STAR b/bin/MacOSX_x86_64/STAR
diff --git a/bin/MacOSX_x86_64/STARlong b/bin/MacOSX_x86_64/STARlong
diff --git a/doc/STARmanual.pdf b/doc/STARmanual.pdf
diff --git a/extras/doc-latex/STARmanual.tex b/extras/doc-latex/STARmanual.tex
@@ -34,7 +34,7 @@
 
 \newcommand{\sechyperref}[1]{\hyperref[#1]{Section \ref{#1}. \nameref{#1}}}
 
-\title{STAR manual 2.6.0a}
+\title{STAR manual 2.6.1a}
 \author{Alexander Dobin\\
 [email protected]}
 \maketitle
@@ -273,7 +273,7 @@ \subsubsection{SAM attributes.}
 \item[]
 \optv{NH HI NM MD} have standard meaning as defined in the SAM format specifications.
 \item[]
-\optv{AS} id the local alignment score (paired for paired-edn reads). 
+\optv{AS} id the local alignment score (paired for paired-end reads). 
 \item[]
 \optv{nM} is the number of mismatches per (paired) alignment, not to be confused with \optv{NM}, which is the number of mismatches in each mate.
 \item[]
@@ -335,11 +335,11 @@ \subsection{Chimeric alignments in the main BAM files.}
 Chimeric alignments can be included together with normal alignments in the main (sorted or unsorted) BAM file(s) using \opt{chimOutType} \optv{WithinBAM}. In these files, formatting of chimeric alignments follows the latest SAM/BAM specifications.
 
 \subsection{Chimeric alignments in \ofilen{Chimeric.out.sam} .}
-When chimeric detection is switched on, STAR will output normal alignments into \ofilen{Aligned.*.sam/bam}, and will output chimeric alignments into a separate file \ofilen{Chimeric.out.sam}. 
+With \opt{chimOutType} \optv{SeparateSAMold} STAR will output normal alignments into \ofilen{Aligned.*.sam/bam}, and will output chimeric alignments into a separate file \ofilen{Chimeric.out.sam}. Note that this option will be deprecated in the future, and the \opt{chimOutType} \optv{WithinBAM} is strongly recommended.
 Some reads may be output to both normal SAM/BAM files, and \ofilen{Chimeric.out.sam} for the following reason. STAR will output a non-chimeric alignment into \ofilen{Aligned.out.sam} with soft-clipping a portion of the read. If this portion is long enough, and it maps well and uniquely  somewhere else in the genome, there will also be a chimeric alignment output into \ofilen{Chimeric.out.sam}. For instance, if you have a paired-end read where the second mate can be split chimerically into 70 and 30 bases. The 100b of the first mate + 70b of the 2nd mate map non-chimerically,and the mapping length/score are big enough, so they will be output into \ofilen{Aligned.out.sam} file. At the same time, the chimeric segments 100-mate1 + 70-mate2 and 30-mate2 will be output into \ofilen{Chimeric.out.sam}.
 
 \subsection{Chimeric alignments in \ofilen{Chimeric.out.junction}}
-In addition to \ofilen{Chimeric.out.sam}, STAR will generate \ofilen{Chimeric.out.junction} file which maybe more convenient for downstream analysis.
+By default, or with \opt{chimOutType} \optv{Junctions}, STAR will generate \ofilen{Chimeric.out.junction} file which maybe more convenient for downstream analysis.
 The format of this file is as follows. Every line contains one chimerically aligned read, e.g.:
 \begin{verbatim}
 chr22   23632601        +       chr9    133729450       +       1       0       0      

diff --git a/extras/doc-latex/parametersDefault.tex b/extras/doc-latex/parametersDefault.tex
@@ -637,7 +637,7 @@
   \optValue{0}
   \optLine{int{\textgreater}=0:             minimum number of overlap bases to trigger mates merging and realignment} 
 \optName{peOverlapMMp}
-  \optValue{0.1}
+  \optValue{0.01}
   \optLine{real, {\textgreater}=0 {\&} {\textless}1:     maximum proportion of mismatched bases in the overlap area} 
 \end{optTable}
 \optSection{Windows, Anchors, Binning}\label{Windows,_Anchors,_Binning}
@@ -716,6 +716,13 @@
 \optName{chimNonchimScoreDropMin}
   \optValue{20}
   \optLine{int{\textgreater}=0: to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read lenght has to be smaller than this value} 
+\optName{chimOutJunctionFormat}
+  \optValue{0}
+  \optLine{int: formatting type for the Chimeric.out.junction file} 
+\begin{optOptTable}
+  \optOpt{0}   \optOptLine{no comment lines/headers}
+  \optOpt{1}   \optOptLine{comment lines at the end of the file: command line and Nreads: total, unique, multi}
+\end{optOptTable}
 \end{optTable}
 \optSection{Quantification of Annotations}\label{Quantification_of_Annotations}
 \begin{optTable}

diff --git a/extras/scripts/mergeGeneCounts.awk b/extras/scripts/mergeGeneCounts.awk
@@ -0,0 +1,54 @@
+#
+# merges ReadsPerGene.out.tab files from multiple runs into one table
+# usage:
+# awk -f mergeGeneCounts.awk -v Col=2 /path/to/1st/ReadsPerGene.out.tab /path/to/2nd/ReadsPerGene.out.tab ...
+# e.g.
+# awk -f mergeGeneCounts.awk -v Col=2 */ReadsPerGene.out.tab
+#
+# -v Col=<column to add to the table>: depends on the standedness of the table
+# advanced parameters
+# -v Skip=<number of lines to skip>
+# -v Name=<common file name substring to remove from column names>
+
+
+
+
+BEGIN {
+    FS="\t";
+
+    if (Name=="") Name="/ReadsPerGene.out.tab";
+    if (Skip=="") Skip=0;
+    if (Col=="") {
+        print "Specify the column with -v Col=..." > /dev/err;
+        exit;
+    };
+
+    for (jj=1;jj<=ARGC;jj++)
+    {# print header line with file names
+        a=ARGV[jj]; 
+        gsub(Name,"",a);
+        printf ";" a
+    }; 
+    printf "\n";
+} 
+{
+    if (ARGIND==1) {
+        L[FNR]=$1; # record gene names (1st column)
+    } else {
+        if ($1!=L[FNR]) {
+            print "File #" ARGIND ": " FILENAME " is not sorted properly, sort all files by the first column" >/dev/err;
+        };
+    };
+
+    V[FNR,ARGIND]=$2
+} 
+END {
+    for (ii=1;ii<=length(L);ii++) 
+    {
+        printf "%s",L[ii];
+        if (V[ii,1]!="") 
+            for (jj=1;jj<=ARGC;jj++) 
+                printf ";" V[ii,jj]; 
+        printf "\n"
+     } 
+}