Skip to content

Commit

Permalink
initial commit, files from rnaseq.vsh
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Aug 25, 2024
1 parent 1679c59 commit 01db1c2
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
61 changes: 61 additions & 0 deletions src/rsem/rsem_merge_counts/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: "rsem_merge_counts"
namespace: "rsem"
description: Merge the transcript quantification results obtained from rsem calculate-expression across all samples.
keywords: [rsem, transcript, expression, counts]
links:
homepage: https://deweylab.github.io/RSEM/
documentation: https://deweylab.github.io/RSEM/rsem-calculate-expression.html
repository: https://github.com/deweylab/RSEM
references:
doi: https://doi.org/10.1186/1471-2105-12-323
license: GPL-3.0

argument_groups:
- name: "Input"
arguments:
- name: "--counts_gene"
type: file
description: Expression counts on gene level (genes)
- name: "--counts_transcripts"
type: file
description: Expression counts on transcript level (isoforms)

- name: "Output"
arguments:
- name: "--merged_gene_counts"
type: file
description: File containing gene counts across all samples.
example: rsem.merged.gene_counts.tsv
direction: output
- name: "--merged_gene_tpm"
type: file
description: File containing gene TPM across all samples.
example: rsem.merged.gene_tpm.tsv
direction: output
- name: "--merged_transcript_counts"
type: file
description: File containing transcript counts across all samples.
example: rsem.merged.transcript_counts.tsv
direction: output
- name: "--merged_transcript_tpm"
type: file
description: File containing transcript TPM across all samples.
example: rsem.merged.transcript_tpm.tsv
direction: output

resources:
- type: bash_script
path: script.sh

test_resources:
- type: bash_script
path: test.sh
- path: test_data

engines:
- type: docker
image: ubuntu:22.04

runners:
- type: executable
- type: nextflow
28 changes: 28 additions & 0 deletions src/rsem/rsem_merge_counts/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

set -ep pipefail

mkdir -p tmp/genes
# cut -f 1,2 `ls $par_count_genes/*` | head -n 1` > gene_ids.txt
for file_id in ${par_count_genes[*]}; do
samplename=`basename $file_id | sed s/\\.genes.results\$//g`
echo $samplename > tmp/genes/${samplename}.counts.txt
cut -f 5 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.counts.txt
echo $samplename > tmp/genes/${samplename}.tpm.txt
cut -f 6 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.tpm.txt
done

mkdir -p tmp/isoforms
# cut -f 1,2 `ls $par_counts_transcripts/*` | head -n 1` > transcript_ids.txt
for file_id in ${par_counts_transcripts[*]}; do
samplename=`basename $file_id | sed s/\\.isoforms.results\$//g`
echo $samplename > tmp/isoforms/${samplename}.counts.txt
cut -f 5 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.counts.txt
echo $samplename > tmp/isoforms/${samplename}.tpm.txt
cut -f 6 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.tpm.txt
done

paste gene_ids.txt tmp/genes/*.counts.txt > $par_merged_gene_counts
paste gene_ids.txt tmp/genes/*.tpm.txt > $par_merged_gene_tpm
paste transcript_ids.txt tmp/isoforms/*.counts.txt > $par_merged_transcript_counts
paste transcript_ids.txt tmp/isoforms/*.tpm.txt > $par_merged_transcript_tpm

0 comments on commit 01db1c2

Please sign in to comment.