Skip to content

Commit

Permalink
Bug fixes for integer sample IDs etc
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-munro committed Jun 7, 2024
1 parent ce8b102 commit e7dc7b8
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Pheast/scripts/fusion_compute_weights.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ zcat < $BED | awk -vs=$B_START -ve=$B_END 'NR >= s + 1 && NR <= e + 1' | while r

# Get the gene positions +/- 500kb
CHR=`echo $PARAM | awk '{ print $1 }'`
P0=`echo $PARAM | awk '{ print $3 - 0.5e6 }'`
P0=`echo $PARAM | awk '{ p = $3 - 0.5e6; if (p < 1) p = 1; print p }'`
P1=`echo $PARAM | awk '{ print $3 + 0.5e6 }'`
GNAME=`echo $PARAM | awk '{ print $4 }'`

Expand Down
13 changes: 8 additions & 5 deletions Pheast/steps/covariates.smk
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ rule prune_for_covar:
pruned_dir = interm_dir / 'covar',
pruned_prefix = interm_dir / 'covar' / 'geno_pruned',
shell:
# --geno 0.05 filters variants with >5% missing values (the rest will be imputed)
# --geno 0.05 filters variants with >5% missing values (the rest will be imputed).
# Default is 0 so that samples with many more genotyped variants than others don't
# result in other samples having mostly missing values after pruning.
"""
mkdir -p {params.pruned_dir}
plink2 \
--bfile {params.geno_prefix} \
--geno 0.05 \
--geno 0.00 \
--maf 0.05 \
--indep-pairwise 200 100 0.1 \
--out {params.pruned_prefix}
Expand Down Expand Up @@ -60,15 +62,16 @@ rule plink_covariates:
output:
plink = interm_dir / 'covar' / '{modality}.covar.plink.tsv',
run:
covar = pd.read_csv(input.covar, sep='\t', index_col=0)
covar = pd.read_csv(input.covar, sep='\t', index_col=0, dtype=str)
covar.index.name = None
covar = covar.T
covar.index.name = 'IID'
covar = covar.reset_index()
# covar.insert(0, 'FID', 0) # Family must match that in the genotype files.
## Get FIDs from genotypes:
fam = pd.read_csv(input.fam, sep='\t', header=None, index_col=1)
fam = fam.to_dict()[0]
fam = pd.read_csv(input.fam, sep='\t', header=None, dtype=str)
# In some versions, dtype doesn't apply to index, so set index later:
fam = fam.set_index(1).to_dict()[0]
## Insert FIDs as first column of covar, joining by IID:
covar.insert(0, 'FID', covar['IID'].map(fam))
covar.to_csv(output.plink, sep='\t', index=False, header=False)
2 changes: 1 addition & 1 deletion Project/scripts/assemble_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def assemble_stability(sample_ids: list, stab_dir: Path, ref_anno: Path, bed: Pa
args = parser.parse_args()

if args.samples is not None:
samples = pd.read_csv(args.samples, sep='\t', header=None)[0].tolist()
samples = pd.read_csv(args.samples, sep='\t', header=None, dtype=str)[0].tolist()

if args.type == 'alt_TSS_polyA':
assemble_alt_TSS_polyA(samples, args.input_dir, args.input_dir2, 'tpm', args.ref_anno, args.output, min_frac=0.05, max_frac=0.95)
Expand Down
10 changes: 5 additions & 5 deletions Project/steps/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ rule star_index:
ref_anno = ref_anno,
output:
# Among other generated files:
ref_dir / 'star_index' / 'SAindex',
ref_dir / 'star_index' / 'chrNameLength.txt',
ref_dir / f'star_index_{read_length}' / 'SAindex',
ref_dir / f'star_index_{read_length}' / 'chrNameLength.txt',
params:
index_dir = ref_dir / 'star_index',
index_dir = ref_dir / f'star_index_{read_length}',
overhang = read_length - 1,
genomeSAindexNbases = int(np.log2(genome_size) / 2 - 1),
threads: 16
Expand Down Expand Up @@ -44,12 +44,12 @@ rule star_align:
"""Align RNA-Seq reads for a sample using STAR."""
input:
fastq = fastq_inputs,
index = ref_dir / 'star_index' / 'SAindex',
index = ref_dir / f'star_index_{read_length}' / 'SAindex',
output:
interm_dir / 'bam' / '{sample_id}.Aligned.sortedByCoord.out.bam',
params:
fastq_list = fastq_star_param,
index_dir = ref_dir / 'star_index',
index_dir = ref_dir / f'star_index_{read_length}',
bam_dir = interm_dir / 'bam',
prefix = str(interm_dir / 'bam' / '{sample_id}.'),
threads: 16
Expand Down

0 comments on commit e7dc7b8

Please sign in to comment.