-
Notifications
You must be signed in to change notification settings - Fork 1
/
prokka_ps.sh
executable file
·73 lines (55 loc) · 2.53 KB
/
prokka_ps.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
# Reference sequence path containing gbk files
#REFSEQPATH="/home/bsalehe/canker_cherry/scripts/refseq1/"
REFSEQJOINEDGBK="/home/bsalehe/canker_cherry/scripts/combined_gbk/"
PROKKA_DB="/data/scratch/bsalehe/prokka_db/"
#PROKKA_OUT="/data/scratch/bsalehe/prokka_out/Tracy/refgenomes/January"
PROKKA_OUT="/data/scratch/bsalehe/Michelle_data/ps_genomic_strains/canker_genomes/epiphyte_genomes/prokka_out"
#PROKKA_OUT="/data/scratch/bsalehe/Michelle_data/ps_genomic_strains/canker_genomes/epiphyte_genomes/test_pipeline/prokka_out"
#spades_out="$1"
Assembly="$1"
#psname=$(dirname $Assembly)
psname=$(basename $Assembly .fa)
#### Prokka genome annotation ####
# 12.03.18 Downloaded all Pseudomonas syringae group complete genome NCBI accession numbers to build database for Prokka annotation
# This included complete chromosomes and plasmids (102 total)
# Accession list was used to find all records on genbank and download as one file
# Install ncbi-genome-download
## conda create -n ncbi_genome_download
## conda install -c bioconda ncbi-genome-download
# Acticate the ncbi-genome-download
## conda activate ncbi-genome-download
# Download ref genomes from ncbi
### ncbi-genome-download -F genbank --genera "Pseudomonas syringae" bacteria -v --flat-output
# Move all gbff.gz to single folder
# mv *.gbff.gz refseq/
# Decompress files from .gbff.gz to .gbk
## for file in refseq/*.gbff.gz; do
## zcat $file > refseq/$(basename $file .gbff.gz).gbk
## done
# Copy some few latest genomes to new folder
## mkdir refseq1
## cp refseq/*.1_C*.gbk refseq1/
# Merge all .gbk files into single gbk file using adapted merge_gbk.py script
## mkdir refseq_merged
#conda create -n biopython
#conda activate biopython
#conda install -c anaconda biopython
export MYCONDAPATH=/home/bsalehe/miniconda3
source ${MYCONDAPATH}/bin/activate biopython
#
#python merge_gbk.py ${REFSEQPATH}*.gbk > ${REFSEQJOINEDGBK}ps.gbk
conda deactivate
#
#export MYCONDAPATH=/home/bsalehe/miniconda3
# conda activate prokka
source ${MYCONDAPATH}/bin/activate prokka
# Run Prokka
#prokka --outdir ${PROKKA_OUT}/${psname} $Assembly
prokka --genus Pseudomonas --species syringae --strain $psname --proteins ${REFSEQJOINEDGBK}/ps.gbk --outdir ${PROKKA_OUT}/${psname} --prefix $psname $Assembly
#convert all ref genbank files into fasta format
#prokka-genbank_to_fasta_db ${REFSEQPATH}*.gbk > ${REFSEQPATH}ps.faa
#mv ${REFSEQPATH}ps.faa ${PROKKA_DB}ps.faa
#makeblastdb -in ${PROKKA_DB}ps.faa -dbtype prot
#blastp -query ps_annotation/*.faa -db ${PROKKA_DB}ps.faa > ps_proteins
conda deactivate