-
Notifications
You must be signed in to change notification settings - Fork 8
/
crisprme_auto_test_download_essentials.sh
executable file
·54 lines (47 loc) · 2.12 KB
/
crisprme_auto_test_download_essentials.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#crisprme download and test
echo "starting download and unzip of data"
echo "unzip gencode+encode annotations"
#unzip annotations
cd Annotations/
tar -xzf encode+gencode.hg38.tar.gz
rm encode+gencode.hg38.tar.gz
tar -xzf gencode.protein_coding.tar.gz
rm gencode.protein_coding.tar.gz
cd ../
#unzip gencode
# echo "unzip gencode protein-coding proximity file"
# cd Gencode/
# tar -xzf gencode.protein_coding.tar.gz
# rm gencode.protein_coding.tar.gz
# cd ../
# echo "start download VCF data and genome (this may take a long time due to connection speed)"
# #download VCFs data
# cd VCFs/
# #download 1000G
# cd hg38_1000G/
# echo "download 1000G VCFs"
# for i in {1..22}; do
# wget -c -q ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr$i.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
# done
# wget -c -q ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
#download HGDP
#uncomment these lines if you want to download also HGDP VCFs
# cd ../hg38_HGDP
# echo "download HGDP VCFs"
# for i in {1..22}
# do
# wget -c -q ftp://ngs.sanger.ac.uk:21/production/hgdp/hgdp_wgs.20190516/hgdp_wgs.20190516.full.chr$i.vcf.gz
# done
# wget -c -q ftp://ngs.sanger.ac.uk:21/production/hgdp/hgdp_wgs.20190516/hgdp_wgs.20190516.full.chrX.vcf.gz
# wget -c -q ftp://ngs.sanger.ac.uk:21/production/hgdp/hgdp_wgs.20190516/hgdp_wgs.20190516.full.chrY.vcf.gz
# cd ../../
#download hg38
cd Genomes/
echo "download hg38"
wget -c -q https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chromFa.tar.gz
tar -xzf hg38.chromFa.tar.gz
mv chroms hg38
cd ../
echo "download complete"
# echo "start testing"
# crisprme.py complete-search --genome Genomes/hg38/ --vcf list_vcf.txt/ --guide sg1617.txt --pam PAMs/20bp-NGG-spCas9.txt --annotation Annotations/gencode_encode.hg38.bed --samplesID list_samplesID.txt --gene_annotation Gencode/gencode.protein_coding.bed --bMax 2 --mm 6 --bDNA 2 --bRNA 2 --merge 3 --output sg1617.6.2.2 --thread 4