-
Notifications
You must be signed in to change notification settings - Fork 2
/
change_vcf_POS.py
61 lines (50 loc) · 1.85 KB
/
change_vcf_POS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 10 17:23:04 2019
@author: YudongCai
@Email: [email protected]
"""
import gzip
import click
def repalce(line, pos):
tline = line.split('\t')
tline[1] = pos.strip()
tline[7] = '.'
return '\t'.join(tline)
@click.command()
@click.option('--invcf', help='输出的vcf.gz文件')
@click.option('--poslist', help='将输出的vcf中的pos按顺序替换为这个文件中的pos, 不提供这参数的话则从1开始顺序编码', default=None)
@click.option('--outvcf', help='输出的vcf.gz文件')
def main(invcf, poslist, outvcf):
flag = True
if poslist:
with gzip.open(invcf, 'rb') as f1, open(poslist) as f2, gzip.open(outvcf, 'wb') as f3:
while flag:
line = f1.readline().decode()
if line[0] == '#':
f3.write(line.encode())
else:
flag = False
pos = f2.readline().strip()
outstring = repalce(line, pos)
f3.write(outstring.encode())
for line, pos in zip(f1, f2):
outstring = repalce(line.decode(), pos)
f3.write(outstring.encode())
else:
pos = 1
with gzip.open(invcf, 'rb') as f1, gzip.open(outvcf, 'wb') as f3:
while flag:
line = f1.readline().decode()
if line[0] == '#':
f3.write(line.encode())
else:
flag = False
outstring = repalce(line, str(pos))
f3.write(outstring.encode())
for line in f1:
pos += 1
outstring = repalce(line.decode(), str(pos))
f3.write(outstring.encode())
if __name__ == '__main__':
main()