forked from Nealelab/UK_Biobank_GWAS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
09.load_mfi_vds.py
26 lines (22 loc) · 1.07 KB
/
09.load_mfi_vds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from hail import *
hc = HailContext()
kt_mfi = hc.import_table('gs://fc-7d5088b4-7673-45b5-95c2-17ae00a04183/imputed/ukb_mfi_chr*_v3.txt', no_header=True)
kt_mfi = kt_mfi.rename({'f0': 'varid',
'f1': 'rsid',
'f2': 'position',
'f3': 'allele1_ref',
'f4': 'allele2_alt',
'f5': 'maf',
'f6': 'minor_allele',
'f7': 'info'})
kt_mfi = kt_mfi.key_by('varid')
kt_mfi = kt_mfi.annotate('mfi = {maf: maf.toFloat(), info: info.toFloat()}')
kt_mfi = kt_mfi.select(['varid', 'mfi'])
kt_sites = hc.read('gs://ukb31063-mega-gwas/hail-0.1/qc/ukb31063.imputed_v3.sites.vds').variants_table().flatten()
kt_sites = kt_sites.rename({'va.varid': 'varid', 'va.rsid': 'rsid'})
kt_sites = kt_sites.select(['varid', 'rsid', 'v'])
kt_sites = kt_sites.key_by('varid')
kt = kt_mfi.join(kt_sites, how='inner')
kt = kt.key_by('v')
vds = VariantDataset.from_table(kt)
vds.write('gs://ukb31063-mega-gwas/hail-0.1/qc/ukb31063.imputed_v3.mfi.vds', overwrite=True)