forked from ComparativeGenomicsToolkit/hal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepareLodFiles.py
112 lines (102 loc) · 6.49 KB
/
prepareLodFiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
#Copyright (C) 2013 by Ngan Nguyen
#
#Released under the MIT license, see LICENSE.txt
"""Take care of Level-of-detail files
"""
import os
from sonLib.bioio import system
from optparse import OptionGroup
def fixLodFilePath(lodtxtfile, localHalfile, outdir):
#fix the path of the original hal file to point to the created
#link relative to the output directory
relPath = os.path.relpath(localHalfile, start=outdir)
lodTxtBuf = ''
for line in open(lodtxtfile):
tokens = line.split()
if len(tokens) == 2 and tokens[0] == '0':
lodTxtBuf += '0 %s\n' % relPath
else:
lodTxtBuf += line
with open(lodtxtfile, 'w') as lodFile:
lodFile.write(lodTxtBuf)
def getLodFiles(localHalfile, options, outdir):
lodtxtfile = os.path.join(outdir, "lod.txt") #outdir/lod.txt
loddir = os.path.join(outdir, "lod") #outdir/lod
if options.lodtxtfile and options.loddir: #if lod files were given, then just make soft links to them
if os.path.exists(lodtxtfile):
if os.path.abspath(lodtxtfile) != os.path.abspath(options.lodtxtfile):
system("rm %s" %lodtxtfile)
system("ln -s %s %s" %(os.path.abspath(options.lodtxtfile), lodtxtfile))
else:
system("ln -s %s %s" %(os.path.abspath(options.lodtxtfile), lodtxtfile))
if os.path.exists(loddir):
if os.path.abspath(loddir) != os.path.abspath(options.loddir):
if os.path.islink(loddir):
system("rm %s" %loddir)
else:
system("rm -Rf %s" %loddir)
loddir = os.path.join(outdir, os.path.basename(options.loddir))
system("ln -s %s %s" %(os.path.abspath(options.loddir), loddir))
else:
system("ln -s %s %s" %(os.path.abspath(options.loddir), loddir))
else: #if lod files were not given, create them using halLodInterpolate.py
system("halLodInterpolate.py %s %s --outHalDir %s %s" %(localHalfile, lodtxtfile, loddir, options.lodOpts))
fixLodFilePath(lodtxtfile, localHalfile, outdir)
return lodtxtfile, loddir
def getLod(options, localHalfile, outdir):
#Create lod files if useLod is specified
lodtxtfile = ''
loddir = ''
options.lodOpts = ''
if options.lodMaxBlock is not None:
options.lodOpts += '--maxBlock %d ' % options.lodMaxBlock
if options.lodScale is not None:
options.lodOpts += '--scale %f ' % options.lodScale
if options.lodMaxDNA is not None:
options.lodOpts += '--maxDNA %d ' % options.lodMaxDNA
if options.lodInMemory is True:
options.lodOpts += '--inMemory '
if options.lodMinSeqFrac is not None:
options.lodOpts += '--minSeqFrac %f ' % options.lodMinSeqFrac
if options.lodMinCovFrac is not None:
options.lodOpts += '--minCovFrac %f ' % options.lodMinCovFrac
if options.lodChunk is not None:
options.lodOpts += '--chunk %d ' % options.lodChunk
if int(options.maxThreads) > 1 and (options.lod or len(options.lodOpts) > 0):
options.lodOpts += '--numProc %d ' % int(options.maxThreads)
if len(options.lodOpts) > 0:
print options.lodOpts
options.lod = True
if options.lod:
lodtxtfile, loddir = getLodFiles(localHalfile, options, outdir)
return lodtxtfile, loddir
def getLodLowestLevel(lodtxtfile):
"""Gets the lowest level at which an LOD hal is used instead of the
base-level hal file."""
f = open(lodtxtfile, 'r')
line = f.readline()
level = int(line.split()[0])
while line and level == 0:
line = f.readline()
if len(line.strip()) == 0:
continue
fields = line.strip().split()
if len(fields) == 2 and fields[1] != "max":
level = int(line.split()[0])
f.close()
return level
def addLodOptions(parser):
group = OptionGroup(parser, "LEVEL OF DETAILS", "Level-of-detail (LOD) options.")
group.add_option('--lod', dest='lod', action="store_true", default=False, help='If specified, create "level of detail" (lod) hal files and will put the lod.txt at the bigUrl instead of the original hal file. Default=%default')
group.add_option('--lodTxtFile', dest='lodtxtfile', help='"hal Level of detail" lod text file. If specified, will put this at the bigUrl instead of the hal file. Default=%default')
group.add_option('--lodDir', dest='loddir', help='"hal Level of detail" lod dir. If specified, will put this at the bigUrl instead of the hal file. Default=%default')
group.add_option('--lodMaxBlock', dest='lodMaxBlock', type='int', help='Maximum number of blocks to display in a hal level of detail (see halLodInterpolate.py --help for the default value).', default=None)
group.add_option('--lodScale', dest='lodScale', type='float', help='Scaling factor between two successive levels of detail (see halLodInterpolate.py --help for the default value).', default=None)
group.add_option('--lodMaxDNA', dest='lodMaxDNA', type='int', help='Maximum query length such that its hal level of detail will contain nucleotide information. Default=%default (see halLodInterpolate.py --help for the default value).', default=None)
group.add_option('--lodInMemory', dest='lodInMemory', action='store_true', help='Load entire hal file into memory when generating levels of detail instead of using hdf5 cache. Could result in drastic speedup. Default=%default.', default=False)
group.add_option('--lodMinSeqFrac', dest='lodMinSeqFrac', type='float', help='Minumum sequence length to sample as fraction of step size for level of detail generation: ie sequences with length <= floor(minSeqFrac * step) are ignored (see halLodExtract --help for default value).', default=None)
group.add_option('--lodMinCovFrac', dest='lodMinCovFrac', type='float', help='Minimum fraction of a genome that must be covered by sequences that exceed --minSeqFrac * step. LODs that would violate this threshold will not be generated (or displayed in the browser). This is seen a better than the alternative, which is to produce unreasonably sparse LODs because half the sequences were not sampled (see halLodInterpolate.py --help for default value).', default=None)
group.add_option('--lodChunk', dest='lodChunk', type='int', help='HDF5 chunk size for generated levels of detail (see halLodExtract --help for default value).', default=None)
#group.add_option('--snpwidth', dest='snpwidth', type='int', default=5000, help='Maximum window size to display SNPs. Default=%default')
parser.add_option_group(group)