-
Notifications
You must be signed in to change notification settings - Fork 1
/
20080918a.py
149 lines (143 loc) · 5.91 KB
/
20080918a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""Characterize the efficiency of a tree reconstruction sampler.
Each distance matrix is sampled in three steps.
First, a nucleotide alignment is sampled from the distribution
implied by the tree using a Jukes-Cantor model.
Second, the maximum likelihood distance
between each sequence pair is calculated.
Third, the sampled matrix may be rejected
if it has elements that are zero or infinity.
"""
from StringIO import StringIO
import time
from SnippetUtil import HandlingError
import SnippetUtil
import NewickIO
import FelTree
import DMSampler
import NeighborhoodJoining
import Clustering
from Form import RadioItem
import Form
import FormOut
def get_form():
"""
@return: the body of a form
"""
# define the tree string
tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
tree = NewickIO.parse(tree_string, FelTree.NewickTree)
formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
# define the object list
form_objects = [
Form.MultiLine('tree', 'tree',
formatted_tree_string),
Form.Integer('sequence_length', 'use sequences that are this long',
100, low=1),
Form.RadioGroup('assumption', 'distance matrix sampling model', [
RadioItem('infinite_alleles', 'infinite alleles', True),
RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]),
Form.RadioGroup('infinity', 'matrices with infinite distances', [
RadioItem('reject_infinity', 'reject these matrices', True),
RadioItem('replace_infinity', 'use 20 instead')]),
Form.RadioGroup('zero', 'matrices with zero distances', [
RadioItem('reject_zero', 'reject these matrices'),
RadioItem('replace_zero', 'use .00001 instead'),
RadioItem('remain_zero', 'use 0 unmodified', True)]),
Form.RadioGroup('criterion', 'tree reconstruction criterion', [
RadioItem('sign', 'spectral sign approximation', True),
RadioItem('nj', 'neighbor joining'),
RadioItem('random', 'random bipartition')])]
# return the object list
return form_objects
def get_form_out():
return FormOut.Report()
def get_response_content(fs):
"""
@param fs: a FieldStorage object containing the cgi arguments
@return: a (response_headers, response_text) pair
"""
# read the tree
tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
# read the sequence length
sequence_length = fs.sequence_length
# get arbitrarily ordered leaf names
ordered_names = list(node.name for node in tree.gen_tips())
# read the criterion string, creating the splitter object
if fs.sign:
splitter = Clustering.StoneSpectralSignDMS()
elif fs.nj:
splitter = Clustering.NeighborJoiningDMS()
elif fs.random:
splitter = Clustering.RandomDMS()
# define the distance matrix sampler
if fs.infinite_alleles:
sampler = DMSampler.InfiniteAllelesSampler(
tree, ordered_names, sequence_length)
elif fs.jukes_cantor:
sampler = DMSampler.DMSampler(
tree, ordered_names, sequence_length)
if fs.reject_infinity:
sampler.set_inf_replacement(None)
elif fs.replace_infinity:
sampler.set_inf_replacement(20)
if fs.reject_zero:
sampler.set_zero_replacement(None)
elif fs.replace_zero:
sampler.set_zero_replacement(0.00001)
elif fs.remain_zero:
sampler.set_zero_replacement(0.0)
# define the amount of time allotted to the sampler
allocated_seconds = 1
# get distance matrices until we run out of time
distance_matrices = []
start_time = time.clock()
sampling_seconds = 0
for result in sampler.gen_samples_or_none():
# if the result was accepted then add the distance matrix
if result is not None:
sequence_list, D = result
distance_matrices.append(D)
# see if we need to stop sampling
sampling_seconds = time.clock() - start_time
if sampling_seconds >= allocated_seconds:
break
# reconstruct trees until we run out of time
start_time = time.clock()
reconstructing_seconds = 0
reconstructed_tree_count = 0
for D in distance_matrices:
# reconstruct a tree using the method of choice
tree_builder = NeighborhoodJoining.TreeBuilder(
D, ordered_names, splitter)
tree_builder.set_fallback_name('nj')
try:
query_tree = tree_builder.build()
except NeighborhoodJoining.NeighborhoodJoiningError as e:
raise HandlingError(e)
reconstructed_tree_count += 1
# see if we need to stop reconstructing the trees
reconstructing_seconds = time.clock() - start_time
if reconstructing_seconds >= allocated_seconds:
break
# define the response
out = StringIO()
if distance_matrices:
print >> out, 'seconds to sample', len(distance_matrices),
print >> out, 'distance matrices:', sampling_seconds
if reconstructed_tree_count:
print >> out, 'seconds to reconstruct', reconstructed_tree_count,
print >> out, 'trees:', reconstructing_seconds
else:
print >> out, 'no trees could be reconstructed',
print >> out, 'in a reasonable amount of time'
else:
print >> out, 'no distance matrices could be sampled'
print >> out, 'in a reasonable amount of time'
print >> out, sampler.proposed,
print >> out, 'distance matrices were proposed but were rejected'
print >> out, sampler.proposals_with_zero,
print >> out, 'proposed distance matrices had estimates of zero'
print >> out, sampler.proposals_with_inf,
print >> out, 'proposed distance matrices had estimates of infinity'
# return the response
return out.getvalue()