forked from cgmartini/martinize.py
-
Notifications
You must be signed in to change notification settings - Fork 1
/
MAP.py
174 lines (151 loc) · 8.94 KB
/
MAP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
##########################
## 4 # FG -> CG MAPPING ## -> @MAP <-
##########################
import FUNC
dnares3 = " DA DC DG DT"
dnares1 = " dA dC dG dT"
rnares3 = " A C G U"
rnares1 = " rA rC rG rU"
# Amino acid nucleic acid codes:
# The naming (AA and '3') is not strictly correct when adding DNA/RNA, but we keep it like this for consistincy.
AA3 = FUNC.spl("TRP TYR PHE HIS HIH ARG LYS CYS ASP GLU ILE LEU MET ASN PRO HYP GLN SER THR VAL ALA GLY"+dnares3+rnares3) #@#
AA1 = FUNC.spl(" W Y F H H R K C D E I L M N P O Q S T V A G"+dnares1+rnares1) #@#
# Dictionaries for conversion from one letter code to three letter code v.v.
AA123, AA321 = FUNC.hash(AA1, AA3), FUNC.hash(AA3, AA1)
# Residue classes:
protein = AA3[:-8] # remove eight to get rid of DNA/RNA here.
water = FUNC.spl("HOH SOL TIP")
lipids = FUNC.spl("DPP DHP DLP DMP DSP POP DOP DAP DUP DPP DHP DLP DMP DSP PPC DSM DSD DSS")
nucleic = FUNC.spl("DAD DCY DGU DTH ADE CYT GUA THY URA DA DC DG DT")
residueTypes = dict(
[(i, "Protein") for i in protein ] +
[(i, "Water") for i in water ] +
[(i, "Lipid") for i in lipids ] +
[(i, "Nucleic") for i in nucleic ]
)
class CoarseGrained:
# Class for mapping an atomistic residue list to a coarsegrained one
# Should get an __init__ function taking a residuelist, atomlist, Pymol selection or ChemPy model
# The result should be stored in a list-type attribute
# The class should have pdbstr and grostr methods
# Standard mapping groups
# Protein backbone
bb = "N CA C O H H1 H2 H3 O1 O2" #@#
# Lipid tails
palmitoyl1 = FUNC.nsplit("C1B C1C C1D C1E", "C1F C1G C1H C1I", "C1J C1K C1L C1M", "C1N C1O C1P") #@#
palmitoyl2 = FUNC.nsplit("C2B C2C C2D C2E", "C2F C2G C2H C2I", "C2J C2K C2L C2M", "C2N C2O C2P") #@#
oleyl1 = FUNC.nsplit("C1B C1C C1D C1E", "C1F C1G C1H", "C1I C1J", "C1K C1L C1M C1N", "C1O C1P C1Q C1R") #@#
oleyl2 = FUNC.nsplit("C2B C2C C2D C2E", "C2F C2G C2H", "C2I C2J", "C2K C2L C2M C2N", "C2O C2P C2Q C2R") #@#
#lauroyl1 = []
#stearoyl1 = []
#arachidonoyl1 = []
#linoleyl1 = []
#hexanoyl1 = []
# Lipid head groups
#phoshpatidylcholine =
phosphatydilethanolamine = FUNC.nsplit("N H1 H2 H3 CA", "CB P OA OB OC OD", "CC CD OG C2A OH", "CE OE C1A OF") #@#
phosphatidylglycerol = FUNC.nsplit("H1 O1 CA H2 O2 CB", "CC P OA OB OC OD", "CD CE OG C2A OH", "CF OE C1A OF") #@#
#phosphatidylserine =
dna_bb = "P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' O3' C2' C1'"
# This is the mapping dictionary
# For each residue it returns a list, each element of which
# lists the atom names to be mapped to the corresponding bead.
# The order should be the standard order of the coarse grained
# beads for the residue. Only atom names matching with those
# present in the list of atoms for the residue will be used
# to determine the bead position. This adds flexibility to the
# approach, as a single definition can be used for different
# states of a residue (e.g., GLU/GLUH).
# For convenience, the list can be specified as a set of strings,
# converted into a list of lists by 'FUNC.nsplit' defined above.
mapping = {
"ALA": FUNC.nsplit(bb + " CB"),
"CYS": FUNC.nsplit(bb, "CB SG"),
"ASP": FUNC.nsplit(bb, "CB CG OD1 OD2"),
"GLU": FUNC.nsplit(bb, "CB CG CD OE1 OE2"),
"PHE": FUNC.nsplit(bb, "CB CG CD1 HD1", "CD2 HD2 CE2 HE2", "CE1 HE1 CZ HZ"),
"GLY": FUNC.nsplit(bb),
"HIS": FUNC.nsplit(bb, "CB CG", "CD2 HD2 NE2 HE2", "ND1 HD1 CE1 HE1"),
"HIH": FUNC.nsplit(bb, "CB CG", "CD2 HD2 NE2 HE2", "ND1 HD1 CE1 HE1"), # Charged Histidine.
"ILE": FUNC.nsplit(bb, "CB CG1 CG2 CD CD1"),
"LYS": FUNC.nsplit(bb, "CB CG CD", "CE NZ HZ1 HZ2 HZ3"),
"LEU": FUNC.nsplit(bb, "CB CG CD1 CD2"),
"MET": FUNC.nsplit(bb, "CB CG SD CE"),
"ASN": FUNC.nsplit(bb, "CB CG ND1 ND2 OD1 OD2 HD11 HD12 HD21 HD22"),
"PRO": FUNC.nsplit(bb, "CB CG CD"),
"HYP": FUNC.nsplit(bb, "CB CG CD OD"),
"GLN": FUNC.nsplit(bb, "CB CG CD OE1 OE2 NE1 NE2 HE11 HE12 HE21 HE22"),
"ARG": FUNC.nsplit(bb, "CB CG CD", "NE HE CZ NH1 NH2 HH11 HH12 HH21 HH22"),
"SER": FUNC.nsplit(bb, "CB OG HG"),
"THR": FUNC.nsplit(bb, "CB OG1 HG1 CG2"),
"VAL": FUNC.nsplit(bb, "CB CG1 CG2"),
"TRP": FUNC.nsplit(bb, "CB CG CD2", "CD1 HD1 NE1 HE1 CE2", "CE3 HE3 CZ3 HZ3", "CZ2 HZ2 CH2 HH2"),
"TYR": FUNC.nsplit(bb, "CB CG CD1 HD1", "CD2 HD2 CE2 HE2", "CE1 HE1 CZ OH HH"),
"POPE": phosphatydilethanolamine + palmitoyl1 + oleyl2,
"DOPE": phosphatydilethanolamine + oleyl1 + oleyl2,
"DPPE": phosphatydilethanolamine + palmitoyl1 + palmitoyl2,
"POPG": phosphatidylglycerol + palmitoyl1 + oleyl2,
"DOPG": phosphatidylglycerol + oleyl1 + oleyl2,
"DPPG": phosphatidylglycerol + palmitoyl1 + palmitoyl2,
"DA": FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'", "N9 C4", "C8 N7 C5", "C6 N6 N1", "C2 N3"),
"DG": FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'", "N9 C4", "C8 N7 C5", "C6 O6 N1", "C2 N2 N3"),
"DC": FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'", "N1 C6", "C5 C4 N4", "N3 C2 O2"),
"DT": FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'", "N1 C6", "C5 C4 O4 C7 C5M", "N3 C2 O2"),
}
# Generic names for side chain beads
residue_bead_names = FUNC.spl("BB SC1 SC2 SC3 SC4")
# Generic names for DNA beads
residue_bead_names_dna = FUNC.spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4")
# This dictionary contains the bead names for all residues,
# following the order in 'mapping'
names = {
"POPE": "NH3 PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split(),
"POPG": "GLC PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split()
}
# Add default bead names for all amino acids
names.update([(i, ("BB", "SC1", "SC2", "SC3", "SC4")) for i in AA3])
# Add the default bead names for all DNA nucleic acids
names.update([(i, ("BB1", "BB2", "BB3", "SC1", "SC2", "SC3", "SC4")) for i in nucleic])
# This dictionary allows determining four letter residue names
# for ones specified with three letters, e.g., resulting from
# truncation to adhere to the PDB format.
# Each entry returns a prototypical test, given as a string,
# and the residue name to be applied if eval(test) is True.
# This is particularly handy to determine lipid types.
# The test assumes there is a local or global array 'atoms'
# containing the atom names of the residue in correct order.
restest = {
"POP": [('atoms[0] == "CA"', "POPG"),
('atoms[0] == "N"', "POPE")]
}
# Crude mass for weighted average. No consideration of united atoms.
# This will probably give only minor deviations, while also giving less headache
mass = {'H': 1, 'C': 12, 'N': 14, 'O': 16, 'S': 32, 'P': 31, 'M': 0}
# Determine average position for a set of weights and coordinates
# This is a rather specific function that requires a list of items
# [(m,(x,y,z),id),..] and returns the weighted average of the
# coordinates and the list of ids mapped to this bead
def aver(b):
mwx, ids = zip(*[((m*x, m*y, m*z), i) for m, (x, y, z), i in b]) # Weighted coordinates
tm = sum(zip(*b)[0]) # Sum of weights
return [sum(i)/tm for i in zip(*mwx)], ids # Centre of mass
# Return the CG beads for an atomistic residue, using the mapping specified above
# The residue 'r' is simply a list of atoms, and each atom is a list:
# [ name, resname, resid, chain, x, y, z ]
def map(r, ca2bb = False):
p = CoarseGrained.mapping[r[0][1]] # Mapping for this residue
if ca2bb: p[0] = ["CA"] # Elnedyn maps BB to CA, ca2bb is False or True
# Get the name, mass and coordinates for all atoms in the residue
a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
# Store weight, coordinate and index for atoms that match a bead
q = [[(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i] for i in p]
# Bead positions
return zip(*[aver(i) for i in q])
# Mapping for index file
def mapIndex(r, ca2bb = False):
p = CoarseGrained.mapping[r[0][1]] # Mapping for this residue
if ca2bb: p[0] = ["CA"] # Elnedyn maps BB to CA, ca2bb is False or True
# Get the name, mass and coordinates for all atoms in the residue
a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
# Store weight, coordinate and index for atoms that match a bead
return [[(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i] for i in p]