def cmdCombine(mols, name="combination", newChainIDs=True, log=True, close=False, modelId=None, refMol=None): from chimera.misc import oslModelCmp mols.sort(lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent())) from Midas import MidasError if not mols: raise MidasError("No molecules specified") if refMol == None: refMol = mols[:1] if len(refMol) == 0: raise MidasError("No reference molecule specified") elif len(refMol) > 1: raise MidasError("Multiple reference molecules specified") refMol = refMol[0] if modelId is not None and type(modelId) != int: try: modelId = int(modelId[1:]) except: raise MidasError("modelId value must be integer") from chimera import suppressNewMoleculeProcessing, \ restoreNewMoleculeProcessing suppressNewMoleculeProcessing() try: m = combine(mols, refMol, newChainIDs=newChainIDs, log=log) except CombineError, v: restoreNewMoleculeProcessing() raise MidasError(v)
def _newModelsCB(self, trigName=None, myData=None, models=None): mols = filter(lambda m: isinstance(m, chimera.Molecule), chimera.openModels.list()) mols.sort(lambda a, b: oslModelCmp(a.oslIdent(), b.oslIdent())) for i in range(len(mols)): mol = mols[i] chains = mol.sequences() if mol in self.assocInfo: col = -1 for widget in self.assocInfo[mol]['widgets']: col += 1 if not widget: continue widget.grid_forget() if len(chains) == 0: widget.destroy() else: widget.grid(row=i+1, column=col, sticky='w') if len(chains) == 0: del self.assocInfo[mol] continue if len(chains) == 0: continue for chain in chains: chain.triggers.addHandler(chain.TRIG_DELETE, self._chainDeletionCB, None) assocInfo = {} self.assocInfo[mol] = assocInfo widgets = [] assocInfo['widgets'] = widgets w = Tkinter.Label(self.parent, text="%s (%s)" % (mol.name, mol.oslIdent())) widgets.append(w) w.grid(row=i+1, column=0, sticky='w') if len(chains) > 1: w = Pmw.OptionMenu(self.parent, items=map(lambda s: s.name, chains)) w.grid(row=i+1, column=1, sticky='w') else: w = None widgets.append(w) w = Pmw.OptionMenu(self.parent, items=['none'] + map(lambda s: s.name, self.mav.seqs), command=lambda v, w=widgets: self._assocMenuCB(v,w)) widgets.append(w) w.grid(row=i+1, column=2, sticky='w') w = Tkinter.Frame(self.parent) widgets.append(w) w.grid(row=i+1, column=3, sticky='w') w.variable = Tkinter.IntVar(w) w.variable.set(False) w.button = Tkinter.Checkbutton(w, variable=w.variable, text="associate with best match") w.button.grid()
def sortFunc(m1, m2): m1IsStr = isinstance(m2, basestring) m2IsStr = isinstance(m1, basestring) if m1IsStr and m2IsStr: return cmp(m1, m2) if m1IsStr: return -1 if m2IsStr: return 1 return oslModelCmp(m1.oslIdent(), m2.oslIdent())
def __init__(self, listFunc=chimera.openModels.list, sortFunc=None, filtFunc=lambda m: True, **kw): self.listFunc = listFunc self.filtFunc = filtFunc if sortFunc is None: from chimera.misc import oslModelCmp sortFunc = lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent()) self.sortFunc = sortFunc self._remKw = kw
def _cmpName(self, g1, g2): """compare two groups by name""" mapName1 = _mapName(g1) mapName2 = _mapName(g2) if isinstance(g1, chimera.ChainTrace) \ and isinstance(g2, chimera.ChainTrace): from chimera.misc import oslModelCmp return oslModelCmp( g1.pseudoBonds[0].atoms[0].oslIdent(end=chimera.SelMolecule), g2.pseudoBonds[0].atoms[0].oslIdent(end=chimera.SelMolecule)) return cmp(mapName1, mapName2)
def _cmpName(self, g1, g2): """compare two groups by name""" mapName1 = _mapName(g1) mapName2 = _mapName(g2) if isinstance(g1, chimera.ChainTrace) \ and isinstance(g2, chimera.ChainTrace): from chimera.misc import oslModelCmp return oslModelCmp(g1.pseudoBonds[0].atoms[0].oslIdent( end=chimera.SelMolecule), g2.pseudoBonds[0].atoms[0].oslIdent( end=chimera.SelMolecule)) return cmp(mapName1, mapName2)
def _itemNames(self): self.itemMap = {} self.valueMap = {} molecules = chimera.openModels.list(modelTypes=[chimera.Molecule]) from chimera.misc import oslModelCmp molecules.sort( lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent())) items = [] for m in molecules: for s in m.sequences(): if not self.filtFunc(s): continue item = s.fullName() self.itemMap[item] = s self.valueMap[s] = item items.append(item) return items
def _itemNames(self): self.itemMap = {} self.valueMap = {} molecules = chimera.openModels.list( modelTypes=[chimera.Molecule]) from chimera.misc import oslModelCmp molecules.sort(lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent())) items = [] for m in molecules: for s in m.sequences(): if not self.filtFunc(s): continue item = s.fullName() self.itemMap[item] = s self.valueMap[s] = item items.append(item) return items
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return { self.seq: self.pos } raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i+1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1)+len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append( (second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] +=1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append( (second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] +=1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5*numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5*numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = ( permute1[0], permute2[0]) replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort(lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[ (seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[ (seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i+1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos>=len(seq): return pos - len(seq) return pos return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i+1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap]+1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns)-1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex+2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex+1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex+rcols, Column({seq: leftPos+1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [Column(c) for c in orderedColumns[colIndex:colIndex+rcols+1]] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i+1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert(not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex+rcols+1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos-len(seq)+1: offset-len(seq)] elif offset >= len(seq): frag = seq[curPos+1:] frag += seq[:offset-len(seq)] else: frag = seq[curPos+1:offset] else: frag = seq[curPos+1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset-len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset+1:] + seq[:] else: frag = seq[offset-len(seq)+1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset+1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return {self.seq: self.pos} raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % ( self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i + 1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1) + len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append((second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] += 1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append((second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] += 1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5 * numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5 * numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = (permute1[0], permute2[0]) replyobj.info( "%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort( lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[(seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[(seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i + 1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos >= len(seq): return pos - len(seq) return pos return "Column[" + ",".join( map( lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp( i[0], i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i + 1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap] + 1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns) - 1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex + 2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex + 1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex + rcols, Column({seq: leftPos + 1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [ Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1] ] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i + 1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert (not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex + rcols + 1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos - len(seq) + 1:offset - len(seq)] elif offset >= len(seq): frag = seq[curPos + 1:] frag += seq[:offset - len(seq)] else: frag = seq[curPos + 1:offset] else: frag = seq[curPos + 1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset - len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset + 1:] + seq[:] else: frag = seq[offset - len(seq) + 1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset + 1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort( lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones