Пример #1
0
def cmdCombine(mols, name="combination", newChainIDs=True, log=True,
				close=False, modelId=None, refMol=None):
	from chimera.misc import oslModelCmp
	mols.sort(lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent()))

	from Midas import MidasError
	if not mols:
		raise MidasError("No molecules specified")
	if refMol == None:
		refMol = mols[:1]
	if len(refMol) == 0:
		raise MidasError("No reference molecule specified")
	elif len(refMol) > 1:
		raise MidasError("Multiple reference molecules specified")
	refMol = refMol[0]
	if modelId is not None and type(modelId) != int:
		try:
			modelId = int(modelId[1:])
		except:
			raise MidasError("modelId value must be integer")
	from chimera import suppressNewMoleculeProcessing, \
					restoreNewMoleculeProcessing
	suppressNewMoleculeProcessing()
	try:
		m = combine(mols, refMol, newChainIDs=newChainIDs, log=log)
	except CombineError, v:
		restoreNewMoleculeProcessing()
		raise MidasError(v)
def cmdCombine(mols,
               name="combination",
               newChainIDs=True,
               log=True,
               close=False,
               modelId=None,
               refMol=None):
    from chimera.misc import oslModelCmp
    mols.sort(lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent()))

    from Midas import MidasError
    if not mols:
        raise MidasError("No molecules specified")
    if refMol == None:
        refMol = mols[:1]
    if len(refMol) == 0:
        raise MidasError("No reference molecule specified")
    elif len(refMol) > 1:
        raise MidasError("Multiple reference molecules specified")
    refMol = refMol[0]
    if modelId is not None and type(modelId) != int:
        try:
            modelId = int(modelId[1:])
        except:
            raise MidasError("modelId value must be integer")
    from chimera import suppressNewMoleculeProcessing, \
        restoreNewMoleculeProcessing
    suppressNewMoleculeProcessing()
    try:
        m = combine(mols, refMol, newChainIDs=newChainIDs, log=log)
    except CombineError, v:
        restoreNewMoleculeProcessing()
        raise MidasError(v)
Пример #3
0
	def _newModelsCB(self, trigName=None, myData=None, models=None):
		mols = filter(lambda m: isinstance(m, chimera.Molecule),
						chimera.openModels.list())
		mols.sort(lambda a, b: oslModelCmp(a.oslIdent(), b.oslIdent()))
		for i in range(len(mols)):
			mol = mols[i]
			chains = mol.sequences()
			if mol in self.assocInfo:
				col = -1
				for widget in self.assocInfo[mol]['widgets']:
					col += 1
					if not widget:
						continue
					widget.grid_forget()
					if len(chains) == 0:
						widget.destroy()
					else:
						widget.grid(row=i+1, column=col,
								sticky='w')
				if len(chains) == 0:
					del self.assocInfo[mol]
				continue
			if len(chains) == 0:
				continue
			for chain in chains:
				chain.triggers.addHandler(chain.TRIG_DELETE,
						self._chainDeletionCB, None)
			assocInfo = {}
			self.assocInfo[mol] = assocInfo
			widgets = []
			assocInfo['widgets'] = widgets
			w = Tkinter.Label(self.parent,
				text="%s (%s)" % (mol.name, mol.oslIdent()))
			widgets.append(w)
			w.grid(row=i+1, column=0, sticky='w')

			if len(chains) > 1:
				w = Pmw.OptionMenu(self.parent,
					items=map(lambda s: s.name, chains))
				w.grid(row=i+1, column=1, sticky='w')
			else:
				w = None
			widgets.append(w)

			w = Pmw.OptionMenu(self.parent, items=['none']
					+ map(lambda s: s.name, self.mav.seqs),
					command=lambda v, w=widgets:
					self._assocMenuCB(v,w))
			widgets.append(w)
			w.grid(row=i+1, column=2, sticky='w')
				
			w = Tkinter.Frame(self.parent)
			widgets.append(w)
			w.grid(row=i+1, column=3, sticky='w')
			w.variable = Tkinter.IntVar(w)
			w.variable.set(False)
			w.button = Tkinter.Checkbutton(w, variable=w.variable,
				text="associate with best match")
			w.button.grid()
Пример #4
0
		def sortFunc(m1, m2):
			m1IsStr = isinstance(m2, basestring)
			m2IsStr = isinstance(m1, basestring)
			if m1IsStr and m2IsStr:
				return cmp(m1, m2)
			if m1IsStr:
				return -1
			if m2IsStr:
				return 1
			return oslModelCmp(m1.oslIdent(), m2.oslIdent())
Пример #5
0
	def __init__(self, listFunc=chimera.openModels.list, sortFunc=None,
					filtFunc=lambda m: True, **kw):
		self.listFunc = listFunc
		self.filtFunc = filtFunc
		if sortFunc is None:
			from chimera.misc import oslModelCmp
			sortFunc = lambda m1, m2: oslModelCmp(m1.oslIdent(),
								m2.oslIdent())
		self.sortFunc = sortFunc
		self._remKw = kw
Пример #6
0
 def sortFunc(m1, m2):
     m1IsStr = isinstance(m2, basestring)
     m2IsStr = isinstance(m1, basestring)
     if m1IsStr and m2IsStr:
         return cmp(m1, m2)
     if m1IsStr:
         return -1
     if m2IsStr:
         return 1
     return oslModelCmp(m1.oslIdent(), m2.oslIdent())
Пример #7
0
    def _cmpName(self, g1, g2):
        """compare two groups by name"""

        mapName1 = _mapName(g1)
        mapName2 = _mapName(g2)
        if isinstance(g1, chimera.ChainTrace) \
        and isinstance(g2, chimera.ChainTrace):
            from chimera.misc import oslModelCmp
            return oslModelCmp(
                g1.pseudoBonds[0].atoms[0].oslIdent(end=chimera.SelMolecule),
                g2.pseudoBonds[0].atoms[0].oslIdent(end=chimera.SelMolecule))
        return cmp(mapName1, mapName2)
Пример #8
0
 def __init__(self,
              listFunc=chimera.openModels.list,
              sortFunc=None,
              filtFunc=lambda m: True,
              **kw):
     self.listFunc = listFunc
     self.filtFunc = filtFunc
     if sortFunc is None:
         from chimera.misc import oslModelCmp
         sortFunc = lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent())
     self.sortFunc = sortFunc
     self._remKw = kw
Пример #9
0
	def _cmpName(self, g1, g2):
		"""compare two groups by name"""

		mapName1 = _mapName(g1)
		mapName2 = _mapName(g2)
		if isinstance(g1, chimera.ChainTrace) \
		and isinstance(g2, chimera.ChainTrace):
		   	from chimera.misc import oslModelCmp
			return oslModelCmp(g1.pseudoBonds[0].atoms[0].oslIdent(
				end=chimera.SelMolecule),
				g2.pseudoBonds[0].atoms[0].oslIdent(
				end=chimera.SelMolecule))
		return cmp(mapName1, mapName2)
Пример #10
0
 def _itemNames(self):
     self.itemMap = {}
     self.valueMap = {}
     molecules = chimera.openModels.list(modelTypes=[chimera.Molecule])
     from chimera.misc import oslModelCmp
     molecules.sort(
         lambda m1, m2: oslModelCmp(m1.oslIdent(), m2.oslIdent()))
     items = []
     for m in molecules:
         for s in m.sequences():
             if not self.filtFunc(s):
                 continue
             item = s.fullName()
             self.itemMap[item] = s
             self.valueMap[s] = item
             items.append(item)
     return items
Пример #11
0
	def _itemNames(self):
		self.itemMap = {}
		self.valueMap = {}
		molecules = chimera.openModels.list(
						modelTypes=[chimera.Molecule])
		from chimera.misc import oslModelCmp
		molecules.sort(lambda m1, m2: oslModelCmp(m1.oslIdent(),
								m2.oslIdent()))
		items = []
		for m in molecules:
			for s in m.sequences():
				if not self.filtFunc(s):
					continue
				item = s.fullName()
				self.itemMap[item] = s
				self.valueMap[s] = item
				items.append(item)
		return items
Пример #12
0
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
	# create list of pairings between sequences
	# and prune to be monotonic
	trees = {}

	if matchType == "all":
		valFunc = min
	else:
		valFunc = max

	# for each pair, go through the second chain residue by residue
	# and compile crosslinks to other chain.  As links are compiled,
	# figure out what previous links are crossed and keep a running 
	# "penalty" function for links based on what they cross.
	# Sort links by penalty and keep pruning worst link until no links
	# cross.
	from chimera.misc import principalAtom
	from CGLutil.AdaptiveTree import AdaptiveTree

	class EndPoint:
		def __init__(self, seq, pos):
			self.seq = seq
			self.pos = pos

		def contains(self, seq, pos):
			return seq == self.seq and pos == self.pos

		def __getattr__(self, attr):
			if attr == "positions":
				return { self.seq: self.pos }
			raise AttributeError, \
				"No such EndPoint attribute: %s" % attr
		def __str__(self):
			from chimera import SelResidue
			if circular and self.pos >= len(self.seq):
				insert = " (circular 2nd half)"
				pos = self.pos - len(self.seq)
			else:
				pos = self.pos
				insert = ""
			return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert)

	class Link:
		def __init__(self, info1, info2, val, doPenalty=False):
			self.info = [info1, info2]
			self.val = val
			if doPenalty:
				self.penalty = 0
				self.crosslinks = []

		def contains(self, seq, pos):
			return self.info[0].contains(seq, pos) \
				or self.info[1].contains(seq. pos)

		def evaluate(self):
			self.val = None
			for s1, p1 in self.info[0].positions.items():
				if circular and s1.circular and p1 >= len(s1):
					p1 -= len(s1)
				pa1 = pas[s1][p1]
				for s2, p2 in self.info[1].positions.items():
					if circular and s2.circular \
					and p2 >= len(s2):
						p2 -= len(s2)
					pa2 = pas[s2][p2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if self.val is None:
						self.val = val
						continue
					self.val = valFunc(self.val, val)
					if valFunc == min and self.val < 0:
						break
				if valFunc == min and self.val < 0:
					break

		def __str__(self):
			return "Link(%s, %s)" % tuple(map(str, self.info))

	allLinks = []
			
	pas = {}
	pairings = {}
	replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
							blankAfter=0)
	for seq in chains:
		seqpas = []
		pairing = []
		for res in seq.residues:
			pa = principalAtom(res)
			pairing.append([])
			if circular:
				pairing.append([])
			if not pa:
				replyobj.warning("Cannot determine principal "
				  "atom for residue %s\n" % res.oslIdent())
				seqpas.append(None)
				continue
			seqpas.append(pa)
		pas[seq] = seqpas
		pairings[seq] = pairing
				

	if circular:
		circularPairs = {}
		holdData = {}
	tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2)
	num = 0
	for i, seq1 in enumerate(chains):
		len1 = len(pairings[seq1])
		for seq2 in chains[i+1:]:
			num += 1
			tag = tagTmpl % num
			len2 = len(pairings[seq2])
			links1 = []
			for i in range(len1):
				links1.append([])
			links2 = []
			for i in range(len2):
				links2.append([])
			linkList = []
			replyobj.status("%sBuilding search tree %s\n"
					% (statusPrefix, tag), blankAfter=0)
			try:
				tree = trees[seq2]
			except KeyError:
				xyzs = []
				data = []
				for i, pa in enumerate(pas[seq2]):
					if pa is None:
						continue
					xyzs.append(pa.xformCoord().data())
					data.append((i, pa))
				tree = AdaptiveTree(xyzs, data, cutoff)
			replyobj.status("%sSearching tree, building links %s\n"
					% (statusPrefix, tag), blankAfter=0)
			for i1, pa1 in enumerate(pas[seq1]):
				if pa1 is None:
					continue
				crd1 = pa1.xformCoord()
				matches = tree.searchTree(crd1.data(), cutoff)
				for i2, pa2 in matches:
					dist = crd1.distance(pa2.xformCoord())
					val = cutoff - dist
					if val <= 0:
						continue
					link = Link(EndPoint(seq1, i1),
						EndPoint(seq2, i2), val,
						doPenalty=True)
					links1[i1].append(link)
					links2[i2].append(link)
					linkList.append(link)

			if circular:
				replyobj.status("%sDetermining circularity %s\n"
					% (statusPrefix, tag), blankAfter=0)
				holdData[(seq1, seq2)] = (links1, links2,
								linkList)
				if len(linkList) < 2:
					replyobj.info("Less than 2 close"
						" residues for %s and %s\n"
						% (seq1.molecule.name,
						seq2.molecule.name))
					continue
				# determine optimal permutation of 1st seq;
				#
				# for each pair of links, find the permutation
				# where they begin to cross/uncross.  Use an
				# array to tabulate number of crossings for
				# each permutation.
				crossings = [0] * len(seq1)
				c2 = [0] * len(seq2)
				from random import sample
				numSamples = 5 * (len(seq1)+len(seq2))
				for ignore in range(numSamples):
					link1, link2 = sample(linkList, 2)
					l1p1 = link1.info[0].pos
					l1p2 = link1.info[1].pos
					l2p1 = link2.info[0].pos
					l2p2 = link2.info[1].pos
					if l1p1 == l2p1 \
					or l1p2 == l2p2:
						# can never cross
						continue
					first = len(seq1) - max(l1p1,
								l2p1)
					second = len(seq1) - min(l1p1,
								l2p1)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq1):
							ranges.append(
							(second,
							len(seq1)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							crossings[i] +=1
					first = len(seq2) - max(l1p2,
								l2p2)
					second = len(seq2) - min(l1p2,
								l2p2)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq2):
							ranges.append(
							(second,
							len(seq2)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							c2[i] +=1
				# to avoid dangling ends causing bogus
				# "circularities", the zero permutation has
				# to be beaten significantly for a 
				# circularity to be declared
				least = crossings[0] - 5*numSamples / len(seq1)
				permute1 = [0]
				for i, crossed in enumerate(crossings):
					if crossed < least:
						least = crossed
						permute1 = [i]
					elif crossed == least:
						permute1.append(i)
				least = c2[0] - 5*numSamples / len(seq2)
				permute2 = [0]
				for i, crossed in enumerate(c2):
					if crossed < least:
						least = crossed
						permute2 = [i]
					elif crossed == least:
						permute2.append(i)
				if permute1[0] != 0 and permute2[0] != 0:
					circularPairs[(seq1, seq2)] = (
						permute1[0], permute2[0])
					replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0]))
				
			else:
				findPruneCrosslinks(allLinks, pairings, seq1,
					seq2, linkList, links1, links2, tag=tag,
					statusPrefix=statusPrefix)

	if circular:
		replyobj.status("%sMinimizing circularities\n" % statusPrefix,
							blankAfter=0)
		circulars = {}
		while 1:
			circularVotes = {}
			for seq1, seq2 in circularPairs.keys():
				if seq1 in circulars or seq2 in circulars:
					continue
				circularVotes[seq1] = circularVotes.get(seq1,
									0) + 1
				circularVotes[seq2] = circularVotes.get(seq2,
									0) + 1
			if not circularVotes:
				break
			candidates = circularVotes.keys()
			candidates.sort(lambda c1, c2: cmp(circularVotes[c2],
							circularVotes[c1]))
			circulars[candidates[0]] = True

		# has to be circular against every non-circular sequence
		# (avoid spurious circularities)
		ejected = True
		while ejected:
			ejected = False
			for cseq in circulars:
				for seq in chains:
					if seq in circulars:
						continue
					if (cseq, seq) not in circularPairs \
					and (seq, cseq) not in circularPairs:
						del circulars[cseq]
						ejected = True
						break
				if ejected:
					break

		for seq in chains:
			seq.circular = seq in circulars
			if seq.circular:
				replyobj.info("circular: %s\n"
							% seq.molecule.name)
		replyobj.status("%sAdjusting links for circular sequences\n"
						% statusPrefix, blankAfter=0)
		for seq1, seq2 in holdData.keys():
			if not seq1.circular and not seq2.circular:
				continue
			links1, links2, linkList = holdData[(seq1, seq2)]
			use1 = seq1.circular
			if seq1.circular and seq2.circular:
				if (seq1, seq2) in circularPairs:
					permute1, permute2 = circularPairs[
								(seq1, seq2)]
				elif (seq2, seq1) in circularPairs:
					permute2, permute1 in circularPairs[
								(seq2, seq1)]
				else:
					continue
				use1 =  len(seq1) - permute1 \
							< len(seq2) - permute2
			if use1:
				adjust, other = seq1, seq2
				links = links1
			else:
				adjust, other = seq2, seq1
				links = links2
			if (adjust, other) in circularPairs:
				permute = circularPairs[(adjust, other)][0]
			elif (other, adjust) in circularPairs:
				permute = circularPairs[(other, adjust)][1]
			else:
				continue
			fixup = len(adjust) - permute
			for link in linkList[:]: # append happens in loop
				if link.info[0].seq == adjust:
					myEnd = link.info[0]
					otherEnd = link.info[1]
				else:
					myEnd = link.info[1]
					otherEnd = link.info[0]
				if myEnd.pos >= fixup:
					continue
				links[myEnd.pos].remove(link)
				myEnd.pos += len(adjust)
				links[myEnd.pos].append(link)

		for i, seqs in enumerate(holdData.keys()):
			seq1, seq2 = seqs
			links1, links2, linkList = holdData[seqs]
			findPruneCrosslinks(allLinks, pairings, seq1, seq2,
				linkList, links1, links2, tag=tagTmpl % (i+1),
				statusPrefix=statusPrefix)
				
	class Column:
		def __init__(self, positions):
			if isinstance(positions, Column):
				self.positions = positions.positions.copy()
			else:
				self.positions = positions

		def contains(self, seq, pos):
			return seq in self.positions \
				and self.positions[seq] == pos

		def participation(self):
			p = 0
			members = self.positions.items()
			for i, sp in enumerate(members):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in members[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					p += val
			return p

		def value(self):
			value = None
			info = self.positions.items()
			for i, sp in enumerate(info):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in info[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if value is None:
						value = val
						continue
					value = valFunc(value, val)
					if valFunc == min and value < 0:
						break
				if valFunc == min and value < 0:
					break
			return value

		def __str__(self):
			from chimera import SelResidue
			def circComp(seq, pos):
				if circular and seq.circular and pos>=len(seq):
					return pos - len(seq)
				return pos
			return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]"
				
	columns = {}
	partialOrder = {}
	for seq in chains:
		columns[seq] = {}
		partialOrder[seq] = []

	seen = {}
	while allLinks:
		replyobj.status("%sForming columns (%d links to check)\n"
						% (statusPrefix, len(allLinks)))
		if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
			allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
			if valFunc == min:
				while len(allLinks) > 1 \
				and allLinks[0].val <= 0:
					allLinks.pop(0)

		link = allLinks.pop()
		if link.val < 0:
			break
		key = tuple(link.info)
		if key in seen:
			continue
		seen[key] = 1
		for info in link.info:
			for seq, pos in info.positions.items():
				pairings[seq][pos].remove(link)

		checkInfo = {}
		checkInfo.update(link.info[0].positions)
		checkInfo.update(link.info[1].positions)
		okay = True
		for seq in link.info[0].positions.keys():
			if seq in link.info[1].positions:
				okay = False
				break
		if not okay or not _check(checkInfo, partialOrder, chains):
			continue

		col = Column(checkInfo)
		for seq, pos in checkInfo.items():
			po = partialOrder[seq]
			for i, pcol in enumerate(po):
				if pcol.positions[seq] > pos:
					break
			else:
				i = len(po)
			po.insert(i, col)
			cols = columns[seq]
			cols[col] = i
			for ncol in po[i+1:]:
				cols[ncol] += 1
		for info in link.info:
			for seq, pos in info.positions.items():
				for l in pairings[seq][pos]:
					if l.info[0].contains(seq, pos):
						base, connect = l.info
					else:
						connect, base = l.info
					l.info = [col, connect]
					l.evaluate()
					for cseq, cpos in col.positions.items():
						if base.contains(cseq, cpos):
							continue
						pairings[cseq][cpos].append(l)
			if isinstance(info, Column):
				for seq in info.positions.keys():
					seqCols = columns[seq]
					opos = seqCols[info]
					po = partialOrder[seq]
					partialOrder[seq] = po[:opos] \
								+ po[opos+1:]
					for pcol in partialOrder[seq][opos:]:
						seqCols[pcol] -= 1
					del seqCols[info]

	replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

	orderedColumns = []
	while 1:
		# find an initial sequence column that can lead
		for seq in partialOrder.keys():
			try:
				col = partialOrder[seq][0]
			except IndexError:
				from chimera import UserError
				raise UserError("Cannot generate alignment with"
					" %s %s because it is not superimposed"
					" on the other structures" %
					(seq.molecule.name, seq.name))
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				# is initial element for all sequences involved
				break
		else:
			break

		orderedColumns.append(col)
		for cseq in col.positions.keys():
			partialOrder[cseq].pop(0)
			if not partialOrder[cseq]:
				del partialOrder[cseq]
		# try to continue using this sequence as long as possible
		while seq in partialOrder:
			col = partialOrder[seq][0]
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				orderedColumns.append(col)
				for cseq in col.positions.keys():
					partialOrder[cseq].pop(0)
					if not partialOrder[cseq]:
						del partialOrder[cseq]
				continue
			break

	from NeedlemanWunsch import cloneSeq
	clone = {}
	current = {}
	for seq in chains:
		clone[seq] = cloneSeq(seq)
		current[seq] = -1
		if circular:
			clone[seq].circular = seq.circular
			if seq.circular:
				clone[seq].name = "2 x " + clone[seq].name

	if not orderedColumns:
		replyobj.status("")
		replyobj.error("No residues satisfy distance constraint"
							" for column!\n")
		return

	# for maximum benefit from the "column squeezing" step that follows,
	# we need to add in the one-residue columns whose position is
	# well-determined
	newOrdered = [orderedColumns[0]]
	for col in orderedColumns[1:]:
		gap = None
		for seq, pos in newOrdered[-1].positions.items():
			if seq not in col.positions:
				continue
			if col.positions[seq] == pos + 1:
				continue
			if gap is not None:
				# not well-determined
				gap = None
				break
			gap = seq
		if gap is not None:
			for pos in range(newOrdered[-1].positions[gap]+1, 
							col.positions[gap]):
				newOrdered.append(Column({gap: pos}))
		newOrdered.append(col)
	orderedColumns = newOrdered

	# Squeeze column where possible:
	#
	# 	Find pairs of columns where the left-hand one could accept
	#	one or more residues from the right-hand one
	#
	#	Keep looking right (if necessary) to until each row has at
	#	least one gap, but no more than one
	#
	#	Squeeze
	colIndex = 0
	while colIndex < len(orderedColumns) - 1:
		replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix,
				colIndex, len(orderedColumns)-1), blankAfter=0)
		l, r = orderedColumns[colIndex:colIndex+2]
		squeezable = False
		for seq in r.positions.keys():
			if seq not in l.positions:
				squeezable = True
				break
		if not squeezable:
			colIndex += 1
			continue

		gapInfo = {}
		for seq in chains:
			if seq in l.positions:
				gapInfo[seq] = (False, l.positions[seq], 0)
			else:
				gapInfo[seq] = (True, None, 1)

		squeezable = False
		redo = False
		rcols = 0
		for r in orderedColumns[colIndex+1:]:
			rcols += 1
			# look for indeterminate residues first, so we can
			# potentially form a single-residue column to complete
			# the squeeze
			indeterminates = False
			for seq, rightPos in r.positions.items():
				inGap, leftPos, numGaps = gapInfo[seq]
				if leftPos is None or rightPos == leftPos + 1:
					continue
				if numGaps == 0:
					indeterminates = True
					continue
				for oseq, info in gapInfo.items():
					if oseq == seq:
						continue
					inGap, pos, numGaps = info
					if inGap:
						continue
					if numGaps != 0:
						break
				else:
					# squeezable
					orderedColumns.insert(colIndex+rcols,
						Column({seq: leftPos+1}))
					redo = True
					break
				indeterminates = True

			if redo:
				break
				
			if indeterminates:
				break

			for seq, info in gapInfo.items():
				inGap, leftPos, numGaps = info
				if seq in r.positions:
					rightPos = r.positions[seq]
					if inGap:
						# closing a gap
						gapInfo[seq] = (False,
							rightPos, 1)
					else:
						# non gap
						gapInfo[seq] = (False,
							rightPos, numGaps)
				else:
					if not inGap and numGaps > 0:
						# two gaps: no-no
						break
					gapInfo[seq] = (True, leftPos, 1)

			else:
				# check if squeeze criteria fulfilled
				for inGap, leftPos, numGaps in gapInfo.values():
					if numGaps == 0:
						break
				else:
					squeezable = True
					break
				l = r
				continue
			break

		if redo:
			continue

		if not squeezable:
			colIndex += 1
			continue

		# squeeze
		replaceCols = [Column(c)
			for c in orderedColumns[colIndex:colIndex+rcols+1]]
		for i, col in enumerate(replaceCols[:-1]):
			rcol = replaceCols[i+1]
			for seq, pos in rcol.positions.items():
				if seq in col.positions:
					continue
				col.positions[seq] = pos
				del rcol.positions[seq]
			if col.value() < 0:
				break
		else:
			assert(not replaceCols[-1].positions)
			ov = 0
			for col in orderedColumns[colIndex:colIndex+rcols+1]:
				ov += col.participation()
			nv = 0
			for col in replaceCols[:-1]:
				nv += col.participation()
			if ov >= nv:
				colIndex += 1
				continue
			orderedColumns[colIndex:colIndex+rcols+1] = \
							replaceCols[:-1]
			if colIndex > 0:
				colIndex -= 1
			continue
		colIndex += 1

	replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
	for col in orderedColumns:
		for seq, offset in col.positions.items():
			curPos = current[seq]
			diff = offset - curPos
			if diff < 2:
				continue
			if circular and seq.circular:
				if curPos >= len(seq):
					frag = seq[curPos-len(seq)+1:
							offset-len(seq)]
				elif offset >= len(seq):
					frag = seq[curPos+1:]
					frag += seq[:offset-len(seq)]
				else:
					frag = seq[curPos+1:offset]
			else:
				frag = seq[curPos+1:offset]
			clone[seq].append(frag)

			gap = gapChar * (diff - 1)
			for cseq in clone.values():
				if cseq == clone[seq]:
					continue
				cseq.append(gap)

		for seq in chains:
			try:
				offset = col.positions[seq]
				if circular and seq.circular \
				and offset >= len(seq):
					char = seq[offset-len(seq)]
				else:
					char = seq[offset]
			except KeyError:
				clone[seq].append(gapChar)
				continue
			clone[seq].append(char)
			current[seq] = offset

	for seq, offset in current.items():
		if circular and seq.circular:
			if offset < 2 * len(seq) - 1:
				if offset < len(seq) - 1:
					frag = seq[offset+1:] + seq[:]
				else:
					frag = seq[offset-len(seq)+1:]
			else:
				continue
		else:
			if offset == len(seq) - 1:
				continue
			frag = seq[offset+1:]
		gap = gapChar * len(frag)
		for cseq in clone.values():
			if cseq == clone[seq]:
				cseq.append(frag)
			else:
				cseq.append(gap)

	clones = clone.values()
	from chimera.misc import oslModelCmp
	clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(),
						b.molecule.oslIdent()))
	replyobj.status("%sDone\n" % statusPrefix)
	return clones
Пример #13
0
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
    # create list of pairings between sequences
    # and prune to be monotonic
    trees = {}

    if matchType == "all":
        valFunc = min
    else:
        valFunc = max

    # for each pair, go through the second chain residue by residue
    # and compile crosslinks to other chain.  As links are compiled,
    # figure out what previous links are crossed and keep a running
    # "penalty" function for links based on what they cross.
    # Sort links by penalty and keep pruning worst link until no links
    # cross.
    from chimera.misc import principalAtom
    from CGLutil.AdaptiveTree import AdaptiveTree

    class EndPoint:
        def __init__(self, seq, pos):
            self.seq = seq
            self.pos = pos

        def contains(self, seq, pos):
            return seq == self.seq and pos == self.pos

        def __getattr__(self, attr):
            if attr == "positions":
                return {self.seq: self.pos}
            raise AttributeError, \
             "No such EndPoint attribute: %s" % attr

        def __str__(self):
            from chimera import SelResidue
            if circular and self.pos >= len(self.seq):
                insert = " (circular 2nd half)"
                pos = self.pos - len(self.seq)
            else:
                pos = self.pos
                insert = ""
            return "EndPoint[(%s %s, %s%s)]" % (
                self.seq.molecule.name, self.seq.name,
                self.seq.residues[pos].oslIdent(SelResidue), insert)

    class Link:
        def __init__(self, info1, info2, val, doPenalty=False):
            self.info = [info1, info2]
            self.val = val
            if doPenalty:
                self.penalty = 0
                self.crosslinks = []

        def contains(self, seq, pos):
            return self.info[0].contains(seq, pos) \
             or self.info[1].contains(seq. pos)

        def evaluate(self):
            self.val = None
            for s1, p1 in self.info[0].positions.items():
                if circular and s1.circular and p1 >= len(s1):
                    p1 -= len(s1)
                pa1 = pas[s1][p1]
                for s2, p2 in self.info[1].positions.items():
                    if circular and s2.circular \
                    and p2 >= len(s2):
                        p2 -= len(s2)
                    pa2 = pas[s2][p2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if self.val is None:
                        self.val = val
                        continue
                    self.val = valFunc(self.val, val)
                    if valFunc == min and self.val < 0:
                        break
                if valFunc == min and self.val < 0:
                    break

        def __str__(self):
            return "Link(%s, %s)" % tuple(map(str, self.info))

    allLinks = []

    pas = {}
    pairings = {}
    replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
                    blankAfter=0)
    for seq in chains:
        seqpas = []
        pairing = []
        for res in seq.residues:
            pa = principalAtom(res)
            pairing.append([])
            if circular:
                pairing.append([])
            if not pa:
                replyobj.warning("Cannot determine principal "
                                 "atom for residue %s\n" % res.oslIdent())
                seqpas.append(None)
                continue
            seqpas.append(pa)
        pas[seq] = seqpas
        pairings[seq] = pairing

    if circular:
        circularPairs = {}
        holdData = {}
    tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2)
    num = 0
    for i, seq1 in enumerate(chains):
        len1 = len(pairings[seq1])
        for seq2 in chains[i + 1:]:
            num += 1
            tag = tagTmpl % num
            len2 = len(pairings[seq2])
            links1 = []
            for i in range(len1):
                links1.append([])
            links2 = []
            for i in range(len2):
                links2.append([])
            linkList = []
            replyobj.status("%sBuilding search tree %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            try:
                tree = trees[seq2]
            except KeyError:
                xyzs = []
                data = []
                for i, pa in enumerate(pas[seq2]):
                    if pa is None:
                        continue
                    xyzs.append(pa.xformCoord().data())
                    data.append((i, pa))
                tree = AdaptiveTree(xyzs, data, cutoff)
            replyobj.status("%sSearching tree, building links %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            for i1, pa1 in enumerate(pas[seq1]):
                if pa1 is None:
                    continue
                crd1 = pa1.xformCoord()
                matches = tree.searchTree(crd1.data(), cutoff)
                for i2, pa2 in matches:
                    dist = crd1.distance(pa2.xformCoord())
                    val = cutoff - dist
                    if val <= 0:
                        continue
                    link = Link(EndPoint(seq1, i1),
                                EndPoint(seq2, i2),
                                val,
                                doPenalty=True)
                    links1[i1].append(link)
                    links2[i2].append(link)
                    linkList.append(link)

            if circular:
                replyobj.status("%sDetermining circularity %s\n" %
                                (statusPrefix, tag),
                                blankAfter=0)
                holdData[(seq1, seq2)] = (links1, links2, linkList)
                if len(linkList) < 2:
                    replyobj.info("Less than 2 close"
                                  " residues for %s and %s\n" %
                                  (seq1.molecule.name, seq2.molecule.name))
                    continue
                # determine optimal permutation of 1st seq;
                #
                # for each pair of links, find the permutation
                # where they begin to cross/uncross.  Use an
                # array to tabulate number of crossings for
                # each permutation.
                crossings = [0] * len(seq1)
                c2 = [0] * len(seq2)
                from random import sample
                numSamples = 5 * (len(seq1) + len(seq2))
                for ignore in range(numSamples):
                    link1, link2 = sample(linkList, 2)
                    l1p1 = link1.info[0].pos
                    l1p2 = link1.info[1].pos
                    l2p1 = link2.info[0].pos
                    l2p2 = link2.info[1].pos
                    if l1p1 == l2p1 \
                    or l1p2 == l2p2:
                        # can never cross
                        continue
                    first = len(seq1) - max(l1p1, l2p1)
                    second = len(seq1) - min(l1p1, l2p1)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq1):
                            ranges.append((second, len(seq1)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            crossings[i] += 1
                    first = len(seq2) - max(l1p2, l2p2)
                    second = len(seq2) - min(l1p2, l2p2)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq2):
                            ranges.append((second, len(seq2)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            c2[i] += 1
                # to avoid dangling ends causing bogus
                # "circularities", the zero permutation has
                # to be beaten significantly for a
                # circularity to be declared
                least = crossings[0] - 5 * numSamples / len(seq1)
                permute1 = [0]
                for i, crossed in enumerate(crossings):
                    if crossed < least:
                        least = crossed
                        permute1 = [i]
                    elif crossed == least:
                        permute1.append(i)
                least = c2[0] - 5 * numSamples / len(seq2)
                permute2 = [0]
                for i, crossed in enumerate(c2):
                    if crossed < least:
                        least = crossed
                        permute2 = [i]
                    elif crossed == least:
                        permute2.append(i)
                if permute1[0] != 0 and permute2[0] != 0:
                    circularPairs[(seq1, seq2)] = (permute1[0], permute2[0])
                    replyobj.info(
                        "%s %s / %s %s: permute %s by %d or %s by %d\n" %
                        (seq1.molecule.name, seq1.name, seq2.molecule.name,
                         seq2.name, seq1.molecule.name, permute1[0],
                         seq2.molecule.name, permute2[0]))

            else:
                findPruneCrosslinks(allLinks,
                                    pairings,
                                    seq1,
                                    seq2,
                                    linkList,
                                    links1,
                                    links2,
                                    tag=tag,
                                    statusPrefix=statusPrefix)

    if circular:
        replyobj.status("%sMinimizing circularities\n" % statusPrefix,
                        blankAfter=0)
        circulars = {}
        while 1:
            circularVotes = {}
            for seq1, seq2 in circularPairs.keys():
                if seq1 in circulars or seq2 in circulars:
                    continue
                circularVotes[seq1] = circularVotes.get(seq1, 0) + 1
                circularVotes[seq2] = circularVotes.get(seq2, 0) + 1
            if not circularVotes:
                break
            candidates = circularVotes.keys()
            candidates.sort(
                lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1]))
            circulars[candidates[0]] = True

        # has to be circular against every non-circular sequence
        # (avoid spurious circularities)
        ejected = True
        while ejected:
            ejected = False
            for cseq in circulars:
                for seq in chains:
                    if seq in circulars:
                        continue
                    if (cseq, seq) not in circularPairs \
                    and (seq, cseq) not in circularPairs:
                        del circulars[cseq]
                        ejected = True
                        break
                if ejected:
                    break

        for seq in chains:
            seq.circular = seq in circulars
            if seq.circular:
                replyobj.info("circular: %s\n" % seq.molecule.name)
        replyobj.status("%sAdjusting links for circular sequences\n" %
                        statusPrefix,
                        blankAfter=0)
        for seq1, seq2 in holdData.keys():
            if not seq1.circular and not seq2.circular:
                continue
            links1, links2, linkList = holdData[(seq1, seq2)]
            use1 = seq1.circular
            if seq1.circular and seq2.circular:
                if (seq1, seq2) in circularPairs:
                    permute1, permute2 = circularPairs[(seq1, seq2)]
                elif (seq2, seq1) in circularPairs:
                    permute2, permute1 in circularPairs[(seq2, seq1)]
                else:
                    continue
                use1 =  len(seq1) - permute1 \
                   < len(seq2) - permute2
            if use1:
                adjust, other = seq1, seq2
                links = links1
            else:
                adjust, other = seq2, seq1
                links = links2
            if (adjust, other) in circularPairs:
                permute = circularPairs[(adjust, other)][0]
            elif (other, adjust) in circularPairs:
                permute = circularPairs[(other, adjust)][1]
            else:
                continue
            fixup = len(adjust) - permute
            for link in linkList[:]:  # append happens in loop
                if link.info[0].seq == adjust:
                    myEnd = link.info[0]
                    otherEnd = link.info[1]
                else:
                    myEnd = link.info[1]
                    otherEnd = link.info[0]
                if myEnd.pos >= fixup:
                    continue
                links[myEnd.pos].remove(link)
                myEnd.pos += len(adjust)
                links[myEnd.pos].append(link)

        for i, seqs in enumerate(holdData.keys()):
            seq1, seq2 = seqs
            links1, links2, linkList = holdData[seqs]
            findPruneCrosslinks(allLinks,
                                pairings,
                                seq1,
                                seq2,
                                linkList,
                                links1,
                                links2,
                                tag=tagTmpl % (i + 1),
                                statusPrefix=statusPrefix)

    class Column:
        def __init__(self, positions):
            if isinstance(positions, Column):
                self.positions = positions.positions.copy()
            else:
                self.positions = positions

        def contains(self, seq, pos):
            return seq in self.positions \
             and self.positions[seq] == pos

        def participation(self):
            p = 0
            members = self.positions.items()
            for i, sp in enumerate(members):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in members[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    p += val
            return p

        def value(self):
            value = None
            info = self.positions.items()
            for i, sp in enumerate(info):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in info[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if value is None:
                        value = val
                        continue
                    value = valFunc(value, val)
                    if valFunc == min and value < 0:
                        break
                if valFunc == min and value < 0:
                    break
            return value

        def __str__(self):
            from chimera import SelResidue

            def circComp(seq, pos):
                if circular and seq.circular and pos >= len(seq):
                    return pos - len(seq)
                return pos

            return "Column[" + ",".join(
                map(
                    lambda i: "(%s %s, %s)" %
                    (i[0].molecule.name, i[0].name, i[0].residues[circComp(
                        i[0], i[1])].oslIdent(SelResidue)),
                    self.positions.items())) + "]"

    columns = {}
    partialOrder = {}
    for seq in chains:
        columns[seq] = {}
        partialOrder[seq] = []

    seen = {}
    while allLinks:
        replyobj.status("%sForming columns (%d links to check)\n" %
                        (statusPrefix, len(allLinks)))
        if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
            allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
            if valFunc == min:
                while len(allLinks) > 1 \
                and allLinks[0].val <= 0:
                    allLinks.pop(0)

        link = allLinks.pop()
        if link.val < 0:
            break
        key = tuple(link.info)
        if key in seen:
            continue
        seen[key] = 1
        for info in link.info:
            for seq, pos in info.positions.items():
                pairings[seq][pos].remove(link)

        checkInfo = {}
        checkInfo.update(link.info[0].positions)
        checkInfo.update(link.info[1].positions)
        okay = True
        for seq in link.info[0].positions.keys():
            if seq in link.info[1].positions:
                okay = False
                break
        if not okay or not _check(checkInfo, partialOrder, chains):
            continue

        col = Column(checkInfo)
        for seq, pos in checkInfo.items():
            po = partialOrder[seq]
            for i, pcol in enumerate(po):
                if pcol.positions[seq] > pos:
                    break
            else:
                i = len(po)
            po.insert(i, col)
            cols = columns[seq]
            cols[col] = i
            for ncol in po[i + 1:]:
                cols[ncol] += 1
        for info in link.info:
            for seq, pos in info.positions.items():
                for l in pairings[seq][pos]:
                    if l.info[0].contains(seq, pos):
                        base, connect = l.info
                    else:
                        connect, base = l.info
                    l.info = [col, connect]
                    l.evaluate()
                    for cseq, cpos in col.positions.items():
                        if base.contains(cseq, cpos):
                            continue
                        pairings[cseq][cpos].append(l)
            if isinstance(info, Column):
                for seq in info.positions.keys():
                    seqCols = columns[seq]
                    opos = seqCols[info]
                    po = partialOrder[seq]
                    partialOrder[seq] = po[:opos] \
                       + po[opos+1:]
                    for pcol in partialOrder[seq][opos:]:
                        seqCols[pcol] -= 1
                    del seqCols[info]

    replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

    orderedColumns = []
    while 1:
        # find an initial sequence column that can lead
        for seq in partialOrder.keys():
            try:
                col = partialOrder[seq][0]
            except IndexError:
                from chimera import UserError
                raise UserError("Cannot generate alignment with"
                                " %s %s because it is not superimposed"
                                " on the other structures" %
                                (seq.molecule.name, seq.name))
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                # is initial element for all sequences involved
                break
        else:
            break

        orderedColumns.append(col)
        for cseq in col.positions.keys():
            partialOrder[cseq].pop(0)
            if not partialOrder[cseq]:
                del partialOrder[cseq]
        # try to continue using this sequence as long as possible
        while seq in partialOrder:
            col = partialOrder[seq][0]
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                orderedColumns.append(col)
                for cseq in col.positions.keys():
                    partialOrder[cseq].pop(0)
                    if not partialOrder[cseq]:
                        del partialOrder[cseq]
                continue
            break

    from NeedlemanWunsch import cloneSeq
    clone = {}
    current = {}
    for seq in chains:
        clone[seq] = cloneSeq(seq)
        current[seq] = -1
        if circular:
            clone[seq].circular = seq.circular
            if seq.circular:
                clone[seq].name = "2 x " + clone[seq].name

    if not orderedColumns:
        replyobj.status("")
        replyobj.error("No residues satisfy distance constraint"
                       " for column!\n")
        return

    # for maximum benefit from the "column squeezing" step that follows,
    # we need to add in the one-residue columns whose position is
    # well-determined
    newOrdered = [orderedColumns[0]]
    for col in orderedColumns[1:]:
        gap = None
        for seq, pos in newOrdered[-1].positions.items():
            if seq not in col.positions:
                continue
            if col.positions[seq] == pos + 1:
                continue
            if gap is not None:
                # not well-determined
                gap = None
                break
            gap = seq
        if gap is not None:
            for pos in range(newOrdered[-1].positions[gap] + 1,
                             col.positions[gap]):
                newOrdered.append(Column({gap: pos}))
        newOrdered.append(col)
    orderedColumns = newOrdered

    # Squeeze column where possible:
    #
    # 	Find pairs of columns where the left-hand one could accept
    #	one or more residues from the right-hand one
    #
    #	Keep looking right (if necessary) to until each row has at
    #	least one gap, but no more than one
    #
    #	Squeeze
    colIndex = 0
    while colIndex < len(orderedColumns) - 1:
        replyobj.status("%sMerging columns (%d/%d)\n" %
                        (statusPrefix, colIndex, len(orderedColumns) - 1),
                        blankAfter=0)
        l, r = orderedColumns[colIndex:colIndex + 2]
        squeezable = False
        for seq in r.positions.keys():
            if seq not in l.positions:
                squeezable = True
                break
        if not squeezable:
            colIndex += 1
            continue

        gapInfo = {}
        for seq in chains:
            if seq in l.positions:
                gapInfo[seq] = (False, l.positions[seq], 0)
            else:
                gapInfo[seq] = (True, None, 1)

        squeezable = False
        redo = False
        rcols = 0
        for r in orderedColumns[colIndex + 1:]:
            rcols += 1
            # look for indeterminate residues first, so we can
            # potentially form a single-residue column to complete
            # the squeeze
            indeterminates = False
            for seq, rightPos in r.positions.items():
                inGap, leftPos, numGaps = gapInfo[seq]
                if leftPos is None or rightPos == leftPos + 1:
                    continue
                if numGaps == 0:
                    indeterminates = True
                    continue
                for oseq, info in gapInfo.items():
                    if oseq == seq:
                        continue
                    inGap, pos, numGaps = info
                    if inGap:
                        continue
                    if numGaps != 0:
                        break
                else:
                    # squeezable
                    orderedColumns.insert(colIndex + rcols,
                                          Column({seq: leftPos + 1}))
                    redo = True
                    break
                indeterminates = True

            if redo:
                break

            if indeterminates:
                break

            for seq, info in gapInfo.items():
                inGap, leftPos, numGaps = info
                if seq in r.positions:
                    rightPos = r.positions[seq]
                    if inGap:
                        # closing a gap
                        gapInfo[seq] = (False, rightPos, 1)
                    else:
                        # non gap
                        gapInfo[seq] = (False, rightPos, numGaps)
                else:
                    if not inGap and numGaps > 0:
                        # two gaps: no-no
                        break
                    gapInfo[seq] = (True, leftPos, 1)

            else:
                # check if squeeze criteria fulfilled
                for inGap, leftPos, numGaps in gapInfo.values():
                    if numGaps == 0:
                        break
                else:
                    squeezable = True
                    break
                l = r
                continue
            break

        if redo:
            continue

        if not squeezable:
            colIndex += 1
            continue

        # squeeze
        replaceCols = [
            Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1]
        ]
        for i, col in enumerate(replaceCols[:-1]):
            rcol = replaceCols[i + 1]
            for seq, pos in rcol.positions.items():
                if seq in col.positions:
                    continue
                col.positions[seq] = pos
                del rcol.positions[seq]
            if col.value() < 0:
                break
        else:
            assert (not replaceCols[-1].positions)
            ov = 0
            for col in orderedColumns[colIndex:colIndex + rcols + 1]:
                ov += col.participation()
            nv = 0
            for col in replaceCols[:-1]:
                nv += col.participation()
            if ov >= nv:
                colIndex += 1
                continue
            orderedColumns[colIndex:colIndex+rcols+1] = \
                replaceCols[:-1]
            if colIndex > 0:
                colIndex -= 1
            continue
        colIndex += 1

    replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
    for col in orderedColumns:
        for seq, offset in col.positions.items():
            curPos = current[seq]
            diff = offset - curPos
            if diff < 2:
                continue
            if circular and seq.circular:
                if curPos >= len(seq):
                    frag = seq[curPos - len(seq) + 1:offset - len(seq)]
                elif offset >= len(seq):
                    frag = seq[curPos + 1:]
                    frag += seq[:offset - len(seq)]
                else:
                    frag = seq[curPos + 1:offset]
            else:
                frag = seq[curPos + 1:offset]
            clone[seq].append(frag)

            gap = gapChar * (diff - 1)
            for cseq in clone.values():
                if cseq == clone[seq]:
                    continue
                cseq.append(gap)

        for seq in chains:
            try:
                offset = col.positions[seq]
                if circular and seq.circular \
                and offset >= len(seq):
                    char = seq[offset - len(seq)]
                else:
                    char = seq[offset]
            except KeyError:
                clone[seq].append(gapChar)
                continue
            clone[seq].append(char)
            current[seq] = offset

    for seq, offset in current.items():
        if circular and seq.circular:
            if offset < 2 * len(seq) - 1:
                if offset < len(seq) - 1:
                    frag = seq[offset + 1:] + seq[:]
                else:
                    frag = seq[offset - len(seq) + 1:]
            else:
                continue
        else:
            if offset == len(seq) - 1:
                continue
            frag = seq[offset + 1:]
        gap = gapChar * len(frag)
        for cseq in clone.values():
            if cseq == clone[seq]:
                cseq.append(frag)
            else:
                cseq.append(gap)

    clones = clone.values()
    from chimera.misc import oslModelCmp
    clones.sort(
        lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent()))
    replyobj.status("%sDone\n" % statusPrefix)
    return clones