def find_all_branch_splits(network, leaves): # find vertice and edge visit history start = network.keys()[0] openset = [start] closedset = {} vhistory = [] ehistory = [] elookup = util.Dict(1, []) while len(openset) > 0: vertex = openset.pop() vhistory.append(vertex) if len(vhistory) > 1: edge = tuple(util.sort(vhistory[-2:])) ehistory.append(edge) elookup[edge].append(len(ehistory) - 1) # skip closed vertices if vertex in closedset: continue for v in network[vertex].keys(): if v not in closedset: openset.append(vertex) openset.append(v) # close new vertex closedset[vertex] = 1 # use histories to define half each split splits = {} for edge in elookup: set1 = {} start, end = elookup[edge] for i in range(start+1, end+1): if vhistory[i] in leaves: set1[vhistory[i]] = 1 # fill in other half of splits using complement set2 = {} for v in leaves: if v not in set1: set2[v] = 1 if edge[0] == vhistory[start]: splits[edge] = [set2, set1] else: splits[edge] = [set1, set2] return splits
def sameParts(parts1, parts2): """ Returns partitions that are exactly the same between 'parts1' and 'parts2' items of 'parts1' and 'parts2' should be hashable. """ lookup = {} parts3 = [] for part in parts1: lookup[tuple(util.sort(part))] = 1 for part in parts2: if tuple(util.sort(part)) in lookup: parts3.append(part) return parts3
def median(vals): """Computes the median of a list of numbers""" lenvals = len(vals) sortvals = util.sort(vals) if lenvals % 2 == 0: return (sortvals[lenvals / 2] + sortvals[lenvals / 2 - 1]) / 2.0 else: return sortvals[lenvals / 2]
def writeParams(filename, params): """Write SPIDIR model parameters to a file""" out = file(filename, "w") keys = util.sort(params.keys()) for key in keys: values = params[key] print >>out, "%s\t%s" % (str(key), "\t".join(map(str,values)))
def display(self, out=sys.stdout): ykeys = util.sort(self.mat.keys()) y = min(ykeys) for ykey in ykeys: while y < ykey: y += 1 out.write("\n") row = self.mat[ykey] xkeys = util.sort(row.keys()) x = 0 for xkey in xkeys: while x < xkey: x += 1 out.write(self.default) out.write(row[xkey]) x += 1 out.write("\n")
def display(self, out=sys.stdout): ykeys = util.sort(list(self.mat.keys())) y = min(ykeys) for ykey in ykeys: while y < ykey: y += 1 out.write("\n") row = self.mat[ykey] xkeys = util.sort(list(row.keys())) x = 0 for xkey in xkeys: while x < xkey: x += 1 out.write(self.default) out.write(row[xkey]) x += 1 out.write("\n")
def draw_matches(self, sp, chrom, start, end, drawn=None): vis = [] if drawn is None: drawn = set() # build list of matches in order of drawing for gene in iter_chrom(self.db.get_regions(sp, chrom), start, end): # need to sort matches by genome order so that mult-genome synteny # is drawn top-down # get orthologs genes2 = [x for x in self.orth_lookup.get(gene.data["ID"], []) if x in self.region_layout] if len(genes2) == 0: continue rows = util.groupby(lambda x: self.region_layout[x].y, genes2) keys = util.sort(rows.keys(), reverse=True) rows = util.mget(rows, keys) l = self.region_layout for i in range(1, len(rows)): for botGene in rows[i]: gene1 = self.db.get_region(botGene) for topGene in rows[i-1]: if (botGene, topGene) in drawn: continue drawn.add((botGene, topGene)) gene2 = self.db.get_region(topGene) y1 = l[topGene].y y2 = l[botGene].y + 1 x1 = l[topGene].x x2 = l[topGene].x + gene2.length() x3 = l[botGene].x + gene1.length() x4 = l[botGene].x if self.fat_matches: vis.append(quads( self.colors["matches"], x1, y1, x2, y1, x3, y2, x4, y2)) vis.append(lines(self.colors["matches"], x1, y1, x4, y2)) return group(* vis)
def qqnorm(data, plot=None): """Quantile-quantile plot""" data2 = util.sort(data) norm = [random.normalvariate(0, 1) for x in range(len(data2))] norm.sort() if plot == None: return util.plot(data2, norm) else: plot.plot(data2, norm) return plot
def draw_matches(self, sp, chrom, start, end, drawn=None): vis = [] if drawn is None: drawn = set() # build list of matches in order of drawing for gene in iter_chrom(self.db.get_regions(sp, chrom), start, end): # need to sort matches by genome order so that mult-genome synteny # is drawn top-down # get orthologs genes2 = [ x for x in self.orth_lookup.get(gene.data["ID"], []) if x in self.region_layout ] if len(genes2) == 0: continue rows = util.groupby(lambda x: self.region_layout[x].y, genes2) keys = util.sort(rows.keys(), reverse=True) rows = util.mget(rows, keys) l = self.region_layout for i in range(1, len(rows)): for botGene in rows[i]: gene1 = self.db.get_region(botGene) for topGene in rows[i - 1]: if (botGene, topGene) in drawn: continue drawn.add((botGene, topGene)) gene2 = self.db.get_region(topGene) y1 = l[topGene].y y2 = l[botGene].y + 1 x1 = l[topGene].x x2 = l[topGene].x + gene2.length() x3 = l[botGene].x + gene1.length() x4 = l[botGene].x if self.fat_matches: vis.append( quads(self.colors["matches"], x1, y1, x2, y1, x3, y2, x4, y2)) vis.append( lines(self.colors["matches"], x1, y1, x4, y2)) return group(*vis)
def filterOne2ones(parts, gene2species): def isOne2one(part, gene2species): counts = util.hist_dict(map(gene2species, part)) return (max(counts.values()) == 1) # get one2ones ones = [x for x in parts if isOne2one(x, gene2species)] # find maximum one2one maxsize = max(len(x) for x in ones) ones = [util.sort(x) for x in ones if len(x) == maxsize] return ones
def makeTopologyMatrix(tree, genes): # find how edges split vertices network = treelib.tree2graph(tree) splits = find_all_branch_splits(network, set(genes)) edges = splits.keys() # create topology matrix n = len(genes) ndists = n*(n-1) / 2 topmat = util.makeMatrix(ndists, len(edges)) vlookup = util.list2lookup(genes) n = len(genes) for e in xrange(len(edges)): set1, set2 = splits[edges[e]] for gene1 in set1: for gene2 in set2: i, j = util.sort([vlookup[gene1], vlookup[gene2]]) index = i*n-i*(i+1)/2+j-i-1 topmat[index][e] = 1.0 return topmat, edges
def keys(self): return util.sort(util.unique(self.db.keys()))