Exemplo n.º 1
0
def find_all_branch_splits(network, leaves):
    # find vertice and edge visit history
    start = network.keys()[0]

    openset = [start]
    closedset = {}
    
    vhistory = []
    ehistory = []
    elookup = util.Dict(1, [])
    
    
    while len(openset) > 0:
        vertex = openset.pop()
        
        vhistory.append(vertex)
        
        if len(vhistory) > 1:
            edge = tuple(util.sort(vhistory[-2:]))        
            ehistory.append(edge)
            elookup[edge].append(len(ehistory) - 1)
        
        # skip closed vertices
        if vertex in closedset:
            continue
        
        for v in network[vertex].keys():
            if v not in closedset:
                openset.append(vertex)            
                openset.append(v)
        

        # close new vertex
        closedset[vertex] = 1
    
    
    # use histories to define half each split
    splits = {}
    for edge in elookup:
        set1 = {}
        
        start, end = elookup[edge]
        for i in range(start+1, end+1):
            if vhistory[i] in leaves:
                set1[vhistory[i]] = 1
        
        # fill in other half of splits using complement
        set2 = {}
        for v in leaves:
            if v not in set1:
                set2[v] = 1
        
        if edge[0] == vhistory[start]:
            splits[edge] = [set2, set1]
        else:
            splits[edge] = [set1, set2]
        
    
    return splits
def sameParts(parts1, parts2):
    """
    Returns partitions that are exactly the same between 'parts1' and 'parts2'

    items of 'parts1' and 'parts2' should be hashable.
    """
    lookup = {}
    parts3 = []

    for part in parts1:
        lookup[tuple(util.sort(part))] = 1

    for part in parts2:
        if tuple(util.sort(part)) in lookup:
            parts3.append(part)

    return parts3
Exemplo n.º 3
0
def sameParts(parts1, parts2):
    """
    Returns partitions that are exactly the same between 'parts1' and 'parts2'

    items of 'parts1' and 'parts2' should be hashable.
    """
    lookup = {}
    parts3 = []

    for part in parts1:
        lookup[tuple(util.sort(part))] = 1

    for part in parts2:
        if tuple(util.sort(part)) in lookup:
            parts3.append(part)

    return parts3
Exemplo n.º 4
0
def median(vals):
    """Computes the median of a list of numbers"""
    lenvals = len(vals)
    sortvals = util.sort(vals)
    
    if lenvals % 2 == 0:
        return (sortvals[lenvals / 2] + sortvals[lenvals / 2 - 1]) / 2.0
    else:
        return sortvals[lenvals / 2]
Exemplo n.º 5
0
def median(vals):
    """Computes the median of a list of numbers"""
    lenvals = len(vals)
    sortvals = util.sort(vals)

    if lenvals % 2 == 0:
        return (sortvals[lenvals / 2] + sortvals[lenvals / 2 - 1]) / 2.0
    else:
        return sortvals[lenvals / 2]
Exemplo n.º 6
0
def writeParams(filename, params):
    """Write SPIDIR model parameters to a file"""
    
    out = file(filename, "w")
    
    keys = util.sort(params.keys())
    
    for key in keys:
        values = params[key]
        print >>out, "%s\t%s" % (str(key), "\t".join(map(str,values)))
    def display(self, out=sys.stdout):
        ykeys = util.sort(self.mat.keys())

        y = min(ykeys)
        for ykey in ykeys:
            while y < ykey:
                y += 1
                out.write("\n")

            row = self.mat[ykey]
            xkeys = util.sort(row.keys())
            x = 0
            for xkey in xkeys:
                while x < xkey:
                    x += 1
                    out.write(self.default)
                out.write(row[xkey])
                x += 1
        out.write("\n")
Exemplo n.º 8
0
 def display(self, out=sys.stdout):
     ykeys = util.sort(list(self.mat.keys()))
     
     y = min(ykeys)
     for ykey in ykeys:
         while y < ykey:
             y += 1
             out.write("\n")
         
         row = self.mat[ykey]
         xkeys = util.sort(list(row.keys()))
         x = 0
         for xkey in xkeys:
             while x < xkey:
                 x += 1
                 out.write(self.default)
             out.write(row[xkey])
             x += 1
     out.write("\n")
    def draw_matches(self, sp, chrom, start, end, drawn=None):
        vis = []

        if drawn is None:
            drawn = set()
        
        # build list of matches in order of drawing
        
        for gene in iter_chrom(self.db.get_regions(sp, chrom), start, end):
            # need to sort matches by genome order so that mult-genome synteny
            # is drawn top-down

            # get orthologs
            genes2 = [x for x in self.orth_lookup.get(gene.data["ID"], [])
                      if x in self.region_layout]
            if len(genes2) == 0:
                continue
            
            rows = util.groupby(lambda x: self.region_layout[x].y, genes2)
            keys = util.sort(rows.keys(), reverse=True)
            rows = util.mget(rows, keys)

            l = self.region_layout
            
            for i in range(1, len(rows)):
                for botGene in rows[i]:
                    gene1 = self.db.get_region(botGene)
                    for topGene in rows[i-1]:

                        if (botGene, topGene) in drawn:
                            continue

                        drawn.add((botGene, topGene))
                        
                        gene2 = self.db.get_region(topGene)
                        y1 = l[topGene].y 
                        y2 = l[botGene].y + 1
                        x1 = l[topGene].x
                        x2 = l[topGene].x + gene2.length()
                        x3 = l[botGene].x + gene1.length()
                        x4 = l[botGene].x
                        
                        if self.fat_matches:
                            vis.append(quads(
                                    self.colors["matches"],
                                    x1, y1,
                                    x2, y1,
                                    x3, y2,
                                    x4, y2))

                        vis.append(lines(self.colors["matches"],
                                         x1, y1,
                                         x4, y2))
        return group(* vis)
Exemplo n.º 10
0
def qqnorm(data, plot=None):
    """Quantile-quantile plot"""

    data2 = util.sort(data)
    norm = [random.normalvariate(0, 1) for x in range(len(data2))]
    norm.sort()

    if plot == None:
        return util.plot(data2, norm)
    else:
        plot.plot(data2, norm)
        return plot
Exemplo n.º 11
0
def qqnorm(data, plot=None):
    """Quantile-quantile plot"""
    
    data2 = util.sort(data)
    norm = [random.normalvariate(0, 1) for x in range(len(data2))]
    norm.sort()
    
    if plot == None:
        return util.plot(data2, norm)
    else:
        plot.plot(data2, norm)
        return plot
Exemplo n.º 12
0
    def draw_matches(self, sp, chrom, start, end, drawn=None):
        vis = []

        if drawn is None:
            drawn = set()

        # build list of matches in order of drawing

        for gene in iter_chrom(self.db.get_regions(sp, chrom), start, end):
            # need to sort matches by genome order so that mult-genome synteny
            # is drawn top-down

            # get orthologs
            genes2 = [
                x for x in self.orth_lookup.get(gene.data["ID"], [])
                if x in self.region_layout
            ]
            if len(genes2) == 0:
                continue

            rows = util.groupby(lambda x: self.region_layout[x].y, genes2)
            keys = util.sort(rows.keys(), reverse=True)
            rows = util.mget(rows, keys)

            l = self.region_layout

            for i in range(1, len(rows)):
                for botGene in rows[i]:
                    gene1 = self.db.get_region(botGene)
                    for topGene in rows[i - 1]:

                        if (botGene, topGene) in drawn:
                            continue

                        drawn.add((botGene, topGene))

                        gene2 = self.db.get_region(topGene)
                        y1 = l[topGene].y
                        y2 = l[botGene].y + 1
                        x1 = l[topGene].x
                        x2 = l[topGene].x + gene2.length()
                        x3 = l[botGene].x + gene1.length()
                        x4 = l[botGene].x

                        if self.fat_matches:
                            vis.append(
                                quads(self.colors["matches"], x1, y1, x2, y1,
                                      x3, y2, x4, y2))

                        vis.append(
                            lines(self.colors["matches"], x1, y1, x4, y2))
        return group(*vis)
Exemplo n.º 13
0
def filterOne2ones(parts, gene2species):
    def isOne2one(part, gene2species):
        counts = util.hist_dict(map(gene2species, part))
        return (max(counts.values()) == 1)

    # get one2ones
    ones = [x for x in parts if isOne2one(x, gene2species)]

    # find maximum one2one
    maxsize = max(len(x) for x in ones)
    ones = [util.sort(x) for x in ones if len(x) == maxsize]

    return ones
def filterOne2ones(parts, gene2species):
    def isOne2one(part, gene2species):
        counts = util.hist_dict(map(gene2species, part))
        return (max(counts.values()) == 1)

    # get one2ones
    ones = [x for x in parts if isOne2one(x, gene2species)]

    # find maximum one2one
    maxsize = max(len(x) for x in ones)
    ones = [util.sort(x) for x in ones if len(x) == maxsize]

    return ones
Exemplo n.º 15
0
def makeTopologyMatrix(tree, genes):

    # find how edges split vertices
    network = treelib.tree2graph(tree)
    splits = find_all_branch_splits(network, set(genes))
    edges = splits.keys()

    # create topology matrix
    n = len(genes) 
    ndists = n*(n-1) / 2
    topmat = util.makeMatrix(ndists, len(edges))
    
    vlookup = util.list2lookup(genes)
    n = len(genes)
    for e in xrange(len(edges)):
        set1, set2 = splits[edges[e]]
        for gene1 in set1:
            for gene2 in set2:
                i, j = util.sort([vlookup[gene1], vlookup[gene2]])
                index = i*n-i*(i+1)/2+j-i-1
                topmat[index][e] = 1.0
    
    return topmat, edges
Exemplo n.º 16
0
 def keys(self):
     return util.sort(util.unique(self.db.keys()))
Exemplo n.º 17
0
 def keys(self):
     return util.sort(util.unique(self.db.keys()))