Exemplo n.º 1
0
    def _clean_clique(self, c):
        '''Ideally, each IBD graph component is a clique (reflecting IBD transitivity).
        In practice, we have approximate transitivity, so attempt to complete the
        clique by looking for more IBD segment edges in the neighborhood of the SNP for
        nodes that are not connected to all others. If those nodes are still not connected
        to at least min_degree neighbors, remove them from the clique.
        If the clique is smaller than (min_degree+1), such nodes are always removed.'''
        max_degree = c.number_of_nodes() - 1
        nodes = c.nodes()
        orig_nodes = len(nodes)
        unconnected = filter(lambda node: c.degree(node) < max_degree, nodes)
        if self.debug:
            self._writeln('\tComponent: nodes %d, edges %d, %s' %
                          (c.number_of_nodes(), c.number_of_edges(),
                           repr(util.occur_dict(c.degree().values()))))
        if unconnected:
            if self.debug:
                self._writeln('\t\tAll nodes %s' % (len(nodes), ))
                self._writeln('\t\tUnconnected nodes %s' %
                              (len(unconnected), ))
            # Add extra edges
            extra_edges = [
                tuple(s.samples) for s in it.chain.from_iterable(
                    self.segments.find(self.bp_left, self.bp_right + 1, node)
                    for node in unconnected)
                if s.samples[0] in nodes and s.samples[1] in nodes
            ]
            # self._writeln('\t\textra_edges', extra_edges
            c.add_edges_from(extra_edges)
            #        if debug:
            #            self._writeln('\t\tnodes %d, edges %d, %s' % (c.number_of_nodes(), c.number_of_edges(), repr(util.occur_dict(c.degree().values())))

            # Delete still-connected nodes
            c_min_degree = min(max_degree, self.min_degree)
            unconnected = filter(lambda node: c.degree(node) < c_min_degree,
                                 nodes)
            if unconnected:
                #            if debug:
                #                self._writeln('\t\tRemoving nodes %s min_degree %d' % (repr(unconnected), c_min_degree)
                c.remove_nodes_from(unconnected)
            if self.debug:
                self._writeln('\t\tCleaned component: %d -> %d' %
                              (orig_nodes, c.number_of_nodes()))
        return c
Exemplo n.º 2
0
def plot_two_families():
    '''Test ancestor imputation and child POO alignment for two families on chromosome 22.'''
    # Parameters
    chrom = 22
    plot = False  # True
    save_plot = False  # True
    debug = False  # True
    
    # Read data
    p = im.hutt('hutt.phased.npz')
    q = p.pedigree.quasi_founders
    # aligned = set(p.haplotype.aligned_samples)
    t = frozenset([frozenset(im.gt.genotyped_children(p, p.pedigree.find_family_by_child(i, genotyped=False))) for i in q])
    num_sibs = map(len, t)
    print 'Distribution of QF family sizes', util.occur_dict(num_sibs)
    # plot_hist_num_sibs(num_sibs)
    
    # ibd = im.index.segment_index.SegmentIndex(os.environ['OBER_OUT'] + '/index_segments')
    
    if plot: P.figure(1)
    s = set([x for x in t if 1049 in x][0]) - set([1049])
    pa, _ = analyze_family(p, s, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, All, Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_poo.png')
    
    if plot: P.figure(2)
    s2 = set([x for x in t if 1049 in x][0])
    analyze_family(p, s2, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, POOC Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_all.png')
    
    if plot: P.figure(3)
    f = p.find_family(10, 1414)  # 4 children, genotyped parents
    s3 = f.children
    analyze_family(p, s3, max_colors=4, title='Haplotype Coloring: Non-Founder Sibs Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/nf_family/hap_colors.png')
    
    if plot: P.show()
    print im.color.hap_color.best_hap_alignment_to_colors(pa)
    print 'Regions', pa.num_regions
    print 'Parental haplotype coverage %', parent_coverage_fraction(pa, p)
    print 'Children coverage by parental haplotypes', pa.color_sequence_coverage(np.arange(4))
Exemplo n.º 3
0
'''
import util, networkx as nx, sys, numpy as np

sorted_pair = lambda a, b: (a, b) if a <= b else (b, a)

'''Main program'''
if __name__ == '__main__':
    in_file = sys.stdin
    delimiter = ','
    output_format = 'cytoscape'  # 'matrix'
    
    # Read disease-block list from a delimited text file (by default, a CSV)
    # Create a hash table of block-to-diseases
    h = util.mdict.from_items((j, i) for (i, j) in np.loadtxt(in_file, delimiter=delimiter, dtype=[('i', 'S50'), ('j', 'S50')]))
    
    # For each h-entry and each pair of diseases in the entry, increment the a-b weight in the
    # disease graph
    weighted_edgelist = util.occur_dict(sorted_pair(a, b) for diseases in h.itervalues() for a in diseases for b in diseases)
    g = nx.Graph()
    g.add_weighted_edges_from((i, j, w) for (i, j), w in weighted_edgelist.iteritems())

    # Convert to adjacency matrix and output in CSV format 
    diseases = g.nodes()
    
    if output_format == 'matrix':
        print delimiter.join(diseases)
        A = nx.to_numpy_matrix(g)
        for i in xrange(len(diseases)): print diseases[i] + delimiter + delimiter.join(str(x) for x in A[i, :].tolist()[0])
    elif output_format == 'cytoscape':
        print '\n'.join(delimiter.join([u, v, 'pp', 'TRUE', 'edge%05d' % (k,), str(d['weight'])]) for k, (u, v, d) in enumerate(g.edges_iter(data=True)))
Exemplo n.º 4
0
 def _clean_clique(self, c):
     '''Ideally, each IBD graph component is a clique (reflecting IBD transitivity).
     In practice, we have approximate transitivity, so attempt to complete the
     clique by looking for more IBD segment edges in the neighborhood of the SNP for
     nodes that are not connected to all others. If those nodes are still not connected
     to at least min_degree neighbors, remove them from the clique.
     If the clique is smaller than (min_degree+1), such nodes are always removed.'''
     max_degree = c.number_of_nodes() - 1
     nodes = c.nodes()
     orig_nodes = len(nodes) 
     unconnected = filter(lambda node: c.degree(node) < max_degree, nodes)
     if self.debug:
         self._writeln('\tComponent: nodes %d, edges %d, %s' % (c.number_of_nodes(), c.number_of_edges(), repr(util.occur_dict(c.degree().values()))))
     if unconnected:
         if self.debug:
             self._writeln('\t\tAll nodes %s' % (len(nodes),))
             self._writeln('\t\tUnconnected nodes %s' % (len(unconnected),))
         # Add extra edges
         extra_edges = [tuple(s.samples) for s in
                        it.chain.from_iterable(self.segments.find(self.bp_left, self.bp_right + 1, node)
                                                      for node in unconnected)
                        if s.samples[0] in nodes and s.samples[1] in nodes]
         # self._writeln('\t\textra_edges', extra_edges
         c.add_edges_from(extra_edges)
 #        if debug:
 #            self._writeln('\t\tnodes %d, edges %d, %s' % (c.number_of_nodes(), c.number_of_edges(), repr(util.occur_dict(c.degree().values())))
 
         # Delete still-connected nodes
         c_min_degree = min(max_degree, self.min_degree)
         unconnected = filter(lambda node: c.degree(node) < c_min_degree, nodes)
         if unconnected:
 #            if debug:
 #                self._writeln('\t\tRemoving nodes %s min_degree %d' % (repr(unconnected), c_min_degree)
             c.remove_nodes_from(unconnected)
         if self.debug:
             self._writeln('\t\tCleaned component: %d -> %d' % (orig_nodes, c.number_of_nodes()))
     return c