Пример #1
0
def form_intersections(segments, full_output=False, samples=None, snp_range=None):
    '''Return a list of bins whose ith element is the list of samples that are equal over
    [endpoints[i], endpoints[i+1]]. This is done by scanning all ibd_pairs entries
    and accumulating them into the intersecting segments' bins.
    Union set of all segment endpoints and sort ascending. These form
    the set of intersections of all pairwise_ibd segments.'''
    non_unique_endpoints = [s[0] for s in segments]
    # print 'snp_range', snp_range
    if snp_range is not None:
        non_unique_endpoints += [(snp_range[START], snp_range[STOP])]
    endpoints = sorted(reduce(set.union, non_unique_endpoints, set([])))
    
    # This index makes it easier to random-access bins
    endpoint_index = dict((x, index) for (index, x) in enumerate(endpoints))
    sub_segments = [(endpoints[i], endpoints[i + 1]) for i in xrange(len(endpoints) - 1)]
    # print 'endpoints', endpoints
    # print 'sub_segments', sub_segments
    
    n = len(endpoints)
    intersections = [list([]) for _ in xrange(n - 1)]
    if full_output:
        value_segment_tuples = [list([]) for _ in xrange(n - 1)]
        segment_local_value_tuples = []

    # Sweep segment intersections (bins) in order, accumulate segment info into each one
    for k, segment in enumerate(segments):
        snp = segment[0]
        for i in xrange(endpoint_index[snp[0]], endpoint_index[snp[1]]):
            intersections[i].append(segment[1])
            if full_output:
                value_segment_tuples[i] += [(x, k) for x in segment[1]]
                segment_local_value_tuples += [(k, (i, x)) for x in segment[1]]

    if full_output:
        # print 'segment_local_value_tuples', segment_local_value_tuples
        segment_to_local_values = util.to_set_dict(segment_local_value_tuples)
        # print 'segment_to_local_values', segment_to_local_values
        # print [x for s in segments for x in s[1]]
        all_samples = samples if samples is not None else util.union_all(x for s in segments for x in s[1])
        # print 'all_samples', all_samples
        all_local_values = set(itertools.product(xrange(n - 1), all_samples))
        dangling_local_values = all_local_values - util.union_all(*segment_to_local_values.values())
        # print 'dangling_local_values', dangling_local_values
        value_to_segments = [util.to_set_dict(d) for d in value_segment_tuples]
        # print 'value_to_segments', value_to_segments 
        util.to_set_dict((x, k) for k, s in enumerate(segments) for x in s[1])
        return (sub_segments, intersections,
                value_to_segments, segment_to_local_values, dangling_local_values)
    else:
        return (sub_segments, intersections)
Пример #2
0
def group_to_color(segments, samples=None, segment_gap=25, snp_range=None):
    '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes
    a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal,
    
    Generate a haplotype coloring plot from a list of IBD segments. samples is the set of 
    haplotype identifiers. if pair_gap is specified, haplotypes pairs are separated by this
    amount [pixels], assuming that their number is even. segment_gap is the # of SNPs to drop
    from each segment''s margins for the purpose of defining colors. This allows a slight
    overlap between segments without making them the same color, which is usually undesirable.'''

    # Define colors using slightly-smaller portions of the original segments
    segment_groups = __group_to_color(
        [(__dilute_segment(x[0], segment_gap), x[1]) for x in segments],
        samples,
        snp_range=snp_range)

    # Translate back result to the original segments. Use sub-segments, dangling nodes of the
    # original segments (note: there may be more irrelevant dangling nodes in the diluted segments)
    sub_segments, _, _, segment_to_local_values, dangling_local_values = \
    im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range)

    # Translate each connected component from a list of segments to a list of local values
    # (i.e., haplotype parts of the same color, for each color)
    return sub_segments, np.array([
        list(util.union_all(*(segment_to_local_values[x] for x in group)))
        for group in segment_groups
    ] + [[x] for x in dangling_local_values])
Пример #3
0
def plot_hap_coloring_from_colors(d,
                                  haps=None,
                                  title=None,
                                  xlabel=None,
                                  ylabel=None,
                                  y=None,
                                  pair_gap=0,
                                  linewidth=6,
                                  segment_gap=25,
                                  snp_range=None,
                                  colors=None):
    '''Generate a haplotype coloring plot from coloring scheme d.'''
    # Generate haplotype colors
    sub_segments, groups = d
    haps = haps if haps is not None \
    else sorted(util.union_all(*(map(operator.itemgetter(1), x) for x in groups)))

    # Prepare axes
    colors = colors if colors is not None else im.plot.colors.get_colors()
    num_haps = len(haps)
    hap_index = dict((hap, k) for k, hap in enumerate(haps))
    custom_label = y is not None
    y = y if y is not None else map(repr, haps)
    xmin, xmax = sub_segments[0][0] - 1, sub_segments[-1][1] + 1
    x_scaled = lambda x: (1.0 * (x - xmin)) / (xmax - xmin)

    P.clf()
    P.hold(True)
    if title is not None:
        P.title(title)
    P.xlim([xmin, xmax])
    P.xlabel(xlabel if xlabel else 'SNP #')
    P.ylabel('Sample')
    hap_ticks = np.arange(0, num_haps)
    if pair_gap > 0:
        hap_ticks += np.array(
            list(
                it.chain.from_iterable(
                    map(lambda x: (x, x), xrange(num_haps / 2)))))
    # hap_ticks = list(reversed(hap_ticks))
    P.yticks(hap_ticks, y)
    P.ylim([hap_ticks[0] - 0.5, hap_ticks[-1] + 0.5])

    for ((_, group), color) in it.izip(enumerate(groups), colors):
        # Draw lines for all members of the group using the same color
        for k, hap in group:
            print 'region %-2d [%-4d,%-4d]  x %.2f:%.2f  hap (%-4d, %d)%s  color (%.2f, %.2f, %.2f)' % \
            (k, sub_segments[k][0], sub_segments[k][1],
             x_scaled(sub_segments[k][0]), x_scaled(sub_segments[k][1]),
             hap[0], hap[1], ' ylabel %-10s' % (y[hap_index[hap]],) if custom_label else '',
             color[0], color[1], color[2])
            P.axhline(y=hap_ticks[hap_index[hap]],
                      xmin=x_scaled(sub_segments[k][0]),
                      xmax=x_scaled(sub_segments[k][1]),
                      linewidth=linewidth,
                      color=color)
    P.show()
    return sub_segments, groups
Пример #4
0
def __group_to_color(segments, samples=None, snp_range=None):
    '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes
    a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal,
    Return a list of lists, each of contains segments of the same color (IBD sharing).'''
    (sub_segments, intersections, value_to_segments, _, _) = \
    im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range)
    # Build a graph G where the nodes = segments and edges = (segments intersect AND their sample
    # sets intersect). G's connected components are groups, where group is a set of segments of the
    # same color.
    return nx.connected_components(nx.from_edgelist(it.chain.from_iterable(it.product(sharing_segments, sharing_segments)
              for sharing_segments in (util.union_all(*(value_to_segments[i][x] for x in component))
                                       for i in xrange(len(sub_segments))
                                       for component in nx.connected_components(nx.Graph(intersections[i]))))))
Пример #5
0
def __group_to_color(segments, samples=None, snp_range=None):
    '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes
    a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal,
    Return a list of lists, each of contains segments of the same color (IBD sharing).'''
    (sub_segments, intersections, value_to_segments, _, _) = \
    form_intersections(segments, True, samples=samples, snp_range=snp_range)
    # Build a graph G where the nodes = segments and edges = (segments intersect AND their sample
    # sets intersect). G's connected components are groups, where group is a set of segments of the
    # same color.
    return nx.connected_components(nx.from_edgelist(itertools.chain.from_iterable(itertools.product(sharing_segments, sharing_segments)
              for sharing_segments in (util.union_all(*(value_to_segments[i][x] for x in component))
                                       for i in xrange(len(sub_segments))
                                       for component in nx.connected_components(nx.Graph(intersections[i]))))))
Пример #6
0
def plot_hap_coloring_from_colors(d, haps=None, title=None, xlabel=None, ylabel=None, y=None,
                      pair_gap=0, linewidth=6, segment_gap=25, snp_range=None, colors=None):
    '''Generate a haplotype coloring plot from coloring scheme d.'''
    # Generate haplotype colors 
    sub_segments, groups = d
    haps = haps if haps is not None \
    else sorted(util.union_all(*(map(operator.itemgetter(1), x) for x in groups)))

    # Prepare axes
    colors = colors if colors is not None else im.plot.colors.get_colors()
    num_haps = len(haps)
    hap_index = dict((hap, k) for k, hap in enumerate(haps))
    custom_label = y is not None
    y = y if y is not None else map(repr, haps)
    xmin, xmax = sub_segments[0][0] - 1, sub_segments[-1][1] + 1
    x_scaled = lambda x: (1.0 * (x - xmin)) / (xmax - xmin)
    
    P.clf()
    P.hold(True)
    if title is not None:
        P.title(title)
    P.xlim([xmin, xmax])
    P.xlabel(xlabel if xlabel else 'SNP #')
    P.ylabel('Sample')
    hap_ticks = np.arange(0, num_haps)
    if pair_gap > 0:
        hap_ticks += np.array(list(it.chain.from_iterable(map(lambda x: (x, x), xrange(num_haps / 2)))))
    # hap_ticks = list(reversed(hap_ticks))
    P.yticks(hap_ticks, y)
    P.ylim([hap_ticks[0] - 0.5, hap_ticks[-1] + 0.5])
    
    for ((_, group), color) in it.izip(enumerate(groups), colors):
        # Draw lines for all members of the group using the same color
        for k, hap in group:
            print 'region %-2d [%-4d,%-4d]  x %.2f:%.2f  hap (%-4d, %d)%s  color (%.2f, %.2f, %.2f)' % \
            (k, sub_segments[k][0], sub_segments[k][1],
             x_scaled(sub_segments[k][0]), x_scaled(sub_segments[k][1]),
             hap[0], hap[1], ' ylabel %-10s' % (y[hap_index[hap]],) if custom_label else '',
             color[0], color[1], color[2])
            P.axhline(y=hap_ticks[hap_index[hap]],
                      xmin=x_scaled(sub_segments[k][0]),
                      xmax=x_scaled(sub_segments[k][1]),
                      linewidth=linewidth, color=color)
    P.show()
    return sub_segments, groups
Пример #7
0
def group_to_color(segments, samples=None, segment_gap=25, snp_range=None):
    '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes
    a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal,
    
    Generate a haplotype coloring plot from a list of IBD segments. samples is the set of 
    haplotype identifiers. if pair_gap is specified, haplotypes pairs are separated by this
    amount [pixels], assuming that their number is even. segment_gap is the # of SNPs to drop
    from each segment''s margins for the purpose of defining colors. This allows a slight
    overlap between segments without making them the same color, which is usually undesirable.'''
    
    # Define colors using slightly-smaller portions of the original segments
    segment_groups = __group_to_color([(__dilute_segment(x[0], segment_gap), x[1]) for x in segments], samples, snp_range=snp_range)

    # Translate back result to the original segments. Use sub-segments, dangling nodes of the
    # original segments (note: there may be more irrelevant dangling nodes in the diluted segments)
    sub_segments, _, _, segment_to_local_values, dangling_local_values = \
    im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range)
    
    # Translate each connected component from a list of segments to a list of local values
    # (i.e., haplotype parts of the same color, for each color)
    return sub_segments, np.array([list(util.union_all(*(segment_to_local_values[x] for x in group))) 
                                   for group in segment_groups] + [[x] for x in dangling_local_values])