示例#1
0
 def test_fill_subset(self):
     """
     Test filling undefined positions on a subset.
     """
     walker = sparse('a', [1, 3, 5, 6, 8, 10])
     expected = [('a', 1, 1)] + list(filled('a', 3, 8, [4, 7])) + [('a', 10, 10)]
     assert_equal(list(wiggelen.fill(walker, regions={'a': (3, 8)})), expected)
示例#2
0
 def test_fill_open(self):
     """
     Test filling undefined positions.
     """
     walker = sparse('a', [3, 5, 6, 8])
     expected = list(filled('a', 3, 8, [4, 7]))
     assert_equal(list(wiggelen.fill(walker)), expected)
示例#3
0
 def test_fill_closed(self):
     """
     Test filling undefined positions with start and stop.
     """
     walker = sparse('a', [3, 5, 6, 8])
     expected = list(filled('a', 1, 10, [1, 2, 4, 7, 9, 10]))
     assert_equal(list(wiggelen.fill(walker, regions={'a': (1, 10)})), expected)
示例#4
0
def find_max(regions, wig):
    # find max within a given region. Assumes regions are ordered and non-overlapping
    # find max by walking through wig file
    # Let's also calculate a new score sum just to be extra sure it's right. The
    # old score sum was done during merging regions, so it could have been messed up
    regions_with_max = []

    start, end, score_sum = regions.pop(0)
    max_value = -1
    max_position = None
    new_score_sum = 0
    for chrom, position, value in fill(walk(wig)):
        if start <= position and position <= end:
            new_score_sum += value
            if value >= max_value:
                max_value = value
                max_position = position
        if position >= end and max_value != -1:
            # position is past region and max_value has been recorded
            region_with_max = (start, end, new_score_sum, max_value,
                               max_position)
            regions_with_max.append(region_with_max)
            # grab new region and reset max
            if len(regions) == 0:  # break when there are no more regions
                break
            start, end, score_sum = regions.pop(0)
            max_value = -1
            max_position = None
            new_score_sum = 0

    return regions_with_max
示例#5
0
 def test_fill_open(self):
     """
     Test filling undefined positions.
     """
     walker = sparse('a', [3, 5, 6, 8])
     expected = list(filled('a', 3, 8, [4, 7]))
     assert_equal(list(wiggelen.fill(walker)), expected)
示例#6
0
 def test_fill_only_edges(self):
     """
     Test filling edges of undefined positions.
     """
     walker = sparse('a', [3, 5, 6, 14])
     expected = [('a', 3, 3), ('a', 4, None), ('a', 5, 5), ('a', 6, 6),
                 ('a', 7, None), ('a', 13, None), ('a', 14, 14)]
     assert_equal(list(wiggelen.fill(walker, only_edges=True)), expected)
示例#7
0
 def test_fill_closed(self):
     """
     Test filling undefined positions with start and stop.
     """
     walker = sparse('a', [3, 5, 6, 8])
     expected = list(filled('a', 1, 10, [1, 2, 4, 7, 9, 10]))
     assert_equal(list(wiggelen.fill(walker, regions={'a': (1, 10)})),
                  expected)
def read_wig(filename, norm=False):
	wig_dict = {position : value for region, position, value in fill(walk(open(filename)))}
	if norm:
		print "Normalizing..."
		wig_median = float(np.median(wig_dict.values()))
		wig_norm = {x : wig_dict[x]/wig_median for x in wig_dict}
		return wig_norm
	else:
		return wig_dict
示例#9
0
 def test_fill_subset(self):
     """
     Test filling undefined positions on a subset.
     """
     walker = sparse('a', [1, 3, 5, 6, 8, 10])
     expected = [('a', 1, 1)] + list(filled('a', 3, 8,
                                            [4, 7])) + [('a', 10, 10)]
     assert_equal(list(wiggelen.fill(walker, regions={'a': (3, 8)})),
                  expected)
示例#10
0
def main(infile, threshold, merge_dist, min_width, strand, outfile):
    # list of tuples, (start, end, avg_exp)
    called_regions = []

    start = None
    end = None
    total_exp = 0

    # fill function steps through every position, returns None if position not
    # in original wig file
    print("Calling preliminary regions...")
    wig = open(infile)
    for region, position, value in fill(walk(wig)):
        if start is None:
            # initialize start of new region to current position
            start = position
        if value is None:
            if total_exp > 0:  # if a region already exists, end it
                called_regions.append((start, end, total_exp))
            # reset start, end, and total_exp
            start = None
            end = None
            total_exp = 0
        elif value < threshold:
            if total_exp > 0:  # if a region already exists, end it
                called_regions.append((start, end, total_exp))
            # reset start, end, and total_exp
            start = None
            end = None
            total_exp = 0
        elif value >= threshold:  # value exceeds threshold, continue region
            total_exp += value
            end = position

    wig.close()

    if total_exp != 0:  # finished iterating but one last region
        called_regions.append((start, end, total_exp))

    print("Filtering out regions smaller than minimum width...")
    # filter out regions that are below minimum width
    filtered_regions = [
        x for x in called_regions if x[1] - x[0] + 1 >= min_width
    ]

    print("Merging regions...")
    merged_regions = merge_regions(filtered_regions, merge_dist)

    # find max region and re-do score sum
    # open wig file again
    print("Finding region max and calculating total score...")
    regions_with_max = find_max(merged_regions, open(infile))

    write_bed(regions_with_max, strand, outfile)
示例#11
0
 def test_fill_only_edges(self):
     """
     Test filling edges of undefined positions.
     """
     walker = sparse('a', [3, 5, 6, 14])
     expected = [('a', 3, 3),
                 ('a', 4, None),
                 ('a', 5, 5),
                 ('a', 6, 6),
                 ('a', 7, None),
                 ('a', 13, None),
                 ('a', 14, 14)]
     assert_equal(list(wiggelen.fill(walker, only_edges=True)), expected)
示例#12
0
 def test_fill_regions(self):
     """
     Test filling undefined positions over multiple regions.
     """
     a = sparse('a', [3, 5, 6, 8])
     b = sparse('b', [3, 5, 6, 8])
     c = sparse('c', [1, 3, 5, 6, 8, 10])
     walker = chain(a, b, c)
     e_a = list(sparse('a', [3, 5, 6, 8]))
     e_b = list(filled('b', 1, 10, [1, 2, 4, 7, 9, 10]))
     e_c = [('c', 1, 1)] + list(filled('c', 3, 8, [4, 7])) + [('c', 10, 10)]
     expected = list(chain(e_a, e_b, e_c))
     assert_equal(list(wiggelen.fill(walker, regions={'b': (1, 10), 'c': (3, 8)})), expected)
示例#13
0
 def test_fill_regions(self):
     """
     Test filling undefined positions over multiple regions.
     """
     a = sparse('a', [3, 5, 6, 8])
     b = sparse('b', [3, 5, 6, 8])
     c = sparse('c', [1, 3, 5, 6, 8, 10])
     walker = chain(a, b, c)
     e_a = list(sparse('a', [3, 5, 6, 8]))
     e_b = list(filled('b', 1, 10, [1, 2, 4, 7, 9, 10]))
     e_c = [('c', 1, 1)] + list(filled('c', 3, 8, [4, 7])) + [('c', 10, 10)]
     expected = list(chain(e_a, e_b, e_c))
     assert_equal(
         list(wiggelen.fill(walker, regions={
             'b': (1, 10),
             'c': (3, 8)
         })), expected)
示例#14
0
    return wig_sum


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('bed', help='bed file')
    parser.add_argument('plus_wig', help='wig file for plus strand')
    parser.add_argument('minus_wig', help='wig file for minus strand')
    parser.add_argument('output_name', help='name of output file')

    args = parser.parse_args()

    plus_wig_dict = {
        position: value
        for region, position, value in fill(walk(open(args.plus_wig)))
    }
    minus_wig_dict = {
        position: value
        for region, position, value in fill(walk(open(args.minus_wig)))
    }

    outfile = open(args.output_name, 'w')

    with open(args.bed) as infile:
        for line in infile:
            fields = line.strip().split()
            chrom = fields[0]
            start = int(fields[1])
            end = int(fields[2])
            name = fields[3]