def test_Interval_sequence():
    genome = pyfasta.Fasta('test/example.fa')
    l1 = Interval.from_string('1:858-967:1', genome=genome)
    l2 = Interval.from_string('1:858-967:-1', genome=genome)
    print l1.sequence
    print l2.sequence
    assert l1.sequence != l2.sequence
def test_Interval_distance():
    l1 = Interval.from_string('chr1:10858-10967:1')
    l2 = Interval.from_string('chr1:10858-10967:-1')
    assert l1.distance(l2) == 0
    l3 = Interval.from_string('chr1:10968-10977:-')
    print l1.distance(l3)
    assert l1.distance(l3) == 1
    assert l3.distance(l1) == 1
def test_Interval_contains():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-10964:-1')
    l3 = Interval.from_string('chr1:20858-30001:-1')
    l4 = Interval.from_string('chr1:30000-30003:-1')
    assert l2 in l1
    assert not l1 in l2
    assert not l3 in l4
def test_Interval_overlaps():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-21000:-1')
    l3 = Interval.from_string('chr1:20858-30000:-1')
    l4 = Interval.from_string('chr1:30000-30001:-1')
    assert l1.overlaps(l2)
    assert l2.overlaps(l3)
    assert not l1.overlaps(l3)
    assert not l3.overlaps(l4)
def test_Interval_is_contiguous():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-21000:-1')
    l3 = Interval.from_string('chr1:20858-30000:-1')
    l4 = Interval.from_string('chr1:30000-30001:-1')
    print l1.distance(l2) == 0
    assert l1.is_contiguous(l2)
    assert l2.is_contiguous(l3)
    assert not l1.is_contiguous(l3)
    assert  l3.is_contiguous(l4)
def test_truncate():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = l1.truncate(100)
    assert l2.start == 10000
    assert l2.end == 10100

    l1 = Interval.from_string('chr1:10000-10967:-1')
    l2 = l1.truncate(100)
    assert l2.start == 10867
    assert l2.end == 10967
def test_Interval_merge():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-12964:-1')
    l3 = Interval.from_string('chr1:10858-10964:-1')

    merged = Interval.merge([l1,l2,l3])
    assert len(merged) == 1
    m = merged[0]
    assert m.start == 10000
    assert m.end == 12964
示例#8
0
 def intervals(self):
     if self.bed_file is None:
         return {}
     if not self.bed_file.exists:
         raise ValueError(f"can not find the bed file: {self.bed_file}")
     intervals = defaultdict(list)
     with open(self.bed_file) as fp:
         for line in fp:
             chrom, start, end, *_ = line.strip().split('\t')
             intervals[chrom].append(
                 Interval(int(start), int(end), chrom=chrom))
     return {chrom: Interval.merge(intervals[chrom]) for chrom in intervals}
def test_Interval_from_string():
    a = Interval.from_string('chr1:10858-10967:1')
    assert a.chrom == 'chr1'
    assert a.start == 10858
    assert a.end == 10967
    assert a.strand == 1

    a = Interval.from_string('chr1:10858-10967:-1')
    assert a.chrom == 'chr1'
    assert a.start == 10858
    assert a.end == 10967
    assert a.strand == -1
示例#10
0
def test_Interval_span():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:20858-30000:-1')
    l3 = Interval.from_string('chr2:20858-30000:-1')
    ex = l1.span(l2)
    print ex
    assert ex.start == 10000
    assert ex.end == 30000
    ex = l2.span(l1)
    assert ex.start == 10000
    assert ex.end == 30000
    try:
        l1.span(l3)
        assert False
    except:
        pass
示例#11
0
def test_Interval_sub():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-10964:-1')
    l3 = Interval.from_string('chr2:10858-10964:-1')
    i = l1 - l2
    print i
    assert len(i)==2
    l = i[0]
    r = i[1]

    assert l.start == 10000
    assert l.end == 10858
    assert r.start == 10964
    assert r.end == 10967

    assert len(l1 - l3) == 1
示例#12
0
def load_amplicons(design, stats, opts):
    amplicons = []
    for row in csv.DictReader(file(design, 'U'), delimiter=opts.delimiter):
        amp_loc = Interval.from_string(row[opts.amplicon_column])
        trim_loc = Interval.from_string(row[opts.trim_column])


        if not trim_loc in amp_loc:
            print('trim location not contained in amplicon location, impossible trim', file=sys.stderr)
            sys.exit(1)

        amplicon = Amplicon(
            chr=amp_loc.chrom, start=amp_loc.start, end=amp_loc.end, strand=amp_loc.strand,
            trim_start = trim_loc.start, trim_end=trim_loc.end,
            external_id = row[opts.id_column], stats = stats,
            offset_allowed=opts.offset_allowed,
        )
        amplicons.append(amplicon)

    # TODO: check that amplicons can be uniquely resolved

    return amplicons
示例#13
0
 def within_interval(self, segment):
     if not self.intervals:
         return True
     if segment.reference_name not in self.intervals:
         return False
     current_position = Interval(
         segment.reference_start or 0,
         segment.reference_end
         or segment.reference_start + segment.query_length,
         chrom=segment.reference_name)
     for interval in self.intervals[segment.reference_name]:
         if interval.distance(current_position) < self.flank_size:
             return True
     return False
示例#14
0
def load_amplicons_from_header(header, stats, samfile, clip=True, load_pileups=True):
    amplicons = []
    for row in header['CO']:
        try:
            row = json.loads(row)
        except:
            continue
        if row.get('type', None) != 'ea':
            continue

        amp_loc = Interval.from_string(row['ac'])
        trim_loc = Interval.from_string(row['tc'])
        strand = row.get('st')
        strand = int(strand) if strand != 'None' else 0

        amplicon = Amplicon(
            chr=amp_loc.chrom, start=amp_loc.start, end=amp_loc.end, strand=strand,
            trim_start = trim_loc.start, trim_end=trim_loc.end,
            external_id = row['id'], stats = stats,
            offset_allowed=10,
        )
        amplicons.append(amplicon)

    return amplicons
示例#15
0
def test_Interval_intersection():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-10964:-1')
    i = l1.intersection(l2)
    assert i.start == l2.start
    assert i.end == l2.end
    assert i.chrom == l2.chrom

    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr1:10858-11000:-1')
    i = l1.intersection(l2)
    assert i.start == l2.start
    assert i.end == l1.end
    assert i.chrom == l2.chrom

    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = Interval.from_string('chr2:10858-10964:-1')
    i = l1.intersection(l2)
    assert i is None
示例#16
0
def test_add_border():
    l1 = Interval.from_string('chr1:10000-10967:1')
    l2 = l1.add_border(upstream=50, downstream=100)
    assert l2.start == 10000 - 50
    assert l2.end == 10967 + 100
示例#17
0
def test_find_minimal_spanning_set():

    targets = [
        Interval.from_string('chr1:10000-11000:+'),
        Interval.from_string('chr1:12000-13000:+')
    ]

    candidates = [
        Interval.from_string('chr1:10000-10500:+'),
        Interval.from_string('chr1:10100-10600:+'),
        Interval.from_string('chr1:10300-11700:+'),
        Interval.from_string('chr1:10500-11000:+'),
        Interval.from_string('chr1:10700-11100:+'),
    ]

    reads = MinimalSpanningSet(targets, candidates)
    n_reads = len(reads.chosen)
    print 'chose', reads.chosen
    print 'n_reads', n_reads
    assert n_reads == 2

    targets = [
        Interval.from_string('chr1:12000-13000:+')
    ]

    candidates = [
        Interval.from_string('chr1:10000-10500:+'),
        Interval.from_string('chr1:10100-10600:+'),
        Interval.from_string('chr1:10300-11700:+'),
        Interval.from_string('chr1:10500-11000:+'),
        Interval.from_string('chr1:10700-11100:+'),
    ]


    # this example is constructed into fooling the greedy algorithm
    # to choose one too many
    reads = MinimalSpanningSet(targets, candidates)
    assert len(reads.chosen) == 0
    targets = [
        Interval.from_string('chr1:12000-13000:+')
    ]

    candidates = [
        Interval.from_string('chr1:12000-12500:+'),
        Interval.from_string('chr1:12500-13000:+'),
        Interval.from_string('chr1:12100-12900:+'),
    ]

    reads = MinimalSpanningSet(targets, candidates)
    assert len(reads.chosen) == 2