Exemplo n.º 1
def test_chop_datafunc():
    def datafunc(iv, islower):
        oldlimit = iv[islower]
        return "oldlimit: {0}, islower: {1}".format(oldlimit, islower)

    t = IntervalTree([Interval(0, 10)])
    t.chop(3, 7, datafunc)
    assert len(t) == 2
    assert sorted(t)[0] == Interval(0, 3, 'oldlimit: 10, islower: True')
    assert sorted(t)[1] == Interval(7, 10, 'oldlimit: 0, islower: False')

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 7, datafunc)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(7, 10, 'oldlimit: 0, islower: False')

    t = IntervalTree([Interval(0, 10)])
    t.chop(5, 10, datafunc)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(0, 5, 'oldlimit: 10, islower: True')

    t = IntervalTree([Interval(0, 10)])
    t.chop(-5, 15, datafunc)
    assert len(t) == 0

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 10, datafunc)
    assert len(t) == 0
Exemplo n.º 2
def interval_fragments(start, end, intervals):
    """ given [start,end) and intervals, return list of unclaimed intervals """
    frag_tree = IntervalTree()
    frag_tree.add(Interval(start, end, 'fragment'))
    for i in intervals:
        frag_tree.chop(i.begin, i.end)
    return list(frag_tree)
Exemplo n.º 3
def test_chop_datafunc():
    def datafunc(iv, islower):
        oldlimit = iv[islower]
        return "oldlimit: {0}, islower: {1}".format(oldlimit, islower)

    t = IntervalTree([Interval(0, 10)])
    t.chop(3, 7, datafunc)
    assert len(t) == 2
    assert sorted(t)[0] == Interval(0, 3, 'oldlimit: 10, islower: True')
    assert sorted(t)[1] == Interval(7, 10, 'oldlimit: 0, islower: False')

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 7, datafunc)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(7, 10, 'oldlimit: 0, islower: False')

    t = IntervalTree([Interval(0, 10)])
    t.chop(5, 10, datafunc)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(0, 5, 'oldlimit: 10, islower: True')

    t = IntervalTree([Interval(0, 10)])
    t.chop(-5, 15, datafunc)
    assert len(t) == 0

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 10, datafunc)
    assert len(t) == 0
Exemplo n.º 4
def split_at_nstretch(
    interval: Interval,
    left: Coords,
    right: Coords,
    nstretches: Sequence[Interval],
) -> List[Interval]:
    best_nstretch = find_best_nstretch(interval, left, right, nstretches)
    itree = IntervalTree([interval])
    itree.chop(best_nstretch.begin, best_nstretch.end)
    split_intervals = list(itree)

    # This should be 0 (if the whole intersection is N), 1 (if the nstretch
    # buts one of the ends), or 2 (if internal split).
    assert len(split_intervals) <= 2, split_intervals
    split_intervals_with_data = []
    for i in split_intervals:
        assert not ((i.begin == interval.begin) and (i.end == interval.end))

        data = None
        if i.begin == interval.begin:
            data = left
        elif i.end == interval.end:
            data = right

        assert data is not None
        split_intervals_with_data.append(Interval(i.begin, i.end, [data]))
    return split_intervals_with_data
Exemplo n.º 5
    def find_intersection_with_interval(self, interval: Interval, sample: str):
        Given an interval find all overlapping calls in the callset and truncate them appropriately.
        Note: we assume that the calls in the callset do not overlap for a single sample.

            interval: a given interval
            sample: sample from the callset

            A list of sorted, non-overlapping events that completely cover a given interval

        assert sample in self.sample_names, "Sample %s is not in the callset" % sample

        calls = self.sample_to_calls_map.get(sample)
        intersecting_calls = calls.find_intersection(interval)

        if not intersecting_calls:
            return [(interval, EventType.NO_CALL)]
            result = IntervalTree([
                TreeInterval(call.interval.start, call.interval.end,
                             call.event_type) for call in intersecting_calls
            max_val = sorted(result)[-1].end
            min_val = sorted(result)[0].begin
            result.chop(interval.end, max(interval.end, max_val))
            result.chop(min(interval.start, min_val), interval.start)
            return [(Interval(interval.chrom, t.begin, t.end), t.data)
                    for t in sorted(result)]
Exemplo n.º 6
    def pprint(self, depth=0):
        tree = IntervalTree()
        tree.add(Interval(self.begin, self.end))

        for child in self.children:
            tree.chop(child.begin, child.end)
            tree.add(Interval(child.begin, child.end, child))

        intervals = sorted(tree.items())

        # if a child exists right where we start, emit a comment for this
        # enveloping structure, otherwise the first gap gets our comment
        comment = '  ' * depth + self.comment

        if type(intervals[0].data) == OhaNode:
            oha_comment(self.begin, comment)
            intervals[0] = Interval(intervals[0].begin, intervals[0].end,

        for interval in intervals:
            if type(interval.data) == OhaNode:
                node = interval.data
                node.pprint(depth + 1)
                data = self.fp.read(interval.length())
                oha(data, interval.begin, interval.data)
Exemplo n.º 7
def interval_tree(start_data, stop_data, buffer_len):
    starts = []
    stops = []
    t = IntervalTree()

    ## Shrink each interval by the buffer size
    for key, value in start_data.iteritems():
        for i in range(0, len(value)):
            shrunk_start = value[i] + buffer_len / 2.0
            shrunk_stop = stop_data[key][i] + 1 - buffer_len / 2.0
            if shrunk_start < shrunk_stop:
                t[shrunk_start:shrunk_stop] = (shrunk_start, shrunk_stop)

    ## Add chromosome endpoints without buffer
    chrom_start, chrom_stop = get_extremes(start_data, stop_data)
    if chrom_start < t.begin() + 1:
        t[chrom_start:t.begin() + 1] = (chrom_start, t.begin() + 1)
    if t.end() - 1 < chrom_stop:
        t[t.end() - 1:chrom_stop] = (t.end() - 1, chrom_stop)

    ## Merge intervals that overlap in tree to get consensus

    ## Check that original intervals only overlap with one consensus interval
    for key, value in start_data.iteritems():
        for i in range(0, len(value)):
            start = value[i]
            stop = stop_data[key][i] + 1
            if len(t[start:stop]) > 1:
                ## If they overlap with more than one
                ## Remove part of consensus interval
                ## This will never be more than the buffer size/2
                assert (len(t[start:stop]) == 2)
                remove_start = 0
                remove_stop = 0
                min_length = float('inf')
                for interval in t[start:stop]:
                    overlap_start, overlap_stop = get_overlap(
                        (start, stop), (interval[0], interval[1]))
                    if (overlap_stop - overlap_start) < min_length:
                        min_length = overlap_stop - overlap_start
                        remove_start = overlap_start
                        remove_stop = overlap_stop
                t.chop(remove_start, remove_stop)
                assert (min_length <= buffer_len / 2.0)
                assert (len(t[start:stop]) < 2)

    ## Get consensus start and stop points
    chrom_len = chrom_stop - chrom_start
    covered = 0.0
    for interval in sorted(t):
        covered = covered + (interval[1] - interval[0])

    print("The percentage of the chromosome covered is: %s" % '{0:.2f}'.format(
        (covered / chrom_len) * 100.0))

    return (starts, stops)
Exemplo n.º 8
def filter_nstretches(
    itrees: Mapping[str, IntervalTree],
    nstretches: Mapping[str, IntervalTree],
    min_non_overlap: int,
) -> None:
    Remove contigs without much going on outside N stretches.

    for scaffold, itree in nstretches.items():

        # Loop through all of the potential breaks
        for nstretch in itree:
            # Find "contigs" that overlap the potential break
            # We do this in sorted order, from smallest to largest alignment
            # that means shorter ones are removed first
            contigs = sorted(itrees[scaffold].overlap(nstretch),
                             key=lambda x: x.length())

            to_drop = set()

            # Loop through the contigs to test.
            for contig in contigs:
                # Find if they overlap with any other n stretches.
                n_overlaps = nstretches[scaffold].overlap(contig)

                # Get an intervaltree of all contigs overlapping this one.
                contig_overlaps = IntervalTree(

                # Remove all of the n-chunks from the intervals.
                # Note the "Coords" is still duplicated in the data attribute
                for n_overlap in n_overlaps:
                    contig_overlaps.chop(n_overlap.begin, n_overlap.end)

                # Get the intervals that aren't the overlap under
                # consideration.
                contig_overlaps_itree = IntervalTree(o for o in contig_overlaps
                                                     if o.data != contig.data)

                # Get the fragments of the overlap under consideration
                contig_itree = IntervalTree(o for o in contig_overlaps
                                            if o.data == contig.data)

                # For each of the fragments, find how many new Non-N bases it
                # contributes to the contigging.
                len_non_overlap = sum([
                    find_len_non_overlap(f, contig_overlaps_itree)
                    for f in contig_itree

                # Remove the contig if it doesn't cut the muster
                if len_non_overlap < min_non_overlap:

            for contig in to_drop:
Exemplo n.º 9
def _get_uncovered_intervals(domain: Interval,
                             covered_intervals: IntervalTree) -> IntervalTree:
    Given an interval domain and a collection of intervals, return a list of
    uncovered intervals.
    tree = IntervalTree([domain])
    for covered in covered_intervals:
        tree.chop(covered.begin, covered.end)
    return tree
Exemplo n.º 10
def sorted_complement(tree, start=None, end=None) -> IntervalTree:
    result = IntervalTree()
    if start is None:
        start = tree.begin()
    if end is None:
        end = tree.end()

    result.addi(start, end)  # using input tree bounds
    for iv in tree:
        result.chop(iv[0], iv[1])
    return sorted(result)
Exemplo n.º 11
def find_diff(list_a, list_b):
    interval_tree = IntervalTree()

    for interval in list_a:
        interval_tree.add(Interval(interval[0], interval[1]))

    for interval in list_b:
        interval_tree.chop(interval[0], interval[1])

    result = []
    for item in interval_tree.items():
        result.append((item.begin, item.end))

    return result
Exemplo n.º 12
def _get_unparse_intervals_of_inds(
    dfs_inds_to_include: Sequence[int],
    ast: ObjectChoiceNode,
    unparse: UnparseResult
) -> IntervalTree:
    """Given some indicies we wish include, find the intervals of the total
    unparse string which are covered by those indicies"""
    include_set = set(dfs_inds_to_include)
    interval_tree = IntervalTree()
    currently_including = False
    for ind, pointer in enumerate(ast.depth_first_iter()):
        if ind % 2 != 0:
            # Only take into account the choice nodes. Skip the object nodes
        assert isinstance(pointer.cur_node, ObjectChoiceNode)
        func_need_to_do_here = None
        if ind in include_set:
            if not currently_including:
                func_need_to_do_here = lambda start, end: interval_tree.add(Interval(start, end))
                currently_including = True
            if currently_including:
                func_need_to_do_here = lambda start, end: interval_tree.chop(start, end)
                currently_including = False
        if func_need_to_do_here:
            span = unparse.pointer_to_span(pointer)
            if span is None or span[1] - span[0] == 0:
            start, end = span
            func_need_to_do_here(start, end)
    return interval_tree
Exemplo n.º 13
    def regionTable(self):
        Get the "region table", a table indicating
        *base*-coordinate-delimited regions, using the same recarray
        dtype that is returned by BasH5Reader.

        *Note* that the regiontable from the StitchedZmwRead will
        not in general be equivalent to that from a bas.h5, if the BAM
        files were produced using bax2bam, because in our BAM
        encodings, a subread or adapter cannot extend beyond the HQ
        region.  Additionally there is no concept of a "region score"
        for the BAM.
        zmwReadExtent = Interval(0, self.zmwReadLength)
        intervalsByType = defaultdict(list)
        for r in self.bamRecords:
                Interval(r.qStart, r.qEnd))

        # Find an HQ region
        hqIntervalTree = IntervalTree([zmwReadExtent])
        for lqInterval in intervalsByType["SCRAP:L"]:
        hqIntervals = list(hqIntervalTree)
        assert len(hqIntervals) in (0, 1)
        if len(hqIntervals) == 0:
            hqInterval = Interval(0, 0)
            hqInterval = hqIntervals[0]
        hqRegion = (self.holeNumber, Region.HQ_REGION, hqInterval.begin,
                    hqInterval.end, 0)

        # Adapters, barcodes, and inserts (and filtered inserts)
        regionTypeMap = {
            "SUBREAD": Region.INSERT_REGION,
            "SCRAP:A": Region.ADAPTER_REGION,
            "SCRAP:B": Region.BARCODE_REGION,
            "SCRAP:F": Region.INSERT_REGION

        regions = [ hqRegion ] + \
                  [ (self.holeNumber, regionTypeMap[code], interval.begin, interval.end, 0)
                    for code in regionTypeMap
                    for interval in intervalsByType[code] ]

        return toRecArray(REGION_TABLE_DTYPE, regions)
Exemplo n.º 14
    def regionTable(self):
        Get the "region table", a table indicating
        *base*-coordinate-delimited regions, using the same recarray
        dtype that is returned by BasH5Reader.

        *Note* that the regiontable from the StitchedZmwRead will
        not in general be equivalent to that from a bas.h5, if the BAM
        files were produced using bax2bam, because in our BAM
        encodings, a subread or adapter cannot extend beyond the HQ
        region.  Additionally there is no concept of a "region score"
        for the BAM.
        zmwReadExtent = Interval(0, self.zmwReadLength)
        intervalsByType = defaultdict(list)
        for r in self.bamRecords:
            intervalsByType[_preciseReadType(r)].append(Interval(r.qStart, r.qEnd))

        # Find an HQ region
        hqIntervalTree = IntervalTree([zmwReadExtent])
        for lqInterval in intervalsByType["SCRAP:L"]:
        hqIntervals = list(hqIntervalTree)
        assert len(hqIntervals) in (0, 1)
        if len(hqIntervals) == 0:
            hqInterval = Interval(0, 0)
            hqInterval = hqIntervals[0]
        hqRegion = (self.holeNumber, Region.HQ_REGION, hqInterval.begin, hqInterval.end, 0)

        # Adapters, barcodes, and inserts (and filtered inserts)
        regionTypeMap = { "SUBREAD" : Region.INSERT_REGION,
                          "SCRAP:A" : Region.ADAPTER_REGION,
                          "SCRAP:B" : Region.BARCODE_REGION,
                          "SCRAP:F" : Region.INSERT_REGION }

        regions = [ hqRegion ] + \
                  [ (self.holeNumber, regionTypeMap[code], interval.begin, interval.end, 0)
                    for code in regionTypeMap
                    for interval in intervalsByType[code] ]

        return toRecArray(REGION_TABLE_DTYPE, regions)
Exemplo n.º 15
    def find(self, size: int, data: Optional[int] = None) -> IntervalTree:
        """Finds an interval tree of a given size in this resource pool.

        This is essentially an operation to find *which* resources to allocate
        considering that we manage individual resource units and guarantee
        exclusive usage by a resource unit.

            size : int
                The size (amount) of resources to allocate
            data : Optional[int]
                The identifier of the "owner" of the found resources. This
                allows us to keep track which job "owns" which resources during

            IntervalTree: An interval tree with the size requested if such
            a tree can be found. Otherwise, an empty tree is returned.
        used = IntervalTree()
        if not self.fits(size):
            return used
        free = IntervalTree([Interval(0, self.size, data)])
        used_size: int = 0
        for interval in self.used_pool:
            free.chop(interval.begin, interval.end)
        for interval in free:
            temp_size = ResourcePool.measure(interval) + used_size
            if temp_size == size:
            if temp_size < size:
                used_size = temp_size
                    Interval(interval.begin, interval.begin + size - used_size,
        return used
Exemplo n.º 16
class IpTree:
    def __init__(self):
        self.tree = IntervalTree()

    def add_interval(self, begin, end, data):
        interval = Interval(begin, end, data)
        overlapped = self.tree[interval.begin:interval.end]
        for o in overlapped:
            if o.contains_interval(interval):
            elif interval.contains_interval(o):
        self.tree.chop(interval.begin, interval.end)

    def update_w(self):
        for i in self.tree.all_intervals:
            i.data['w'] = i.end - i.begin

    def get_all(self):
        return self.tree.all_intervals
Exemplo n.º 17
def test_chop():
    t = IntervalTree([Interval(0, 10)])
    t.chop(3, 7)
    assert len(t) == 2
    assert sorted(t)[0] == Interval(0, 3)
    assert sorted(t)[1] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 7)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(5, 10)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(0, 5)

    t = IntervalTree([Interval(0, 10)])
    t.chop(-5, 15)
    assert len(t) == 0

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 10)
    assert len(t) == 0
Exemplo n.º 18
class Day(object):

    def __init__(self, start, end, dt):
        self.dt = dt
        self.free = IntervalTree([get_iv(start, end)])
        self.booked = IntervalTree([])

    def is_free(self, interval):
        return (self.free.overlaps(interval)
                and not self.booked.overlaps(interval))

    def schedule(self, interval):
        assert self.is_free(interval),\
            "Attempt to double-book: {} - {}".format(
                m2t(interval.begin), m2t(interval.end))
        self.free.chop(interval.begin, interval.end + self.dt)

    def dumps(self):
        dump = ''
        for iv in sorted(self.booked):
            dump += "\t{} - {}\t{}\n".format(
                m2t(iv.begin), m2t(iv.end), iv.data)
        return dump
Exemplo n.º 19
def test_chop():
    t = IntervalTree([Interval(0, 10)])
    t.chop(3, 7)
    assert len(t) == 2
    assert sorted(t)[0] == Interval(0, 3)
    assert sorted(t)[1] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 7)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(5, 10)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(0, 5)

    t = IntervalTree([Interval(0, 10)])
    t.chop(-5, 15)
    assert len(t) == 0

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 10)
    assert len(t) == 0
Exemplo n.º 20
class Allocator(Publisher):
    # Initialization ------------------------------------------------------ {{{

    __slots__ = ('_aa', '_am', '_arg', '_ts')

    def __init__(self, tslam=None, cliargs=[], **kwargs):

        self._ts = tslam

        self._aa = IntervalTree()
        self._aa.add(AddrIval(0, 2**64, AState.REVOKED))

        self._am = IntervalTree()
        self._am.add(AddrIval(0, 2**64, AState.UNMAPD))

        self._ls = {}

        # Argument parsing ---------------------------------------------------- {{{

        argp = argparse.ArgumentParser()
                          help="Automatically insert fixups for reports")
                          help="Ignore map/unmap constraints")
                          help="Suppress warnings for safely dropped events")
        self._arg = argp.parse_args(cliargs)

# --------------------------------------------------------------------- }}}
# --------------------------------------------------------------------- }}}
# Allocation ---------------------------------------------------------- {{{

    def _allocd(self, begin, end):
        overlaps_a = self._aa[begin:end]
        overlaps_m = self._am[begin:end]

        if not self._arg.skip_map:
            overlaps_unmapped = [
                o for o in overlaps_m if o.state == AState.UNMAPD
            if overlaps_unmapped:
                logging.warning("Allocation ts=%d b=%x e=%x overlaps unmap=%r",
                                self._ts(), begin, end, overlaps_unmapped)

            # XXX fix by mapping pages

        overlaps_allocated = [
            o for o in overlaps_a if o.state == AState.ALLOCD
        if overlaps_allocated:
            logging.error("Allocation ts=%d b=%x e=%x overlaps alloc=%r",
                          self._ts(), begin, end, overlaps_allocated)
            if self._arg.fix:
                for oa in overlaps_allocated:
                    self._publish('free', '', oa.begin)

        self._aa.chop(begin, end)
        self._aa.add(AddrIval(begin, end, AState.ALLOCD))

    def allocd(self, stk, begin, end):
        self._allocd(begin, end)
        self._publish('allocd', stk, begin, end)

# --------------------------------------------------------------------- }}}
# Freeing ------------------------------------------------------------- {{{

    def _freed(self, addr):
        doalloc = False
        end = addr + 1  # Will be fixed up later
        overlaps_a = self._aa[addr:end]
        overlaps_m = self._am[addr:end]

        if not self._arg.skip_map:
            overlaps_unmapped = [
                o for o in overlaps_m if o.state == AState.UNMAPD
            if overlaps_unmapped:
                logging.error("Free ts=%d a=%x overlaps unmap=%r", self._ts(),
                              addr, overlaps_unmapped)

        allocations = [o for o in overlaps_a if o.state == AState.ALLOCD]
        overlaps_free = [o for o in overlaps_a if o.state == AState.FREED]
        if overlaps_free != []:
            logging.warning("Free ts=%d a=%x overlaps free=%r", self._ts(),
                            addr, overlaps_free)
            if allocations == [] and len(
                    overlaps_free) == 1 and self._arg.drop_safe:
                return False
                for of in overlaps_free:
                    if of.begin <= addr:
                        end = max(end, of.end)
                if self._arg.fix:
                    doalloc = True

        if len(allocations) > 1 or (allocations != [] and overlaps_free != []):
            logging.error("Free ts=%d a=%x multiply-attested alloc=%r free=%r",
                          self._ts(), addr, allocations, overlaps_free)
        elif allocations == [] and overlaps_free == []:
            logging.warning("Free ts=%d a=%x no corresponding alloc",
                            self._ts(), addr)
            if self._arg.fix and not self._arg.drop_safe:
                doalloc = True
                assert doalloc == False
                return False
            for a in allocations:
                if a.begin != addr:
                    # Likely to leave cruft behind, indicative of serious errors
                    logging.error("Free ts=%d a=%x within alloc=%r",
                                  self._ts(), addr, a)
                    end = max(end, a.end)

        self._aa.chop(addr, end)
        self._aa.add(AddrIval(addr, end, AState.FREED))

        if doalloc:
            self._publish('allocd', '', addr, end)

        return True

    def freed(self, stk, addr):
        if addr == 0:
            # Just throw out free(NULL)

        if self._freed(addr):
            self._publish('freed', stk, addr)

# --------------------------------------------------------------------- }}}
# Reallocation -------------------------------------------------------- {{{

    def reallocd(self, stk, begin_old, begin_new, end_new):
        self._allocd(begin_new, end_new)
        self._publish('reallocd', stk, begin_old, begin_new, end_new)

# --------------------------------------------------------------------- }}}
# Mapping ------------------------------------------------------------- {{{

    def mapd(self, stk, begin, end, prot):

        # XXX

        self._publish('mapd', stk, begin, end, prot)

# --------------------------------------------------------------------- }}}
# Unmapping ----------------------------------------------------------- {{{

    def unmapd(self, stk, begin, end):

        # XXX

        self._publish('unmapd', stk, begin, end)

# --------------------------------------------------------------------- }}}
# Revoking ------------------------------------------------------------ {{{

    def revoked(self, stk, spans):

        for (begin, end) in spans:
            overlaps = self._aa[begin:end]
            overlaps_allocated = [
                o for o in overlaps if o.state == AState.ALLOCD
            if overlaps_allocated:
                logging.warning("Revocation ts=%d b=%x e=%x overlaps alloc=%r",
                                self._ts(), begin, end, overlaps_allocated)
                if self._arg.fix:
                    for oa in overlaps_allocated:
                        self._publish('free', '', oa.begin)

                # XXX fix by freeing

        self._publish('revoked', stk, spans)

# --------------------------------------------------------------------- }}}
# Size-measurement pass-thru ------------------------------------------ {{{

    def size_measured(self, sz):
        self._publish('size_measured', sz)

    def sweep_size_measured(self, sz):
        self._publish('sweep_size_measured', sz)
Exemplo n.º 21
class TemporalPathPyObject(PathPyObject):
    """Base class for a temporal object."""
    def __init__(self, uid: Optional[str] = None, **kwargs: Any) -> None:
        """Initialize the temporal object."""

        # initialize the parent class

        # default start and end time of the object
        self._start = float('-inf')
        self._end = float('inf')

        # initialize an intervaltree to save events
        self._events = IntervalTree()

        # add new events

        # variable to store changes in the events
        self._len_events = len(self._events)

    def __iter__(self):

        # create generator
        for start, end, attributes in sorted(self._events):
            self._attributes = {**{'start': start, 'end': end}, **attributes}
            yield self
        self._attributes.pop('start', None)
        self._attributes.pop('end', None)

    def __getitem__(self, key: Any) -> Any:
        # get the last element
        _, _, last = self.last()
        return last.get(key, None)

    @__getitem__.register(tuple)  # type: ignore
    def _(self, key: tuple) -> Any:
        start, end, _ = _get_start_end(key[0])
        values = {
            k: v
            for _, _, o in sorted(self._events[start:end])
            for k, v in o.items()
        return values.get(key[1], None) if len(key) == 2 else values

    @__getitem__.register(slice)  # type: ignore
    @__getitem__.register(int)  # type: ignore
    @__getitem__.register(float)  # type: ignore
    def _(self, key: Union[int, float, slice]) -> Any:
        start, end, _ = _get_start_end(key)

        # create generator
        for start, end, attributes in sorted(self._events[start:end]):
            self._attributes = {**{'start': start, 'end': end}, **attributes}
            yield self
        self._attributes.pop('start', None)
        self._attributes.pop('end', None)

    def __setitem__(self, key: Any, value: Any) -> None:
                   **{key: value})

    @__setitem__.register(tuple)  # type: ignore
    def _(self, key: tuple, value: Any) -> None:
        start, end, _ = _get_start_end(key[0])
        self.event(start=start, end=end, **{key[1]: value})

    def start(self):
        """start of the object"""
        return self.attributes.get('start', self._start)

    def end(self):
        """end of the object"""
        return self.attributes.get('end', self._end)

    def _clean_events(self):
        """helper function to clean events"""

        # BUG: There is a bug in the intervaltree library
        # merge_equals switches old and new data randomly
        def reducer(old, new):
            return {**old, **new}

        if len(self._events) != self._len_events:
            # split overlapping intervals

            # combine the dict of the overlapping intervals

            # update the length of the events
            self._len_events = len(self._events)

    def event(self, *args, **kwargs) -> None:
        """Add a temporal event."""

        # check if object is avtive or inactive
        active = kwargs.pop('active', True)

        # get start and end time of the even
        start, end, kwargs = _get_start_end(*args, **kwargs)

        if active:
            self._events[start:end] = kwargs  # type: ignore
            self._attributes = kwargs.copy()
            self._events.chop(start, end)

        # update start and end times
        self._start = self._events.begin()
        self._end = self._events.end()

    def last(self):
        """return the last added intervall"""
        interval = sorted(self._events)[-1]
        return interval.begin, interval.end, interval.data
Exemplo n.º 22
class SegmentProducer(object):

    save_interval = SAVE_INTERVAL

    def __init__(self, download, n_procs):

        assert download.size is not None,\
            'Segment producer passed uninitizalied Download!'

        self.download = download
        self.n_procs = n_procs

        # Initialize producer

    def _setup_pbar(self):
        self.pbar = None
        self.pbar = get_pbar(self.download.ID, self.download.size)

    def _setup_work(self):
        if self.is_complete():
            log.info('File already complete.')

        work_size = self.integrate(self.work_pool)
        self.block_size = work_size / self.n_procs

    def _setup_queues(self):
        if WINDOWS:
            self.q_work = Queue()
            self.q_complete = Queue()
            manager = Manager()
            self.q_work = manager.Queue()
            self.q_complete = manager.Queue()

    def integrate(self, itree):
        return sum([i.end-i.begin for i in itree.items()])

    def validate_segment_md5sums(self):
        if not self.download.check_segment_md5sums:
            return True
        corrupt_segments = 0
        intervals = sorted(self.completed.items())
        pbar = ProgressBar(widgets=[
            'Checksumming {}: '.format(self.download.ID), Percentage(), ' ',
            Bar(marker='#', left='[', right=']'), ' ', ETA()])
        with mmap_open(self.download.path) as data:
            for interval in pbar(intervals):
                log.debug('Checking segment md5: {}'.format(interval))
                if not interval.data or 'md5sum' not in interval.data:
                        """User opted to check segment md5sums on restart.
                        Previous download did not record segment
                        md5sums (--no-segment-md5sums)."""))
                chunk = data[interval.begin:interval.end]
                checksum = md5sum(chunk)
                if checksum != interval.data.get('md5sum'):
                    log.debug('Redownloading corrupt segment {}, {}.'.format(
                        interval, checksum))
                    corrupt_segments += 1
        if corrupt_segments:
            log.warn('Redownloading {} currupt segments.'.format(

    def load_state(self):
        # Establish default intervals
        self.work_pool = IntervalTree([Interval(0, self.download.size)])
        self.completed = IntervalTree()
        self.size_complete = 0
        if not os.path.isfile(self.download.state_path)\
           and os.path.isfile(self.download.path):
                """A file named '{} was found but no state file was found at at
                '{}'. Either this file was downloaded to a different
                location, the state file was moved, or the state file
                was deleted.  Parcel refuses to claim the file has
                been successfully downloaded and will restart the
                    self.download.path, self.download.state_path))

        if not os.path.isfile(self.download.state_path):

        # If there is a file at load_path, attempt to remove
        # downloaded sections from work_pool
        log.info('Found state file {}, attempting to resume download'.format(

        if not os.path.isfile(self.download.path):
                """State file found at '{}' but no file for {}.
                Restarting entire download.""".format(
                    self.download.state_path, self.download.ID)))
            with open(self.download.state_path, "rb") as f:
                self.completed = pickle.load(f)
            assert isinstance(self.completed, IntervalTree), \
                "Bad save state: {}".format(self.download.state_path)
        except Exception as e:
            self.completed = IntervalTree()
            log.error('Unable to resume file state: {}'.format(str(e)))
            self.size_complete = self.integrate(self.completed)
            for interval in self.completed:
                self.work_pool.chop(interval.begin, interval.end)

    def save_state(self):
            # Grab a temp file in the same directory (hopefully avoud
            # cross device links) in order to atomically write our save file
            temp = tempfile.NamedTemporaryFile(
            # Write completed state
            pickle.dump(self.completed, temp)
            # Make sure all data is written to disk

            # Rename temp file as our save file, this could fail if
            # the state file and the temp directory are on different devices
            if OS_WINDOWS and os.path.exists(self.download.state_path):
                # If we're on windows, there's not much we can do here
                # except stash the old state file, rename the new one,
                # and back up if there is a problem.
                old_path = os.path.join(tempfile.gettempdir(), ''.join(
                    random.choice(string.ascii_lowercase + string.digits)
                    for _ in range(10)))
                    # stash the old state file
                    os.rename(self.download.state_path, old_path)
                    # move the new state file into place
                    os.rename(temp.name, self.download.state_path)
                    # if no exception, then delete the old stash
                except Exception as msg:
                    log.error('Unable to write state file: {}'.format(msg))
                        os.rename(old_path, self.download.state_path)
                # If we're not on windows, then we'll just try to
                # atomically rename the file
                os.rename(temp.name, self.download.state_path)

        except KeyboardInterrupt:
            log.warn('Keyboard interrupt. removing temp save file'.format(
        except Exception as e:
            log.error('Unable to save state: {}'.format(str(e)))

    def schedule(self):
        while True:
            interval = self._get_next_interval()
            log.debug('Returning interval: {}'.format(interval))
            if not interval:

    def _get_next_interval(self):
        intervals = sorted(self.work_pool.items())
        if not intervals:
            return None
        interval = intervals[0]
        start = interval.begin
        end = min(interval.end, start + self.block_size)
        self.work_pool.chop(start, end)
        return Interval(start, end)

    def print_progress(self):
        if not self.pbar:
        except Exception as e:
            log.debug('Unable to update pbar: {}'.format(str(e)))

    def check_file_exists_and_size(self):
        if self.download.is_regular_file:
            return (os.path.isfile(self.download.path)
                    and os.path.getsize(
                        self.download.path) == self.download.size)
            log.debug('File is not a regular file, refusing to check size.')
            return (os.path.exists(self.download.path))

    def is_complete(self):
        return (self.integrate(self.completed) == self.download.size and

    def finish_download(self):
        # Tell the children there is no more work, each child should
        # pull one NoneType from the queue and exit
        for i in range(self.n_procs):

        # Wait for all the children to exit by checking to make sure
        # that everyone has taken their NoneType from the queue.
        # Otherwise, the segment producer will exit before the
        # children return, causing them to read from a closed queue
        log.debug('Waiting for children to report')
        while not self.q_work.empty():

        # Finish the progressbar
        if self.pbar:

    def wait_for_completion(self):
            since_save = 0
            while not self.is_complete():
                while since_save < self.save_interval:
                    interval = self.q_complete.get()
                    if self.is_complete():
                    this_size = interval.end - interval.begin
                    self.size_complete += this_size
                    since_save += this_size
                since_save = 0
                assert (False)

        if (geneId == None):
            print("Warning (coding)")
            continue  # Skip this CDS
        assert (geneId != None)

        if (cds.end - cds.start < 1):
            continue  # Skip this CDS

        codingRegions.addi(cds.start, cds.end, geneId)
        nonCodingRegions.chop(cds.start, cds.end)

    # -----------------------------------------------------
    # Collect transcribed regions
    # -----------------------------------------------------
    # Note: standard coding genes have the general structure: gene -> mRNA -> CDS
    #       non-coding genes have the general structure       XXXX_gene -> noncoding_exon   (XXXX can be tRNA, rRNA, snRNA, snoRNA, ncRNA)
    for mRNA in db.features_of_type(('mRNA', 'noncoding_exon'),
        geneId = None
        for gene in db.parents(mRNA.id):
            if gene.featuretype == 'gene':
                geneId = gene.id
            elif gene.featuretype == 'transposable_element_gene' or gene.featuretype == 'LTR_retrotransposon' or gene.featuretype == 'tRNA_gene' or gene.featuretype == 'ncRNA_gene' or gene.featuretype == 'snoRNA_gene' or gene.featuretype == 'rRNA_gene' or gene.featuretype == 'snRNA_gene' or gene.featuretype == 'telomerase_RNA_gene':
                # TODO - What to do about transposable element genes?!
Exemplo n.º 24
class IntervalGroup(BaseTree):
    _tree: IntervalTree

    def compatible_keys(keys):
        for key in keys:
            if not isinstance(key, tuple):
                return False
            if not len(key) == 2:
                return False
            if not all([isinstance(x, int) for x in key]):
                return False
        return True

    def from_dict(cls, d):
        ivs = [Interval(*k, v) for k, v in d.items()]
        return cls(IntervalTree(ivs))

    def from_label_dict(cls, d):
        ivs = [Interval(*map(int, k.split("-")), v) for k, v in d.items()]
        return cls(IntervalTree(ivs))

    def add_group(self, name, group):
        self[name] = group

    def key_to_label(self, key):
        return f"{key[0]}-{key[1]}"

    def label_to_key(self, label):
        return tuple(apply(int, label.split("-")))

    def to_label_dict(self):
        return {f"{iv.begin}-{iv.end}": iv.data for iv in sorted(self._tree)}

    def to_dict(self):
        return {(iv.begin, iv.end): iv.data for iv in sorted(self._tree)}

    def __init__(self, tree=None, *args, **kwargs):
        if tree is None:
            tree = IntervalTree()
        if not isinstance(tree, IntervalTree):
            raise TypeError("tree must be an instance of IntervalTree.")
        self._tree = tree

    def __getitem__(self, key):
        if isinstance(key, str):
            key = self.label_to_key(key)
        if isinstance(key, int):
            return self.value(key)
        elif isinstance(key, tuple) and len(key) == 2:
            return self.overlap_content(*key)
        elif isinstance(key, Iterable):
            return self.values_at(key)
        elif isinstance(key, slice):
            start = key.start or self.start
            stop = key.stop or self.end
            if key.step is None:
                return self.overlap(key.start, key.stop)
                return self.values_at(range(start, stop, key.step))

    def start(self):
        return self._tree.begin()

    def end(self):
        return self._tree.end()

    def __setitem__(self, key, value):
        if isinstance(key, str):
            key = self.label_to_key(key)
        if isinstance(key, slice):
            start, stop, step = key.start, key.stop, key.step
        elif isinstance(key, tuple):
            if len(key) == 2:
                start, stop = key
                step = None
            elif len(key) == 3:
                start, stop, step = key
                raise ValueError("Setting intervals with tuple must be  \
                            of form (start, end) or (start, end, step)")
            raise TypeError(
                "Wrong type. Setting intervals can only be done using a \
                            slice or tuple of (start, end) or (start, end, step)"
        if start is None:
            start = self.start
        if stop is None:
            stop = self.end
        if step is None:
            self.set_interval(start, stop, value)
            indices = list(range(start, stop, step))
            for begin, end, val in zip(indices[:-1], indices[1:], value):
                self.set_interval(begin, end, val)

    def __delitem__(self, key):
        if isinstance(key, str):
            key = self.label_to_key(key)
        if isinstance(key, tuple) and len(key) == 2:

        if isinstance(key, slice):
            self._tree.chop(key.start, key.end)
        raise TypeError("Must pass a tuple of (begin,end) or slice.")

    def keys(self):
        for iv in sorted(self._tree):
            yield iv.begin, iv.end

    def labels(self):
        return map(self.key_to_label, self.keys())

    def items(self):
        for iv in sorted(self._tree):
            yield (iv.begin, iv.end), iv.data

    def values(self):
        for iv in sorted(self._tree):
            yield iv.data

    def __iter__(self):
        return self.keys()

    def __len__(self):
        return len(self._tree)

    def __bool__(self):
        return bool(len(self._tree))

    def __getstate__(self):
        return tuple(sorted([tuple(iv) for iv in self._tree]))

    def __setstate__(self, d):
        ivs = [Interval(*iv) for iv in d]
        self._tree = IntervalTree(ivs)

    def overlap(self, begin, end):
        hits = sorted(self._tree.overlap(begin, end))
        return [
            Interval(max(iv.begin, begin), min(iv.end, end), iv.data)
            for iv in hits

    def overlap_content(self, begin, end):
        hits = sorted(self._tree.overlap(begin, end))
        if len(hits) == 1:
            return hits[0].data
        return [hit.data for hit in hits]

    def value(self, index):
        hits = sorted(self._tree.at(index))
        if len(hits) == 1:
            return hits[0].data
        return hits

    def values_at(self, indices):
        return [self.value(i) for i in indices]

    def set_interval(self, begin, end, value):
        self._tree.chop(begin, end)
        self._tree.addi(begin, end, value)

    def to_df(self, title="tree"):
        import pandas as pd
        ivs = []

        for (begin, end), data in self.items():
            if isinstance(data, BaseTree):
                data = float("nan")
            interval = {
                "label": f"{begin}-{end}",
                "begin": begin,
                "parameter": title,
                "mid": (begin + end) / 2,
                "end": end,
                "data": data
        return pd.DataFrame(ivs)

    def to_native(self):
        ivs = []
        for (begin, end), data in self.items():
            if isinstance(data, BaseTree):
                iv = Interval(begin, end, data.to_native())
                iv = Interval(begin, end, data)
        return IntervalTree(ivs)

    def explorer(self, title="tree"):
        import panel as pn
        from ..visualizations import IntervalTreeExplorer
        return IntervalTreeExplorer(tree=self, label=title)
Exemplo n.º 25
def make_plan(store, base_url, uuid,
    Generate plan for selected program in the store.
    The plan goes at least 4 hours into the future.

    log.msg('Create plan for program {}...'.format(uuid))

    # Do not continue unless the program actually exists.
    if uuid not in store.program:
        log.msg('New plan has {} items.'.format(len(EMPTY_PLAN['items'])))
        return EMPTY_PLAN

    # We are going to plan for the next 4 hours.
    # Some of these hours will be today and some may be tomorrow.
    now = datetime.now()
    today = now.date()
    tomorrow = today + timedelta(days=1)

    # Do not generate items outside the 4h time window.
    not_before = mktime(now.timetuple())
    not_after = not_before + 4 * 3600

    # Use the interval tree to decide what events override what segments.
    ptree = IntervalTree()

    # Use another tree to track screen layouts.
    ltree = IntervalTree()

    # And another tree to track device power
    pwrtree = IntervalTree()

    # Start with an interval covering the whole 4h window.
    ltree[not_before:not_after] = DEFAULT_LAYOUT

    # Assume off is default
    pwrtree[not_before:not_after] = 'standby'

    for segment in store.segment.filter(program=uuid, day=today.weekday()):
        insert_segment(ptree, today, segment)
        insert_segment(ltree, today, segment)

    for segment in store.segment.filter(program=uuid, day=tomorrow.weekday()):
        insert_segment(ptree, tomorrow, segment)
        insert_segment(ltree, tomorrow, segment)

    # Ordered screen layouts.
    layouts = []

    # Generate layouts.
    for interval in sorted(ltree):
        if interval.end < not_before:

        if interval.begin > not_after:

            'start': interval.begin,
            'end': interval.end,
            'mode': interval.data['mode'],
            'sidebar': interval.data['sidebar'],
            'panel': interval.data['panel'],

    for event in store.event.filter(program=uuid, date=today.isoformat()):
        insert_segment(ptree, today, event)

    for event in store.event.filter(program=uuid, date=tomorrow.isoformat()):
        insert_segment(ptree, tomorrow, event)

    # Ordered playlist items.
    items = []

    # Generate items for all intervals.
    for interval in sorted(ptree):
        begin = interval.begin

        if interval.end < not_before:
            # Skip this interval, it is already in the past.

        if begin > not_after:
            # End here, no need to go that far in the future.

        playlist = store.item.filter(playlist=interval.data['playlist'])
        playlist = sorted(playlist, key=lambda item: item['position'])

        for item in cycle(playlist):
            # Locate the file backing the item.
            file = store.file[item['file']]

            # NOTE: Do not allow items to have shorter than 1s duration
            #       or else we get stuck in this loop forever.
            duration = max(1.0, item['duration'])

            # Be careful not to exceed segment range.
            end = min(begin + duration, interval.end)

            if end >= not_before:
                # Insert the item only when it's in the future.
                    'start': begin,
                    'end': end,
                    'type': file['type'],
                    'url': base_url + '/' + file['path'] \
                           if file['stream_url'] is None \
                           else file['stream_url'],

            # Update our current position.
            begin = end

            if begin >= interval.end or begin > not_after:
                # Advance to the next segment.

    power = []

    # Set power intervals
    for interval in sorted(ptree):

        if interval.end < not_before:

        if interval.begin > not_after:

        # Power the device up a few seconds ahead to let it warm up.
        begin = interval.begin - power_up_before

        # Power the device down a few seconds after the segment ends.
        end = interval.end + power_down_after

        pwrtree.chop(begin, end)
        pwrtree[begin:end] = 'on'

    for interval in sorted(pwrtree):
        duration = interval.end - interval.begin

        state = interval.data

        # Do not turn the device off for gaps shorter than a certain
        # minimum to limit equipment wear and improve user experience.
        if state == 'standby' and duration < power_down_gap:
            state = 'on'

            'start': interval.begin,
            'end': interval.end,
            'power': state

        New plan has {} items and {} layouts.
    '''.strip().format(len(items), len(layouts)))

    return {
        'id': uuid4().hex,
        'items': items,
        'layouts': layouts,
        'power': power,
Exemplo n.º 26
def _calculate_work_and_wait_time_by_status(issue, lead_time_statuses,
    work_intervals = IntervalTree()
    wait_intervals = IntervalTree()
    work_time_by_status = Counter()
    wait_time_by_status = Counter()
    work_time_by_status_with_block_time = Counter()
    wait_time_by_status_with_block_time = Counter()

    last_status_change_date = issue[CREATED_DATE]
    for transition in issue[STATUS_TRANSITIONS]:
        if transition['from'] in lead_time_statuses:
            if transition['from'] in work_statuses:
                    Interval(last_status_change_date, transition['date'],
                    Interval(last_status_change_date, transition['date'],
        last_status_change_date = transition['date']

    wait_intervals_with_block_time = copy.deepcopy(wait_intervals)
    work_intervals_with_block_time = copy.deepcopy(work_intervals)

    for i in range(len(issue[FLAG_TRANSITIONS])):
        transition_block_start = issue[FLAG_TRANSITIONS][i]
        if transition_block_start['from'] is None:
            if i + 1 < len(issue[FLAG_TRANSITIONS]):
                transition_block_end = issue[FLAG_TRANSITIONS][i + 1]

    for interval in work_intervals:
        work_time_by_status[interval.data] += (
            interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY
    for interval in wait_intervals:
        wait_time_by_status[interval.data] += (
            interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY

    for interval in work_intervals_with_block_time:
        work_time_by_status_with_block_time[interval.data] += (
            interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY
    for interval in wait_intervals_with_block_time:
        wait_time_by_status_with_block_time[interval.data] += (
            interval.end - interval.begin).total_seconds() / SECONDS_IN_DAY

    return {
            '{}_work_time'.format(x): work_time_by_status[x]
            for x in lead_time_statuses
            '{}_wait_time'.format(x): wait_time_by_status[x]
            for x in lead_time_statuses
            for x in lead_time_statuses
            for x in lead_time_statuses