def test_valuesview():
    if hexversion < 0x02070000: return

    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = get_valuesview(temp)

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    that = dict(mapping)
    that_values = get_valuesview(that)

    values = get_valuesview(SortedDict(mapping[:2]))
    assert repr(values) == "SortedDict_values([0, 1])"
Пример #2
0
def test_valuesview():
    if hexversion < 0x02070000: return

    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = get_valuesview(temp)

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    that = dict(mapping)
    that_values = get_valuesview(that)

    values = get_valuesview(SortedDict(mapping[:2]))
    assert repr(values) == "SortedDict_values([0, 1])"
Пример #3
0
def search(path, base):
    cur = base + "/" + path
    ans = SortedDict()
    if os.path.isfile(cur):
        ans[path] = os.stat(base + "/" + path).st_size
        return ans
    li = os.listdir(cur)
    for f in li:
        ans.update(search(path + "/" + f, base))
    return ans
Пример #4
0
def ls_video_files():
    """ This function returns the filenames of all video clips for the respective camera.

    returns: a dict where the key is the starting timestamp of the clip and the value is the filename """
    r = SortedDict()
    for i in range(17 if camera == 'Cam1' else 18,
                   28):  # the 17th is missing for Cam2, CamL and CamR
        p = os.path.join(data_path, '2018-05-%d' % i)
        r.update({
            int(f[:13]): f
            for f in os.listdir(p) if os.path.isfile(os.path.join(p, f))
            and f[0] != '.' and f[-3:] == 'mp4'
        })
    return r
def Get_Thresholds(Threshold,Pops,Pop_counts):
    """Get your thresholds organized"""
    print 'Storing thresholds...'
    Thresholds= SortedDict() # initialize a dictionary
    for i in range(0,len(Pop_counts)): # for each population
#        print i
        keys_thresh = '%s' % Pop_counts.iloc[i]# name a key after the population number
        Thresholds.update({keys_thresh:[]}) # add keys to empty dictionaries
        call= Pop_counts[keys_thresh] # call this index from Populations to figure out how many alleles are available for the population
        t = float(Threshold)/100 # convert threshold to proportion
        value = int(call * t) # calculate number of alleles needed for the population
        Thresholds[keys_thresh].append(value) # append value to proper dictionary key
#    print Thresholds
    return Thresholds
Пример #6
0
def test_update():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict()
    temp.update()
    temp.update(mapping)
    temp.update(dict(mapping))
    temp.update(mapping[5:7])
    assert list(temp.items()) == mapping
def test_update():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict()
    temp.update()
    temp.update(mapping)
    temp.update(dict(mapping))
    temp.update(mapping[5:7])
    assert list(temp.items()) == mapping
Пример #8
0
def distribute_work(func_spice, xfunc, tais, **kwargs):
    # Each process will get 'chunksize' tais
    chunk = int(math.ceil(len(tais) / float(nprocs)))
    results = [apply_async(pool, func_spice, args=(xfunc, tais[chunk*i:chunk*(i + 1)],), kwds=kwargs) for i in range(nprocs)]

    # Collect all results into a single result dict
    res = SortedDict()
    for r in results:
        obj = r.get()
        if isinstance(obj, str):
            raise GeometrySpiceError(obj)

        res.update(obj)

    return [ {k:v} for k, v in res.items() ]
Пример #9
0
    def events_from_workload(self, workload):
        # TODO support mutiple events at same timestamp
        fake_events = SortedDict({
            0.0: [{
                'timestamp': 0.0,
                'type': 'SIMULATION_BEGINS',
                'data': {}
            }]
        })
        max_event_time = 0.0
        max_walltime = 0
        for job in workload['jobs']:
            assert 'subtime' in job
            timestamp = job['subtime']
            if timestamp == 0:  # just to not have 2 events at 0.0
                timestamp = 1e-06
            profile = workload['profiles'][job['profile']]
            assert profile['type'] == 'delay'
            job_event = {'timestamp': timestamp, 'type': 'JOB_SUBMITTED'}
            job_event['data'] = {
                'job_id': str(job['id']),
                'job': {
                    'id': str(job['id']),
                    'subtime': timestamp,
                    'res': job['res'],
                    'walltime': timestamp,
                    'profile': {
                        'type': 'delay',
                        'delay': profile['delay']
                    }
                }
            }
            self.nb_jobs_toExecute += 1

            assert timestamp not in fake_events

            fake_events.update({timestamp: [job_event]})

            if timestamp > max_event_time:
                max_event_time = timestamp
            if job['walltime'] > max_walltime:
                max_walltime = job['walltime']

        # Simulation finished when all jobs are executed
        #simulation_end_time = max_event_time + max_walltime + 50
        #fake_events.update({simulation_end_time: [{'timestamp': simulation_end_time,
        #                                           'type': 'SIMULATION_ENDS', 'data': {}}]})
        return fake_events
Пример #10
0
    def load_scheduled_tasks(self):
        """Returns an automatically sorted dict of timestamp: List[ScheduledTask]"""
        all_scheduled_tasks = list(ScheduledTask.objects.order_by('when').all())
        now = datetime.datetime.utcnow()

        for task in all_scheduled_tasks:
            if task.time_tolerance is None or now - task.when <= datetime.timedelta(seconds=task.time_tolerance):
                self.recently_expired_tasks.append(task)
            else:
                task.delete()

        unsorted_scheduled_tasks = {task.when.timestamp(): task for task in all_scheduled_tasks}
        scheduled_tasks = SortedDict()
        scheduled_tasks.update(unsorted_scheduled_tasks)
        self.cached_scheduled_tasks = scheduled_tasks
        return scheduled_tasks
Пример #11
0
    def generate(self):
        """Generates the wishlist.json data"""

        repo = self.github.get_repo("conan-io/wishlist")
        issues = repo.get_issues(state="open",
                                 sort="updated",
                                 direction="desc")
        issuesSorted = SortedDict()
        jsonIssues = dict()

        for issue in issues:
            upvotes = 0
            for reaction in issue.get_reactions():
                if reaction.content == "+1":
                    upvotes += 1

            issuesSorted.update(dict({(upvotes, issue.number): issue.title}))

        for upvotes, issuetitle in reversed(issuesSorted.items()):
            print("{} : #{} {}".format(upvotes[0], upvotes[1], issuetitle))
            #jsonIssues.update(dict({upvotes[1]: (dict({'upvotes': upvotes[0]}), dict({'issue': upvotes[1]}), dict({'issuetitle': issuetitle}))}))
            jsonIssues.update(
                dict({
                    upvotes[1]:
                    dict({
                        'upvotes': upvotes[0],
                        'issue': upvotes[1],
                        'issuetitle': issuetitle
                    })
                }))

        if not os.path.exists("build/"):
            os.makedirs("build/")

        data = json.dumps(jsonIssues, indent=4)
        with open('build/wishlist.json', 'w') as file:
            file.write(data)

        with open('build/wishlist.js', 'w') as file:
            file.write('var wishlist_data = \n')
            file.write(data)
            file.write(';')
Пример #12
0
    def update_tracks(self, observations, directions, delta_t):
        # Propagate tracks
        [propagate_track(v, delta_t) for v in self.active_tracks.values()]
        assignments = {}
        taken_tracks = set()

        # Associate tracks
        print(observations)
        sorted_distances = SortedDict()
        for obs in range(len(observations)):
            for track, v in self.active_tracks.items():
                distance = abs(observations[obs] - v.position_pre())
                if distance < ASSOCIATION_RADIUS:
                    sorted_distances.update({distance: (obs, track)})

        for k, v in sorted_distances.items():
            if v[0] not in assignments and v[1] not in taken_tracks:
                assignments[v[0]] = v[1]
                taken_tracks.add(v[1])

        print(assignments)
        # Update tracks that are associated, and create new ones.
        for i in range(len(observations)):
            if i in assignments:
                update_track(self.active_tracks[assignments[i]], observations[i])
            else:
                self.start_track(observations[i], directions[i])

        # Retire old tracks and delete other ones.
        to_delete = []
        to_retire = []
        for k, track in self.active_tracks.items():
            if track.n is N and track.m < M:
                to_delete.append(k)
            elif track.n > N and track.m is 0:
                to_retire.append(k)

        for t in to_retire:
            self.retired_tracks[t] = self.active_tracks[t]

        for t in to_retire + to_delete:
            del self.active_tracks[t]
Пример #13
0
def test_keysview():
    if hexversion < 0x02070000: return

    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    keys = get_keysview(temp)

    assert len(keys) == 13
    assert 'a' in keys
    assert list(keys) == [val for val, pos in mapping[:13]]
    assert keys[0] == 'a'
    assert list(reversed(keys)) == list(reversed(string.ascii_lowercase[:13]))
    assert keys.index('f') == 5
    assert keys.count('m') == 1
    assert keys.count('0') == 0
    assert keys.isdisjoint(['1', '2', '3'])

    temp.update(mapping[13:])

    assert len(keys) == 26
    assert 'z' in keys
    assert list(keys) == [val for val, pos in mapping]

    that = dict(mapping)

    that_keys = get_keysview(that)

    assert keys == that_keys
    assert not (keys != that_keys)
    assert not (keys < that_keys)
    assert not (keys > that_keys)
    assert keys <= that_keys
    assert keys >= that_keys

    assert list(keys & that_keys) == [val for val, pos in mapping]
    assert list(keys | that_keys) == [val for val, pos in mapping]
    assert list(keys - that_keys) == []
    assert list(keys ^ that_keys) == []

    keys = get_keysview(SortedDict(mapping[:2]))
    assert repr(keys) == "SortedDict_keys(['a', 'b'])"
Пример #14
0
def test_itemsview():
    if hexversion < 0x02070000: return
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    items = get_itemsview(temp)

    assert len(items) == 13
    assert ('a', 0) in items
    assert list(items) == mapping[:13]
    assert items[0] == ('a', 0)
    assert items[-3:] == [('k', 10), ('l', 11), ('m', 12)]
    assert list(reversed(items)) == list(reversed(mapping[:13]))
    assert items.index(('f', 5)) == 5
    assert items.count(('m', 12)) == 1
    assert items.isdisjoint([('0', 26), ('1', 27)])
    assert not items.isdisjoint([('a', 0), ('b', 1)])

    temp.update(mapping[13:])

    assert len(items) == 26
    assert ('z', 25) in items
    assert list(items) == mapping

    that = dict(mapping)

    that_items = get_itemsview(that)

    assert items == that_items
    assert not (items != that_items)
    assert not (items < that_items)
    assert not (items > that_items)
    assert items <= that_items
    assert items >= that_items

    assert list(items & that_items) == mapping
    assert list(items | that_items) == mapping
    assert list(items - that_items) == []
    assert list(items ^ that_items) == []

    items = SortedDict(mapping[:2]).viewitems()
    assert repr(items) == "SortedDict_items([('a', 0), ('b', 1)])"
Пример #15
0
    def shortest_path(self, source_id, target_id):
        self.vertices[source_id].dist = 0
        found = False

        #create a sorted dicitonary to store unvisited nodes
        unvisited = SortedDict(self.vertices.copy())

        target = self.vertices[target_id]
        source = self.vertices[source_id]

        #iterate through unvisited nodes until there are none left or target
        #is found
        while (len(unvisited) > 0):
            min_dist = unvisited.popitem(0)[1]
            min_dist.visited = True

            if (min_dist.node == target.node):
                found = True
                break

        #update the new shortest distance of min_dist's neighbors
            for v in min_dist.adjacent:
                new_dist = min_dist.dist + self.edges[min_dist.node][v]
                if (new_dist < self.vertices[v].dist
                        and self.vertices[v].visited == False):
                    self.vertices[v].dist = new_dist
                    self.vertices[v].previous = min_dist.node
                    unvisited.update({v: self.vertices[v]})

        #backtrace to determine shortest path
        path = []
        if (found == True):
            current = target
            while (current != None):
                path.append(current)
                if current.previous != None:
                    current = self.vertices[current.previous]
                else:
                    current = None

        return path
def test_keysview():
    if hexversion < 0x02070000: return

    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    keys = get_keysview(temp)

    assert len(keys) == 13
    assert 'a' in keys
    assert list(keys) == [val for val, pos in mapping[:13]]
    assert keys[0] == 'a'
    assert list(reversed(keys)) == list(reversed(string.ascii_lowercase[:13]))
    assert keys.index('f') == 5
    assert keys.count('m') == 1
    assert keys.count('0') == 0
    assert keys.isdisjoint(['1', '2', '3'])

    temp.update(mapping[13:])

    assert len(keys) == 26
    assert 'z' in keys
    assert list(keys) == [val for val, pos in mapping]

    that = dict(mapping)

    that_keys = get_keysview(that)

    assert keys == that_keys
    assert not (keys != that_keys)
    assert not (keys < that_keys)
    assert not (keys > that_keys)
    assert keys <= that_keys
    assert keys >= that_keys

    assert list(keys & that_keys) == [val for val, pos in mapping]
    assert list(keys | that_keys) == [val for val, pos in mapping]
    assert list(keys - that_keys) == []
    assert list(keys ^ that_keys) == []

    keys = get_keysview(SortedDict(mapping[:2]))
    assert repr(keys) == "SortedDict_keys(['a', 'b'])"
def test_itemsview():
    if hexversion < 0x02070000: return
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    items = get_itemsview(temp)

    assert len(items) == 13
    assert ('a', 0) in items
    assert list(items) == mapping[:13]
    assert items[0] == ('a', 0)
    assert items[-3:] == [('k', 10), ('l', 11), ('m', 12)]
    assert list(reversed(items)) == list(reversed(mapping[:13]))
    assert items.index(('f', 5)) == 5
    assert items.count(('m', 12)) == 1
    assert items.isdisjoint([('0', 26), ('1', 27)])
    assert not items.isdisjoint([('a', 0), ('b', 1)])

    temp.update(mapping[13:])

    assert len(items) == 26
    assert ('z', 25) in items
    assert list(items) == mapping

    that = dict(mapping)

    that_items = get_itemsview(that)

    assert items == that_items
    assert not (items != that_items)
    assert not (items < that_items)
    assert not (items > that_items)
    assert items <= that_items
    assert items >= that_items

    assert list(items & that_items) == mapping
    assert list(items | that_items) == mapping
    assert list(items - that_items) == []
    assert list(items ^ that_items) == []

    items = SortedDict(mapping[:2]).viewitems()
    assert repr(items) == "SortedDict_items([('a', 0), ('b', 1)])"
def compare_faces(list_of_face_encodings, unknown_face_encodings):
    """
    Function which find 4 person which closest.
    """

    result_list_of_faces = SortedDict()

    for element in list_of_face_encodings:
        # calculate distance from current encodings to unknown encodings
        current_distance = face_recognition.api.face_distance(
            element["face_encoding"], unknown_face_encodings)
        min_dist = current_distance.min()
        # add to our list of top matches faces
        if len(result_list_of_faces) < 3:
            result_list_of_faces.update({min_dist: element})
        else:
            result_list_of_faces.update({min_dist: element})
            result_list_of_faces.popitem()
    if result_list_of_faces.keys()[0] > 0.6:
        result_list_of_faces = None
    return result_list_of_faces
def test_valuesview():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = temp.values()

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    values = SortedDict(mapping[:2]).values()
    assert repr(values) == "SortedValuesView(SortedDict({'a': 0, 'b': 1}))"
Пример #20
0
def test_valuesview():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = temp.values()

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    values = SortedDict(mapping[:2]).values()
    assert repr(values) == "SortedValuesView(SortedDict({'a': 0, 'b': 1}))"
Пример #21
0
class StockPrice:
    def __init__(self):
        self.A = SortedDict()
        self.mx = SortedList()

    def update(self, timestamp: int, price: int) -> None:
        if timestamp not in self.A:
            self.mx.add(price)
        else:
            idx = self.mx.bisect_left(self.A[timestamp])
            # self.mx[idx:idx + 1] = []
            del self.mx[idx]
            self.mx.add(price)
        self.A.update({timestamp: price})

    def current(self) -> int:
        return self.A.values()[-1]

    def maximum(self) -> int:
        return self.mx[-1]

    def minimum(self) -> int:
        return self.mx[0]
Пример #22
0
class LeafSet(object):
    __slots__ = ('peers', 'capacity')
    __passthru = {'get', 'clear', 'pop', 'popitem', 'peekitem', 'key'}
    __iters = {'keys', 'values', 'items'}

    def __init__(self, my_key, iterable=(), capacity=8):
        try:
            iterable = iterable.items()  # view object
        except AttributeError:
            pass
        tuple_itemgetter = Peer.distance(my_key, itemgetter(0))
        key_itemgetter = Peer.distance(my_key)
        self.capacity = capacity
        self.peers = SortedDict(key_itemgetter)
        if iterable:
            l = sorted(iterable, key=tuple_itemgetter)
            self.peers.update(islice(l, capacity))

    def clear(self):
        self.peers.clear()

    def prune(self):
        extra = len(self) - self.capacity
        for i in range(extra):
            self.peers.popitem(last=True)

    def update(self, iterable):
        try:
            iterable = iterable.items()  # view object
        except AttributeError:
            pass
        iterable = iter(iterable)
        items = tuple(islice(iterable, 500))
        while items:
            self.peers.update(items)
            items = tuple(islice(iterable, 500))


    def setdefault(self, *args, **kwargs):
        self.peers.setdefault(*args, **kwargs)
        self.prune()

    def __setitem__(self, *args, **kwargs):
        self.peers.__setitem__(*args, **kwargs)
        self.prune()

    def __getitem__(self, *args, **kwargs):
        return self.peers.__getitem__(*args, **kwargs)

    def __delitem__(self, *args, **kwargs):
        return self.peers.__delitem__(*args, **kwargs)

    def __iter__(self, *args, **kwargs):
        return self.peers.__iter__(*args, **kwargs)

    def __reversed__(self, *args, **kwargs):
        return self.peers.__reversed__(*args, **kwargs)

    def __contains__(self, *args, **kwargs):
        return self.peers.__contains__(*args, **kwargs)

    def __len__(self, *args, **kwargs):
        return self.peers.__len__(*args, **kwargs)

    def __getattr__(self, key):
        if key in self.__class__.__passthru:
            return getattr(self.peers, key)
        elif key in self.__class__.__iters:
            return getattr(self.peers, 'iter' + key)
        else:
            return super().__getattr__(key)

    def __repr__(self):
        return '<%s keys=%r capacity=%d/%d>' % (
            self.__class__.__name__, list(self), len(self), self.capacity)
Пример #23
0
class TreePage(BasePage):
    """
    Page object, implemented with a sorted dict. Who knows what's underneath!
    """

    def __init__(self, *args, **kwargs):
        storage = kwargs.pop("storage", None)
        super(TreePage, self).__init__(*args, **kwargs)
        self._storage = SortedDict() if storage is None else storage

    def keys(self):
        if len(self._storage) == 0:
            return set()
        else:
            return set.union(*(set(range(*self._resolve_range(mo))) for mo in self._storage.itervalues()))

    def replace_mo(self, state, old_mo, new_mo):
        start, end = self._resolve_range(old_mo)
        for key in self._storage.irange(start, end-1):
            val = self._storage[key]
            if val is old_mo:
                #assert new_mo.includes(a)
                self._storage[key] = new_mo

    def store_overwrite(self, state, new_mo, start, end):
        # iterate over each item we might overwrite
        # track our mutations separately since we're in the process of iterating
        deletes = []
        updates = { start: new_mo }

        for key in self._storage.irange(maximum=end-1, reverse=True):
            old_mo = self._storage[key]

            # make sure we aren't overwriting all of an item that overlaps the end boundary
            if end < self._page_addr + self._page_size and end not in updates and old_mo.includes(end):
                updates[end] = old_mo

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if key < start:
                break

            # delete any key that falls within the range
            deletes.append(key)

        #assert all(m.includes(i) for i,m in updates.items())

        # perform mutations
        for key in deletes:
            del self._storage[key]

        self._storage.update(updates)

    def store_underwrite(self, state, new_mo, start, end):
        # track the point that we need to write up to
        last_missing = end - 1
        # track also updates since we can't update while iterating
        updates = {}

        for key in self._storage.irange(maximum=end-1, reverse=True):
            mo = self._storage[key]

            # if the mo stops
            if mo.base <= last_missing and not mo.includes(last_missing):
                updates[max(mo.last_addr+1, start)] = new_mo
            last_missing = mo.base - 1

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if last_missing < start:
                break

        # if there are no memory objects <= start, we won't have filled start yet
        if last_missing >= start:
            updates[start] = new_mo

        #assert all(m.includes(i) for i,m in updates.items())

        self._storage.update(updates)

    def load_mo(self, state, page_idx):
        """
        Loads a memory object from memory.

        :param page_idx: the index into the page
        :returns: a tuple of the object
        """

        try:
            key = next(self._storage.irange(maximum=page_idx, reverse=True))
        except StopIteration:
            return None
        else:
            return self._storage[key]

    def load_slice(self, state, start, end):
        """
        Return the memory objects overlapping with the provided slice.

        :param start: the start address
        :param end: the end address (non-inclusive)
        :returns: tuples of (starting_addr, memory_object)
        """
        keys = list(self._storage.irange(start, end-1))
        if not keys or keys[0] != start:
            try:
                key = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                pass
            else:
                if self._storage[key].includes(start):
                    items.insert(0, key)
        return [(key, self._storage[key]) for key in keys]

    def _copy_args(self):
        return { 'storage': self._storage.copy() }
Пример #24
0
class CacheStore(object):
    class CacheItem(object):
        def __init__(self):
            self.valid = Event()
            self.data = None

    def __init__(self, key=None):
        self.lock = RLock()
        self.store = SortedDict(key)

    def __getitem__(self, item):
        return self.get(item)

    def put(self, key, data):
        with self.lock:
            item = self.store[key] if key in self.store else self.CacheItem()
            item.data = data
            item.valid.set()

            if key not in self.store:
                self.store[key] = item
                return True

            return False

    def update(self, **kwargs):
        with self.lock:
            items = {}
            created = []
            updated = []
            for k, v in kwargs.items():
                items[k] = self.CacheItem()
                items[k].data = v
                items[k].valid.set()
                if k in self.store:
                    updated.append(k)
                else:
                    created.append(k)

            self.store.update(**items)
            return created, updated

    def update_one(self, key, **kwargs):
        with self.lock:
            item = self.get(key)
            if not item:
                return False

            for k, v in kwargs.items():
                set(item, k, v)

            self.put(key, item)
            return True

    def update_many(self, key, predicate, **kwargs):
        with self.lock:
            updated = []
            for k, v in self.itervalid():
                if predicate(v):
                    if self.update_one(k, **kwargs):
                        updated.append(key)

            return updated

    def get(self, key, default=None, timeout=None):
        item = self.store.get(key)
        if item:
            item.valid.wait(timeout)
            return item.data

        return default

    def remove(self, key):
        with self.lock:
            if key in self.store:
                del self.store[key]
                return True

            return False

    def remove_many(self, keys):
        with self.lock:
            removed = []
            for key in keys:
                if key in self.store:
                    del self.store[key]
                    removed.append(key)

            return removed

    def clear(self):
        with self.lock:
            items = list(self.store.keys())
            self.store.clear()
            return items

    def exists(self, key):
        return key in self.store

    def rename(self, oldkey, newkey):
        with self.lock:
            obj = self.get(oldkey)
            obj['id'] = newkey
            self.put(newkey, obj)
            self.remove(oldkey)

    def is_valid(self, key):
        item = self.store.get(key)
        if item:
            return item.valid.is_set()

        return False

    def invalidate(self, key):
        with self.lock:
            item = self.store.get(key)
            if item:
                item.valid.clear()

    def itervalid(self):
        for key, value in list(self.store.items()):
            if value.valid.is_set():
                yield (key, value.data)

    def validvalues(self):
        for value in list(self.store.values()):
            if value.valid.is_set():
                yield value.data

    def remove_predicate(self, predicate):
        result = []
        for k, v in self.itervalid():
            if predicate(v):
                self.remove(k)
                result.append(k)

        return result

    def query(self, *filter, **params):
        return query(list(self.validvalues()), *filter, **params)
Пример #25
0
#The bintrees project now recommends using Sorted Containers instead and has stopped development.
#The API differs significantly but the supported functionality is the same.
#The Tree object in bintrees is most similar to SortedDict.
#All of the mapping methods and set methods are available using either SortedDict or SortedKeysView.
from sortedcontainers import SortedDict

sd = SortedDict()

sd.update({'jack': 'apple', 'jill': 'pear', 'giant': 'sheep'})

print('sorteddict', sd)

#Return an iterator over the keys of the sorted dict.
myiter = iter(sd)
for k in myiter:
    print('key', k, 'value', sd.get(k))
Пример #26
0
class KeyedRegion(object):
    """
    KeyedRegion keeps a mapping between stack offsets and all variables covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """
    def __init__(self, tree=None):
        self._storage = SortedDict() if tree is None else tree

    def _get_container(self, offset):
        try:
            base_offset = next(
                self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one varaible covering the given offset.

        :param offset:
        :return:
        """

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return self._storage.itervalues()

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.iteritems():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion()

        kr = KeyedRegion()
        for key, ro in self._storage.iteritems():
            kr._storage[key] = ro.copy()
        return kr

    def merge(self, other, make_phi_func=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.iteritems():  # type: RegionObject
            for loc_and_var in item.objects:
                self.__store(loc_and_var,
                             overwrite=False,
                             make_phi_func=make_phi_func)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = {}

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [obj.variable for obj in ro.objects]
            offset_to_vars[key] = variables

        s = []
        for offset, variables in offset_to_vars.iteritems():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        self._store(start, variable, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        self._store(start, variable, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index variables covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        base_addr, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.variables

    #
    # Private methods
    #

    def _store(self, start, variable, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param variable: The variable to store.
        :param bool overwrite: Whether existing variables should be overwritten or not.
        :return: None
        """

        loc_and_var = LocationAndVariable(start, variable)
        self.__store(loc_and_var, overwrite=overwrite)

    def __store(self, loc_and_var, overwrite=False, make_phi_func=None):
        """
        Store a variable into the storage.

        :param LocationAndVariable loc_and_var: The descriptor describing start address and the variable.
        :param bool overwrite: Whether existing variables should be overwritten or not.
        :return: None
        """

        start = loc_and_var.start
        variable = loc_and_var.variable
        variable_size = variable.size if variable.size is not None else 1
        end = start + variable_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end - 1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
            # insert it into the beginningq
            overlapping_items.insert(0, (floor_key, self._storage[floor_key]))

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, variable_size, {loc_and_var})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(loc_and_var)
                else:
                    self._add_object_or_make_phi(b,
                                                 loc_and_var,
                                                 make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end,
                                        {loc_and_var})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(loc_and_var)
                else:
                    self._add_object_or_make_phi(a,
                                                 loc_and_var,
                                                 make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(loc_and_var)
                else:
                    self._add_object_or_make_phi(item,
                                                 loc_and_var,
                                                 make_phi_func=make_phi_func)
                to_update[loc_and_var.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(
                    self._storage.irange(maximum=end - 1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(
                    self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_or_make_phi(self, item, loc_and_var, make_phi_func=None):  #pylint:disable=no-self-use
        if not make_phi_func or len({loc_and_var.variable}
                                    | item.variables) == 1:
            item.add_object(loc_and_var)
        else:
            # make a phi node
            item.set_object(
                LocationAndVariable(
                    loc_and_var.start,
                    make_phi_func(loc_and_var.variable, *item.variables)))
Пример #27
0
class KeyedRegion(object):
    """
    KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """
    def __init__(self, tree=None):
        self._storage = SortedDict() if tree is None else tree

    def _get_container(self, offset):
        try:
            base_offset = next(self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one varaible covering the given offset.

        :param offset:
        :return:
        """

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return iter(self._storage.values())

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.items():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion()

        kr = KeyedRegion()
        for key, ro in self._storage.items():
            kr._storage[key] = ro.copy()
        return kr

    def merge(self, other, make_phi_func=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.items():  # type: RegionObject
            for loc_and_var in item.stored_objects:
                self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = { }

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [ obj.obj for obj in ro.stored_objects ]
            offset_to_vars[key] = variables

        s = [ ]
        for offset, variables in offset_to_vars.items():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.add_object(start, variable, size)

    def add_object(self, start, obj, object_size):
        """
        Add/Store an object to this region at the given offset.

        :param start:
        :param obj:
        :param int object_size: Size of the object
        :return:
        """

        self._store(start, obj, object_size, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.set_object(start, variable, size)

    def set_object(self, start, obj, object_size):
        """
        Add an object to this region at the given offset, and remove all other objects that are fully covered by this
        object.

        :param start:
        :param obj:
        :param object_size:
        :return:
        """

        self._store(start, obj, object_size, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index objects covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        _, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.internal_objects

    def get_objects_by_offset(self, start):
        """
        Find objects covering the given region offset.

        :param start:
        :return:
        """

        _, container = self._get_container(start)
        if container is None:
            return set()
        else:
            return container.internal_objects

    #
    # Private methods
    #

    def _store(self, start, obj, size, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param obj: The object to store.
        :param int size: Size of the object to store.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        stored_object = StoredObject(start, obj, size)
        self.__store(stored_object, overwrite=overwrite)

    def __store(self, stored_object, overwrite=False, make_phi_func=None):
        """
        Store a variable into the storage.

        :param StoredObject stored_object: The descriptor describing start address and the variable.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        start = stored_object.start
        object_size = stored_object.size
        end = start + object_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end-1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
                # insert it into the beginning
                overlapping_items.insert(0, floor_key)

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, object_size, {stored_object})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(b, stored_object, make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end, {stored_object})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(a, stored_object, make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(item, stored_object, make_phi_func=make_phi_func)
                to_update[item.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(self._storage.irange(maximum=end-1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_or_make_phi(self, item, stored_object, make_phi_func=None):  #pylint:disable=no-self-use
        if not make_phi_func or len({stored_object.obj} | item.internal_objects) == 1:
            item.add_object(stored_object)
        else:
            # make a phi node
            item.set_object(StoredObject(stored_object.start,
                                         make_phi_func(stored_object.obj, *item.internal_objects),
                                         stored_object.size,
                                         )
                            )
Пример #28
0
class CacheStore(object):
    class CacheItem(object):
        __slots__ = ('valid', 'data')

        def __init__(self):
            self.valid = Event()
            self.data = None

    def __init__(self, key=None):
        self.lock = RLock()
        self.store = SortedDict(key)

    def __getitem__(self, item):
        return self.get(item)

    def put(self, key, data):
        with self.lock:
            try:
                item = self.store[key]
                item.data = data
                item.valid.set()
                return False
            except KeyError:
                item = self.CacheItem()
                item.data = data
                item.valid.set()
                self.store[key] = item
                return True

    def update(self, **kwargs):
        with self.lock:
            items = {}
            created = []
            updated = []
            for k, v in kwargs.items():
                items[k] = self.CacheItem()
                items[k].data = v
                items[k].valid.set()
                if k in self.store:
                    updated.append(k)
                else:
                    created.append(k)

            self.store.update(**items)
            return created, updated

    def update_one(self, key, **kwargs):
        with self.lock:
            item = self.get(key)
            if not item:
                return False

            for k, v in kwargs.items():
                set(item, k, v)

            self.put(key, item)
            return True

    def update_many(self, key, predicate, **kwargs):
        with self.lock:
            updated = []
            for k, v in self.itervalid():
                if predicate(v):
                    if self.update_one(k, **kwargs):
                        updated.append(key)

            return updated

    def get(self, key, default=None, timeout=None):
        item = self.store.get(key)
        if item:
            item.valid.wait(timeout)
            return item.data

        return default

    def remove(self, key):
        with self.lock:
            try:
                del self.store[key]
                return True
            except KeyError:
                return False

    def remove_many(self, keys):
        with self.lock:
            removed = []
            for key in keys:
                try:
                    del self.store[key]
                    removed.append(key)
                except KeyError:
                    pass

            return removed

    def clear(self):
        with self.lock:
            items = list(self.store.keys())
            self.store.clear()
            return items

    def exists(self, key):
        return key in self.store

    def rename(self, oldkey, newkey):
        with self.lock:
            obj = self.get(oldkey)
            obj['id'] = newkey
            self.put(newkey, obj)
            self.remove(oldkey)

    def is_valid(self, key):
        item = self.store.get(key)
        if item:
            return item.valid.is_set()

        return False

    def invalidate(self, key):
        with self.lock:
            item = self.store.get(key)
            if item:
                item.valid.clear()

    def itervalid(self):
        for key, value in list(self.store.items()):
            if value.valid.is_set():
                yield (key, value.data)

    def validvalues(self):
        for value in list(self.store.values()):
            if value.valid.is_set():
                yield value.data

    def remove_predicate(self, predicate):
        result = []
        for k, v in self.itervalid():
            if predicate(v):
                self.remove(k)
                result.append(k)

        return result

    def query(self, *filter, **params):
        return query(list(self.validvalues()), *filter, **params)
Пример #29
0
# For writing output video
fullPathtoOutputVideo = os.path.join(os.path.dirname(ConfigurationForVideoSegmentation.fullyQualifiedPathToOutputMovie), "out_" + os.path.basename(ConfigurationForVideoSegmentation.fullyQualifiedPathToInputMovie))
writer = skvideo.io.FFmpegWriter(fullPathtoOutputVideo, outputdict={'-vcodec': 'libx264', '-b': '750100000'})

while(not allFramesAreDone):

    # wait for the output channel to be full
    ConfigurationForVideoSegmentation.barrier.wait()
    # Crossing this barrier means that all workers have put their results into the output queue

    # Extract all annotated frames in increasing order. Use SortedDict for the purpose.
    s = SortedDict()

    while(not ConfigurationForVideoSegmentation.outputChannel.empty()):
        s.update(ConfigurationForVideoSegmentation.outputChannel.get())

    # Sorted container sorts by keys, and keys are frame numbers. So, we can just reverse and pop.
    for key in list(s.keys()):

        # Count the number of sentinel objects encountered
        if(key == -1):

            nSentinelObjectsEncountered = nSentinelObjectsEncountered + 1

            # Not all workers may have seen the sentinel object yet.

            if(nSentinelObjectsEncountered == ConfigurationForVideoSegmentation.nProcesses -1):
                allFramesAreDone = True
                break
        else:
Пример #30
0
def save_polygon(polygon, all_metadata):
    d = SortedDict([(m,'') for m in all_metadata])
    d.update(polygon['properties'])
    return d.values()
Пример #31
0
class Visibility_polygon_class(object):
    def __init__(self):
        self.origin = (0, 0)
        self.refvec = (0, 1)
        self.segments = []
        self.event_queue = []
        self.status = 0
        self.status = SortedDict()
        self.visibility_polygon = []

    def order_segments(self, arg):
        return arg

    # Starts here!
    def get_visibility_polygon(self, segments, origin):
        self.origin = origin
        self.segments = segments
        self.add_bounding_box()
        self.create_event_queue_from_segments()
        self.sort_event_queue()
        self.initialize_status()
        #return self.status
        self.perform_sweep()
        return self.visibility_polygon

    def add_bounding_box(self):
        # Find extreme points
        margin = 40
        top_y = 400 + margin  #uppermost_point_index(self.event_queue)
        bottom_y = 130 - margin  #lowermost_point_index(self.event_queue)
        right_x = 600 + margin  #rightmost_point_index(self.event_queue)
        left_x = 200 - margin  #leftmost_point_index(self.event_queue)
        # Create the bounding box and add it to event queue
        s1 = Segment(Point(right_x, top_y), Point(right_x, bottom_y))
        s2 = Segment(Point(right_x - 5, bottom_y), Point(left_x, bottom_y))
        s3 = Segment(Point(left_x, bottom_y + 5), Point(left_x, top_y))
        s4 = Segment(Point(left_x + 5, top_y), Point(right_x, top_y + 5))

        p = [s1, s2, s3, s4]
        self.segments.extend(p)

    # Create an event queue with all points and their connections (not sorted yet!)
    def create_event_queue_from_segments(self):
        for s in self.segments:
            p1 = EventPoint(s.p1)
            p2 = EventPoint(s.p2)
            p1.twin = p2
            p2.twin = p1
            self.event_queue.append(p1)
            self.event_queue.append(p2)

    # Create event queue
    def sort_event_queue(self):
        # Sort the points in clockwise order
        self.event_queue = sorted(self.event_queue, key=self.get_key)

    def initialize_status(self):
        sweep_ray = Ray(self.origin, self.event_queue[0].p)
        i = 0
        for ep in self.event_queue:
            if ep.type == DEFAULT_VERTEX:
                segment = Segment(ep.p, ep.twin.p)
                intersection_point = sweep_ray.intersection(segment)

                if len(intersection_point) > 0:
                    # If the segments first point is the current event-point
                    if intersection_point[
                            0] == ep.p:  # if the point is on the initial ray
                        if len(
                                Ray(self.origin, self.event_queue[
                                    i + 1].p).intersection(segment)
                        ) > 0:  #if the point was a start point
                            self.initialize_segment(ep, intersection_point)
                        else:
                            self.initialize_segment(ep.twin,
                                                    intersection_point)
                    else:
                        self.initialize_segment(ep.twin, intersection_point)
                else:
                    # Event-points not hit by the ray gets a type
                    ep.type = START_VERTEX
                    ep.twin.type = END_VERTEX
            i += 1

    def initialize_segment(self, ep, intersection_point):
        status_segment = StatusSegment(ep, ep.twin, self.origin)
        status_segment.current_distance = distance(intersection_point[0],
                                                   self.origin)
        ep.status_segment = status_segment
        ep.twin.status_segment = status_segment
        ep.type = START_VERTEX
        ep.twin.type = END_VERTEX
        if ep != self.event_queue[0]:
            self.status.update(
                {status_segment.current_distance: status_segment})

    def perform_sweep(self):
        print("\nStatus at start: " + str(len(self.status)))

        for ep in self.event_queue:
            print("\nStatus: " + str(len(self.status)))

            if ep.type == START_VERTEX:
                print("START_VERTEX")
                status_segment = StatusSegment(ep, ep.twin, self.origin)

                print("current segment: " + str(status_segment.segment) +
                      str(status_segment.current_distance))

                ep.status_segment = status_segment
                ep.twin.status_segment = status_segment
                if self.status.__len__() == 0:
                    self.status.update(
                        {status_segment.current_distance: status_segment})
                    self.visibility_polygon.append(ep.p)
                    print("empty status. Append")
                else:
                    first_in_status = self.status.peekitem(index=0)

                    print("first in status and distance: " +
                          str(first_in_status[1].segment) + ": " +
                          str(first_in_status[1].current_distance))

                    current_ray = Ray(self.origin, ep.p)
                    intersection_point = current_ray.intersection(
                        first_in_status[1].segment)
                    first_in_status[1].current_distance = distance(
                        intersection_point[0], self.origin)
                    print("First in status new distance: " +
                          str(first_in_status[1].current_distance))
                    self.status.update(
                        {status_segment.current_distance:
                         status_segment})  # insert the new segment to status
                    self.status.__delitem__(first_in_status[0])
                    self.status.update({
                        first_in_status[1].current_distance:
                        first_in_status[1]
                    })  #update the key distance to the origin
                    new_first_in_status = self.status.peekitem(index=0)
                    if new_first_in_status[1] != first_in_status[1]:
                        self.visibility_polygon.append(intersection_point[0])
                        self.visibility_polygon.append(ep.p)
                        print("normal status. Append")

            elif ep.type == END_VERTEX:
                print("END_VERTEX")
                first_in_status = self.status.peekitem(index=0)
                print("first in status and distance: " +
                      str(first_in_status[1].segment) + ": " +
                      str(first_in_status[1].current_distance))
                self.status.__delitem__(ep.status_segment.current_distance)
                print("ep status segment and distance: " +
                      str(ep.status_segment.segment) + ": " +
                      str(ep.status_segment.current_distance))
                if self.status.__len__() == 0:
                    self.visibility_polygon.append(ep.p)
                    print("empty status. Append")
                else:
                    new_first_in_status = self.status.peekitem(index=0)
                    if new_first_in_status[1] != first_in_status[1]:
                        current_ray = Ray(self.origin, ep.p)
                        intersection_point = current_ray.intersection(
                            new_first_in_status[1].segment)
                        self.visibility_polygon.append(ep.p)
                        self.visibility_polygon.append(intersection_point[0])
                        print("normal status. Append")

    # Gets key for sorting
    def get_key(self, point):
        return self.clockwiseangle_and_distance(point.p)

    # returns the angle and length vector from the origin to the point
    def clockwiseangle_and_distance(self, point):
        # Vector between point and the origin: v = p - o
        vector = [point[0] - self.origin[0], point[1] - self.origin[1]]
        # Length of vector: ||v||
        lenvector = math.hypot(vector[0], vector[1])
        # If length is zero there is no angle
        if lenvector == 0:
            return -math.pi, 0
        # Normalize vector: v/||v||
        normalized = [vector[0] / lenvector, vector[1] / lenvector]
        dotprod = normalized[0] * self.refvec[0] + normalized[1] * self.refvec[
            1]  # x1*x2 + y1*y2
        diffprod = self.refvec[1] * normalized[0] - self.refvec[
            0] * normalized[1]  # x1*y2 - y1*x2
        angle = math.atan2(diffprod, dotprod)
        # Negative angles represent counter-clockwise angles so we need to subtract them
        # from 2*pi (360 degrees)
        if angle < 0:
            return 2 * math.pi + angle, lenvector
        # I return first the angle because that's the primary sorting criterium
        # but if two vectors have the same angle then the shorter distance should come first.
        return angle, lenvector
Пример #32
0
class BaseNode(object):
    def __init__(self, tree, bucket=None, new=False):
        self.tree = tree
        if bucket is not None:
            self.bucket = SortedDict(bucket)
        else:
            self.bucket = SortedDict()

        self.lazy = None
        self.changed = new

    def _split(self):
        """
        Creates a new node of the same type and splits the contents of the
        bucket into two parts of an equal size. The lower keys are being stored
        in the bucket of the new node. The higher keys are being stored in
        the bucket of the old node. Afterwards, the new node is being returned.
        """
        new_bucket = self.bucket.items()[:len(self.bucket) // 2]
        self.bucket = SortedDict(self.bucket.items()[len(self.bucket) // 2:])

        new_node = LazyNode(node=self.__class__(tree=self.tree,
                                                bucket=new_bucket, new=True),
                            tree=self.tree)

        if hasattr(new_node, 'rest'):
            new_node.rest = new_node.bucket.popitem()[1]
        if hasattr(self, 'next'):
            self.next = new_node

        if self is self.tree.root.node:
            self.tree._create_root(new_node, self.lazy)

        return new_node

    def _insert(self, key, value):
        """
        Inserts the key and value into the bucket. If the bucket has become too
        large, the node will be split into two nodes.
        """
        self.changed = True
        if isinstance(self, Leaf):
            self.bucket[key] = LazyNode(node=value, tree=self.tree)
        else:
            self.bucket[key] = value

        if len(self.bucket) > self.tree.max_size:
            return self, self._split()
        return self, None

    def _take_first(self):
        key = self.bucket.keys()[0]
        return key, self.bucket.pop(key)

    def _set_first(self, key, value):
        self.bucket[key] = value

    def _merge_right(self, right, parent):
        """Merge the buckets of the two children and place them at of the right
        node in the parent"""
        self.bucket.update(right.bucket)
        self.changed = True
        right.changed = True
        parent.changed = True

        left_index = parent.bucket.values().index(self.lazy)
        left_key = parent.bucket.keys()[left_index]
        try:
            right_index = parent.bucket.values().index(right.lazy)
            right_key = parent.bucket.keys()[right_index]
            parent.bucket[right_key] = self.lazy

        except ValueError:
            parent.rest = self.lazy
        del parent.bucket[left_key]

    def _commit(self, db):
        for n in self.bucket.values():
            n._commit(db)

        pos = db.tell()
        db.write(encode(self))
        return pos
Пример #33
0
class SLIM(BaseMiner, MDLOptimizer):
    """SLIM: Directly Mining Descriptive Patterns

    SLIM looks for a compressed representation of transactional data.
    This compressed representation if a set of descriptive patterns,
    and can be used to:

    - provide a natively interpretable modeling of this data
    - make predictions on new data, using this condensed representation as an encoding scheme

    Idea of early stopping is inspired from
    http://eda.mmci.uni-saarland.de/pres/ida14-slimmer-poster.pdf


    Parameters
    ----------
    n_iter_no_change: int, default=100
        Number of candidate evaluation with no improvement to count before stopping optimization.
    tol: float, default=None
        Tolerance for the early stopping, in bits.
        When the compression size is not improving by at least `tol` for `n_iter_no_change`
        iterations, the training stops.
        Default to None, will be automatically computed considering the size of input data.
    pruning: bool, default=True
        Either to activate pruning or not. Pruned itemsets may be useful at
        prediction time, so it is usually recommended to set it to False
        to build a classifier. The model will be less concise, but will lead
        to more accurate predictions on average.


    Examples
    --------
    >>> from skmine.itemsets import SLIM
    >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
    >>> SLIM().fit(D).codetable  # doctest: +SKIP
    (butter, tea)         [2]
    (milk, bananas)    [0, 1]
    (cookies)          [1, 2]
    dtype: object

    References
    ----------
    .. [1]
        Smets, K & Vreeken, J
        "Slim: Directly Mining Descriptive Patterns", 2012

    .. [2] Gandhi, M & Vreeken, J
        "Slimmer, outsmarting Slim", 2014
    """
    def __init__(self, *, n_iter_no_change=100, tol=None, pruning=True):
        self.n_iter_no_change = n_iter_no_change
        self.tol = tol
        self.standard_codetable_ = None
        self.codetable_ = SortedDict()
        self.model_size_ = None  # L(CT|D)
        self.data_size_ = None  # L(D|CT)
        self.pruning = pruning

    def fit(self, D, y=None):  # pylint:disable = too-many-locals
        """fit SLIM on a transactional dataset

        This generate new candidate patterns and add those which improve compression,
        iteratibely refining ``self.codetable_``

        Parameters
        -------
        D: pd.DataFrame
            Transactional dataset, either as an iterable of iterables or encoded as tabular binary data
        """
        self._prefit(D, y=y)
        n_iter_no_change = 0
        seen_cands = set()

        tol = self.tol or self.standard_codetable_.map(len).median()

        while n_iter_no_change < self.n_iter_no_change:
            candidates = self.generate_candidates(stack=seen_cands)
            for cand, _ in candidates:
                data_size, model_size, update_d, prune_set = self.evaluate(
                    cand)
                diff = (self.model_size_ + self.data_size_) - (data_size +
                                                               model_size)

                if diff > 0.01:  # underflow
                    self.codetable_.update(update_d)
                    if self.pruning:
                        self.codetable_, data_size, model_size = self._prune(
                            self.codetable_, prune_set, model_size, data_size)

                    self.data_size_ = data_size
                    self.model_size_ = model_size

                if diff < tol:
                    n_iter_no_change += 1
                    if n_iter_no_change > self.n_iter_no_change:
                        break  # inner break

            if not candidates:  # if empty candidate generation
                n_iter_no_change += self.n_iter_no_change  # force while loop to break

        return self

    def decision_function(self, D):
        """Compute covers on new data, and return code length

        This function function is named ``decision_function`` because code lengths
        represent the distance between a point and the current codetable.

        Setting ``pruning`` to False when creating the model
        is recommended to cover unseen data, and especially when building a classifier.

        Parameters
        ----------
        D: pd.DataFrame or np.ndarray
            new data to make predictions on, in tabular format

        Example
        -------
        >>> from skmine.itemsets import SLIM; import pandas as pd
        >>> def to_tabular(D): return pd.Series(D).str.join('|').str.get_dummies(sep="|")
        >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
        >>> new_D = to_tabular([['cookies', 'butter']])
        >>> slim = SLIM().fit(to_tabular(D))
        >>> slim.decision_function(new_D)
        0   -1.321928
        dtype: float32
        """
        D = _check_D(D)
        codetable = pd.Series(self.codetable_)
        D_sct = {
            k: Bitmap(np.where(D[k])[0])
            for k in D.columns if k in self.standard_codetable_
        }
        covers = cover(D_sct, codetable.index)

        mat = np.zeros(shape=(len(D), len(covers)))
        for idx, tids in enumerate(covers.values()):
            mat[tids, idx] = 1
        mat = pd.DataFrame(mat, columns=covers.keys())

        code_lengths = codetable.map(len)
        ct_codes = code_lengths / code_lengths.sum()
        codes = (mat * ct_codes).sum(axis=1).astype(np.float32)
        # positive sign on log2 to return negative distance : sklearn]
        r = _log2(codes)
        r[r == 0] = -np.inf  # zeros would fool a `shortest code wins` strategy
        return r

    def generate_candidates(self, stack=None, thresh=1e3):
        """
        Generate candidates from the current codetable (SLIM is any-time)

        Note that `stack` is updated during the execution of this method.

        Parameters
        ----------
        stack: set[frozenset], default=None
            a stack of already-seen candidates to be excluded
        thresh: int, default=1_000
            if the size of the current codetable is higher than `thresh`,
            candidate are generated on-the-fly, and remain unsorted. If not,
            they are returned in a list, sorted by decreasing order of estimated gain

        Returns
        -------
        iterator[tuple(frozenset, Bitmap)]
        """
        ct = SortedDict(self._standard_candidate_order, self.codetable.items())
        # if big number of elements in codetable, just take a generator, do not sort output
        gen = generate_candidates if len(
            ct) < thresh else generate_candidates_big
        return gen(ct, stack=stack)

    def evaluate(self, candidate):
        """
        Evaluate ``candidate``, considering the current codetable and a dataset ``D``

        Parameters
        ----------
        candidate: frozenset
            a new candidate to be evaluated

        Returns
        -------
        (float, float, dict, set)
            updated (data size, model size, codetable)
            and finally the set of itemsets for which usage decreased
        """
        idx = self.codetable_.bisect(candidate)
        ct = list(self.codetable_)
        ct.insert(idx, candidate)
        D = {k: v.copy() for k, v in self.standard_codetable_.items()}
        CTc = cover(D, ct)

        updated, decreased = {candidate: CTc[candidate]}, set()
        for iset, usage in self.codetable_.items(
        ):  # TODO useless is size is too big
            if usage != CTc[iset]:
                updated[iset] = CTc[iset]
                if len(CTc[iset]) < len(usage):
                    decreased.add(iset)

        data_size, model_size = self._compute_sizes(
            CTc)  # TODO pruning in evaluate

        return data_size, model_size, updated, decreased

    def reconstruct(self):
        """reconstruct the original data from the current `self.codetable_`"""
        return reconstruct(self.codetable_)

    @lru_cache(maxsize=1024)
    def get_support(self, itemset):
        """Get support from an itemset"""
        U = reduce(Bitmap.intersection, self.standard_codetable_.loc[itemset])
        return len(U)

    def _standard_cover_order(self, itemset):
        """
        Returns a tuple associated with an itemset,
        so that many itemsets can be sorted in Standard Cover Order
        """
        return (-len(itemset), -self.get_support(itemset), tuple(itemset))

    def _standard_candidate_order(self, itemset):
        return (-self.get_support(itemset), -len(itemset), tuple(itemset))

    def _prefit(self, D, y=None):
        if hasattr(D, 'ndim') and D.ndim == 2:
            D = _check_D(D)
            if y is not None:
                D = supervised_to_unsupervised(D, y)  # SKLEARN_COMPAT
            item_to_tids = {k: Bitmap(np.where(D[k])[0]) for k in D.columns}
        else:
            item_to_tids = _to_vertical(D)
        self.standard_codetable_ = pd.Series(item_to_tids)
        usage = self.standard_codetable_.map(len).astype(np.uint32)

        ct_it = ((frozenset([e]), tids) for e, tids in item_to_tids.items())
        self.codetable_ = SortedDict(self._standard_cover_order, ct_it)

        codes = -_log2(usage / usage.sum())

        # L(code_ST(X)) = L(code_CT(X)), because CT=ST
        self.model_size_ = 2 * codes.sum()

        self.data_size_ = (codes * usage).sum()

        return self

    def _get_standard_codes(self, index):
        """compute the size of a codetable index given the standard codetable"""
        flat_items = list(chain(*index))
        items, counts = np.unique(flat_items, return_counts=True)

        usages = self.standard_codetable_.loc[items].map(len).astype(np.uint32)
        usages /= usages.sum()
        codes = -_log2(usages)
        return codes * counts

    def _compute_sizes(self, codetable):
        """
        Compute sizes for both the data and the model

        .. math:: L(D|CT)
        .. math:: L(CT|D)

        Parameters
        ----------
        codetable : Mapping
            A series mapping itemsets to their usage tids

        Returns
        -------
        tuple(float, float)
            (data_size, model_size)
        """
        isets, usages = zip(*((_[0], len(_[1])) for _ in codetable.items()
                              if len(_[1]) > 0))
        usages = np.array(usages, dtype=np.uint32)
        codes = -_log2(usages / usages.sum())

        stand_codes = self._get_standard_codes(isets)

        model_size = stand_codes.sum() + codes.sum(
        )  # L(CTc|D) = L(X|ST) + L(X|CTc)
        data_size = (codes * usages).sum()
        return data_size, model_size

    def _prune(self, codetable, prune_set, model_size, data_size):
        """post prune a codetable considering itemsets for which usage has decreased

        Parameters
        ----------
        codetable: SortedDict
        prune_set: set
            itemsets in ``codetable`` for which usage has decreased
        model_size: float
            current model_size for ``codetable``
        data_size: float
            current data size when encoding ``D`` with ``codetable``

        Returns
        -------
        new_codetable, new_data_size, new_model_size: SortedDict, float, float
            a tuple containing the pruned codetable, and new model size and data size
            w.r.t this new codetable
        """
        prune_set = {k for k in prune_set if len(k) > 1}  # remove singletons
        while prune_set:
            cand = min(prune_set, key=lambda e: len(codetable[e]))
            prune_set.discard(cand)

            ct = list(codetable)
            ct.remove(cand)

            D = {k: v.copy()
                 for k, v in self.standard_codetable_.items()
                 }  # TODO avoid data copies
            CTp = cover(D, ct)
            decreased = {
                k
                for k, v in CTp.items()
                if len(k) > 1 and len(v) < len(codetable[k])
            }

            d_size, m_size = self._compute_sizes(CTp)

            if d_size + m_size < model_size + data_size:
                codetable.update(CTp)
                del codetable[cand]
                prune_set.update(decreased)
                data_size, model_size = d_size, m_size

        return codetable, data_size, model_size
Пример #34
0
class KeyedRegion:
    """
    KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """

    __slots__ = ('_storage', '_object_mapping', '_phi_node_contains' )

    def __init__(self, tree=None, phi_node_contains=None):
        self._storage = SortedDict() if tree is None else tree
        self._object_mapping = weakref.WeakValueDictionary()
        self._phi_node_contains = phi_node_contains

    def __getstate__(self):
        return self._storage, dict(self._object_mapping), self._phi_node_contains

    def __setstate__(self, s):
        self._storage, om, self._phi_node_contains = s
        self._object_mapping = weakref.WeakValueDictionary(om)

    def _get_container(self, offset):
        try:
            base_offset = next(self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one variable covering the given offset.

        :param offset:
        :return:
        """

        if type(offset) is not int:
            raise TypeError("KeyedRegion only accepts concrete offsets.")

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return iter(self._storage.values())

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.items():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion(phi_node_contains=self._phi_node_contains)

        kr = KeyedRegion(phi_node_contains=self._phi_node_contains)
        for key, ro in self._storage.items():
            kr._storage[key] = ro.copy()
        kr._object_mapping = self._object_mapping.copy()
        return kr

    def merge(self, other, replacements=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.items():  # type: RegionObject
            for so in item.stored_objects:  # type: StoredObject
                if replacements and so.obj in replacements:
                    so = StoredObject(so.start, replacements[so.obj], so.size)
                self._object_mapping[so.obj_id] = so
                self.__store(so, overwrite=False)

        return self

    def replace(self, replacements):
        """
        Replace variables with other variables.

        :param dict replacements:   A dict of variable replacements.
        :return:                    self
        """

        for old_var, new_var in replacements.items():
            old_var_id = id(old_var)
            if old_var_id in self._object_mapping:
                # FIXME: we need to check if old_var still exists in the storage
                old_so = self._object_mapping[old_var_id]  # type: StoredObject
                self._store(old_so.start, new_var, old_so.size, overwrite=True)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = { }

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [ obj.obj for obj in ro.stored_objects ]
            offset_to_vars[key] = variables

        s = [ ]
        for offset, variables in offset_to_vars.items():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.add_object(start, variable, size)

    def add_object(self, start, obj, object_size):
        """
        Add/Store an object to this region at the given offset.

        :param start:
        :param obj:
        :param int object_size: Size of the object
        :return:
        """

        self._store(start, obj, object_size, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.set_object(start, variable, size)

    def set_object(self, start, obj, object_size):
        """
        Add an object to this region at the given offset, and remove all other objects that are fully covered by this
        object.

        :param start:
        :param obj:
        :param object_size:
        :return:
        """

        self._store(start, obj, object_size, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index objects covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        _, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.internal_objects

    def get_objects_by_offset(self, start):
        """
        Find objects covering the given region offset.

        :param start:
        :return:
        """

        _, container = self._get_container(start)
        if container is None:
            return set()
        else:
            return container.internal_objects

    #
    # Private methods
    #

    def _store(self, start, obj, size, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param obj: The object to store.
        :param int size: Size of the object to store.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        stored_object = StoredObject(start, obj, size)
        self._object_mapping[stored_object.obj_id] = stored_object
        self.__store(stored_object, overwrite=overwrite)

    def __store(self, stored_object, overwrite=False):
        """
        Store a variable into the storage.

        :param StoredObject stored_object: The descriptor describing start address and the variable.
        :param bool overwrite:  Whether existing objects should be overwritten or not. True to make a strong update,
                                False to make a weak update.
        :return: None
        """

        start = stored_object.start
        object_size = stored_object.size
        end = start + object_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end-1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
            # insert it into the beginning
            overlapping_items.insert(0, floor_key)

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, object_size, {stored_object})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(stored_object)
                else:
                    self._add_object_with_check(b, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end, {stored_object})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(stored_object)
                else:
                    self._add_object_with_check(a, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(stored_object)
                else:
                    self._add_object_with_check(item, stored_object)
                to_update[item.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(self._storage.irange(maximum=end-1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_with_check(self, item, stored_object):
        if len({stored_object.obj} | item.internal_objects) > 1:
            if self._phi_node_contains is not None:
                # check if `item` is a phi node that contains stored_object.obj
                for so in item.internal_objects:
                    if self._phi_node_contains(so, stored_object.obj):
                        # yes! so we want to skip this object
                        return
                # check if `stored_object.obj` is a phi node that contains item.internal_objects
                if all(self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects):
                    # yes!
                    item.set_object(stored_object)
                    return

            l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects))
            # import ipdb; ipdb.set_trace()
        item.add_object(stored_object)
Пример #35
0
class FakeBatsim(object):
    def __init__(self, batsky_scheduler, workload_file):
        self.batsky_sched = batsky_scheduler
        self.batsky_sched.bs = self

        self.jobs = dict()

        self._current_time = 0
        self.nb_jobs_submitted = 0
        self.nb_jobs_completed = 0
        self.nb_jobs_toExecute = 0

        self.running_simulation = False

        self.workload = None
        if workload_file:
            with open(workload_file) as json_file:
                self.workload = json.load(json_file)
            self._fake_events = self.events_from_workload(self.workload)
        else:
            self._fake_events = SortedDict({
                0.0: [{
                    'timestamp': 0.0,
                    'type': 'SIMULATION_BEGINS',
                    'data': {}
                }],
                5.0: [{
                    'timestamp': 5.0,
                    'type': 'JOB_SUBMITTED',
                    'data': {
                        'job_id': 'w0!1',
                        'job': {
                            'id': 'w0!1',
                            'subtime': 5.0,
                            'res': 1,
                            'walltime': 12,
                            'profile': {
                                'type': 'delay',
                                'delay': 10
                            }
                        }
                    }
                }],
                20.0: [{
                    'timestamp': 20.0,
                    'type': 'SIMULATION_ENDS',
                    'data': {}
                }]
            })

        #0.0: [{'type': '', 'data': {}}],

        self._read_bat_msg()

        self.batsky_sched.onAfterBatsimInit()

    def events_from_workload(self, workload):
        # TODO support mutiple events at same timestamp
        fake_events = SortedDict({
            0.0: [{
                'timestamp': 0.0,
                'type': 'SIMULATION_BEGINS',
                'data': {}
            }]
        })
        max_event_time = 0.0
        max_walltime = 0
        for job in workload['jobs']:
            assert 'subtime' in job
            timestamp = job['subtime']
            if timestamp == 0:  # just to not have 2 events at 0.0
                timestamp = 1e-06
            profile = workload['profiles'][job['profile']]
            assert profile['type'] == 'delay'
            job_event = {'timestamp': timestamp, 'type': 'JOB_SUBMITTED'}
            job_event['data'] = {
                'job_id': str(job['id']),
                'job': {
                    'id': str(job['id']),
                    'subtime': timestamp,
                    'res': job['res'],
                    'walltime': timestamp,
                    'profile': {
                        'type': 'delay',
                        'delay': profile['delay']
                    }
                }
            }
            self.nb_jobs_toExecute += 1

            assert timestamp not in fake_events

            fake_events.update({timestamp: [job_event]})

            if timestamp > max_event_time:
                max_event_time = timestamp
            if job['walltime'] > max_walltime:
                max_walltime = job['walltime']

        # Simulation finished when all jobs are executed
        #simulation_end_time = max_event_time + max_walltime + 50
        #fake_events.update({simulation_end_time: [{'timestamp': simulation_end_time,
        #                                           'type': 'SIMULATION_ENDS', 'data': {}}]})
        return fake_events

    def time(self):
        return self._current_time

    def consume_time(self, t):
        self._current_time += float(t)
        return self._current_time

        return True

    def wake_me_up_at(self, at_time):
        events = []
        if at_time in self._fake_events:
            events = self._fake_events.get(at_time)
        events.append({
            'timestamp': at_time,
            'type': 'REQUESTED_CALL',
            'data': {}
        })
        self._fake_events.update({at_time: events})

    def execute_jobs(self, jobs):
        # Generate the events of completion

        for job in jobs:
            events = []
            completion_time = self.time() + job.profile['delay']
            if completion_time in self._fake_events:
                events = self._fake_events.get(completion_time)

            assert job.profile['type'] == 'delay'
            events.append({
                'timestamp': completion_time,
                'type': 'JOB_COMPLETED',
                'data': {
                    'job_id': job.id
                }
            })
            logger.debug(
                'Execute_job: insert completion events for job: {} completion_time: {}'
                .format(job.id, completion_time))

            self._fake_events.update({completion_time: events})

    def start(self):
        cont = True
        while cont:
            cont = self.do_next_event()

    def do_next_event(self):
        return self._read_bat_msg()

    def _read_bat_msg(self):
        (batsim_time, events) = self._fake_events.popitem(index=0)
        logger.debug('Batsim time {}  Events: {}'.format(batsim_time, events))
        self._current_time = batsim_time

        for event in events:
            event_type = event['type']
            event_data = event.get('data', {})

            if event_type == 'SIMULATION_BEGINS':
                assert not self.running_simulation, "A simulation is already running (is more than one instance of Batsim active?!)"
                self.running_simulation = True
                self.batsky_sched.onSimulationBegins()

            elif event_type == 'SIMULATION_ENDS':
                self.batsky_sched.onSimulationEnds()

            elif event_type == 'JOB_SUBMITTED':
                job_id = event_data['job_id']
                job, profile = self.get_job_and_profile(event)
                job.job_state = Job.State.SUBMITTED
                self.nb_jobs_submitted += 1
                self.jobs[job_id] = job
                self.batsky_sched.onJobSubmission(job)

            elif event_type == 'JOB_COMPLETED':
                job_id = event_data['job_id']
                j = self.jobs[job_id]
                j.finish_time = event['timestamp']
                self.batsky_sched.onJobCompletion(j)
                if j.job_state == Job.State.COMPLETED_WALLTIME_REACHED:
                    self.nb_jobs_timeout += 1
                elif j.job_state == Job.State.COMPLETED_FAILED:
                    self.nb_jobs_failed += 1
                elif j.job_state == Job.State.COMPLETED_SUCCESSFULLY:
                    self.nb_jobs_successful += 1
                elif j.job_state == Job.State.COMPLETED_KILLED:
                    self.nb_jobs_killed += 1
                self.nb_jobs_completed += 1

                if self.nb_jobs_completed == self.nb_jobs_toExecute:
                    self.batsky_sched.onSimulationEnds()

            elif event_type == 'REQUESTED_CALL':
                self.batsky_sched.onRequestedCall()

        return True

    def get_job_and_profile(self, event):
        json_dict = event["data"]["job"]
        job = Job.from_json_dict(json_dict)

        if "profile" in event["data"]:
            profile = event["data"]["profile"]
        else:
            profile = {}

        return job, profile
Пример #36
0
def dzsz(open,
         high,
         low,
         close,
         min_legin=1,
         min_legout=2,
         min_base=1,
         max_legin=2,
         max_legout=6,
         max_base=6,
         demand_dict=None,
         supply_dict=None,
         use_proximal=False):
    ind_arr = open.index.array

    def body_range(ind):
        cdl_range = high.iloc[ind] - low.iloc[ind]
        cdl_body = abs(close.iloc[ind] - open.iloc[ind])
        # cdl_body = 0.05 if cdl_body == 0 else cdl_body
        cdl_body, cdl_range = (0.0, 1) if cdl_range == 0 else (cdl_body,
                                                               cdl_range)
        #Debugging cdl_body,cdl_range runtime warning
        # with warnings.catch_warnings(record=True) as w:
        #     # Cause all warnings to always be triggered.
        #     warnings.simplefilter("always")
        #     # Trigger a warning.
        #     cdl_body_range = cdl_body/cdl_range
        #     if len(w) != 0:
        #         print(cdl_range,cdl_body_range,cdl_body)
        # print(f"{ind_arr[ind]}\t CDLRANGE\t {cdl_body_range}",end='\t')
        cdl_body_range = cdl_body / cdl_range
        return cdl_body_range

    def green(ind):
        return (close.iloc[ind] > open.iloc[ind]) and (body_range(ind) > 0.55)

    def red(ind):
        return (close.iloc[ind] < open.iloc[ind]) and (body_range(ind) > 0.55)

    def rally(ind, leg_size):
        leg = True
        for i in range(ind, ind - leg_size + 1, -1):
            if green(i):
                pass
            else:
                leg = False
        if leg:
            if green(ind - leg_size + 1):
                return leg
            else:
                if body_range(ind - leg_size + 1) < 0.5 and low[
                        ind - leg_size + 1] > high[ind - leg_size]:
                    return leg
                else:
                    return False
        return leg

    def drop(ind, leg_size):
        leg = True
        for i in range(ind, ind - leg_size + 1, -1):
            if red(i):
                pass
            else:
                leg = False
        if leg:
            if red(ind - leg_size + 1):
                return leg
            else:
                if body_range(ind - leg_size + 1) < 0.5 and high[
                        ind - leg_size + 1] < low[ind - leg_size]:
                    return leg
                else:
                    return False
        return leg

    def legout(ind, leg_length=min_legout):
        is_rally = rally(ind, leg_length)
        is_drop = drop(ind, leg_length)
        if is_rally:
            return {'distal': low.iloc[ind], 'is_rally': True}
        if is_drop:
            return {'distal': high.iloc[ind], 'is_rally': False}
        return {'distal': False, 'is_rally': None}

    def base(base_start, base_length=min_base, is_rally=True):
        is_base = True
        proximal = 0 if is_rally else 99999999
        distal = 99999999 if is_rally else 0
        for i in range(base_start, base_start - base_length, -1):
            if body_range(i) <= 0.5:
                proximal = max(proximal, close.iloc[i],
                               open.iloc[i]) if is_rally else min(
                                   proximal, close.iloc[i], open.iloc[i])
                distal = min(distal, low.iloc[i]) if is_rally else max(
                    distal, high.iloc[i])
            else:
                is_base = False
                break
        if is_base:
            if is_rally:
                is_base = proximal < close.iloc[base_start + 1]
            else:
                is_base = proximal > close.iloc[base_start + 1]
        return {'is_base': is_base, 'proximal': proximal, 'distal': distal}

    def legin(ind, legin_length=min_legin):
        is_rally = rally(ind, legin_length)
        is_drop = drop(ind, legin_length)
        if is_rally:
            return {'distal': high.iloc[ind], 'is_rally': True}
        if is_drop:
            return {'distal': low.iloc[ind], 'is_rally': False}
        return {'distal': False, 'is_rally': None}

    demand_dict = SortedDict() if demand_dict is None else demand_dict
    supply_dict = SortedDict() if supply_dict is None else supply_dict
    ind_list = []
    if use_proximal:
        for ind in range(-len(open) + 15, -1):
            # check for legout length
            for i in range(max_legout, min_legout - 1, -1):
                is_legout = legout(ind, i)
                if is_legout['distal']:
                    for j in range(max_base, min_base - 1, -1):
                        is_base = base(ind - i, j, is_legout['is_rally'])
                        if is_base['is_base']:
                            for k in range(max_legin, min_legin - 1, -1):
                                is_legin = legin(ind - i - j, k)
                                if is_legin['distal']:
                                    ind_list.append(ind)
                                    if is_legout['is_rally']:  #_brally
                                        proximal = is_base['proximal']
                                        distal = min(
                                            is_base['distal'],
                                            is_legout['distal']
                                        ) if is_legin['is_rally'] else min(
                                            is_base['distal'],
                                            is_legin['distal'],
                                            is_legout['distal'])
                                        if proximal not in demand_dict.keys():
                                            demand_dict.update({
                                                proximal: {
                                                    "timestamp": ind_arr[ind],
                                                    "legout_length": i,
                                                    "base_length": j,
                                                    "legin_length": k,
                                                    "proximal": proximal,
                                                    "distal": distal
                                                }
                                            })
                                    else:  #_bdrop
                                        proximal = is_base['proximal']
                                        distal = max(
                                            is_base['distal'],
                                            is_legout['distal']
                                        ) if not is_legin['is_rally'] else max(
                                            is_base['distal'],
                                            is_legin['distal'],
                                            is_legout['distal'])
                                        if proximal not in supply_dict.keys():
                                            supply_dict.update({
                                                proximal: {
                                                    "timestamp": ind_arr[ind],
                                                    "legout_length": i,
                                                    "base_length": j,
                                                    "legin_length": k,
                                                    "proximal": proximal,
                                                    "distal": distal
                                                }
                                            })
            # Elimination method 1
            if len(demand_dict.keys()) != 0:
                # print(list(demand_dict.keys()), low[ind])
                while low[ind] < demand_dict.keys()[-1]:
                    demand_dict.popitem(index=-1)
                    if len(demand_dict.keys()) == 0:
                        break
            if len(supply_dict.keys()) != 0:
                # print(list(demand_dict.keys()), low[ind])
                while high[ind] > supply_dict.keys()[0]:
                    supply_dict.popitem(index=0)
                    if len(supply_dict.keys()) == 0:
                        break
    else:
        for ind in range(-len(open) + 15, -1):
            # check for legout length
            for i in range(max_legout, min_legout - 1, -1):
                is_legout = legout(ind, i)
                if is_legout['distal']:
                    for j in range(max_base, min_base - 1, -1):
                        is_base = base(ind - i, j, is_legout['is_rally'])
                        if is_base['is_base']:
                            for k in range(max_legin, min_legin - 1, -1):
                                is_legin = legin(ind - i - j, k)
                                if is_legin['distal']:
                                    ind_list.append(ind)
                                    if is_legout['is_rally']:  #_brally
                                        proximal = is_base['proximal']
                                        distal = min(
                                            is_base['distal'],
                                            is_legout['distal']
                                        ) if is_legin['is_rally'] else min(
                                            is_base['distal'],
                                            is_legin['distal'],
                                            is_legout['distal'])
                                        if distal not in demand_dict.keys():
                                            demand_dict.update({
                                                distal: {
                                                    "timestamp": ind_arr[ind],
                                                    "legout_length": i,
                                                    "base_length": j,
                                                    "legin_length": k,
                                                    "proximal": proximal,
                                                    "distal": distal
                                                }
                                            })
                                    else:  #_bdrop
                                        proximal = is_base['proximal']
                                        distal = max(
                                            is_base['distal'],
                                            is_legout['distal']
                                        ) if not is_legin['is_rally'] else max(
                                            is_base['distal'],
                                            is_legin['distal'],
                                            is_legout['distal'])
                                        if distal not in supply_dict.keys():
                                            supply_dict.update({
                                                distal: {
                                                    "timestamp": ind_arr[ind],
                                                    "legout_length": i,
                                                    "base_length": j,
                                                    "legin_length": k,
                                                    "proximal": proximal,
                                                    "distal": distal
                                                }
                                            })
            # Elimination method 1
            if len(demand_dict.keys()) != 0:
                while close[ind] < demand_dict.keys()[-1]:
                    demand_dict.popitem(index=-1)
                    if len(demand_dict.keys()) == 0:
                        break
            if len(supply_dict.keys()) != 0:
                while close[ind] > supply_dict.keys()[0]:
                    supply_dict.popitem(index=0)
                    if len(supply_dict.keys()) == 0:
                        break

    df_demand = pd.DataFrame(demand_dict.values(), index=demand_dict.keys())
    df_supply = pd.DataFrame(supply_dict.values(), index=supply_dict.keys())

    return (df_demand, df_supply, demand_dict, supply_dict)
Пример #37
0
class SnapshotGraph(object):
    def __init__(self, **attr):
        self.graph = {}
        self.graph.update(attr)
        self.snapshots = SortedDict()

    @property
    def name(self):
        """String identifier of the snapshot graph.

        This snapshot graph attribute appears in the attribute dict SnapshotGraph.graph
        keyed by the string `"name"`. as well as an attribute (technically
        a property) `SnapshotGraph.name`. This is entirely user controlled.
        """
        return self.graph.get('name', '')

    @name.setter
    def name(self, s):
        self.graph['name'] = s

    def __str__(self):
        """Return the snapshot graph name.

        Returns
        -------
        name : string
            The name of the snapshot graph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph(name='foo')
        >>> str(G)
        'foo'
        """
        return self.name

    def __len__(self):
        """Return the number of snapshots. Use: 'len(G)'.

        Returns
        -------
        num_snapshots : int
            The number of snapshots in the graph.

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> len(G)
        2

        """
        return len(self.snapshots)

    def __contains__(self, graph):
        """Return True if graph in the snapshot graph, False otherwise. Use: 'graph in G'.

        Parameters
        ----------
        graph: networkx graph object
            networkx graph to be looked for into snapshot graph.

        Returns
        -------
        None

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> nxG1 in G
        True
        """

        try:
            return graph in self.snapshots.values()
        except TypeError:
            return False

    def __iter__(self):
        """Iterates through snapshots in snapshot graph.


        Returns
        -------
        Iterable of snapshots

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> for snapshot in G:
                print(True)
        True
        True
        """

        return iter(self.snapshots.values())

    def insert(self, graph, start=None, end=None, time=None):
        """Insert a graph into the snapshot graph, with specified intervals.

        Parameters
        ----------
        graph: networkx graph object
            A networkx graph to be inserted into snapshot graph.
        start: start of the interval, inclusive
        end: end of the interval, exclusive
        time: timestamp for impulses, cannot be used together with (start, end)

        Returns
        -------
        None

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> G = dnx.SnapshotGraph()
        >>> G.insert(nxG1, start=0, end=3)

        """
        if time is not None and (start or end):
            raise ValueError('Time and (start or end) cannot both be specified.')
        elif time is not None:
            self.snapshots.update({(time, time): graph})
        elif start is None or end is None:
            raise ValueError('Either time or both start and end must be specified.')
        elif start > end:
            raise ValueError('Start of the interval must be lower or equal to end')
        else:
            self.snapshots.update({(start, end): graph})

    def add_snapshot(self, ebunch=None, graph=None, start=None, end=None, time=None):
        """Add a snapshot with a bunch of edge values.

        Parameters
        ----------
        ebunch : container of edges, optional (default= None)
            Each edge in the ebunch list will be included to all added graphs.
        graph : networkx graph object, optional (default= None)
            networkx graph to be inserted into snapshot graph.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        time: timestamp for impulses, cannot be used together with (start, end)

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 4), (1, 3)], start=0, end=3)
        """
        if not graph:
            g = Graph()
            g.add_edges_from(ebunch)
        else:
            g = graph

        if time is not None and (start or end):
            raise ValueError('Time and (start or end) cannot both be specified.')
        elif time is not None:
            self.insert(g, time=time)
        elif start is None and end is None:
            raise ValueError('Either time or both start and end must be specified.')
        else:
            self.insert(g, start=start, end=end)

    def subgraph(self, nbunch, sbunch=None, start=None, end=None):
        """Return a snapshot graph containing only the nodes in bunch, and snapshot indexes in sbunch.

        Parameters
        ----------
        nbunch : container of nodes
            Each node in the nbunch list will be included in all subgraphs indexed in sbunch.
        sbunch : container of edges, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of subgraphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            snap_graph : SnapshotGraph object
                Contains only the nodes in bunch, and snapshot indexes in sbunch.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=0, end=3)
        >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=3, end=10)
        >>> H = G.subgraph([4, 6])
        >>> type(H)
        <class 'snapshotgraph.SnapshotGraph'>
        >>> list(H.get([0])[0].edges(data=True))
        [(4, 6, {})]
        """

        subgraph = SnapshotGraph()
        subgraph.graph = self.graph

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for key, snapshot in self._get(sbunch=sbunch):
                subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1])
        else:
            for key, snapshot in self._get(start=start, end=end, include_interval=True):
                subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1])

        return subgraph

    def degree(self, sbunch=None, nbunch=None, start=None, end=None, weight=None):
        """Return a list of tuples containing the degrees of each node in each snapshot

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        nbunch : container of nodes, optional (default= None)
            Each node in the nbunch list will be included in the returned list of
            node degrees.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        weight : string, optional (default= None)
            The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1.
            The degree is the sum of the edge weights adjacent to the node.

        Returns
        -------
            degree_list : list
                List of DegreeView objects containing the degree of each node, indexed by requested snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3. end=10)
        >>> G.degree(sbunch=[1])
        [DegreeView({1: 2, 4: 1, 3: 1})]
        >>> G.degree(nbunch=[1, 2])
        [DegreeView({1: 2, 2: 1}), DegreeView({1: 2})]
        """
        # returns a list of degrees for each graph snapshot in snapshots
        # use generator to create list of degrees

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            if nbunch:
                return [graph.degree(nbunch, weight=weight) for graph in self._get(sbunch=sbunch)]
            else:
                return [graph.degree(graph, weight=weight) for graph in self._get(sbunch=sbunch)]
        else:
            if nbunch:
                return [graph.degree(nbunch, weight=weight) for graph in self._get(start=start, end=end)]
            else:
                return [graph.degree(graph, weight=weight) for graph in self._get(start=start, end=end)]

    def number_of_nodes(self, sbunch=None, start=None, end=None):
        """Gets number of nodes in each snapshot requested in 'sbunch'.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of number of nodes in the snapshot. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            num_nodes : list
                A list of of the number of nodes in each requested snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.number_of_nodes(sbunch=[1])
        [3]
        >>> G.number_of_nodes(sbunch=[0, 1])
        [3, 3]
        """
        # returns a list of the number of nodes in each graph in the range

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.number_of_nodes() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.number_of_nodes() for graph in self._get(start=start, end=end)]

    def order(self, sbunch=None, start=None, end=None):
        """Returns order of each graph requested in 'sbunch'.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node orders. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            snapshot_orders : list
                A list of the orders of each snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.order(sbunch=[1])
        [3]
        >>> G.order(sbunch=[0, 1])
        [3, 3]
        """
        # returns a list of the order of the graph in the range

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.order() for graph in self._get(sbunch=sbunch)]
        else:
            return [g.order() for g in self._get(start=start, end=end)]

    def has_node(self, n, sbunch=None, start=None, end=None):
        """Gets boolean list of if a snapshot in 'sbunch' contains node 'n'.

        Parameters
        ----------
        n : node
            Node to be checked for in requested snapshots.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of if the snapshot graph includes the node. It is highly recommended
            that this list is sequential, however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            List of boolean values if index in sbunch contains n.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.has_node(1, sbunch=[1])
        [True]
        >>> G.has_node(1)
        [True, True]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.has_node(n) for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.has_node(n) for graph in self._get(start=start, end=end)]

    def is_multigraph(self, sbunch=None, start=None, end=None):
        """Returns a list of boolean values for if the graph at the index is a multigraph.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of booleans. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            mutli_list : list
                List of boolean values if index in sbunch is a multigraph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.is_multigraph(sbunch=[0, 1])
        [False, False]
        >>> G.is_multigraph()
        [False, False]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.is_multigraph() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.is_multigraph() for graph in self._get(start=start, end=end)]

    def is_directed(self, sbunch=None, start=None, end=None):
        """Returns a list of boolean values for if the graph at the index is a directed graph.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of booleans. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            is_direct_list : list
                List of boolean values if index in sbunch is a directed graph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.is_directed(sbunch=[0, 1])
        [False, False]
        >>> G.is_directed()
        [False, False]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.is_directed() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.is_directed() for graph in self._get(start=start, end=end)]

    def to_directed(self, sbunch=None, start=None, end=None):
        """Returns a list of networkx directed graph objects.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of directed graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            direct_list : list
                List of networkx directed graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.to_directed(sbunch=[0, 1])
        [<networkx.classes.digraph.DiGraph object at 0x7f1a6de49dd8>, <networkx.classes.digraph.DiGraph object at 0x7f1a6de49e10>]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.to_directed() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.to_directed() for graph in self._get(start=start, end=end)]

    def to_undirected(self, sbunch=None, start=None, end=None, ):
        """Returns a list of networkx graph objects.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of undirected graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            undirect_list : list
                List of networkx graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.to_directed(sbunch=[0, 1])
        [<networkx.classes.graph.Graph object at 0x7ff532219e10>, <networkx.classes.graph.Graph object at 0x7ff532219e48>]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.to_undirected() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.to_undirected() for graph in self._get(start=start, end=end)]

    def size(self, sbunch=None, start=None, end=None, weight=None):
        """Returns the size of each graph index as specified in sbunch as a list.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of sizes. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        weight : string, optional (default=None)
            The edge attribute that holds the numerical value used as a weight.
            If None, then each edge has weight 1.

        Returns
        -------
            size_list: list
                List of sizes of each graph indexed in sbunch.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.size(sbunch=[0, 1])
        [2, 2]
        >>> G.size()
        [2, 2]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.size(weight=weight) for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.size(weight=weight) for graph in self._get(start=start, end=end)]

    def _get(self, sbunch=None, start=None, end=None, include_interval=False, split_overlaps=False):
        """Returns a list of graphs specified in sbunch. Hidden utility tool for other functions.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        include_interval: if True, return snapshots with its corresponding intervals
        split_overlaps: if True, when query by time interval, split snapshots if query interval overlaps with any
            snapshots' intervals. For ex: graph G contains snapshots with time intervals [(0,4),(4,6),(6,10)]. If query
            interval is [2,10], the snapshot with interval (0,4) will be split into two snapshots (0,2) and (2,4), both
            of which have the same copy of the original snapshot. This parameter is used for updating graphs by
            interval. For intance, with the example above, if you want to update interval (2,10), then the snapshot at
            (0,2) won't be updated.

        Returns
        -------
        If include_interval: List of tuples of (interval, networkx graph object).
        else: List of networkx graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G._get(sbunch=[0])
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>]
        >>> G._get()
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        >>> G._get(start=2, end=6)
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        """

        if include_interval:
            graphs = self.snapshots.items()
        else:
            graphs = self.snapshots.values()

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:  # if retrieve by indexes
            for index in sbunch:
                yield graphs[index]
        else:  # if retrieve by interval
            if start is None:
                min_idx = 0
            else:
                min_idx = self.snapshots.bisect_left((start,))

                # Decrease 1 index if start is in the middle of an interval
                # Eg: if Keys = [(2,5)(5,6)], start=3 won't retrieve (2,5) as we want,
                # therefore, decrease 1 index to include (2,5). If start=5, then we won't need to change
                if min_idx > 0 and start < self.snapshots.keys()[min_idx][0]:
                    if split_overlaps:
                        # Eg: if Keys = [(2,5)(5,6)] and start=3, split (2,5) into (2,3) and (3,5)
                        key, g = self.snapshots.popitem(min_idx - 1)
                        self.insert(g, key[0], start)
                        self.insert(copy.deepcopy(g), start, key[1])
                    else:
                        min_idx -= 1

            if end is None:
                max_idx = len(self.snapshots)
            else:
                max_idx = self.snapshots.bisect_left((end,))
                # Split the snapshot if 'end' is in the middle of an interval
                # Eg: if Keys = [(2,5)(5,9)] and end=7, split (5,9) into (5,7) and (7,9)
                if split_overlaps and max_idx < len(self.snapshots) and end < self.snapshots.keys()[max_idx][1]:
                    key, g = self.snapshots.popitem(max_idx)
                    self.insert(g, key[0], end)
                    self.insert(copy.deepcopy(g), end, key[1])

            for graph in graphs[min_idx: max_idx]:
                yield graph

    def get(self, sbunch=None, start=None, end=None):
        """Returns a list of graphs specified in sbunch. Interface function for users.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        List of networkx graph objects.


        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G._get(sbunch=[0])
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>]
        >>> G._get()
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        >>> G._get(start=2, end=6)
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        """

        return [snapshot for snapshot in self._get(sbunch, start, end)]

    def add_nodes_from(self, nbunch, sbunch=None, start=None, end=None, **attrs):
        """Adds nodes to snapshots in sbunch.
        Note: This function may lead to increase in number of snapshots if changes occur within a snapshot.

        Parameters
        ----------
        nbunch : container of nodes
            Each node in the nbunch list will be added to all graphs indexed in sbunch.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)

        >>> G.add_nodes_from([5, 6, 7], [0])
        >>> G.add_nodes_from([8, 9, 10, 11], [1])
        >>> nx.adjacency_matrix(G.get()[0]).todense()
        [[0 1 1 0 0 0]
         [1 0 0 0 0 0]
         [1 0 0 0 0 0]
         [0 0 0 0 0 0]
         [0 0 0 0 0 0]
         [0 0 0 0 0 0]]
        >>> nx.adjacency_matrix(G.get()[1]).todense()
        [[0 1 1 0 0 0 0]
         [1 0 0 0 0 0 0]
         [1 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]]

        """
        
        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for graph in self._get(sbunch=sbunch):
                graph.add_nodes_from(nbunch, **attrs)
        else:
            for graph in self._get(start=start, end=end, split_overlaps=True):
                graph.add_nodes_from(nbunch, **attrs)

    def add_edges_from(self, ebunch, sbunch=None, start=None, end=None, **attrs):
        """Adds edges to snapshots in sbunch.
        Note: This function may lead to increase in number of snapshots if changes occur within a snapshot.

        Parameters
        ----------
        ebunch : container of edges
            Each edge in the ebunch list will be added to all graphs indexed in sbunch.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)

        >>> G.add_edges_from([(5, 6), (7, 6)], [0])
        >>> G.add_edges_from([(8, 9), (10, 11)], [0, 1])
        >>> nx.adjacency_matrix(G.get()[0]).todense()
        [[0 1 1 0 0 0 0 0 0 0]
         [1 0 0 0 0 0 0 0 0 0]
         [1 0 0 0 0 0 0 0 0 0]
         [0 0 0 0 1 0 0 0 0 0]
         [0 0 0 1 0 1 0 0 0 0]
         [0 0 0 0 1 0 0 0 0 0]
         [0 0 0 0 0 0 0 1 0 0]
         [0 0 0 0 0 0 1 0 0 0]
         [0 0 0 0 0 0 0 0 0 1]
         [0 0 0 0 0 0 0 0 1 0]]
        >>> nx.adjacency_matrix(G.get()[1]).todense()
        [[0 1 1 0 0 0 0]
         [1 0 0 0 0 0 0]
         [1 0 0 0 0 0 0]
         [0 0 0 0 1 0 0]
         [0 0 0 1 0 0 0]
         [0 0 0 0 0 0 1]
         [0 0 0 0 0 1 0]]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for graph in self._get(sbunch=sbunch):
                graph.add_edges_from(ebunch, **attrs)
        else:
            for graph in self._get(start=start, end=end, split_overlaps=True):
                graph.add_edges_from(ebunch, **attrs)

    @staticmethod
    def load_from_txt(path, delimiter=";", comments="#", start='start', end='end'):
        """Read snapshot graph in from path.
           Every line in the file must be an adjacency matrix, with rows separated by delimiter.

        Parameters
        ----------
        path : string or file
           Filename to read.

        comments : string, optional
           Marker for comment lines

        start: string, optional
            Marker for start timestamps

        end: string, optional
            Marker for end timestamps

        delimiter : string, optional
           Separator for rows in matrix.  The default is ;. Cannot be whitespace or \n.

        Returns
        -------
        G: SnapshotGraph
            The graph corresponding to the list of adjacency matrices.

        Examples
        --------
        >>> G=dnx.Snapshotgraph.load_from_txt("my_dygraph.txt")
        """

        if delimiter == ' ' or delimiter == '\n':
            raise ValueError("Delimiter cannot be " + delimiter + ".")

        sg = SnapshotGraph()

        with open(path, 'r') as file:
            for line in file:
                p = line.find(comments)
                if p >= 0:
                    line = line[:p]
                if not len(line):
                    continue

                p = min(line.find(start), line.find(end))
                interval = [None, None]

                for item in line[p:].split():
                    key, value = item.split('=')

                    try:
                        value = float(value)
                    except:
                        raise ValueError('Value of "{}" must be float.'.format(key))

                    if key == start:
                        interval[0] = value
                    else:
                        interval[1] = value

                if interval[0] is None or interval[1] is None:
                    raise ValueError('A snapshot does not include its interval')

                line = line[:p].strip()
                matrix = []
                for row in line.split(delimiter):
                    matrix.append(row.split(' '))

                g = from_numpy_array(np.array(matrix))
                sg.insert(g, start=interval[0], end=interval[1])

        return sg

    def save_to_txt(self, path, delimiter=";", start='start', end='end'):
        """Write snapshot graph to path.
           Every line in the file will be an adjacency matrix.

        Parameters
        ----------
        path : string or file
           Filename to write.

        start: string, optional
            Marker for start timestamps

        end: string, optional
            Marker for end timestamps

        delimiter : string, optional
           Separator for rows in matrix.  The default is ;. Cannot be whitespace or \n.

        Examples
        --------
        >>> G.save_to_txt("my_dygraph.txt")
        """

        if len(self) == 0:
            raise ValueError("Given graph is empty.")

        if delimiter == ' ' or delimiter == '\n':
            raise ValueError("Delimiter cannot be " + delimiter + ".")

        with open(path, 'w') as file:
            for interval, graph in self._get(include_interval=True):
                m = adjacency_matrix(graph).todense()
                line = delimiter.join(' '.join(x for x in y) for y in np.asarray(m, dtype=str)) + ' ' + start + '=' +\
                    str(interval[0]) + ' ' + end + '=' + str(interval[1]) + '\n'

                file.write(line)

    def compute_network_statistic(self, nx_statistic_function, sbunch=None, start=None, end=None, **kwargs):
        """Compute networkx statistics on each snapshot.

        Parameters
        ----------
        nx_statistic_function : function from networkx.algorithms
           Statistic function to calculate.
        sbunch: snapshots indices to compute statistic
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        kwargs : optional
           inputs for nx_statistic_function

        Examples
        --------
        >>> G.compute_network_statistic(nx.algorithms.centrality.degree_centrality)
        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [nx_statistic_function(graph, **kwargs) for graph in self._get(sbunch=sbunch)]
        else:
            return [nx_statistic_function(graph, **kwargs) for graph in self._get(start=start, end=end)]