def test_valuesview(): if hexversion < 0x02070000: return mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) values = get_valuesview(temp) assert len(values) == 13 assert 0 in values assert list(values) == [pos for val, pos in mapping[:13]] assert values[0] == 0 assert values[-3:] == [10, 11, 12] assert list(reversed(values)) == list(reversed(range(13))) assert values.index(5) == 5 assert values.count(10) == 1 temp.update(mapping[13:]) assert len(values) == 26 assert 25 in values assert list(values) == [pos for val, pos in mapping] that = dict(mapping) that_values = get_valuesview(that) values = get_valuesview(SortedDict(mapping[:2])) assert repr(values) == "SortedDict_values([0, 1])"
def search(path, base): cur = base + "/" + path ans = SortedDict() if os.path.isfile(cur): ans[path] = os.stat(base + "/" + path).st_size return ans li = os.listdir(cur) for f in li: ans.update(search(path + "/" + f, base)) return ans
def ls_video_files(): """ This function returns the filenames of all video clips for the respective camera. returns: a dict where the key is the starting timestamp of the clip and the value is the filename """ r = SortedDict() for i in range(17 if camera == 'Cam1' else 18, 28): # the 17th is missing for Cam2, CamL and CamR p = os.path.join(data_path, '2018-05-%d' % i) r.update({ int(f[:13]): f for f in os.listdir(p) if os.path.isfile(os.path.join(p, f)) and f[0] != '.' and f[-3:] == 'mp4' }) return r
def Get_Thresholds(Threshold,Pops,Pop_counts): """Get your thresholds organized""" print 'Storing thresholds...' Thresholds= SortedDict() # initialize a dictionary for i in range(0,len(Pop_counts)): # for each population # print i keys_thresh = '%s' % Pop_counts.iloc[i]# name a key after the population number Thresholds.update({keys_thresh:[]}) # add keys to empty dictionaries call= Pop_counts[keys_thresh] # call this index from Populations to figure out how many alleles are available for the population t = float(Threshold)/100 # convert threshold to proportion value = int(call * t) # calculate number of alleles needed for the population Thresholds[keys_thresh].append(value) # append value to proper dictionary key # print Thresholds return Thresholds
def test_update(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict() temp.update() temp.update(mapping) temp.update(dict(mapping)) temp.update(mapping[5:7]) assert list(temp.items()) == mapping
def distribute_work(func_spice, xfunc, tais, **kwargs): # Each process will get 'chunksize' tais chunk = int(math.ceil(len(tais) / float(nprocs))) results = [apply_async(pool, func_spice, args=(xfunc, tais[chunk*i:chunk*(i + 1)],), kwds=kwargs) for i in range(nprocs)] # Collect all results into a single result dict res = SortedDict() for r in results: obj = r.get() if isinstance(obj, str): raise GeometrySpiceError(obj) res.update(obj) return [ {k:v} for k, v in res.items() ]
def events_from_workload(self, workload): # TODO support mutiple events at same timestamp fake_events = SortedDict({ 0.0: [{ 'timestamp': 0.0, 'type': 'SIMULATION_BEGINS', 'data': {} }] }) max_event_time = 0.0 max_walltime = 0 for job in workload['jobs']: assert 'subtime' in job timestamp = job['subtime'] if timestamp == 0: # just to not have 2 events at 0.0 timestamp = 1e-06 profile = workload['profiles'][job['profile']] assert profile['type'] == 'delay' job_event = {'timestamp': timestamp, 'type': 'JOB_SUBMITTED'} job_event['data'] = { 'job_id': str(job['id']), 'job': { 'id': str(job['id']), 'subtime': timestamp, 'res': job['res'], 'walltime': timestamp, 'profile': { 'type': 'delay', 'delay': profile['delay'] } } } self.nb_jobs_toExecute += 1 assert timestamp not in fake_events fake_events.update({timestamp: [job_event]}) if timestamp > max_event_time: max_event_time = timestamp if job['walltime'] > max_walltime: max_walltime = job['walltime'] # Simulation finished when all jobs are executed #simulation_end_time = max_event_time + max_walltime + 50 #fake_events.update({simulation_end_time: [{'timestamp': simulation_end_time, # 'type': 'SIMULATION_ENDS', 'data': {}}]}) return fake_events
def load_scheduled_tasks(self): """Returns an automatically sorted dict of timestamp: List[ScheduledTask]""" all_scheduled_tasks = list(ScheduledTask.objects.order_by('when').all()) now = datetime.datetime.utcnow() for task in all_scheduled_tasks: if task.time_tolerance is None or now - task.when <= datetime.timedelta(seconds=task.time_tolerance): self.recently_expired_tasks.append(task) else: task.delete() unsorted_scheduled_tasks = {task.when.timestamp(): task for task in all_scheduled_tasks} scheduled_tasks = SortedDict() scheduled_tasks.update(unsorted_scheduled_tasks) self.cached_scheduled_tasks = scheduled_tasks return scheduled_tasks
def generate(self): """Generates the wishlist.json data""" repo = self.github.get_repo("conan-io/wishlist") issues = repo.get_issues(state="open", sort="updated", direction="desc") issuesSorted = SortedDict() jsonIssues = dict() for issue in issues: upvotes = 0 for reaction in issue.get_reactions(): if reaction.content == "+1": upvotes += 1 issuesSorted.update(dict({(upvotes, issue.number): issue.title})) for upvotes, issuetitle in reversed(issuesSorted.items()): print("{} : #{} {}".format(upvotes[0], upvotes[1], issuetitle)) #jsonIssues.update(dict({upvotes[1]: (dict({'upvotes': upvotes[0]}), dict({'issue': upvotes[1]}), dict({'issuetitle': issuetitle}))})) jsonIssues.update( dict({ upvotes[1]: dict({ 'upvotes': upvotes[0], 'issue': upvotes[1], 'issuetitle': issuetitle }) })) if not os.path.exists("build/"): os.makedirs("build/") data = json.dumps(jsonIssues, indent=4) with open('build/wishlist.json', 'w') as file: file.write(data) with open('build/wishlist.js', 'w') as file: file.write('var wishlist_data = \n') file.write(data) file.write(';')
def update_tracks(self, observations, directions, delta_t): # Propagate tracks [propagate_track(v, delta_t) for v in self.active_tracks.values()] assignments = {} taken_tracks = set() # Associate tracks print(observations) sorted_distances = SortedDict() for obs in range(len(observations)): for track, v in self.active_tracks.items(): distance = abs(observations[obs] - v.position_pre()) if distance < ASSOCIATION_RADIUS: sorted_distances.update({distance: (obs, track)}) for k, v in sorted_distances.items(): if v[0] not in assignments and v[1] not in taken_tracks: assignments[v[0]] = v[1] taken_tracks.add(v[1]) print(assignments) # Update tracks that are associated, and create new ones. for i in range(len(observations)): if i in assignments: update_track(self.active_tracks[assignments[i]], observations[i]) else: self.start_track(observations[i], directions[i]) # Retire old tracks and delete other ones. to_delete = [] to_retire = [] for k, track in self.active_tracks.items(): if track.n is N and track.m < M: to_delete.append(k) elif track.n > N and track.m is 0: to_retire.append(k) for t in to_retire: self.retired_tracks[t] = self.active_tracks[t] for t in to_retire + to_delete: del self.active_tracks[t]
def test_keysview(): if hexversion < 0x02070000: return mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) keys = get_keysview(temp) assert len(keys) == 13 assert 'a' in keys assert list(keys) == [val for val, pos in mapping[:13]] assert keys[0] == 'a' assert list(reversed(keys)) == list(reversed(string.ascii_lowercase[:13])) assert keys.index('f') == 5 assert keys.count('m') == 1 assert keys.count('0') == 0 assert keys.isdisjoint(['1', '2', '3']) temp.update(mapping[13:]) assert len(keys) == 26 assert 'z' in keys assert list(keys) == [val for val, pos in mapping] that = dict(mapping) that_keys = get_keysview(that) assert keys == that_keys assert not (keys != that_keys) assert not (keys < that_keys) assert not (keys > that_keys) assert keys <= that_keys assert keys >= that_keys assert list(keys & that_keys) == [val for val, pos in mapping] assert list(keys | that_keys) == [val for val, pos in mapping] assert list(keys - that_keys) == [] assert list(keys ^ that_keys) == [] keys = get_keysview(SortedDict(mapping[:2])) assert repr(keys) == "SortedDict_keys(['a', 'b'])"
def test_itemsview(): if hexversion < 0x02070000: return mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) items = get_itemsview(temp) assert len(items) == 13 assert ('a', 0) in items assert list(items) == mapping[:13] assert items[0] == ('a', 0) assert items[-3:] == [('k', 10), ('l', 11), ('m', 12)] assert list(reversed(items)) == list(reversed(mapping[:13])) assert items.index(('f', 5)) == 5 assert items.count(('m', 12)) == 1 assert items.isdisjoint([('0', 26), ('1', 27)]) assert not items.isdisjoint([('a', 0), ('b', 1)]) temp.update(mapping[13:]) assert len(items) == 26 assert ('z', 25) in items assert list(items) == mapping that = dict(mapping) that_items = get_itemsview(that) assert items == that_items assert not (items != that_items) assert not (items < that_items) assert not (items > that_items) assert items <= that_items assert items >= that_items assert list(items & that_items) == mapping assert list(items | that_items) == mapping assert list(items - that_items) == [] assert list(items ^ that_items) == [] items = SortedDict(mapping[:2]).viewitems() assert repr(items) == "SortedDict_items([('a', 0), ('b', 1)])"
def shortest_path(self, source_id, target_id): self.vertices[source_id].dist = 0 found = False #create a sorted dicitonary to store unvisited nodes unvisited = SortedDict(self.vertices.copy()) target = self.vertices[target_id] source = self.vertices[source_id] #iterate through unvisited nodes until there are none left or target #is found while (len(unvisited) > 0): min_dist = unvisited.popitem(0)[1] min_dist.visited = True if (min_dist.node == target.node): found = True break #update the new shortest distance of min_dist's neighbors for v in min_dist.adjacent: new_dist = min_dist.dist + self.edges[min_dist.node][v] if (new_dist < self.vertices[v].dist and self.vertices[v].visited == False): self.vertices[v].dist = new_dist self.vertices[v].previous = min_dist.node unvisited.update({v: self.vertices[v]}) #backtrace to determine shortest path path = [] if (found == True): current = target while (current != None): path.append(current) if current.previous != None: current = self.vertices[current.previous] else: current = None return path
def compare_faces(list_of_face_encodings, unknown_face_encodings): """ Function which find 4 person which closest. """ result_list_of_faces = SortedDict() for element in list_of_face_encodings: # calculate distance from current encodings to unknown encodings current_distance = face_recognition.api.face_distance( element["face_encoding"], unknown_face_encodings) min_dist = current_distance.min() # add to our list of top matches faces if len(result_list_of_faces) < 3: result_list_of_faces.update({min_dist: element}) else: result_list_of_faces.update({min_dist: element}) result_list_of_faces.popitem() if result_list_of_faces.keys()[0] > 0.6: result_list_of_faces = None return result_list_of_faces
def test_valuesview(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) values = temp.values() assert len(values) == 13 assert 0 in values assert list(values) == [pos for val, pos in mapping[:13]] assert values[0] == 0 assert values[-3:] == [10, 11, 12] assert list(reversed(values)) == list(reversed(range(13))) assert values.index(5) == 5 assert values.count(10) == 1 temp.update(mapping[13:]) assert len(values) == 26 assert 25 in values assert list(values) == [pos for val, pos in mapping] values = SortedDict(mapping[:2]).values() assert repr(values) == "SortedValuesView(SortedDict({'a': 0, 'b': 1}))"
class StockPrice: def __init__(self): self.A = SortedDict() self.mx = SortedList() def update(self, timestamp: int, price: int) -> None: if timestamp not in self.A: self.mx.add(price) else: idx = self.mx.bisect_left(self.A[timestamp]) # self.mx[idx:idx + 1] = [] del self.mx[idx] self.mx.add(price) self.A.update({timestamp: price}) def current(self) -> int: return self.A.values()[-1] def maximum(self) -> int: return self.mx[-1] def minimum(self) -> int: return self.mx[0]
class LeafSet(object): __slots__ = ('peers', 'capacity') __passthru = {'get', 'clear', 'pop', 'popitem', 'peekitem', 'key'} __iters = {'keys', 'values', 'items'} def __init__(self, my_key, iterable=(), capacity=8): try: iterable = iterable.items() # view object except AttributeError: pass tuple_itemgetter = Peer.distance(my_key, itemgetter(0)) key_itemgetter = Peer.distance(my_key) self.capacity = capacity self.peers = SortedDict(key_itemgetter) if iterable: l = sorted(iterable, key=tuple_itemgetter) self.peers.update(islice(l, capacity)) def clear(self): self.peers.clear() def prune(self): extra = len(self) - self.capacity for i in range(extra): self.peers.popitem(last=True) def update(self, iterable): try: iterable = iterable.items() # view object except AttributeError: pass iterable = iter(iterable) items = tuple(islice(iterable, 500)) while items: self.peers.update(items) items = tuple(islice(iterable, 500)) def setdefault(self, *args, **kwargs): self.peers.setdefault(*args, **kwargs) self.prune() def __setitem__(self, *args, **kwargs): self.peers.__setitem__(*args, **kwargs) self.prune() def __getitem__(self, *args, **kwargs): return self.peers.__getitem__(*args, **kwargs) def __delitem__(self, *args, **kwargs): return self.peers.__delitem__(*args, **kwargs) def __iter__(self, *args, **kwargs): return self.peers.__iter__(*args, **kwargs) def __reversed__(self, *args, **kwargs): return self.peers.__reversed__(*args, **kwargs) def __contains__(self, *args, **kwargs): return self.peers.__contains__(*args, **kwargs) def __len__(self, *args, **kwargs): return self.peers.__len__(*args, **kwargs) def __getattr__(self, key): if key in self.__class__.__passthru: return getattr(self.peers, key) elif key in self.__class__.__iters: return getattr(self.peers, 'iter' + key) else: return super().__getattr__(key) def __repr__(self): return '<%s keys=%r capacity=%d/%d>' % ( self.__class__.__name__, list(self), len(self), self.capacity)
class TreePage(BasePage): """ Page object, implemented with a sorted dict. Who knows what's underneath! """ def __init__(self, *args, **kwargs): storage = kwargs.pop("storage", None) super(TreePage, self).__init__(*args, **kwargs) self._storage = SortedDict() if storage is None else storage def keys(self): if len(self._storage) == 0: return set() else: return set.union(*(set(range(*self._resolve_range(mo))) for mo in self._storage.itervalues())) def replace_mo(self, state, old_mo, new_mo): start, end = self._resolve_range(old_mo) for key in self._storage.irange(start, end-1): val = self._storage[key] if val is old_mo: #assert new_mo.includes(a) self._storage[key] = new_mo def store_overwrite(self, state, new_mo, start, end): # iterate over each item we might overwrite # track our mutations separately since we're in the process of iterating deletes = [] updates = { start: new_mo } for key in self._storage.irange(maximum=end-1, reverse=True): old_mo = self._storage[key] # make sure we aren't overwriting all of an item that overlaps the end boundary if end < self._page_addr + self._page_size and end not in updates and old_mo.includes(end): updates[end] = old_mo # we can't set a minimum on the range because we need to do the above for # the first object before start too if key < start: break # delete any key that falls within the range deletes.append(key) #assert all(m.includes(i) for i,m in updates.items()) # perform mutations for key in deletes: del self._storage[key] self._storage.update(updates) def store_underwrite(self, state, new_mo, start, end): # track the point that we need to write up to last_missing = end - 1 # track also updates since we can't update while iterating updates = {} for key in self._storage.irange(maximum=end-1, reverse=True): mo = self._storage[key] # if the mo stops if mo.base <= last_missing and not mo.includes(last_missing): updates[max(mo.last_addr+1, start)] = new_mo last_missing = mo.base - 1 # we can't set a minimum on the range because we need to do the above for # the first object before start too if last_missing < start: break # if there are no memory objects <= start, we won't have filled start yet if last_missing >= start: updates[start] = new_mo #assert all(m.includes(i) for i,m in updates.items()) self._storage.update(updates) def load_mo(self, state, page_idx): """ Loads a memory object from memory. :param page_idx: the index into the page :returns: a tuple of the object """ try: key = next(self._storage.irange(maximum=page_idx, reverse=True)) except StopIteration: return None else: return self._storage[key] def load_slice(self, state, start, end): """ Return the memory objects overlapping with the provided slice. :param start: the start address :param end: the end address (non-inclusive) :returns: tuples of (starting_addr, memory_object) """ keys = list(self._storage.irange(start, end-1)) if not keys or keys[0] != start: try: key = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: pass else: if self._storage[key].includes(start): items.insert(0, key) return [(key, self._storage[key]) for key in keys] def _copy_args(self): return { 'storage': self._storage.copy() }
class CacheStore(object): class CacheItem(object): def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: item = self.store[key] if key in self.store else self.CacheItem() item.data = data item.valid.set() if key not in self.store: self.store[key] = item return True return False def update(self, **kwargs): with self.lock: items = {} created = [] updated = [] for k, v in kwargs.items(): items[k] = self.CacheItem() items[k].data = v items[k].valid.set() if k in self.store: updated.append(k) else: created.append(k) self.store.update(**items) return created, updated def update_one(self, key, **kwargs): with self.lock: item = self.get(key) if not item: return False for k, v in kwargs.items(): set(item, k, v) self.put(key, item) return True def update_many(self, key, predicate, **kwargs): with self.lock: updated = [] for k, v in self.itervalid(): if predicate(v): if self.update_one(k, **kwargs): updated.append(key) return updated def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: if key in self.store: del self.store[key] return True return False def remove_many(self, keys): with self.lock: removed = [] for key in keys: if key in self.store: del self.store[key] removed.append(key) return removed def clear(self): with self.lock: items = list(self.store.keys()) self.store.clear() return items def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return query(list(self.validvalues()), *filter, **params)
#The bintrees project now recommends using Sorted Containers instead and has stopped development. #The API differs significantly but the supported functionality is the same. #The Tree object in bintrees is most similar to SortedDict. #All of the mapping methods and set methods are available using either SortedDict or SortedKeysView. from sortedcontainers import SortedDict sd = SortedDict() sd.update({'jack': 'apple', 'jill': 'pear', 'giant': 'sheep'}) print('sorteddict', sd) #Return an iterator over the keys of the sorted dict. myiter = iter(sd) for k in myiter: print('key', k, 'value', sd.get(k))
class KeyedRegion(object): """ KeyedRegion keeps a mapping between stack offsets and all variables covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ def __init__(self, tree=None): self._storage = SortedDict() if tree is None else tree def _get_container(self, offset): try: base_offset = next( self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one varaible covering the given offset. :param offset: :return: """ return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return self._storage.itervalues() def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.iteritems(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion() kr = KeyedRegion() for key, ro in self._storage.iteritems(): kr._storage[key] = ro.copy() return kr def merge(self, other, make_phi_func=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.iteritems(): # type: RegionObject for loc_and_var in item.objects: self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = {} for key in sorted(keys): ro = self._storage[key] variables = [obj.variable for obj in ro.objects] offset_to_vars[key] = variables s = [] for offset, variables in offset_to_vars.iteritems(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ self._store(start, variable, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ self._store(start, variable, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index variables covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ base_addr, container = self._get_container(start) if container is None: return [] else: return container.variables # # Private methods # def _store(self, start, variable, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param variable: The variable to store. :param bool overwrite: Whether existing variables should be overwritten or not. :return: None """ loc_and_var = LocationAndVariable(start, variable) self.__store(loc_and_var, overwrite=overwrite) def __store(self, loc_and_var, overwrite=False, make_phi_func=None): """ Store a variable into the storage. :param LocationAndVariable loc_and_var: The descriptor describing start address and the variable. :param bool overwrite: Whether existing variables should be overwritten or not. :return: None """ start = loc_and_var.start variable = loc_and_var.variable variable_size = variable.size if variable.size is not None else 1 end = start + variable_size # region items in the middle overlapping_items = list(self._storage.irange(start, end - 1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginningq overlapping_items.insert(0, (floor_key, self._storage[floor_key])) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, variable_size, {loc_and_var})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(loc_and_var) else: self._add_object_or_make_phi(b, loc_and_var, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {loc_and_var}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(loc_and_var) else: self._add_object_or_make_phi(a, loc_and_var, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(loc_and_var) else: self._add_object_or_make_phi(item, loc_and_var, make_phi_func=make_phi_func) to_update[loc_and_var.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next( self._storage.irange(maximum=end - 1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next( self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_or_make_phi(self, item, loc_and_var, make_phi_func=None): #pylint:disable=no-self-use if not make_phi_func or len({loc_and_var.variable} | item.variables) == 1: item.add_object(loc_and_var) else: # make a phi node item.set_object( LocationAndVariable( loc_and_var.start, make_phi_func(loc_and_var.variable, *item.variables)))
class KeyedRegion(object): """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ def __init__(self, tree=None): self._storage = SortedDict() if tree is None else tree def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one varaible covering the given offset. :param offset: :return: """ return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion() kr = KeyedRegion() for key, ro in self._storage.items(): kr._storage[key] = ro.copy() return kr def merge(self, other, make_phi_func=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for loc_and_var in item.stored_objects: self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False, make_phi_func=None): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_or_make_phi(b, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_or_make_phi(a, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_or_make_phi(item, stored_object, make_phi_func=make_phi_func) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_or_make_phi(self, item, stored_object, make_phi_func=None): #pylint:disable=no-self-use if not make_phi_func or len({stored_object.obj} | item.internal_objects) == 1: item.add_object(stored_object) else: # make a phi node item.set_object(StoredObject(stored_object.start, make_phi_func(stored_object.obj, *item.internal_objects), stored_object.size, ) )
class CacheStore(object): class CacheItem(object): __slots__ = ('valid', 'data') def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: try: item = self.store[key] item.data = data item.valid.set() return False except KeyError: item = self.CacheItem() item.data = data item.valid.set() self.store[key] = item return True def update(self, **kwargs): with self.lock: items = {} created = [] updated = [] for k, v in kwargs.items(): items[k] = self.CacheItem() items[k].data = v items[k].valid.set() if k in self.store: updated.append(k) else: created.append(k) self.store.update(**items) return created, updated def update_one(self, key, **kwargs): with self.lock: item = self.get(key) if not item: return False for k, v in kwargs.items(): set(item, k, v) self.put(key, item) return True def update_many(self, key, predicate, **kwargs): with self.lock: updated = [] for k, v in self.itervalid(): if predicate(v): if self.update_one(k, **kwargs): updated.append(key) return updated def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: try: del self.store[key] return True except KeyError: return False def remove_many(self, keys): with self.lock: removed = [] for key in keys: try: del self.store[key] removed.append(key) except KeyError: pass return removed def clear(self): with self.lock: items = list(self.store.keys()) self.store.clear() return items def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return query(list(self.validvalues()), *filter, **params)
# For writing output video fullPathtoOutputVideo = os.path.join(os.path.dirname(ConfigurationForVideoSegmentation.fullyQualifiedPathToOutputMovie), "out_" + os.path.basename(ConfigurationForVideoSegmentation.fullyQualifiedPathToInputMovie)) writer = skvideo.io.FFmpegWriter(fullPathtoOutputVideo, outputdict={'-vcodec': 'libx264', '-b': '750100000'}) while(not allFramesAreDone): # wait for the output channel to be full ConfigurationForVideoSegmentation.barrier.wait() # Crossing this barrier means that all workers have put their results into the output queue # Extract all annotated frames in increasing order. Use SortedDict for the purpose. s = SortedDict() while(not ConfigurationForVideoSegmentation.outputChannel.empty()): s.update(ConfigurationForVideoSegmentation.outputChannel.get()) # Sorted container sorts by keys, and keys are frame numbers. So, we can just reverse and pop. for key in list(s.keys()): # Count the number of sentinel objects encountered if(key == -1): nSentinelObjectsEncountered = nSentinelObjectsEncountered + 1 # Not all workers may have seen the sentinel object yet. if(nSentinelObjectsEncountered == ConfigurationForVideoSegmentation.nProcesses -1): allFramesAreDone = True break else:
def save_polygon(polygon, all_metadata): d = SortedDict([(m,'') for m in all_metadata]) d.update(polygon['properties']) return d.values()
class Visibility_polygon_class(object): def __init__(self): self.origin = (0, 0) self.refvec = (0, 1) self.segments = [] self.event_queue = [] self.status = 0 self.status = SortedDict() self.visibility_polygon = [] def order_segments(self, arg): return arg # Starts here! def get_visibility_polygon(self, segments, origin): self.origin = origin self.segments = segments self.add_bounding_box() self.create_event_queue_from_segments() self.sort_event_queue() self.initialize_status() #return self.status self.perform_sweep() return self.visibility_polygon def add_bounding_box(self): # Find extreme points margin = 40 top_y = 400 + margin #uppermost_point_index(self.event_queue) bottom_y = 130 - margin #lowermost_point_index(self.event_queue) right_x = 600 + margin #rightmost_point_index(self.event_queue) left_x = 200 - margin #leftmost_point_index(self.event_queue) # Create the bounding box and add it to event queue s1 = Segment(Point(right_x, top_y), Point(right_x, bottom_y)) s2 = Segment(Point(right_x - 5, bottom_y), Point(left_x, bottom_y)) s3 = Segment(Point(left_x, bottom_y + 5), Point(left_x, top_y)) s4 = Segment(Point(left_x + 5, top_y), Point(right_x, top_y + 5)) p = [s1, s2, s3, s4] self.segments.extend(p) # Create an event queue with all points and their connections (not sorted yet!) def create_event_queue_from_segments(self): for s in self.segments: p1 = EventPoint(s.p1) p2 = EventPoint(s.p2) p1.twin = p2 p2.twin = p1 self.event_queue.append(p1) self.event_queue.append(p2) # Create event queue def sort_event_queue(self): # Sort the points in clockwise order self.event_queue = sorted(self.event_queue, key=self.get_key) def initialize_status(self): sweep_ray = Ray(self.origin, self.event_queue[0].p) i = 0 for ep in self.event_queue: if ep.type == DEFAULT_VERTEX: segment = Segment(ep.p, ep.twin.p) intersection_point = sweep_ray.intersection(segment) if len(intersection_point) > 0: # If the segments first point is the current event-point if intersection_point[ 0] == ep.p: # if the point is on the initial ray if len( Ray(self.origin, self.event_queue[ i + 1].p).intersection(segment) ) > 0: #if the point was a start point self.initialize_segment(ep, intersection_point) else: self.initialize_segment(ep.twin, intersection_point) else: self.initialize_segment(ep.twin, intersection_point) else: # Event-points not hit by the ray gets a type ep.type = START_VERTEX ep.twin.type = END_VERTEX i += 1 def initialize_segment(self, ep, intersection_point): status_segment = StatusSegment(ep, ep.twin, self.origin) status_segment.current_distance = distance(intersection_point[0], self.origin) ep.status_segment = status_segment ep.twin.status_segment = status_segment ep.type = START_VERTEX ep.twin.type = END_VERTEX if ep != self.event_queue[0]: self.status.update( {status_segment.current_distance: status_segment}) def perform_sweep(self): print("\nStatus at start: " + str(len(self.status))) for ep in self.event_queue: print("\nStatus: " + str(len(self.status))) if ep.type == START_VERTEX: print("START_VERTEX") status_segment = StatusSegment(ep, ep.twin, self.origin) print("current segment: " + str(status_segment.segment) + str(status_segment.current_distance)) ep.status_segment = status_segment ep.twin.status_segment = status_segment if self.status.__len__() == 0: self.status.update( {status_segment.current_distance: status_segment}) self.visibility_polygon.append(ep.p) print("empty status. Append") else: first_in_status = self.status.peekitem(index=0) print("first in status and distance: " + str(first_in_status[1].segment) + ": " + str(first_in_status[1].current_distance)) current_ray = Ray(self.origin, ep.p) intersection_point = current_ray.intersection( first_in_status[1].segment) first_in_status[1].current_distance = distance( intersection_point[0], self.origin) print("First in status new distance: " + str(first_in_status[1].current_distance)) self.status.update( {status_segment.current_distance: status_segment}) # insert the new segment to status self.status.__delitem__(first_in_status[0]) self.status.update({ first_in_status[1].current_distance: first_in_status[1] }) #update the key distance to the origin new_first_in_status = self.status.peekitem(index=0) if new_first_in_status[1] != first_in_status[1]: self.visibility_polygon.append(intersection_point[0]) self.visibility_polygon.append(ep.p) print("normal status. Append") elif ep.type == END_VERTEX: print("END_VERTEX") first_in_status = self.status.peekitem(index=0) print("first in status and distance: " + str(first_in_status[1].segment) + ": " + str(first_in_status[1].current_distance)) self.status.__delitem__(ep.status_segment.current_distance) print("ep status segment and distance: " + str(ep.status_segment.segment) + ": " + str(ep.status_segment.current_distance)) if self.status.__len__() == 0: self.visibility_polygon.append(ep.p) print("empty status. Append") else: new_first_in_status = self.status.peekitem(index=0) if new_first_in_status[1] != first_in_status[1]: current_ray = Ray(self.origin, ep.p) intersection_point = current_ray.intersection( new_first_in_status[1].segment) self.visibility_polygon.append(ep.p) self.visibility_polygon.append(intersection_point[0]) print("normal status. Append") # Gets key for sorting def get_key(self, point): return self.clockwiseangle_and_distance(point.p) # returns the angle and length vector from the origin to the point def clockwiseangle_and_distance(self, point): # Vector between point and the origin: v = p - o vector = [point[0] - self.origin[0], point[1] - self.origin[1]] # Length of vector: ||v|| lenvector = math.hypot(vector[0], vector[1]) # If length is zero there is no angle if lenvector == 0: return -math.pi, 0 # Normalize vector: v/||v|| normalized = [vector[0] / lenvector, vector[1] / lenvector] dotprod = normalized[0] * self.refvec[0] + normalized[1] * self.refvec[ 1] # x1*x2 + y1*y2 diffprod = self.refvec[1] * normalized[0] - self.refvec[ 0] * normalized[1] # x1*y2 - y1*x2 angle = math.atan2(diffprod, dotprod) # Negative angles represent counter-clockwise angles so we need to subtract them # from 2*pi (360 degrees) if angle < 0: return 2 * math.pi + angle, lenvector # I return first the angle because that's the primary sorting criterium # but if two vectors have the same angle then the shorter distance should come first. return angle, lenvector
class BaseNode(object): def __init__(self, tree, bucket=None, new=False): self.tree = tree if bucket is not None: self.bucket = SortedDict(bucket) else: self.bucket = SortedDict() self.lazy = None self.changed = new def _split(self): """ Creates a new node of the same type and splits the contents of the bucket into two parts of an equal size. The lower keys are being stored in the bucket of the new node. The higher keys are being stored in the bucket of the old node. Afterwards, the new node is being returned. """ new_bucket = self.bucket.items()[:len(self.bucket) // 2] self.bucket = SortedDict(self.bucket.items()[len(self.bucket) // 2:]) new_node = LazyNode(node=self.__class__(tree=self.tree, bucket=new_bucket, new=True), tree=self.tree) if hasattr(new_node, 'rest'): new_node.rest = new_node.bucket.popitem()[1] if hasattr(self, 'next'): self.next = new_node if self is self.tree.root.node: self.tree._create_root(new_node, self.lazy) return new_node def _insert(self, key, value): """ Inserts the key and value into the bucket. If the bucket has become too large, the node will be split into two nodes. """ self.changed = True if isinstance(self, Leaf): self.bucket[key] = LazyNode(node=value, tree=self.tree) else: self.bucket[key] = value if len(self.bucket) > self.tree.max_size: return self, self._split() return self, None def _take_first(self): key = self.bucket.keys()[0] return key, self.bucket.pop(key) def _set_first(self, key, value): self.bucket[key] = value def _merge_right(self, right, parent): """Merge the buckets of the two children and place them at of the right node in the parent""" self.bucket.update(right.bucket) self.changed = True right.changed = True parent.changed = True left_index = parent.bucket.values().index(self.lazy) left_key = parent.bucket.keys()[left_index] try: right_index = parent.bucket.values().index(right.lazy) right_key = parent.bucket.keys()[right_index] parent.bucket[right_key] = self.lazy except ValueError: parent.rest = self.lazy del parent.bucket[left_key] def _commit(self, db): for n in self.bucket.values(): n._commit(db) pos = db.tell() db.write(encode(self)) return pos
class SLIM(BaseMiner, MDLOptimizer): """SLIM: Directly Mining Descriptive Patterns SLIM looks for a compressed representation of transactional data. This compressed representation if a set of descriptive patterns, and can be used to: - provide a natively interpretable modeling of this data - make predictions on new data, using this condensed representation as an encoding scheme Idea of early stopping is inspired from http://eda.mmci.uni-saarland.de/pres/ida14-slimmer-poster.pdf Parameters ---------- n_iter_no_change: int, default=100 Number of candidate evaluation with no improvement to count before stopping optimization. tol: float, default=None Tolerance for the early stopping, in bits. When the compression size is not improving by at least `tol` for `n_iter_no_change` iterations, the training stops. Default to None, will be automatically computed considering the size of input data. pruning: bool, default=True Either to activate pruning or not. Pruned itemsets may be useful at prediction time, so it is usually recommended to set it to False to build a classifier. The model will be less concise, but will lead to more accurate predictions on average. Examples -------- >>> from skmine.itemsets import SLIM >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']] >>> SLIM().fit(D).codetable # doctest: +SKIP (butter, tea) [2] (milk, bananas) [0, 1] (cookies) [1, 2] dtype: object References ---------- .. [1] Smets, K & Vreeken, J "Slim: Directly Mining Descriptive Patterns", 2012 .. [2] Gandhi, M & Vreeken, J "Slimmer, outsmarting Slim", 2014 """ def __init__(self, *, n_iter_no_change=100, tol=None, pruning=True): self.n_iter_no_change = n_iter_no_change self.tol = tol self.standard_codetable_ = None self.codetable_ = SortedDict() self.model_size_ = None # L(CT|D) self.data_size_ = None # L(D|CT) self.pruning = pruning def fit(self, D, y=None): # pylint:disable = too-many-locals """fit SLIM on a transactional dataset This generate new candidate patterns and add those which improve compression, iteratibely refining ``self.codetable_`` Parameters ------- D: pd.DataFrame Transactional dataset, either as an iterable of iterables or encoded as tabular binary data """ self._prefit(D, y=y) n_iter_no_change = 0 seen_cands = set() tol = self.tol or self.standard_codetable_.map(len).median() while n_iter_no_change < self.n_iter_no_change: candidates = self.generate_candidates(stack=seen_cands) for cand, _ in candidates: data_size, model_size, update_d, prune_set = self.evaluate( cand) diff = (self.model_size_ + self.data_size_) - (data_size + model_size) if diff > 0.01: # underflow self.codetable_.update(update_d) if self.pruning: self.codetable_, data_size, model_size = self._prune( self.codetable_, prune_set, model_size, data_size) self.data_size_ = data_size self.model_size_ = model_size if diff < tol: n_iter_no_change += 1 if n_iter_no_change > self.n_iter_no_change: break # inner break if not candidates: # if empty candidate generation n_iter_no_change += self.n_iter_no_change # force while loop to break return self def decision_function(self, D): """Compute covers on new data, and return code length This function function is named ``decision_function`` because code lengths represent the distance between a point and the current codetable. Setting ``pruning`` to False when creating the model is recommended to cover unseen data, and especially when building a classifier. Parameters ---------- D: pd.DataFrame or np.ndarray new data to make predictions on, in tabular format Example ------- >>> from skmine.itemsets import SLIM; import pandas as pd >>> def to_tabular(D): return pd.Series(D).str.join('|').str.get_dummies(sep="|") >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']] >>> new_D = to_tabular([['cookies', 'butter']]) >>> slim = SLIM().fit(to_tabular(D)) >>> slim.decision_function(new_D) 0 -1.321928 dtype: float32 """ D = _check_D(D) codetable = pd.Series(self.codetable_) D_sct = { k: Bitmap(np.where(D[k])[0]) for k in D.columns if k in self.standard_codetable_ } covers = cover(D_sct, codetable.index) mat = np.zeros(shape=(len(D), len(covers))) for idx, tids in enumerate(covers.values()): mat[tids, idx] = 1 mat = pd.DataFrame(mat, columns=covers.keys()) code_lengths = codetable.map(len) ct_codes = code_lengths / code_lengths.sum() codes = (mat * ct_codes).sum(axis=1).astype(np.float32) # positive sign on log2 to return negative distance : sklearn] r = _log2(codes) r[r == 0] = -np.inf # zeros would fool a `shortest code wins` strategy return r def generate_candidates(self, stack=None, thresh=1e3): """ Generate candidates from the current codetable (SLIM is any-time) Note that `stack` is updated during the execution of this method. Parameters ---------- stack: set[frozenset], default=None a stack of already-seen candidates to be excluded thresh: int, default=1_000 if the size of the current codetable is higher than `thresh`, candidate are generated on-the-fly, and remain unsorted. If not, they are returned in a list, sorted by decreasing order of estimated gain Returns ------- iterator[tuple(frozenset, Bitmap)] """ ct = SortedDict(self._standard_candidate_order, self.codetable.items()) # if big number of elements in codetable, just take a generator, do not sort output gen = generate_candidates if len( ct) < thresh else generate_candidates_big return gen(ct, stack=stack) def evaluate(self, candidate): """ Evaluate ``candidate``, considering the current codetable and a dataset ``D`` Parameters ---------- candidate: frozenset a new candidate to be evaluated Returns ------- (float, float, dict, set) updated (data size, model size, codetable) and finally the set of itemsets for which usage decreased """ idx = self.codetable_.bisect(candidate) ct = list(self.codetable_) ct.insert(idx, candidate) D = {k: v.copy() for k, v in self.standard_codetable_.items()} CTc = cover(D, ct) updated, decreased = {candidate: CTc[candidate]}, set() for iset, usage in self.codetable_.items( ): # TODO useless is size is too big if usage != CTc[iset]: updated[iset] = CTc[iset] if len(CTc[iset]) < len(usage): decreased.add(iset) data_size, model_size = self._compute_sizes( CTc) # TODO pruning in evaluate return data_size, model_size, updated, decreased def reconstruct(self): """reconstruct the original data from the current `self.codetable_`""" return reconstruct(self.codetable_) @lru_cache(maxsize=1024) def get_support(self, itemset): """Get support from an itemset""" U = reduce(Bitmap.intersection, self.standard_codetable_.loc[itemset]) return len(U) def _standard_cover_order(self, itemset): """ Returns a tuple associated with an itemset, so that many itemsets can be sorted in Standard Cover Order """ return (-len(itemset), -self.get_support(itemset), tuple(itemset)) def _standard_candidate_order(self, itemset): return (-self.get_support(itemset), -len(itemset), tuple(itemset)) def _prefit(self, D, y=None): if hasattr(D, 'ndim') and D.ndim == 2: D = _check_D(D) if y is not None: D = supervised_to_unsupervised(D, y) # SKLEARN_COMPAT item_to_tids = {k: Bitmap(np.where(D[k])[0]) for k in D.columns} else: item_to_tids = _to_vertical(D) self.standard_codetable_ = pd.Series(item_to_tids) usage = self.standard_codetable_.map(len).astype(np.uint32) ct_it = ((frozenset([e]), tids) for e, tids in item_to_tids.items()) self.codetable_ = SortedDict(self._standard_cover_order, ct_it) codes = -_log2(usage / usage.sum()) # L(code_ST(X)) = L(code_CT(X)), because CT=ST self.model_size_ = 2 * codes.sum() self.data_size_ = (codes * usage).sum() return self def _get_standard_codes(self, index): """compute the size of a codetable index given the standard codetable""" flat_items = list(chain(*index)) items, counts = np.unique(flat_items, return_counts=True) usages = self.standard_codetable_.loc[items].map(len).astype(np.uint32) usages /= usages.sum() codes = -_log2(usages) return codes * counts def _compute_sizes(self, codetable): """ Compute sizes for both the data and the model .. math:: L(D|CT) .. math:: L(CT|D) Parameters ---------- codetable : Mapping A series mapping itemsets to their usage tids Returns ------- tuple(float, float) (data_size, model_size) """ isets, usages = zip(*((_[0], len(_[1])) for _ in codetable.items() if len(_[1]) > 0)) usages = np.array(usages, dtype=np.uint32) codes = -_log2(usages / usages.sum()) stand_codes = self._get_standard_codes(isets) model_size = stand_codes.sum() + codes.sum( ) # L(CTc|D) = L(X|ST) + L(X|CTc) data_size = (codes * usages).sum() return data_size, model_size def _prune(self, codetable, prune_set, model_size, data_size): """post prune a codetable considering itemsets for which usage has decreased Parameters ---------- codetable: SortedDict prune_set: set itemsets in ``codetable`` for which usage has decreased model_size: float current model_size for ``codetable`` data_size: float current data size when encoding ``D`` with ``codetable`` Returns ------- new_codetable, new_data_size, new_model_size: SortedDict, float, float a tuple containing the pruned codetable, and new model size and data size w.r.t this new codetable """ prune_set = {k for k in prune_set if len(k) > 1} # remove singletons while prune_set: cand = min(prune_set, key=lambda e: len(codetable[e])) prune_set.discard(cand) ct = list(codetable) ct.remove(cand) D = {k: v.copy() for k, v in self.standard_codetable_.items() } # TODO avoid data copies CTp = cover(D, ct) decreased = { k for k, v in CTp.items() if len(k) > 1 and len(v) < len(codetable[k]) } d_size, m_size = self._compute_sizes(CTp) if d_size + m_size < model_size + data_size: codetable.update(CTp) del codetable[cand] prune_set.update(decreased) data_size, model_size = d_size, m_size return codetable, data_size, model_size
class KeyedRegion: """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ __slots__ = ('_storage', '_object_mapping', '_phi_node_contains' ) def __init__(self, tree=None, phi_node_contains=None): self._storage = SortedDict() if tree is None else tree self._object_mapping = weakref.WeakValueDictionary() self._phi_node_contains = phi_node_contains def __getstate__(self): return self._storage, dict(self._object_mapping), self._phi_node_contains def __setstate__(self, s): self._storage, om, self._phi_node_contains = s self._object_mapping = weakref.WeakValueDictionary(om) def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one variable covering the given offset. :param offset: :return: """ if type(offset) is not int: raise TypeError("KeyedRegion only accepts concrete offsets.") return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion(phi_node_contains=self._phi_node_contains) kr = KeyedRegion(phi_node_contains=self._phi_node_contains) for key, ro in self._storage.items(): kr._storage[key] = ro.copy() kr._object_mapping = self._object_mapping.copy() return kr def merge(self, other, replacements=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False) return self def replace(self, replacements): """ Replace variables with other variables. :param dict replacements: A dict of variable replacements. :return: self """ for old_var, new_var in replacements.items(): old_var_id = id(old_var) if old_var_id in self._object_mapping: # FIXME: we need to check if old_var still exists in the storage old_so = self._object_mapping[old_var_id] # type: StoredObject self._store(old_so.start, new_var, old_so.size, overwrite=True) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self._object_mapping[stored_object.obj_id] = stored_object self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. True to make a strong update, False to make a weak update. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_with_check(b, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_with_check(a, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_with_check(item, stored_object) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_with_check(self, item, stored_object): if len({stored_object.obj} | item.internal_objects) > 1: if self._phi_node_contains is not None: # check if `item` is a phi node that contains stored_object.obj for so in item.internal_objects: if self._phi_node_contains(so, stored_object.obj): # yes! so we want to skip this object return # check if `stored_object.obj` is a phi node that contains item.internal_objects if all(self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects): # yes! item.set_object(stored_object) return l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects)) # import ipdb; ipdb.set_trace() item.add_object(stored_object)
class FakeBatsim(object): def __init__(self, batsky_scheduler, workload_file): self.batsky_sched = batsky_scheduler self.batsky_sched.bs = self self.jobs = dict() self._current_time = 0 self.nb_jobs_submitted = 0 self.nb_jobs_completed = 0 self.nb_jobs_toExecute = 0 self.running_simulation = False self.workload = None if workload_file: with open(workload_file) as json_file: self.workload = json.load(json_file) self._fake_events = self.events_from_workload(self.workload) else: self._fake_events = SortedDict({ 0.0: [{ 'timestamp': 0.0, 'type': 'SIMULATION_BEGINS', 'data': {} }], 5.0: [{ 'timestamp': 5.0, 'type': 'JOB_SUBMITTED', 'data': { 'job_id': 'w0!1', 'job': { 'id': 'w0!1', 'subtime': 5.0, 'res': 1, 'walltime': 12, 'profile': { 'type': 'delay', 'delay': 10 } } } }], 20.0: [{ 'timestamp': 20.0, 'type': 'SIMULATION_ENDS', 'data': {} }] }) #0.0: [{'type': '', 'data': {}}], self._read_bat_msg() self.batsky_sched.onAfterBatsimInit() def events_from_workload(self, workload): # TODO support mutiple events at same timestamp fake_events = SortedDict({ 0.0: [{ 'timestamp': 0.0, 'type': 'SIMULATION_BEGINS', 'data': {} }] }) max_event_time = 0.0 max_walltime = 0 for job in workload['jobs']: assert 'subtime' in job timestamp = job['subtime'] if timestamp == 0: # just to not have 2 events at 0.0 timestamp = 1e-06 profile = workload['profiles'][job['profile']] assert profile['type'] == 'delay' job_event = {'timestamp': timestamp, 'type': 'JOB_SUBMITTED'} job_event['data'] = { 'job_id': str(job['id']), 'job': { 'id': str(job['id']), 'subtime': timestamp, 'res': job['res'], 'walltime': timestamp, 'profile': { 'type': 'delay', 'delay': profile['delay'] } } } self.nb_jobs_toExecute += 1 assert timestamp not in fake_events fake_events.update({timestamp: [job_event]}) if timestamp > max_event_time: max_event_time = timestamp if job['walltime'] > max_walltime: max_walltime = job['walltime'] # Simulation finished when all jobs are executed #simulation_end_time = max_event_time + max_walltime + 50 #fake_events.update({simulation_end_time: [{'timestamp': simulation_end_time, # 'type': 'SIMULATION_ENDS', 'data': {}}]}) return fake_events def time(self): return self._current_time def consume_time(self, t): self._current_time += float(t) return self._current_time return True def wake_me_up_at(self, at_time): events = [] if at_time in self._fake_events: events = self._fake_events.get(at_time) events.append({ 'timestamp': at_time, 'type': 'REQUESTED_CALL', 'data': {} }) self._fake_events.update({at_time: events}) def execute_jobs(self, jobs): # Generate the events of completion for job in jobs: events = [] completion_time = self.time() + job.profile['delay'] if completion_time in self._fake_events: events = self._fake_events.get(completion_time) assert job.profile['type'] == 'delay' events.append({ 'timestamp': completion_time, 'type': 'JOB_COMPLETED', 'data': { 'job_id': job.id } }) logger.debug( 'Execute_job: insert completion events for job: {} completion_time: {}' .format(job.id, completion_time)) self._fake_events.update({completion_time: events}) def start(self): cont = True while cont: cont = self.do_next_event() def do_next_event(self): return self._read_bat_msg() def _read_bat_msg(self): (batsim_time, events) = self._fake_events.popitem(index=0) logger.debug('Batsim time {} Events: {}'.format(batsim_time, events)) self._current_time = batsim_time for event in events: event_type = event['type'] event_data = event.get('data', {}) if event_type == 'SIMULATION_BEGINS': assert not self.running_simulation, "A simulation is already running (is more than one instance of Batsim active?!)" self.running_simulation = True self.batsky_sched.onSimulationBegins() elif event_type == 'SIMULATION_ENDS': self.batsky_sched.onSimulationEnds() elif event_type == 'JOB_SUBMITTED': job_id = event_data['job_id'] job, profile = self.get_job_and_profile(event) job.job_state = Job.State.SUBMITTED self.nb_jobs_submitted += 1 self.jobs[job_id] = job self.batsky_sched.onJobSubmission(job) elif event_type == 'JOB_COMPLETED': job_id = event_data['job_id'] j = self.jobs[job_id] j.finish_time = event['timestamp'] self.batsky_sched.onJobCompletion(j) if j.job_state == Job.State.COMPLETED_WALLTIME_REACHED: self.nb_jobs_timeout += 1 elif j.job_state == Job.State.COMPLETED_FAILED: self.nb_jobs_failed += 1 elif j.job_state == Job.State.COMPLETED_SUCCESSFULLY: self.nb_jobs_successful += 1 elif j.job_state == Job.State.COMPLETED_KILLED: self.nb_jobs_killed += 1 self.nb_jobs_completed += 1 if self.nb_jobs_completed == self.nb_jobs_toExecute: self.batsky_sched.onSimulationEnds() elif event_type == 'REQUESTED_CALL': self.batsky_sched.onRequestedCall() return True def get_job_and_profile(self, event): json_dict = event["data"]["job"] job = Job.from_json_dict(json_dict) if "profile" in event["data"]: profile = event["data"]["profile"] else: profile = {} return job, profile
def dzsz(open, high, low, close, min_legin=1, min_legout=2, min_base=1, max_legin=2, max_legout=6, max_base=6, demand_dict=None, supply_dict=None, use_proximal=False): ind_arr = open.index.array def body_range(ind): cdl_range = high.iloc[ind] - low.iloc[ind] cdl_body = abs(close.iloc[ind] - open.iloc[ind]) # cdl_body = 0.05 if cdl_body == 0 else cdl_body cdl_body, cdl_range = (0.0, 1) if cdl_range == 0 else (cdl_body, cdl_range) #Debugging cdl_body,cdl_range runtime warning # with warnings.catch_warnings(record=True) as w: # # Cause all warnings to always be triggered. # warnings.simplefilter("always") # # Trigger a warning. # cdl_body_range = cdl_body/cdl_range # if len(w) != 0: # print(cdl_range,cdl_body_range,cdl_body) # print(f"{ind_arr[ind]}\t CDLRANGE\t {cdl_body_range}",end='\t') cdl_body_range = cdl_body / cdl_range return cdl_body_range def green(ind): return (close.iloc[ind] > open.iloc[ind]) and (body_range(ind) > 0.55) def red(ind): return (close.iloc[ind] < open.iloc[ind]) and (body_range(ind) > 0.55) def rally(ind, leg_size): leg = True for i in range(ind, ind - leg_size + 1, -1): if green(i): pass else: leg = False if leg: if green(ind - leg_size + 1): return leg else: if body_range(ind - leg_size + 1) < 0.5 and low[ ind - leg_size + 1] > high[ind - leg_size]: return leg else: return False return leg def drop(ind, leg_size): leg = True for i in range(ind, ind - leg_size + 1, -1): if red(i): pass else: leg = False if leg: if red(ind - leg_size + 1): return leg else: if body_range(ind - leg_size + 1) < 0.5 and high[ ind - leg_size + 1] < low[ind - leg_size]: return leg else: return False return leg def legout(ind, leg_length=min_legout): is_rally = rally(ind, leg_length) is_drop = drop(ind, leg_length) if is_rally: return {'distal': low.iloc[ind], 'is_rally': True} if is_drop: return {'distal': high.iloc[ind], 'is_rally': False} return {'distal': False, 'is_rally': None} def base(base_start, base_length=min_base, is_rally=True): is_base = True proximal = 0 if is_rally else 99999999 distal = 99999999 if is_rally else 0 for i in range(base_start, base_start - base_length, -1): if body_range(i) <= 0.5: proximal = max(proximal, close.iloc[i], open.iloc[i]) if is_rally else min( proximal, close.iloc[i], open.iloc[i]) distal = min(distal, low.iloc[i]) if is_rally else max( distal, high.iloc[i]) else: is_base = False break if is_base: if is_rally: is_base = proximal < close.iloc[base_start + 1] else: is_base = proximal > close.iloc[base_start + 1] return {'is_base': is_base, 'proximal': proximal, 'distal': distal} def legin(ind, legin_length=min_legin): is_rally = rally(ind, legin_length) is_drop = drop(ind, legin_length) if is_rally: return {'distal': high.iloc[ind], 'is_rally': True} if is_drop: return {'distal': low.iloc[ind], 'is_rally': False} return {'distal': False, 'is_rally': None} demand_dict = SortedDict() if demand_dict is None else demand_dict supply_dict = SortedDict() if supply_dict is None else supply_dict ind_list = [] if use_proximal: for ind in range(-len(open) + 15, -1): # check for legout length for i in range(max_legout, min_legout - 1, -1): is_legout = legout(ind, i) if is_legout['distal']: for j in range(max_base, min_base - 1, -1): is_base = base(ind - i, j, is_legout['is_rally']) if is_base['is_base']: for k in range(max_legin, min_legin - 1, -1): is_legin = legin(ind - i - j, k) if is_legin['distal']: ind_list.append(ind) if is_legout['is_rally']: #_brally proximal = is_base['proximal'] distal = min( is_base['distal'], is_legout['distal'] ) if is_legin['is_rally'] else min( is_base['distal'], is_legin['distal'], is_legout['distal']) if proximal not in demand_dict.keys(): demand_dict.update({ proximal: { "timestamp": ind_arr[ind], "legout_length": i, "base_length": j, "legin_length": k, "proximal": proximal, "distal": distal } }) else: #_bdrop proximal = is_base['proximal'] distal = max( is_base['distal'], is_legout['distal'] ) if not is_legin['is_rally'] else max( is_base['distal'], is_legin['distal'], is_legout['distal']) if proximal not in supply_dict.keys(): supply_dict.update({ proximal: { "timestamp": ind_arr[ind], "legout_length": i, "base_length": j, "legin_length": k, "proximal": proximal, "distal": distal } }) # Elimination method 1 if len(demand_dict.keys()) != 0: # print(list(demand_dict.keys()), low[ind]) while low[ind] < demand_dict.keys()[-1]: demand_dict.popitem(index=-1) if len(demand_dict.keys()) == 0: break if len(supply_dict.keys()) != 0: # print(list(demand_dict.keys()), low[ind]) while high[ind] > supply_dict.keys()[0]: supply_dict.popitem(index=0) if len(supply_dict.keys()) == 0: break else: for ind in range(-len(open) + 15, -1): # check for legout length for i in range(max_legout, min_legout - 1, -1): is_legout = legout(ind, i) if is_legout['distal']: for j in range(max_base, min_base - 1, -1): is_base = base(ind - i, j, is_legout['is_rally']) if is_base['is_base']: for k in range(max_legin, min_legin - 1, -1): is_legin = legin(ind - i - j, k) if is_legin['distal']: ind_list.append(ind) if is_legout['is_rally']: #_brally proximal = is_base['proximal'] distal = min( is_base['distal'], is_legout['distal'] ) if is_legin['is_rally'] else min( is_base['distal'], is_legin['distal'], is_legout['distal']) if distal not in demand_dict.keys(): demand_dict.update({ distal: { "timestamp": ind_arr[ind], "legout_length": i, "base_length": j, "legin_length": k, "proximal": proximal, "distal": distal } }) else: #_bdrop proximal = is_base['proximal'] distal = max( is_base['distal'], is_legout['distal'] ) if not is_legin['is_rally'] else max( is_base['distal'], is_legin['distal'], is_legout['distal']) if distal not in supply_dict.keys(): supply_dict.update({ distal: { "timestamp": ind_arr[ind], "legout_length": i, "base_length": j, "legin_length": k, "proximal": proximal, "distal": distal } }) # Elimination method 1 if len(demand_dict.keys()) != 0: while close[ind] < demand_dict.keys()[-1]: demand_dict.popitem(index=-1) if len(demand_dict.keys()) == 0: break if len(supply_dict.keys()) != 0: while close[ind] > supply_dict.keys()[0]: supply_dict.popitem(index=0) if len(supply_dict.keys()) == 0: break df_demand = pd.DataFrame(demand_dict.values(), index=demand_dict.keys()) df_supply = pd.DataFrame(supply_dict.values(), index=supply_dict.keys()) return (df_demand, df_supply, demand_dict, supply_dict)
class SnapshotGraph(object): def __init__(self, **attr): self.graph = {} self.graph.update(attr) self.snapshots = SortedDict() @property def name(self): """String identifier of the snapshot graph. This snapshot graph attribute appears in the attribute dict SnapshotGraph.graph keyed by the string `"name"`. as well as an attribute (technically a property) `SnapshotGraph.name`. This is entirely user controlled. """ return self.graph.get('name', '') @name.setter def name(self, s): self.graph['name'] = s def __str__(self): """Return the snapshot graph name. Returns ------- name : string The name of the snapshot graph. Examples -------- >>> G = dnx.SnapshotGraph(name='foo') >>> str(G) 'foo' """ return self.name def __len__(self): """Return the number of snapshots. Use: 'len(G)'. Returns ------- num_snapshots : int The number of snapshots in the graph. Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> len(G) 2 """ return len(self.snapshots) def __contains__(self, graph): """Return True if graph in the snapshot graph, False otherwise. Use: 'graph in G'. Parameters ---------- graph: networkx graph object networkx graph to be looked for into snapshot graph. Returns ------- None Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> nxG1 in G True """ try: return graph in self.snapshots.values() except TypeError: return False def __iter__(self): """Iterates through snapshots in snapshot graph. Returns ------- Iterable of snapshots Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> for snapshot in G: print(True) True True """ return iter(self.snapshots.values()) def insert(self, graph, start=None, end=None, time=None): """Insert a graph into the snapshot graph, with specified intervals. Parameters ---------- graph: networkx graph object A networkx graph to be inserted into snapshot graph. start: start of the interval, inclusive end: end of the interval, exclusive time: timestamp for impulses, cannot be used together with (start, end) Returns ------- None Examples -------- >>> nxG1 = nx.Graph() >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> G = dnx.SnapshotGraph() >>> G.insert(nxG1, start=0, end=3) """ if time is not None and (start or end): raise ValueError('Time and (start or end) cannot both be specified.') elif time is not None: self.snapshots.update({(time, time): graph}) elif start is None or end is None: raise ValueError('Either time or both start and end must be specified.') elif start > end: raise ValueError('Start of the interval must be lower or equal to end') else: self.snapshots.update({(start, end): graph}) def add_snapshot(self, ebunch=None, graph=None, start=None, end=None, time=None): """Add a snapshot with a bunch of edge values. Parameters ---------- ebunch : container of edges, optional (default= None) Each edge in the ebunch list will be included to all added graphs. graph : networkx graph object, optional (default= None) networkx graph to be inserted into snapshot graph. start: start timestamp, inclusive end: end timestamp, exclusive time: timestamp for impulses, cannot be used together with (start, end) Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 4), (1, 3)], start=0, end=3) """ if not graph: g = Graph() g.add_edges_from(ebunch) else: g = graph if time is not None and (start or end): raise ValueError('Time and (start or end) cannot both be specified.') elif time is not None: self.insert(g, time=time) elif start is None and end is None: raise ValueError('Either time or both start and end must be specified.') else: self.insert(g, start=start, end=end) def subgraph(self, nbunch, sbunch=None, start=None, end=None): """Return a snapshot graph containing only the nodes in bunch, and snapshot indexes in sbunch. Parameters ---------- nbunch : container of nodes Each node in the nbunch list will be included in all subgraphs indexed in sbunch. sbunch : container of edges, optional (default= None) Each snapshot index in this list will be included in the returned list of subgraphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- snap_graph : SnapshotGraph object Contains only the nodes in bunch, and snapshot indexes in sbunch. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=0, end=3) >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=3, end=10) >>> H = G.subgraph([4, 6]) >>> type(H) <class 'snapshotgraph.SnapshotGraph'> >>> list(H.get([0])[0].edges(data=True)) [(4, 6, {})] """ subgraph = SnapshotGraph() subgraph.graph = self.graph if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for key, snapshot in self._get(sbunch=sbunch): subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1]) else: for key, snapshot in self._get(start=start, end=end, include_interval=True): subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1]) return subgraph def degree(self, sbunch=None, nbunch=None, start=None, end=None, weight=None): """Return a list of tuples containing the degrees of each node in each snapshot Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. nbunch : container of nodes, optional (default= None) Each node in the nbunch list will be included in the returned list of node degrees. start: start timestamp, inclusive end: end timestamp, exclusive weight : string, optional (default= None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- degree_list : list List of DegreeView objects containing the degree of each node, indexed by requested snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3. end=10) >>> G.degree(sbunch=[1]) [DegreeView({1: 2, 4: 1, 3: 1})] >>> G.degree(nbunch=[1, 2]) [DegreeView({1: 2, 2: 1}), DegreeView({1: 2})] """ # returns a list of degrees for each graph snapshot in snapshots # use generator to create list of degrees if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: if nbunch: return [graph.degree(nbunch, weight=weight) for graph in self._get(sbunch=sbunch)] else: return [graph.degree(graph, weight=weight) for graph in self._get(sbunch=sbunch)] else: if nbunch: return [graph.degree(nbunch, weight=weight) for graph in self._get(start=start, end=end)] else: return [graph.degree(graph, weight=weight) for graph in self._get(start=start, end=end)] def number_of_nodes(self, sbunch=None, start=None, end=None): """Gets number of nodes in each snapshot requested in 'sbunch'. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of number of nodes in the snapshot. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- num_nodes : list A list of of the number of nodes in each requested snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.number_of_nodes(sbunch=[1]) [3] >>> G.number_of_nodes(sbunch=[0, 1]) [3, 3] """ # returns a list of the number of nodes in each graph in the range if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.number_of_nodes() for graph in self._get(sbunch=sbunch)] else: return [graph.number_of_nodes() for graph in self._get(start=start, end=end)] def order(self, sbunch=None, start=None, end=None): """Returns order of each graph requested in 'sbunch'. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node orders. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- snapshot_orders : list A list of the orders of each snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.order(sbunch=[1]) [3] >>> G.order(sbunch=[0, 1]) [3, 3] """ # returns a list of the order of the graph in the range if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.order() for graph in self._get(sbunch=sbunch)] else: return [g.order() for g in self._get(start=start, end=end)] def has_node(self, n, sbunch=None, start=None, end=None): """Gets boolean list of if a snapshot in 'sbunch' contains node 'n'. Parameters ---------- n : node Node to be checked for in requested snapshots. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of if the snapshot graph includes the node. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- List of boolean values if index in sbunch contains n. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.has_node(1, sbunch=[1]) [True] >>> G.has_node(1) [True, True] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.has_node(n) for graph in self._get(sbunch=sbunch)] else: return [graph.has_node(n) for graph in self._get(start=start, end=end)] def is_multigraph(self, sbunch=None, start=None, end=None): """Returns a list of boolean values for if the graph at the index is a multigraph. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of booleans. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- mutli_list : list List of boolean values if index in sbunch is a multigraph. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.is_multigraph(sbunch=[0, 1]) [False, False] >>> G.is_multigraph() [False, False] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.is_multigraph() for graph in self._get(sbunch=sbunch)] else: return [graph.is_multigraph() for graph in self._get(start=start, end=end)] def is_directed(self, sbunch=None, start=None, end=None): """Returns a list of boolean values for if the graph at the index is a directed graph. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of booleans. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- is_direct_list : list List of boolean values if index in sbunch is a directed graph. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.is_directed(sbunch=[0, 1]) [False, False] >>> G.is_directed() [False, False] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.is_directed() for graph in self._get(sbunch=sbunch)] else: return [graph.is_directed() for graph in self._get(start=start, end=end)] def to_directed(self, sbunch=None, start=None, end=None): """Returns a list of networkx directed graph objects. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of directed graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- direct_list : list List of networkx directed graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.to_directed(sbunch=[0, 1]) [<networkx.classes.digraph.DiGraph object at 0x7f1a6de49dd8>, <networkx.classes.digraph.DiGraph object at 0x7f1a6de49e10>] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.to_directed() for graph in self._get(sbunch=sbunch)] else: return [graph.to_directed() for graph in self._get(start=start, end=end)] def to_undirected(self, sbunch=None, start=None, end=None, ): """Returns a list of networkx graph objects. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of undirected graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- undirect_list : list List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.to_directed(sbunch=[0, 1]) [<networkx.classes.graph.Graph object at 0x7ff532219e10>, <networkx.classes.graph.Graph object at 0x7ff532219e48>] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.to_undirected() for graph in self._get(sbunch=sbunch)] else: return [graph.to_undirected() for graph in self._get(start=start, end=end)] def size(self, sbunch=None, start=None, end=None, weight=None): """Returns the size of each graph index as specified in sbunch as a list. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of sizes. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive weight : string, optional (default=None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. Returns ------- size_list: list List of sizes of each graph indexed in sbunch. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.size(sbunch=[0, 1]) [2, 2] >>> G.size() [2, 2] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.size(weight=weight) for graph in self._get(sbunch=sbunch)] else: return [graph.size(weight=weight) for graph in self._get(start=start, end=end)] def _get(self, sbunch=None, start=None, end=None, include_interval=False, split_overlaps=False): """Returns a list of graphs specified in sbunch. Hidden utility tool for other functions. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive include_interval: if True, return snapshots with its corresponding intervals split_overlaps: if True, when query by time interval, split snapshots if query interval overlaps with any snapshots' intervals. For ex: graph G contains snapshots with time intervals [(0,4),(4,6),(6,10)]. If query interval is [2,10], the snapshot with interval (0,4) will be split into two snapshots (0,2) and (2,4), both of which have the same copy of the original snapshot. This parameter is used for updating graphs by interval. For intance, with the example above, if you want to update interval (2,10), then the snapshot at (0,2) won't be updated. Returns ------- If include_interval: List of tuples of (interval, networkx graph object). else: List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G._get(sbunch=[0]) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>] >>> G._get() [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] >>> G._get(start=2, end=6) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] """ if include_interval: graphs = self.snapshots.items() else: graphs = self.snapshots.values() if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: # if retrieve by indexes for index in sbunch: yield graphs[index] else: # if retrieve by interval if start is None: min_idx = 0 else: min_idx = self.snapshots.bisect_left((start,)) # Decrease 1 index if start is in the middle of an interval # Eg: if Keys = [(2,5)(5,6)], start=3 won't retrieve (2,5) as we want, # therefore, decrease 1 index to include (2,5). If start=5, then we won't need to change if min_idx > 0 and start < self.snapshots.keys()[min_idx][0]: if split_overlaps: # Eg: if Keys = [(2,5)(5,6)] and start=3, split (2,5) into (2,3) and (3,5) key, g = self.snapshots.popitem(min_idx - 1) self.insert(g, key[0], start) self.insert(copy.deepcopy(g), start, key[1]) else: min_idx -= 1 if end is None: max_idx = len(self.snapshots) else: max_idx = self.snapshots.bisect_left((end,)) # Split the snapshot if 'end' is in the middle of an interval # Eg: if Keys = [(2,5)(5,9)] and end=7, split (5,9) into (5,7) and (7,9) if split_overlaps and max_idx < len(self.snapshots) and end < self.snapshots.keys()[max_idx][1]: key, g = self.snapshots.popitem(max_idx) self.insert(g, key[0], end) self.insert(copy.deepcopy(g), end, key[1]) for graph in graphs[min_idx: max_idx]: yield graph def get(self, sbunch=None, start=None, end=None): """Returns a list of graphs specified in sbunch. Interface function for users. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G._get(sbunch=[0]) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>] >>> G._get() [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] >>> G._get(start=2, end=6) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] """ return [snapshot for snapshot in self._get(sbunch, start, end)] def add_nodes_from(self, nbunch, sbunch=None, start=None, end=None, **attrs): """Adds nodes to snapshots in sbunch. Note: This function may lead to increase in number of snapshots if changes occur within a snapshot. Parameters ---------- nbunch : container of nodes Each node in the nbunch list will be added to all graphs indexed in sbunch. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.add_nodes_from([5, 6, 7], [0]) >>> G.add_nodes_from([8, 9, 10, 11], [1]) >>> nx.adjacency_matrix(G.get()[0]).todense() [[0 1 1 0 0 0] [1 0 0 0 0 0] [1 0 0 0 0 0] [0 0 0 0 0 0] [0 0 0 0 0 0] [0 0 0 0 0 0]] >>> nx.adjacency_matrix(G.get()[1]).todense() [[0 1 1 0 0 0 0] [1 0 0 0 0 0 0] [1 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0]] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for graph in self._get(sbunch=sbunch): graph.add_nodes_from(nbunch, **attrs) else: for graph in self._get(start=start, end=end, split_overlaps=True): graph.add_nodes_from(nbunch, **attrs) def add_edges_from(self, ebunch, sbunch=None, start=None, end=None, **attrs): """Adds edges to snapshots in sbunch. Note: This function may lead to increase in number of snapshots if changes occur within a snapshot. Parameters ---------- ebunch : container of edges Each edge in the ebunch list will be added to all graphs indexed in sbunch. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.add_edges_from([(5, 6), (7, 6)], [0]) >>> G.add_edges_from([(8, 9), (10, 11)], [0, 1]) >>> nx.adjacency_matrix(G.get()[0]).todense() [[0 1 1 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 0 1 0 1 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 0 0 0 0 0 1 0 0] [0 0 0 0 0 0 1 0 0 0] [0 0 0 0 0 0 0 0 0 1] [0 0 0 0 0 0 0 0 1 0]] >>> nx.adjacency_matrix(G.get()[1]).todense() [[0 1 1 0 0 0 0] [1 0 0 0 0 0 0] [1 0 0 0 0 0 0] [0 0 0 0 1 0 0] [0 0 0 1 0 0 0] [0 0 0 0 0 0 1] [0 0 0 0 0 1 0]] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for graph in self._get(sbunch=sbunch): graph.add_edges_from(ebunch, **attrs) else: for graph in self._get(start=start, end=end, split_overlaps=True): graph.add_edges_from(ebunch, **attrs) @staticmethod def load_from_txt(path, delimiter=";", comments="#", start='start', end='end'): """Read snapshot graph in from path. Every line in the file must be an adjacency matrix, with rows separated by delimiter. Parameters ---------- path : string or file Filename to read. comments : string, optional Marker for comment lines start: string, optional Marker for start timestamps end: string, optional Marker for end timestamps delimiter : string, optional Separator for rows in matrix. The default is ;. Cannot be whitespace or \n. Returns ------- G: SnapshotGraph The graph corresponding to the list of adjacency matrices. Examples -------- >>> G=dnx.Snapshotgraph.load_from_txt("my_dygraph.txt") """ if delimiter == ' ' or delimiter == '\n': raise ValueError("Delimiter cannot be " + delimiter + ".") sg = SnapshotGraph() with open(path, 'r') as file: for line in file: p = line.find(comments) if p >= 0: line = line[:p] if not len(line): continue p = min(line.find(start), line.find(end)) interval = [None, None] for item in line[p:].split(): key, value = item.split('=') try: value = float(value) except: raise ValueError('Value of "{}" must be float.'.format(key)) if key == start: interval[0] = value else: interval[1] = value if interval[0] is None or interval[1] is None: raise ValueError('A snapshot does not include its interval') line = line[:p].strip() matrix = [] for row in line.split(delimiter): matrix.append(row.split(' ')) g = from_numpy_array(np.array(matrix)) sg.insert(g, start=interval[0], end=interval[1]) return sg def save_to_txt(self, path, delimiter=";", start='start', end='end'): """Write snapshot graph to path. Every line in the file will be an adjacency matrix. Parameters ---------- path : string or file Filename to write. start: string, optional Marker for start timestamps end: string, optional Marker for end timestamps delimiter : string, optional Separator for rows in matrix. The default is ;. Cannot be whitespace or \n. Examples -------- >>> G.save_to_txt("my_dygraph.txt") """ if len(self) == 0: raise ValueError("Given graph is empty.") if delimiter == ' ' or delimiter == '\n': raise ValueError("Delimiter cannot be " + delimiter + ".") with open(path, 'w') as file: for interval, graph in self._get(include_interval=True): m = adjacency_matrix(graph).todense() line = delimiter.join(' '.join(x for x in y) for y in np.asarray(m, dtype=str)) + ' ' + start + '=' +\ str(interval[0]) + ' ' + end + '=' + str(interval[1]) + '\n' file.write(line) def compute_network_statistic(self, nx_statistic_function, sbunch=None, start=None, end=None, **kwargs): """Compute networkx statistics on each snapshot. Parameters ---------- nx_statistic_function : function from networkx.algorithms Statistic function to calculate. sbunch: snapshots indices to compute statistic start: start timestamp, inclusive end: end timestamp, exclusive kwargs : optional inputs for nx_statistic_function Examples -------- >>> G.compute_network_statistic(nx.algorithms.centrality.degree_centrality) """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [nx_statistic_function(graph, **kwargs) for graph in self._get(sbunch=sbunch)] else: return [nx_statistic_function(graph, **kwargs) for graph in self._get(start=start, end=end)]