def oddEvenJumps(self, nums) -> int: if len(nums) == 0: return 0 if len(nums) == 1: return 1 n = len(nums) isOkEven = [False] * len(nums) isOkOdd = [False] * len(nums) isOkEven[n - 1] = True isOkOdd[n - 1] = True tree = SortedDict() tree[nums[n - 1]] = n - 1 for i in range(n - 2, -1, -1): val = nums[i] if val in tree: isOkEven[i] = isOkOdd[tree[val]] isOkOdd[i] = isOkEven[tree[val]] else: smallestP = tree.bisect_left(val) largestP = tree.bisect_left(val) - 1 isOkOdd[i] = True if smallestP != len(tree) and isOkEven[ tree.peekitem(smallestP)[1]] else False isOkEven[i] = True if largestP != -1 and isOkOdd[tree.peekitem( largestP)[1]] else False tree[val] = i res = 0 for e in isOkOdd: if e: res += 1 return res
def oddEvenJumps(self, arr: List[int]) -> int: N = len(arr) odd = [False] * N even = [False] * N odd[-1] = True even[-1] = True sd = SortedDict() sd[arr[N - 1]] = N - 1 for i in range(N - 2, -1, -1): if arr[i] in sd: odd[i] = even[sd[arr[i]]] even[i] = odd[sd[arr[i]]] else: # greatest smaller floor_idx = sd.bisect_left(arr[i]) - 1 if floor_idx != -1: even[i] = odd[sd.peekitem(floor_idx)[1]] # smallest greater ceiling_idx = sd.bisect_left(arr[i]) if ceiling_idx != len(sd): odd[i] = even[sd.peekitem(ceiling_idx)[1]] sd[arr[i]] = i return odd.count(True)
class LogSystem: def __init__(self): self.map = SortedDict(list) self.gra = { 'Year': 5, 'Month':8, 'Day':11, 'Hour': 14, 'Minute':17, 'Second':20, } def put(self, id: int, timestamp: str) -> None: self.map.setdefault(timestamp, []).append(id) def retrieve(self, start: str, end: str, granularity: str) -> List[int]: idx = self.gra[granularity] left = self.map.bisect_left(start[:idx]) result = [] for i in range(left, len(self.map)): # DO NOT USE bisect to find right, it may only granular to year, but date exceeds key = self.map.keys()[i] if key[:idx]>end[:idx]: break result.extend(self.map[key]) return result
class UnboundedInterpolator: """Class that can linearly interpolate through a function that is costly to compute, on the go, with no need to specify bounds or pre-compute It is costly to do the binary search though so I would recommend using BoundedInterpolator instead.""" def __init__(self, func, resolution, debug=False): self._func = func self._resolution = resolution self._data = SortedDict() self._keys = self._data.keys() self._debug = debug # vectorized function so it can take ndarrays self._vf = np.vectorize(self._eval) def min_val(self): return self._keys[0] def max_val(self): return self._keys[-1] def __call__(self, x): if type(x) is np.ndarray or type(x) is list: return self._vf(x) return self._eval(x) def _eval(self, x): if x in self._data: return self._data[x] # if there are <= 1 data points or if x is less than or greater than all # existing keys, always compute the value if len(self._data) <= 1 or x < self.min_val() or x > self.max_val(): if self._debug: print("Computing value of function because not enough data or" " bigger or smaller than all other keys") self._data[x] = self._func(x) return self._data[x] # index of smallest key greater than x right_index = self._data.bisect_left(x) # index of largest key less than x left_index = right_index - 1 ldiff = x - self._keys[left_index] rdiff = self._keys[right_index] - x if max(ldiff, rdiff) > self._resolution: # if the biggest distance to a neighbor is to big, compute the value if self._debug: print("Computing value of function because x value not close" " enough to other keys") self._data[x] = self._func(x) return self._data[x] else: # otherwise, can interpolate if self._debug: print("Interpolating") lval = self._data[self._keys[left_index]] rval = self._data[self._keys[right_index]] return (lval * rdiff + rval * ldiff) / (ldiff + rdiff)
def canAttendMeetings(self, intervals: List[List[int]]) -> bool: points = SortedDict() for start, end in intervals: # print(start, end) # print(points) i_start = points.bisect_right(start) i_end = points.bisect_left(end) # print("i_start", i_start) # print("i_end", i_end) if i_end != i_start: return False if i_start > 0 and points.peekitem(i_start-1)[1] == 1: return False if points.get(start) == -1: del points[start] else: points[start] = 1 if points.get(end) == 1: del points[end] else: points[end] = -1 return True
def trapezoid_decomposition_linear(polygons): """ Keep track of which lines to add to GUI, keep track of the point_vertices. """ # Enumerate all the edges and iteratively build up the set of trapezoids # Add a vertical line for each point in the polygon all_polygons = np.concatenate(polygons, axis=0) vertical_lines = SortedDict( {x[0]: [x[1], 1000000, 0] for x in all_polygons}) # Loop over Polygons to determine end-points for polygon in polygons: start_vertex = polygon[0] for vertex in polygon[1:]: # find the lines in front of the smaller x_start = start_vertex[0] x_curr = vertex[0] start_idx = vertical_lines.bisect_right(min(x_start, x_curr)) end_idx = vertical_lines.bisect_left(max(x_start, x_curr)) x_vals = vertical_lines.keys() for i in range(start_idx, end_idx): x = x_vals[i] if x < min(x_start, x_curr) or x > max(x_start, x_curr): continue y, top, bottom = vertical_lines[x] y_val = linear_interpolation(start_vertex, vertex, x) if y_val > y and y_val < top: vertical_lines[x][1] = y_val elif y_val < y and y_val > bottom: vertical_lines[x][2] = y_val start_vertex = vertex return vertical_lines
def test_bisect_key(): temp = SortedDict(modulo, ((val, val) for val in range(100))) temp._reset(7) assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100)) assert all( temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100)) assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
def maxDepthBST(self, order: List[int]) -> int: sd = SortedDict() for x in order: k = sd.bisect_left(x) val = 1 if k: val = 1 + sd.values()[k-1] if k < len(sd): val = max(val, 1 + sd.values()[k]) sd[x] = val return max(sd.values())
def maxDepthBST(self, order: List[int]) -> int: sd = SortedDict({0: 0, float('inf'): 0, order[0]: 1}) ans = 1 for v in order[1:]: lower = sd.bisect_left(v) - 1 higher = lower + 1 depth = 1 + max(sd.values()[lower], sd.values()[higher]) ans = max(ans, depth) sd[v] = depth return ans
class MyCalendar: def __init__(self): self.booked = SortedDict() def book(self, start: int, end: int) -> bool: i = self.booked.bisect_left(end) if i == 0 or self.booked.items()[i - 1][1] <= start: self.booked[start] = end return True return False
def find_index_before(sorted_dict: SortedDict, key: Any) -> int: ''' Find index of the first key in a sorted dict that is less than or equal to the key passed in. If the key is less than the first key in the dict, return -1 ''' size = len(sorted_dict) if not size: return -1 i = sorted_dict.bisect_left(key) if i == size: return size - 1 if sorted_dict.keys()[i] != key: return i - 1 return i
class QtDictListModel(QAbstractListModel): def __init__(self): QAbstractListModel.__init__(self) self._items = SortedDict() def role(self, item, role): return item def rowCount(self, parent): if parent.isValid(): return 0 return len(self._items) def from_index(self, index): if not index.isValid() or index.row() >= len(self._items): return None return self._items.peekitem(index.row())[1] def data(self, index, role): item = self.from_index(index) if item is None: return None return self.role(item, role) def _add(self, key, item): assert key not in self._items next_index = self._items.bisect_left(key) self.beginInsertRows(QModelIndex(), next_index, next_index) self._items[key] = item self.endInsertRows() # TODO - removal is O(n). def _remove(self, key): assert key in self._items item_index = self._items.index(key) self.beginRemoveRows(QModelIndex(), item_index, item_index) del self._items[key] self.endRemoveRows() def _clear(self): self.beginRemoveRows(QModelIndex(), 0, len(self._items) - 1) self._items.clear() self.endRemoveRows() # O(n). Rework if it's too slow. def _update(self, key, roles=None): item_index = self._items.index(key) index = self.index(item_index, 0) if roles is None: self.dataChanged.emit(index, index) else: self.dataChanged.emit(index, index, roles)
def maxDepthBST(self, order: List[int]) -> int: # python way for binary treemap depths = SortedDict() # add dummy bounds to avoid extra ifs depths[-math.inf] = 0 depths[math.inf] = 0 # for every value find bounds and take the lowest depth + 1 # put the value back to depths for x in order: i = depths.bisect_left(x) depths[x] = 1 + max(depths.values()[i - 1:i + 1]) # return the maximum value so far return max(depths.values())
def oddEvenJumps(self, A: List[int]) -> int: INT_MIN = -(2**31) INT_MAX = 2**31 - 1 a = A n = len(a) mm = SortedDict() oj = [-1 for i in range(n)] for i in range(n - 1, 0, -1): mm[a[i]] = i j = mm.bisect_left(a[i - 1]) if j == len(mm): continue j = mm.iloc[j] oj[i - 1] = mm[j] mm = SortedDict() ej = [-1 for i in range(n)] for i in range(n - 1, 0, -1): mm[a[i]] = i j = mm.bisect_right(a[i - 1]) - 1 if j == -1: continue j = mm.iloc[j] ej[i - 1] = mm[j] dp = {} def dfs(idx, odd): nonlocal dp if idx == n - 1: return True if (idx, odd) in dp: return dp[(idx, odd)] idx1 = oj[idx] if odd else ej[idx] if idx1 == -1: dp[(idx, odd)] = False else: dp[(idx, odd)] = dfs(idx1, not odd) return dp[(idx, odd)] res = 0 for i in range(n): if dfs(i, True): res += 1 return res
def jobScheduling(self, startTime: List[int], endTime: List[int], profit: List[int]) -> int: tasks = SortedList(zip(startTime, endTime, profit), key=lambda t: t[0]) n = len(tasks) dp = SortedDict() for i in reversed(range(n)): s1, e, p = tasks[i] if s1 in dp: dp[s1] = max(dp[s1], p) else: dp[s1] = p j = dp.bisect_left(e) if j < len(dp): s2 = dp.keys()[j] dp[s1] = max(dp[s1], p + dp[s2]) k = dp.index(s1) if k < len(dp) - 1: s2 = dp.keys()[k + 1] dp[s1] = max(dp[s1], dp[s2]) return max(dp.values())
def populate_component_matrix(paths: List[Path], schematic: PangenomeSchematic): # the loops are 1) paths, and then 2) schematic.components # paths are in the same order as schematic.path_names for i, path in enumerate(paths): sorted_bins = SortedDict((bin.bin_id, bin) for bin in path.bins) values = list(sorted_bins.values()) for component in schematic.components: from_id = sorted_bins.bisect_left(component.first_bin) to_id = sorted_bins.bisect_right(component.last_bin) relevant = values[from_id:to_id] padded = [] if relevant: padded = [[]] * (component.last_bin - component.first_bin + 1) for bin in relevant: padded[bin.bin_id - component.first_bin] = \ Bin(bin.coverage, bin.inversion_rate, bin.first_nucleotide, bin.last_nucleotide) component.matrix.append( padded) # ensure there's always 1 entry for each path print("Populated Matrix per component per path.") populate_component_occupancy(schematic)
def containsNearbyAlmostDuplicateOrderMap(self, nums, k, t): """ :type nums: List[int] :type k: int :type t: int :rtype: bool """ m = SortedDict() j = 0 for i in range(len(nums)): # only need to consider index range [i-k, i] if i - j > k: m.pop(nums[j]) j += 1 # abs(nums[i]-nums[j]) <= t --> nums[i] - t < nums[j] # if nums[j] < nums[i]-t, abs(nums[i]-nums[j]) <= t won't hold # search for an index where nums[a] first >= nums[i]-t a = m.bisect_left(nums[i] - t) keys = m.keys() if a < len(m) and abs(keys[a] - nums[i]) <= t: return True m[nums[i]] = i return False
def fallingSquares(self, positions): """ :type positions: List[List[int]] :rtype: List[int] """ res = [] sd = SortedDict() curMax = 0 for pos in positions: start, end, h = pos[0], pos[0] + pos[1], 0 t = [] keys = sd.keys() index = sd.bisect_left((start, start)) if index > 0: index -= 1 if keys[index][1] <= start: index += 1 delete = [] while index < len(keys) and keys[index][0] < end: if (start > keys[index][0]): t.append((keys[index][0], start, sd[keys[index]])) if (end < keys[index][1]): t.append((end, keys[index][1], sd[keys[index]])) h = max(h, sd[keys[index]]) delete.append(keys[index]) index += 1 for d in delete: sd.pop(d) sd[(start, end)] = h + pos[1] for a in t: sd[(a[0], a[1])] = a[2] curMax = max(curMax, h + pos[1]) res.append(curMax) return res
def oddEvenJumps(self, A: List[int]) -> int: n = len(A) m = SortedDict() dp = [[0] * 2 for _ in range(n)] dp[n - 1][0] = dp[n - 1][1] = 1 m[A[n - 1]] = n - 1 res = 1 for i in range(n - 2, -1, -1): # return index of lower bound, eg, first item >= A[i] # bisect_left return item >= value o = m.bisect_left(A[i]) if o != len(m): dp[i][0] = dp[m.items()[o][1]][1] # index of first item <= A[i] # bisect_right return item > val # so e - 1 represents item <= val e = m.bisect_right(A[i]) if e != 0: dp[i][1] = dp[m.items()[e - 1][1]][0] if dp[i][0]: res += 1 m[A[i]] = i return res
class TimeSeries(TictsMagicMixin, TictsOperationMixin, PandasMixin, TictsIOMixin, TictsPlot): """ TimeSeries object. Args: default: The default value of timeseries. permissive (bool): Whether to allow accessing non-existing values or not. If is True, getting non existing item returns None. If is False, getting non existing item raises. """ _default_interpolate = "previous" _meta_keys = ('default', 'name', 'permissive') @property def index(self): return self.data.keys() @property def lower_bound(self): """Return the lower bound time index.""" if self.empty: return MINTS return self.index[0] @property def upper_bound(self): """Return the upper bound time index.""" if self.empty: return MAXTS return self.index[-1] @property def _has_default(self): return self.default != NO_DEFAULT @property def _kwargs_special_keys(self): kwargs = {} for attr_name in self._meta_keys: kwargs[attr_name] = getattr(self, attr_name) return kwargs @property def empty(self): """Return whether the TimeSeries is empty or not.""" return len(self) == 0 def __init__(self, data=None, default=NO_DEFAULT, name=DEFAULT_NAME, permissive=True, tz='UTC'): """""" if isinstance(data, self.__class__): for attr in ('data', *self._meta_keys): setattr(self, attr, getattr(data, attr)) # Only set 'default' and 'name' if is different from default if default != NO_DEFAULT: setattr(self, 'default', default) if name != DEFAULT_NAME: setattr(self, 'name', name) return if hasattr(default, 'lower') and default.lower() == 'no_default': # 'no_default' as string is used at JSON serealization time self.default = NO_DEFAULT else: self.default = default self.name = name self.permissive = permissive # Overwrite the name if data is an instance of pd.DataFrame or pd.Series if isinstance(data, pd.DataFrame): if len(data.columns) != 1: msg = ("Can't convert a DataFrame with several columns into " "one timeseries: {}.") raise ValueError(msg.format(data.columns)) self.name = data.columns[0] elif isinstance(data, pd.Series): self.name = data.name try: tz = pytz.timezone(tz) except pytz.UnknownTimeZoneError: raise ValueError('{} is not a valid timezone'.format(tz)) # SortedDict.__init__ does not use the __setitem__ # Hence we got to parse datetime keys ourselves. # SortedDict use the first arg given and check if is a callable # in case you want to give your custom sorting function. self.data = SortedDict(None, _process_args(data, tz)) def __setitem__(self, key, value): if isinstance(key, slice): return self.set_interval(key.start, key.stop, value) if key in self._meta_keys: super().__setitem__(key, value) else: key = timestamp_converter(key, self.tz) self.data[key] = value def __getitem__(self, key): """Get the value of the time series, even in-between measured values by interpolation. Args: key (datetime): datetime index interpolate (str): interpolate operator among ["previous", "linear"] """ interpolate = self._default_interpolate if isinstance(key, tuple): if len(key) == 2: key, interpolate = key elif len(key) > 2: raise KeyError if isinstance(key, slice): return self.slice(key.start, key.stop) key = timestamp_converter(key, self.tz) basemsg = "Getting {} but default attribute is not set".format(key) if self.empty: if self._has_default: return self.default else: if self.permissive: return else: raise KeyError( "{} and timeseries is empty".format(basemsg)) if key < self.lower_bound: if self._has_default: return self.default else: if self.permissive: return else: msg = "{}, can't deduce value before the oldest measurement" raise KeyError(msg.format(basemsg)) # If the key is already defined: if key in self.index: return self.data[key] if interpolate.lower() == "previous": fn = self._get_previous elif interpolate.lower() == "linear": fn = self._get_linear_interpolate else: raise ValueError("'{}' interpolation unknown.".format(interpolate)) return fn(key) def _get_previous(self, time): # In this case, bisect_left == bisect_right == bisect # And idx > 0 as we already handled other cases previous_idx = self.data.bisect(time) - 1 time_idx = self.index[previous_idx] return self.data[time_idx] def _get_linear_interpolate(self, time): # TODO: put it into a 'get_previous_index' method idx = self.data.bisect_left(time) previous_time_idx = self.index[idx - 1] # TODO: check on left bound case # out of right bound case: if idx == len(self): return self.data[previous_time_idx] next_time_idx = self.index[idx] previous_value = self.data[previous_time_idx] next_value = self.data[next_time_idx] coeff = (time - previous_time_idx) / ( next_time_idx - previous_time_idx) value = previous_value + coeff * (next_value - previous_value) return value def slice(self, start, end): # noqa A003 """Slice your timeseries for give interval. Args: start (datetime or str): lower bound end (datetime or str): upper bound Returns: TimeSeries sliced """ start = timestamp_converter(start, self.tz) end = timestamp_converter(end, self.tz) newts = TimeSeries(**self._kwargs_special_keys) for key in self.data.irange(start, end, inclusive=(True, False)): newts[key] = self[key] should_add_left_closure = (start not in newts.index and start >= self.lower_bound) if should_add_left_closure: newts[start] = self[start] # is applying get_previous on self return newts def set_interval(self, start, end, value): """Set a value for an interval of time. Args: start (datetime or str): lower bound end (datetime or str): upper bound value: the value to be set Returns: self Raises: NotImplementedError: when no default is set. """ if not self._has_default: msg = "At the moment, you have to set a default for set_interval" raise NotImplementedError(msg) start = timestamp_converter(start, self.tz) end = timestamp_converter(end, self.tz) keys = self.data.irange(start, end, inclusive=(True, False)) last_value = self[end] for key in list(keys): del self.data[key] self[start] = value self[end] = last_value def compact(self): """Convert this instance to a compact version: consecutive measurement of the same value are discarded. Returns: TimeSeries """ ts = TimeSeries(**self._kwargs_special_keys) for time, value in self.items(): should_set_it = ts.empty or (ts[time] != value) if should_set_it: ts[time] = value return ts def iterintervals(self, end=None): """Iterator that contain start, end of intervals. Args: end (datetime): right bound of last interval. """ lst_keys = SortedList(self.index) if not end: end = self.upper_bound else: end = timestamp_converter(end, self.tz) if end not in lst_keys: lst_keys.add(end) for i, key in enumerate(lst_keys[:-1]): next_key = lst_keys[i + 1] if next_key > end: # stop there raise StopIteration yield key, next_key def equals(self, other, check_default=True, check_name=True): if not isinstance(other, self.__class__): raise TypeError("Can't compare {} with {}".format( self.__class__.__name__, other.__class__.__name__)) is_equal = self.data == other.data if check_default: is_equal = is_equal and self.default == other.default if check_name: is_equal = is_equal and self.name == other.name return is_equal @property def tz(self): if self.empty: return pytz.UTC return str(self.index[0].tz) def tz_convert(self, tz): try: tz = pytz.timezone(tz) except pytz.UnknownTimeZoneError: raise ValueError('{} is not a valid timezone'.format(tz)) ts = deepcopy(self) for key in ts.index: ts[key.tz_convert(tz)] = ts.data.pop(key) return ts
class FederationRemoteSendQueue(object): """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = {} # Pending presence map user_id -> UserPresenceState self.presence_changed = SortedDict() # Stream position -> list[user_id] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = SortedDict() self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = SortedDict() # stream position -> (destination, key) self.edus = SortedDict() # stream position -> Edu self.device_messages = SortedDict() # stream position -> destination self.pos = 1 self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "device_messages", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = set( user_id for uids in self.presence_changed.values() for user_id in uids ) keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids.update( user_id for user_id, _ in self.presence_destinations.values() ) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys] for edu_key in to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] def notify_new_events(self, current_id): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): """As per FederationSender Args: receipt (synapse.types.ReadReceipt): """ # nothing to do here: the replication listener will handle it. pass def send_presence(self, states): """As per FederationSender Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list(filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update({state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_presence_to_destinations(self, states, destinations): """As per FederationSender Args: states (list[UserPresenceState]) destinations (list[str]) """ for state in states: pos = self._next_pos() self.presence_map.update({state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) def get_replication_rows(self, from_token, to_token, limit, federation_ack=None): """Get rows to be sent over federation between the two tokens Args: from_token (int) to_token(int) limit (int) federation_ack (int): Optional. The position where the worker is explicitly acknowledged it has handled. Allows us to drop data from before that point """ # TODO: Handle limit. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # There should be only one reader, so lets delete everything its # acknowledged its seen. if federation_ack: self._clear_queue_before_pos(federation_ack) # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow( state=self.presence_map[user_id], ))) # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append((pos, PresenceDestinationsRow( state=self.presence_map[user_id], destinations=list(dests), ))) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], ))) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow( destination=destination, ))) # Sort rows based on pos rows.sort() return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
def test_bisect(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping) assert temp.bisect_left('a') == 0 assert temp.bisect_right('f') == 6 assert temp.bisect('f') == 6
class FileTable(object): """docstring for FileTable""" def __init__(self, myip, server): super(FileTable, self).__init__() self.ring = SortedDict() self.hasher = hashlib.sha224 self.myhash = self.hash(myip) self.add_node(myip) self.server = server def hash(self, key): return self.hasher(key).hexdigest()[:-10] def hash_at(self, idx): idx %= len(self.ring) hash = self.ring.iloc[idx] return hash def add_node(self, ip): hash = self.hash(ip) self.ring[hash] = {'ip': ip, 'files': []} SDFS_LOGGER.info('After adding %s - %s' % (ip, repr(self.ring))) def remove_node(self, failed_list): start_time = time.time() # this is for debug flag = False # deep copy failed list because it will be reset soon ip_list = list(failed_list) # change the order of failed node # make sure the smaller id node be handled first if len(ip_list) == 2: if self.hash(ip_list[0]) == 0 and self.hash(ip_list[1]) == len(self.ring) - 1: ip_list[0], ip_list[1] = ip_list[1], ip_list[0] elif self.ring.index(self.hash(ip_list[0])) == self.ring.index(self.hash(ip_list[1])) + 1: ip_list[0], ip_list[1] = ip_list[1], ip_list[0] for ip in ip_list: hash = self.hash(ip) idx = self.ring.index(hash) # if the node is not the direct successor of the failed node, do nothing if len(ip_list) == 2 and ip == ip_list[1] and self.hash_at((idx + 2) % len(self.ring)) == self.myhash: continue if self.hash_at((idx + 1) % len(self.ring)) == self.myhash or (self.hash_at((idx + 2) % len(self.ring)) == self.myhash and len(ip_list) == 2): # this is for debug flag = True heritage = set(self.ring[hash]['files']) my_files = set(self.ring[self.myhash]['files']) next_files = set(self.ring[self.hash_at(idx + 2)]['files']) # determine the to_me = heritage - my_files to_next = (heritage & my_files) - next_files to_next_next = heritage & my_files & next_files replica_list = [list(to_me), list(to_next), list(to_next_next)] self.ring[self.myhash]['files'].extend(to_me) # handle replica dest_ip_to_me = self.ring[self.hash_at(self.ring.index(hash) - 1)]['ip'] dest_ip_to_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 1)]['ip'] dest_ip_to_next_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 2)]['ip'] dest_ip_list = [dest_ip_to_me, dest_ip_to_next, dest_ip_to_next_next] del self.ring[hash] self.server.handle_replica(replica_list, dest_ip_list, ip_list) else: del self.ring[hash] elapsed_time = time.time() - start_time if flag: print "It takes", elapsed_time, "to handle replica" def lookup(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 ip_list = [self.ring[self.hash_at(idx + i)]['ip'] for i in xrange(3)] return ip_list def insert(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 for i in xrange(3): node_hash = self.hash_at(idx + i) self.ring[node_hash]['files'].append(sdfs_filename) SDFS_LOGGER.info('Inserted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip'])) def delete(self, sdfs_filename): hash = self.hash(sdfs_filename) idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0 for i in xrange(3): node_hash = self.hash_at(idx + i) self.ring[node_hash]['files'].remove(sdfs_filename) SDFS_LOGGER.info('Deleted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip'])) def update_replica(self, replica_list, dest_ip_list): for i in xrange(3): self.ring[self.hash(dest_ip_list[i])]['files'] = list(set(self.ring[self.hash(dest_ip_list[i])]['files'] + replica_list[i])) def list_my_store(self): print '-' * 5 + 'my files are:' for f in self.ring[self.myhash]['files']: print f, print print '-' * 5 + 'that is all' def list_file_location(self): all_files = set() for value in self.ring.values(): all_files.update(set(value['files'])) for f in all_files: print f + ' is stored at ', for value in self.ring.values(): if f in value['files']: print value['ip'], print
def test_bisect(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping) assert temp.bisect_left('a') == 0 assert temp.bisect_right('f') == 6 assert temp.bisect('f') == 6
def test_bisect_key(): temp = SortedDict(modulo, 7, ((val, val) for val in range(100))) assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100)) assert all(temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100)) assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
class AddressSpaceCollapseTransform(transforms.Transform): """ Transform that shrinks selected segments of the address-space Given a list of address ranges in which we are not interested, the trasform applies a linear scale to the address-space regions marked as Range.T_KEEP, a different scale is applied to Range.T_OMIT regions so that these occupy 5% of the total size of the Range.T_KEEP regions. """ def __init__(self, *args, **kwargs): super(AddressSpaceCollapseTransform, self).__init__(*args, **kwargs) self.target_ranges = RangeSet() """List of ranges to keep and omit""" self.omit_scale = 1 """Scale factor of the omitted address ranges""" self.target_ranges.append(Range(0, np.inf, Range.T_KEEP)) self._precomputed_offsets = None """SortedDict ... """ self._inverse = False """Is this transform performing the direct or inverse operation""" self.has_inverse = False # pyplot seems not to care self.is_separable = True self.input_dims = 2 self.output_dims = 2 self._precompute_offsets() def update_range(self, range_list): """ Update parameters depending on the omit ranges. The range list must be complete, in the sense that it should mark every part of the address-range without holes as either omit or keep. """ self.target_ranges = range_list keep = [r for r in self.target_ranges if r.rtype == Range.T_KEEP] omit = [r for r in self.target_ranges if r.rtype == Range.T_OMIT] # total size of the KEEP ranges keep_size = reduce( lambda acc, r: acc + r.size if r.size < np.inf else acc, keep, 0) omit_size = reduce( lambda acc, r: acc + r.size if r.size < np.inf else acc, omit, 0) if omit_size != 0: # we want the omitted ranges to take up 5% of the keep ranges # in size # scale = <percent_of_keep_size_to_take> * sum(keep) / sum(omit) self.omit_scale = 0.05 * keep_size / omit_size self._precompute_offsets() def _precompute_offsets(self): # reset previous offsets self._precomputed_offsets = SortedDict() x_collapsed = 0 for r in self.target_ranges: r_scale = 1 if r.rtype == Range.T_KEEP else self.omit_scale self._precomputed_offsets[r.start] = (x_collapsed, r_scale) x_collapsed += r.size * r_scale def get_x(self, x_dataspace): """ Scale the x from data-space coordinates to the collapsed address-space coordinates. The conversion uses a fast lookup of precomputed offsets based on the omit/keep range intervals. """ if x_dataspace < 0: return x_dataspace base_idx = self._precomputed_offsets.bisect_left(x_dataspace) if (len(self._precomputed_offsets) == base_idx or self._precomputed_offsets.iloc[base_idx] > x_dataspace): key = self._precomputed_offsets.iloc[base_idx - 1] else: key = x_dataspace x_collapsed, x_scale = self._precomputed_offsets[key] return x_collapsed + (x_dataspace - key) * x_scale def get_x_inv(self, x): """ Inverse of get_x Find the address range corresponding to the plot range given by scanning all the target ranges """ x_inverse = 0 x_current = 0 for r in self.target_ranges: if r.rtype == Range.T_KEEP: if x > x_current + r.size: x_current += r.size x_inverse += r.size else: x_inverse += x - x_current break elif r.rtype == Range.T_OMIT: scaled_size = r.size * self.omit_scale if x > x_current + scaled_size: x_current += scaled_size x_inverse += r.size else: x_inverse += (x - x_current) / self.omit_scale break else: logger.error("The range %s must have a valid type", r) raise ValueError("Unexpected range in transform %s", r) return x_inverse def transform_x(self, x): """ Handle the X axis transformation """ if self._inverse: return self.get_x_inv(x) else: return self.get_x(x) def transform_non_affine(self, datain): """ The transform modifies only the X-axis, Y-axis is identity datain is a numpy array of size Nx2 return a numpy array of size Nx2 """ _prev = np.array(datain) dataout = np.array(datain) for point in dataout: point[0] = self.transform_x(point[0]) return dataout def inverted(self): trans = AddressSpaceCollapseTransform() trans.target_ranges = self.target_ranges trans.omit_scale = self.omit_scale trans._inverse = not self._inverse return trans
class AddressSpaceCollapseTransform(transforms.Transform): """ Transform that shrinks selected segments of the address-space Given a list of address ranges in which we are not interested, the trasform applies a linear scale to the address-space regions marked as Range.T_KEEP, a different scale is applied to Range.T_OMIT regions so that these occupy 5% of the total size of the Range.T_KEEP regions. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._target_ranges = [] """ Unsorted list of target ranges, possibly with duplicates or overlapping ranges. """ self._intervals = None """ Numpy array that holds intervals [start,end,type]. The type is 0 for omit ranges and 1 for keep ranges. """ self._precomputed_offsets = None """ SortedDict that caches the transformed X corresponding to the start of each interval """ self.omit_scale = 1 """Scale factor of the omitted address ranges""" self._inverse = False """Is this transform performing the direct or inverse operation""" self.has_inverse = False # pyplot seems not to care self.is_separable = True self.input_dims = 2 self.output_dims = 2 def set_ranges(self, ranges): """ The ranges here represent the parts of the address space we want to show. :param ranges: list of intervals in the form [(start, end), ...] :type ranges: list of 2-tuples """ logger.debug("Set collapse ranges (%d)", len(ranges)) self._target_ranges = ranges self._precomputed_offsets = None self._intervals = None def get_ranges(self): """See :meth:`set_ranges`.""" return self._target_ranges def _merge(self, intervals): """ Given a set of intervals [(start, end), ...] merge the overlapping intervals. This is O(n*log(n)) but if all goes well is only done once for every plot. """ merged = SortedListWithKey(intervals, key=lambda k: (k[0], k[1])) out = [] if len(merged) == 0: return out curr = merged[0] idx = 1 while idx < len(merged): to_merge = merged[idx] if to_merge[0] > curr[1]: # we are done with to_merge out.append(curr) curr = to_merge else: curr = (curr[0], to_merge[1]) end_idx = merged.bisect((curr[1], np.inf)) idx = end_idx if end_idx == len(merged): end_idx -= 1 if merged[end_idx][0] <= curr[1]: end = max(curr[1], merged[end_idx][1]) else: end = max(curr[1], merged[end_idx - 1][1]) curr = (curr[0], end) out.append(curr) logger.debug("Merge collapse ranges (remaining %d)", len(out)) return out def _gen_omit_scale(self, intervals): """ Generate the scale used to collapse omit ranges. The scale is computed so that the omitted ranges take up 5% of the total size of the keep ranges. """ keep = intervals[intervals[:, 2] == 1] omit = intervals[intervals[:, 2] == 0] keep_size = np.sum(keep[:, 1] - keep[:, 0]) # the last omit interval always goes to Inf omit_size = np.sum(omit[:, 1] - omit[:, 0]) if omit_size != 0: # we want the omitted ranges to take up 5% of the keep ranges # in size # scale = <percent_of_keep_size_to_take> * sum(keep) / sum(omit) self.omit_scale = 0.05 * keep_size / omit_size logger.debug("Omit scale 5%%: total-keep:%d total-omit:%d scale:%s", keep_size, omit_size, self.omit_scale) def _range_len(self, start, end, step): return (end - start - 1) // step + 1 def _gen_intervals(self): """ Generate the non-overlapping intervals to display in the axis. The intervals generated cover the whole axis without holes. """ logger.debug("Generate collapse intervals") # merge ranges O(n*log(n)) and sort them merged_intervals = self._merge(self._target_ranges) if len(merged_intervals) == 0: self._intervals = np.zeros((0, 3)) return # try not using fancy vectorization intervals = [] prev_end = 0 for r in merged_intervals: if prev_end < r[0]: # omit intervals.append((prev_end, r[0], 0)) # keep intervals.append((r[0], r[1], 1)) prev_end = r[1] # last always omitted to infinity intervals.append((prev_end, np.inf, 0)) self._intervals = np.array(intervals) self._gen_omit_scale(self._intervals[:-1]) def _precompute_offsets(self): """ Precompute the transformed X base values for the start of each interval on the axis. The base addresses are used to look up the closest interval start when transforming. """ self._gen_intervals() logger.debug("Precompute collapse range offsets") # reset previous offsets self._precomputed_offsets = SortedDict() x_collapsed = 0 for r in self._intervals: r_scale = 1 if r[2] else self.omit_scale self._precomputed_offsets[r[0]] = (x_collapsed, r_scale) x_collapsed += (r[1] - r[0]) * r_scale def get_x(self, x_dataspace): """ Get the transformed X coordinate. This is just a lookup in the precomputed offsets and some calculations, should be O(log(n)) in the number of intervals (which is expected to be at most in the order of 10**3~10**4) """ if self._precomputed_offsets == None: self._precompute_offsets() if x_dataspace < 0 or len(self._precomputed_offsets) == 0: return x_dataspace base_idx = self._precomputed_offsets.bisect_left(x_dataspace) if (len(self._precomputed_offsets) == base_idx or self._precomputed_offsets.iloc[base_idx] > x_dataspace): key = self._precomputed_offsets.iloc[base_idx - 1] else: key = x_dataspace x_collapsed, x_scale = self._precomputed_offsets[key] return x_collapsed + (x_dataspace - key) * x_scale def get_x_inv(self, x): """ Inverse of get_x Find the address range corresponding to the plot range given by scanning all the target ranges XXX: this may be made faster by using a reverse form of the precomputed offsets but there is no need for such an effort because the inverse transform is not invoked as much. """ if self._precomputed_offsets == None: self._precompute_offsets() x_inverse = 0 x_current = 0 for r in self._intervals: r_size = r[1] - r[0] if r[2] == 1: # range is type KEEP if x > x_current + r_size: x_current += r_size x_inverse += r_size else: x_inverse += x - x_current break elif r[2] == 0: scaled_size = r_size * self.omit_scale if x > x_current + scaled_size: x_current += scaled_size x_inverse += r_size else: x_inverse += (x - x_current) / self.omit_scale break else: logger.error("The range %s must have a valid type", r) raise ValueError("Unexpected range in transform %s", r) return x_inverse def transform_x(self, x): """ Handle the X axis transformation """ if self._inverse: return self.get_x_inv(x) else: return self.get_x(x) def transform_non_affine(self, datain): """ The transform modifies only the X-axis, Y-axis is identity datain is a numpy array of size Nx2 return a numpy array of size Nx2 """ _prev = np.array(datain) dataout = np.array(datain) for point in dataout: point[0] = self.transform_x(point[0]) return dataout def inverted(self): trans = AddressSpaceCollapseTransform() trans._target_ranges = self._target_ranges trans._intervals = self._intervals trans._precomputed_offsets = self._precomputed_offsets trans.omit_scale = self.omit_scale trans._inverse = not self._inverse return trans
class SnapshotGraph(object): def __init__(self, **attr): self.graph = {} self.graph.update(attr) self.snapshots = SortedDict() @property def name(self): """String identifier of the snapshot graph. This snapshot graph attribute appears in the attribute dict SnapshotGraph.graph keyed by the string `"name"`. as well as an attribute (technically a property) `SnapshotGraph.name`. This is entirely user controlled. """ return self.graph.get('name', '') @name.setter def name(self, s): self.graph['name'] = s def __str__(self): """Return the snapshot graph name. Returns ------- name : string The name of the snapshot graph. Examples -------- >>> G = dnx.SnapshotGraph(name='foo') >>> str(G) 'foo' """ return self.name def __len__(self): """Return the number of snapshots. Use: 'len(G)'. Returns ------- num_snapshots : int The number of snapshots in the graph. Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> len(G) 2 """ return len(self.snapshots) def __contains__(self, graph): """Return True if graph in the snapshot graph, False otherwise. Use: 'graph in G'. Parameters ---------- graph: networkx graph object networkx graph to be looked for into snapshot graph. Returns ------- None Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> nxG1 in G True """ try: return graph in self.snapshots.values() except TypeError: return False def __iter__(self): """Iterates through snapshots in snapshot graph. Returns ------- Iterable of snapshots Examples -------- >>> nxG1 = nx.Graph() >>> nxG2 = nx.Graph() >>> >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> nxG2.add_edges_from([(1, 4), (1, 3)]) >>> >>> G = dnx.SnapshotGraph() >>> G.add_snapshot(graph=nxG1) >>> G.add_snapshot(graph=nxG2) >>> for snapshot in G: print(True) True True """ return iter(self.snapshots.values()) def insert(self, graph, start=None, end=None, time=None): """Insert a graph into the snapshot graph, with specified intervals. Parameters ---------- graph: networkx graph object A networkx graph to be inserted into snapshot graph. start: start of the interval, inclusive end: end of the interval, exclusive time: timestamp for impulses, cannot be used together with (start, end) Returns ------- None Examples -------- >>> nxG1 = nx.Graph() >>> nxG1.add_edges_from([(1, 2), (1, 3)]) >>> G = dnx.SnapshotGraph() >>> G.insert(nxG1, start=0, end=3) """ if time is not None and (start or end): raise ValueError('Time and (start or end) cannot both be specified.') elif time is not None: self.snapshots.update({(time, time): graph}) elif start is None or end is None: raise ValueError('Either time or both start and end must be specified.') elif start > end: raise ValueError('Start of the interval must be lower or equal to end') else: self.snapshots.update({(start, end): graph}) def add_snapshot(self, ebunch=None, graph=None, start=None, end=None, time=None): """Add a snapshot with a bunch of edge values. Parameters ---------- ebunch : container of edges, optional (default= None) Each edge in the ebunch list will be included to all added graphs. graph : networkx graph object, optional (default= None) networkx graph to be inserted into snapshot graph. start: start timestamp, inclusive end: end timestamp, exclusive time: timestamp for impulses, cannot be used together with (start, end) Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 4), (1, 3)], start=0, end=3) """ if not graph: g = Graph() g.add_edges_from(ebunch) else: g = graph if time is not None and (start or end): raise ValueError('Time and (start or end) cannot both be specified.') elif time is not None: self.insert(g, time=time) elif start is None and end is None: raise ValueError('Either time or both start and end must be specified.') else: self.insert(g, start=start, end=end) def subgraph(self, nbunch, sbunch=None, start=None, end=None): """Return a snapshot graph containing only the nodes in bunch, and snapshot indexes in sbunch. Parameters ---------- nbunch : container of nodes Each node in the nbunch list will be included in all subgraphs indexed in sbunch. sbunch : container of edges, optional (default= None) Each snapshot index in this list will be included in the returned list of subgraphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- snap_graph : SnapshotGraph object Contains only the nodes in bunch, and snapshot indexes in sbunch. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=0, end=3) >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=3, end=10) >>> H = G.subgraph([4, 6]) >>> type(H) <class 'snapshotgraph.SnapshotGraph'> >>> list(H.get([0])[0].edges(data=True)) [(4, 6, {})] """ subgraph = SnapshotGraph() subgraph.graph = self.graph if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for key, snapshot in self._get(sbunch=sbunch): subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1]) else: for key, snapshot in self._get(start=start, end=end, include_interval=True): subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1]) return subgraph def degree(self, sbunch=None, nbunch=None, start=None, end=None, weight=None): """Return a list of tuples containing the degrees of each node in each snapshot Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. nbunch : container of nodes, optional (default= None) Each node in the nbunch list will be included in the returned list of node degrees. start: start timestamp, inclusive end: end timestamp, exclusive weight : string, optional (default= None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- degree_list : list List of DegreeView objects containing the degree of each node, indexed by requested snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3. end=10) >>> G.degree(sbunch=[1]) [DegreeView({1: 2, 4: 1, 3: 1})] >>> G.degree(nbunch=[1, 2]) [DegreeView({1: 2, 2: 1}), DegreeView({1: 2})] """ # returns a list of degrees for each graph snapshot in snapshots # use generator to create list of degrees if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: if nbunch: return [graph.degree(nbunch, weight=weight) for graph in self._get(sbunch=sbunch)] else: return [graph.degree(graph, weight=weight) for graph in self._get(sbunch=sbunch)] else: if nbunch: return [graph.degree(nbunch, weight=weight) for graph in self._get(start=start, end=end)] else: return [graph.degree(graph, weight=weight) for graph in self._get(start=start, end=end)] def number_of_nodes(self, sbunch=None, start=None, end=None): """Gets number of nodes in each snapshot requested in 'sbunch'. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of number of nodes in the snapshot. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- num_nodes : list A list of of the number of nodes in each requested snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.number_of_nodes(sbunch=[1]) [3] >>> G.number_of_nodes(sbunch=[0, 1]) [3, 3] """ # returns a list of the number of nodes in each graph in the range if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.number_of_nodes() for graph in self._get(sbunch=sbunch)] else: return [graph.number_of_nodes() for graph in self._get(start=start, end=end)] def order(self, sbunch=None, start=None, end=None): """Returns order of each graph requested in 'sbunch'. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node orders. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- snapshot_orders : list A list of the orders of each snapshot. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.order(sbunch=[1]) [3] >>> G.order(sbunch=[0, 1]) [3, 3] """ # returns a list of the order of the graph in the range if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.order() for graph in self._get(sbunch=sbunch)] else: return [g.order() for g in self._get(start=start, end=end)] def has_node(self, n, sbunch=None, start=None, end=None): """Gets boolean list of if a snapshot in 'sbunch' contains node 'n'. Parameters ---------- n : node Node to be checked for in requested snapshots. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of if the snapshot graph includes the node. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- List of boolean values if index in sbunch contains n. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.has_node(1, sbunch=[1]) [True] >>> G.has_node(1) [True, True] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.has_node(n) for graph in self._get(sbunch=sbunch)] else: return [graph.has_node(n) for graph in self._get(start=start, end=end)] def is_multigraph(self, sbunch=None, start=None, end=None): """Returns a list of boolean values for if the graph at the index is a multigraph. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of booleans. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- mutli_list : list List of boolean values if index in sbunch is a multigraph. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.is_multigraph(sbunch=[0, 1]) [False, False] >>> G.is_multigraph() [False, False] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.is_multigraph() for graph in self._get(sbunch=sbunch)] else: return [graph.is_multigraph() for graph in self._get(start=start, end=end)] def is_directed(self, sbunch=None, start=None, end=None): """Returns a list of boolean values for if the graph at the index is a directed graph. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of booleans. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- is_direct_list : list List of boolean values if index in sbunch is a directed graph. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.is_directed(sbunch=[0, 1]) [False, False] >>> G.is_directed() [False, False] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.is_directed() for graph in self._get(sbunch=sbunch)] else: return [graph.is_directed() for graph in self._get(start=start, end=end)] def to_directed(self, sbunch=None, start=None, end=None): """Returns a list of networkx directed graph objects. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of directed graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- direct_list : list List of networkx directed graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.to_directed(sbunch=[0, 1]) [<networkx.classes.digraph.DiGraph object at 0x7f1a6de49dd8>, <networkx.classes.digraph.DiGraph object at 0x7f1a6de49e10>] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.to_directed() for graph in self._get(sbunch=sbunch)] else: return [graph.to_directed() for graph in self._get(start=start, end=end)] def to_undirected(self, sbunch=None, start=None, end=None, ): """Returns a list of networkx graph objects. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of undirected graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- undirect_list : list List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.to_directed(sbunch=[0, 1]) [<networkx.classes.graph.Graph object at 0x7ff532219e10>, <networkx.classes.graph.Graph object at 0x7ff532219e48>] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.to_undirected() for graph in self._get(sbunch=sbunch)] else: return [graph.to_undirected() for graph in self._get(start=start, end=end)] def size(self, sbunch=None, start=None, end=None, weight=None): """Returns the size of each graph index as specified in sbunch as a list. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of sizes. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive weight : string, optional (default=None) The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. Returns ------- size_list: list List of sizes of each graph indexed in sbunch. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.size(sbunch=[0, 1]) [2, 2] >>> G.size() [2, 2] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [graph.size(weight=weight) for graph in self._get(sbunch=sbunch)] else: return [graph.size(weight=weight) for graph in self._get(start=start, end=end)] def _get(self, sbunch=None, start=None, end=None, include_interval=False, split_overlaps=False): """Returns a list of graphs specified in sbunch. Hidden utility tool for other functions. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive include_interval: if True, return snapshots with its corresponding intervals split_overlaps: if True, when query by time interval, split snapshots if query interval overlaps with any snapshots' intervals. For ex: graph G contains snapshots with time intervals [(0,4),(4,6),(6,10)]. If query interval is [2,10], the snapshot with interval (0,4) will be split into two snapshots (0,2) and (2,4), both of which have the same copy of the original snapshot. This parameter is used for updating graphs by interval. For intance, with the example above, if you want to update interval (2,10), then the snapshot at (0,2) won't be updated. Returns ------- If include_interval: List of tuples of (interval, networkx graph object). else: List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G._get(sbunch=[0]) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>] >>> G._get() [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] >>> G._get(start=2, end=6) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] """ if include_interval: graphs = self.snapshots.items() else: graphs = self.snapshots.values() if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: # if retrieve by indexes for index in sbunch: yield graphs[index] else: # if retrieve by interval if start is None: min_idx = 0 else: min_idx = self.snapshots.bisect_left((start,)) # Decrease 1 index if start is in the middle of an interval # Eg: if Keys = [(2,5)(5,6)], start=3 won't retrieve (2,5) as we want, # therefore, decrease 1 index to include (2,5). If start=5, then we won't need to change if min_idx > 0 and start < self.snapshots.keys()[min_idx][0]: if split_overlaps: # Eg: if Keys = [(2,5)(5,6)] and start=3, split (2,5) into (2,3) and (3,5) key, g = self.snapshots.popitem(min_idx - 1) self.insert(g, key[0], start) self.insert(copy.deepcopy(g), start, key[1]) else: min_idx -= 1 if end is None: max_idx = len(self.snapshots) else: max_idx = self.snapshots.bisect_left((end,)) # Split the snapshot if 'end' is in the middle of an interval # Eg: if Keys = [(2,5)(5,9)] and end=7, split (5,9) into (5,7) and (7,9) if split_overlaps and max_idx < len(self.snapshots) and end < self.snapshots.keys()[max_idx][1]: key, g = self.snapshots.popitem(max_idx) self.insert(g, key[0], end) self.insert(copy.deepcopy(g), end, key[1]) for graph in graphs[min_idx: max_idx]: yield graph def get(self, sbunch=None, start=None, end=None): """Returns a list of graphs specified in sbunch. Interface function for users. Parameters ---------- sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of graphs. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- List of networkx graph objects. Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G._get(sbunch=[0]) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>] >>> G._get() [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] >>> G._get(start=2, end=6) [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>] """ return [snapshot for snapshot in self._get(sbunch, start, end)] def add_nodes_from(self, nbunch, sbunch=None, start=None, end=None, **attrs): """Adds nodes to snapshots in sbunch. Note: This function may lead to increase in number of snapshots if changes occur within a snapshot. Parameters ---------- nbunch : container of nodes Each node in the nbunch list will be added to all graphs indexed in sbunch. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.add_nodes_from([5, 6, 7], [0]) >>> G.add_nodes_from([8, 9, 10, 11], [1]) >>> nx.adjacency_matrix(G.get()[0]).todense() [[0 1 1 0 0 0] [1 0 0 0 0 0] [1 0 0 0 0 0] [0 0 0 0 0 0] [0 0 0 0 0 0] [0 0 0 0 0 0]] >>> nx.adjacency_matrix(G.get()[1]).todense() [[0 1 1 0 0 0 0] [1 0 0 0 0 0 0] [1 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0] [0 0 0 0 0 0 0]] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for graph in self._get(sbunch=sbunch): graph.add_nodes_from(nbunch, **attrs) else: for graph in self._get(start=start, end=end, split_overlaps=True): graph.add_nodes_from(nbunch, **attrs) def add_edges_from(self, ebunch, sbunch=None, start=None, end=None, **attrs): """Adds edges to snapshots in sbunch. Note: This function may lead to increase in number of snapshots if changes occur within a snapshot. Parameters ---------- ebunch : container of edges Each edge in the ebunch list will be added to all graphs indexed in sbunch. sbunch : container of snapshot indexes, optional (default= None) Each snapshot index in this list will be included in the returned list of node degrees. It is highly recommended that this list is sequential, however it can be out of order. start: start timestamp, inclusive end: end timestamp, exclusive Returns ------- None Examples -------- >>> G = dnx.SnapshotGraph() >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3) >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10) >>> G.add_edges_from([(5, 6), (7, 6)], [0]) >>> G.add_edges_from([(8, 9), (10, 11)], [0, 1]) >>> nx.adjacency_matrix(G.get()[0]).todense() [[0 1 1 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 0 1 0 1 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 0 0 0 0 0 1 0 0] [0 0 0 0 0 0 1 0 0 0] [0 0 0 0 0 0 0 0 0 1] [0 0 0 0 0 0 0 0 1 0]] >>> nx.adjacency_matrix(G.get()[1]).todense() [[0 1 1 0 0 0 0] [1 0 0 0 0 0 0] [1 0 0 0 0 0 0] [0 0 0 0 1 0 0] [0 0 0 1 0 0 0] [0 0 0 0 0 0 1] [0 0 0 0 0 1 0]] """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: for graph in self._get(sbunch=sbunch): graph.add_edges_from(ebunch, **attrs) else: for graph in self._get(start=start, end=end, split_overlaps=True): graph.add_edges_from(ebunch, **attrs) @staticmethod def load_from_txt(path, delimiter=";", comments="#", start='start', end='end'): """Read snapshot graph in from path. Every line in the file must be an adjacency matrix, with rows separated by delimiter. Parameters ---------- path : string or file Filename to read. comments : string, optional Marker for comment lines start: string, optional Marker for start timestamps end: string, optional Marker for end timestamps delimiter : string, optional Separator for rows in matrix. The default is ;. Cannot be whitespace or \n. Returns ------- G: SnapshotGraph The graph corresponding to the list of adjacency matrices. Examples -------- >>> G=dnx.Snapshotgraph.load_from_txt("my_dygraph.txt") """ if delimiter == ' ' or delimiter == '\n': raise ValueError("Delimiter cannot be " + delimiter + ".") sg = SnapshotGraph() with open(path, 'r') as file: for line in file: p = line.find(comments) if p >= 0: line = line[:p] if not len(line): continue p = min(line.find(start), line.find(end)) interval = [None, None] for item in line[p:].split(): key, value = item.split('=') try: value = float(value) except: raise ValueError('Value of "{}" must be float.'.format(key)) if key == start: interval[0] = value else: interval[1] = value if interval[0] is None or interval[1] is None: raise ValueError('A snapshot does not include its interval') line = line[:p].strip() matrix = [] for row in line.split(delimiter): matrix.append(row.split(' ')) g = from_numpy_array(np.array(matrix)) sg.insert(g, start=interval[0], end=interval[1]) return sg def save_to_txt(self, path, delimiter=";", start='start', end='end'): """Write snapshot graph to path. Every line in the file will be an adjacency matrix. Parameters ---------- path : string or file Filename to write. start: string, optional Marker for start timestamps end: string, optional Marker for end timestamps delimiter : string, optional Separator for rows in matrix. The default is ;. Cannot be whitespace or \n. Examples -------- >>> G.save_to_txt("my_dygraph.txt") """ if len(self) == 0: raise ValueError("Given graph is empty.") if delimiter == ' ' or delimiter == '\n': raise ValueError("Delimiter cannot be " + delimiter + ".") with open(path, 'w') as file: for interval, graph in self._get(include_interval=True): m = adjacency_matrix(graph).todense() line = delimiter.join(' '.join(x for x in y) for y in np.asarray(m, dtype=str)) + ' ' + start + '=' +\ str(interval[0]) + ' ' + end + '=' + str(interval[1]) + '\n' file.write(line) def compute_network_statistic(self, nx_statistic_function, sbunch=None, start=None, end=None, **kwargs): """Compute networkx statistics on each snapshot. Parameters ---------- nx_statistic_function : function from networkx.algorithms Statistic function to calculate. sbunch: snapshots indices to compute statistic start: start timestamp, inclusive end: end timestamp, exclusive kwargs : optional inputs for nx_statistic_function Examples -------- >>> G.compute_network_statistic(nx.algorithms.centrality.degree_centrality) """ if sbunch and (start or end): raise ValueError('Either sbunch or (start and end) can be specified.') elif sbunch: return [nx_statistic_function(graph, **kwargs) for graph in self._get(sbunch=sbunch)] else: return [nx_statistic_function(graph, **kwargs) for graph in self._get(start=start, end=end)]
class LevelTrace(object): """ Traces the level of some entity across a time span """ def __init__(self, trace=None): """ Creates a new level trace, possibly copying from an existing object. """ if trace is None: self._trace = SortedDict() elif isinstance(trace, LevelTrace): self._trace = SortedDict(trace._trace) else: self._trace = SortedDict(trace) # Make sure trace is terminated (returns to 0) if len(self._trace) > 0 and self._trace[self._trace.keys()[-1]] != 0: raise ValueError( "Trace not terminated - ends with {}:{}!".format( self._trace.keys()[-1], self._trace[self._trace.keys()[-1]]) ) def __repr__(self): items = ', '.join(["{!r}: {!r}".format(k, v) for k, v in self._trace.items()]) return "LevelTrace({{{}}})".format(items) def __eq__(self, other): return self._trace == other._trace def __neg__(self): return self.map(operator.neg) def __sub__(self, other): return self.zip_with(other, operator.sub) def __add__(self, other): return self.zip_with(other, operator.add) def start(self): """ Returns first non-null point in trace """ if len(self._trace) == 0: return 0 return self._trace.keys()[0] def end(self): """ Returns first point in trace that is null and only followed by nulls """ if len(self._trace) == 0: return 0 return self._trace.keys()[-1] def length(self): if len(self._trace) == 0: return 0 return self.end() - self.start() def get(self, time): ix = self._trace.bisect_right(time) - 1 if ix < 0: return 0 else: (_, lvl) = self._trace.peekitem(ix) return lvl def map(self, fn): return LevelTrace({t: fn(v) for t, v in self._trace.items()}) def map_key(self, fn): return LevelTrace(dict(fn(t, v) for t, v in self._trace.items())) def shift(self, time): return self.map_key(lambda t, v: (t + time, v)) def __getitem__(self, where): # For non-slices defaults to get if not isinstance(where, slice): return self.get(where) if where.step is not None: raise ValueError("Stepping meaningless for LevelTrace!") # Limit res = LevelTrace(self) if where.start is not None and where.start > res.start(): res.set(res.start(), where.start, 0) if where.stop is not None and where.stop < res.end(): res.set(where.stop, res.end(), 0) # Shift, if necessary if where.start is not None: res = res.shift(-where.start) return res def set(self, start, end, level): """ Sets the level for some time range :param start: Start of range :param end: End of range :aram amount: Level to set """ # Check errors, no-ops if start >= end: return # Determine levels at start (and before start) start_ix = self._trace.bisect_right(start) - 1 prev_lvl = lvl = 0 if start_ix >= 0: (t, lvl) = self._trace.peekitem(start_ix) # If we have no entry exactly at our start point, the # level was constant at this point before if start > t: prev_lvl = lvl # Otherwise look up previous level. Default 0 (see above) elif start_ix > 0: (_, prev_lvl) = self._trace.peekitem(start_ix-1) # Prepare start if prev_lvl == level: if start in self._trace: del self._trace[start] else: self._trace[start] = level # Remove all in-between states for time in list(self._trace.irange(start, end, inclusive=(False, False))): lvl = self._trace[time] del self._trace[time] # Add or remove end, if necessary if end not in self._trace: if lvl != level: self._trace[end] = lvl elif level == self._trace[end]: del self._trace[end] def add(self, start, end, amount): """ Increases the level for some time range :param start: Start of range :param end: End of range :aram amount: Amount to add to level """ # Check errors, no-ops if start > end: raise ValueError("End needs to be after start!") if start == end or amount == 0: return # Determine levels at start (and before start) start_ix = self._trace.bisect_right(start) - 1 prev_lvl = lvl = 0 if start_ix >= 0: (t, lvl) = self._trace.peekitem(start_ix) # If we have no entry exactly at our start point, the # level was constant at this point before if start > t: prev_lvl = lvl # Otherwise look up previous level. Default 0 (see above) elif start_ix > 0: (_, prev_lvl) = self._trace.peekitem(start_ix-1) # Prepare start if prev_lvl == lvl + amount: del self._trace[start] else: self._trace[start] = lvl + amount # Update all in-between states for time in self._trace.irange(start, end, inclusive=(False, False)): lvl = self._trace[time] self._trace[time] = lvl + amount # Add or remove end, if necessary if end not in self._trace: self._trace[end] = lvl elif lvl + amount == self._trace[end]: del self._trace[end] def __delitem__(self, where): # Cannot set single values if not isinstance(where, slice): raise ValueError("Cannot set level for single point, pass an interval!") if where.step is not None: raise ValueError("Stepping meaningless for LevelTrace!") # Set range to zero start = (where.start if where.start is not None else self.start()) end = (where.stop if where.stop is not None else self.end()) self.set(start, end, 0) def __setitem__(self, where, value): # Cannot set single values if not isinstance(where, slice): raise ValueError("Cannot set level for single point, pass an interval!") if where.step is not None: raise ValueError("Stepping meaningless for LevelTrace!") # Setting a level trace? if isinstance(value, LevelTrace): # Remove existing data del self[where] if where.start is not None: if value.start() < 0: raise ValueError("Level trace starts before 0!") value = value.shift(where.start) if where.stop is not None: if value.end() > where.stop: raise ValueError("Level trace to set is larger than slice!") self._trace = (self + value)._trace else: # Otherwise set constant value start = (where.start if where.start is not None else self.start()) end = (where.stop if where.stop is not None else self.end()) self.set(start, end, value) def foldl1(self, start, end, fn): """ Does a left-fold over the levels present in the given range. Seeds with level at start. """ if start > end: raise ValueError("End needs to be after start!") val = self.get(start) start_ix = self._trace.bisect_right(start) end_ix = self._trace.bisect_left(end) for lvl in self._trace.values()[start_ix:end_ix]: val = fn(val, lvl) return val def minimum(self, start, end): """ Returns the lowest level in the given range """ return self.foldl1(start, end, min) def maximum(self, start, end): """ Returns the highest level in the given range """ return self.foldl1(start, end, max) def foldl_time(self, start, end, val, fn): """ Does a left-fold over the levels present in the given range, also passing how long the level was held. Seed passed. """ if start > end: raise ValueError("End needs to be after start!") last_time = start last_lvl = self.get(start) start_ix = self._trace.bisect_right(start) end_ix = self._trace.bisect_left(end) for time, lvl in self._trace.items()[start_ix:end_ix]: val = fn(val, time-last_time, last_lvl) last_time = time last_lvl = lvl return fn(val, end-last_time, last_lvl) def integrate(self, start, end): """ Returns the integral over a range (sum below level curve) """ return self.foldl_time(start, end, 0, lambda v, time, lvl: v + time * lvl) def average(self, start, end): """ Returns the average level over a given range """ return self.integrate(start, end) / (end - start) def find_above(self, time, level): """Returns the first time larger or equal to the given start time where the level is at least the specified value. """ if self.get(time) >= level: return time ix = self._trace.bisect_right(time) for t, lvl in self._trace.items()[ix:]: if lvl >= level: return t return None def find_below(self, time, level): """Returns the first time larger or equal to the given start time where the level is less or equal the specified value. """ if self.get(time) <= level: return time ix = self._trace.bisect_right(time) for t, lvl in self._trace.items()[ix:]: if lvl <= level: return t return None def find_below_backward(self, time, level): """Returns the last time smaller or equal to the given time where there exists a region to the left where the level is below the given value. """ last = time ix = self._trace.bisect_right(time)-1 if ix >= 0: for t, lvl in self._trace.items()[ix::-1]: if lvl <= level and time > t: return last last = t if level >= 0: return last return None def find_above_backward(self, time, level): """Returns the last time smaller or equal to the given time where there exists a region to the left where the level is below the given value. """ last = time ix = self._trace.bisect_right(time)-1 if ix >= 0: for t, lvl in self._trace.items()[ix::-1]: if lvl >= level and time > t: return last last = t if level <= 0: return last return None def find_period_below(self, start, end, target, length): """Returns a period where the level is below the target for a certain length of time, within a given start and end time""" if start > end: raise ValueError("End needs to be after start!") if length < 0: raise ValueError("Period length must be larger than zero!") period_start = (start if self.get(start) <= target else None) start_ix = self._trace.bisect_right(start) end_ix = self._trace.bisect_left(end) for time, lvl in self._trace.items()[start_ix:end_ix]: # Period long enough? if period_start is not None: if time >= period_start + length: return period_start # Not enough space until end? elif time + length > end: return None # Above target? Reset period if lvl > target: period_start = None else: if period_start is None: period_start = time # Possible at end? if period_start is not None and period_start+length <= end: return period_start # Nothing found return None def zip_with(self, other, fn): # Simple cases if len(self._trace) == 0: return other.map(lambda x: fn(0, x)) if len(other._trace) == 0: return self.map(lambda x: fn(x, 0)) # Read first item from both sides left = self._trace.items() right = other._trace.items() left_ix = 0 right_ix = 0 left_val = 0 right_val = 0 last_val = 0 trace = SortedDict() # Go through pairs while left_ix < len(left) and right_ix < len(right): # Next items lt,lv = left[left_ix] rt,rv = right[right_ix] # Determine what to do if lt < rt: v = fn(lv, right_val) if v != last_val: last_val = trace[lt] = v left_val = lv left_ix += 1 elif lt > rt: v = fn(left_val, rv) if v != last_val: last_val = trace[rt] = v right_val = rv right_ix += 1 else: v = fn(lv, rv) if v != last_val: last_val = trace[lt] = v left_val = lv left_ix += 1 right_val = rv right_ix += 1 # Handle left-overs while left_ix < len(left): lt,lv = left[left_ix] v = fn(lv, right_val) if v != last_val: last_val = trace[lt] = v left_ix += 1 while right_ix < len(right): rt,rv = right[right_ix] v = fn(left_val, rv) if v != last_val: last_val = trace[rt] = v right_ix += 1 return LevelTrace(trace)
posStrand = [False] # CAN THEORETICALLY EXTRACT FROM BOTH STRANDS else: posStrand = [True,False] for strand in posStrand: kmer = "" if strand: kmer = context[posInWindow:posInWindow+forwardLength] else: kmer = context[posInWindow-forwardLength+1:posInWindow+1] for i in xrange(targetCov//len(posStrand)): # Pick a threshold rval = random.random() if rval <= forward[strand][kmer]*correctForward: # Pick a length lval = random.random() key = lengthDist.bisect_left(lval) selLength = lengthDist[lengthDist.iloc[key]] # Check the kmer at the other end rkmer = "" if strand: rkmer = context[posInWindow+selLength-reverseLength:posInWindow+selLength] else: rkmer = context[posInWindow-selLength+1:posInWindow-selLength+reverseLength+1] rval = random.random() if rval <= reverse[not strand][rkmer]*correctReverse: # Extract sequence and write BAM record selSeq = "" if strand: selSeq = context[posInWindow:posInWindow+selLength] else: selSeq = context[posInWindow-selLength+1:posInWindow+1] writeBAMentry(chrom,pos,selSeq,selLength,strand) #print strand,kmer,rkmer,selSeq,len(selSeq),selLength
class FederationRemoteSendQueue(object): """A drop in replacement for TransactionQueue""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = { } # Pending presence map user_id -> UserPresenceState self.presence_changed = SortedDict() # Stream position -> user_id self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = SortedDict( ) # stream position -> (destination, key) self.edus = SortedDict() # stream position -> Edu self.device_messages = SortedDict() # stream position -> destination self.pos = 1 self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "device_messages", "pos_time", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = set(user_id for uids in itervalues(self.presence_changed) for user_id in uids) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) to_del = [ edu_key for edu_key in self.keyed_edu if edu_key not in live_keys ] for edu_key in to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] def notify_new_events(self, current_id): """As per TransactionQueue""" # We don't need to replicate this as it gets sent down a different # stream. pass def send_edu(self, destination, edu_type, content, key=None): """As per TransactionQueue""" pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_presence(self, states): """As per TransactionQueue Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list( filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update( {state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per TransactionQueue""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) def get_replication_rows(self, from_token, to_token, limit, federation_ack=None): """Get rows to be sent over federation between the two tokens Args: from_token (int) to_token(int) limit (int) federation_ack (int): Optional. The position where the worker is explicitly acknowledged it has handled. Allows us to drop data from before that point """ # TODO: Handle limit. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # There should be only one reader, so lets delete everything its # acknowledged its seen. if federation_ack: self._clear_queue_before_pos(federation_ack) # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow(state=self.presence_map[user_id], ))) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], ))) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow(destination=destination, ))) # Sort rows based on pos rows.sort() return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
class StepVector(): @classmethod def sliced(cls, other, start, end): newobj = cls(other.datatype, _tree=other._t, _bounds=(start, end)) return newobj def __init__(self, datatype, _tree=None, _bounds=None): self.datatype = datatype if _tree is not None: self._t = _tree else: self._t = SortedDict() if _bounds is not None: self._bounds = _bounds else: self._bounds = (None, None) # set upon slicing/subsetting def __getitem__(self, key): if type(key) == slice: if (key.step is not None) and (key.step != 1): raise ValueError("Invalid step value") start = key.start end = key.stop if self._bounds[0] is not None: if start is None: start = self._bounds[0] else: if start < self._bounds[0]: raise ValueError("Start out of bounds") if self._bounds[1] is not None: if end is None: end = self._bounds[1] else: if end > self._bounds[1]: raise ValueError("End out of bounds") return self.sliced(self, start, end) else: assert type(key) == int if self._bounds[0] is not None: if key < self._bounds[0]: raise ValueError("Key out of bounds") if self._bounds[1] is not None: if key >= self._bounds[0]: raise ValueError("Key out of bounds") if self._t: try: prevkey = self._floor_key(key) return self._t[prevkey] except KeyError: # no item smaller than or equal to key return self.datatype() else: # empty tree return self.datatype() def __setitem__(self, key, value): if type(key) == slice: start = key.start end = key.stop else: assert type(key) == int start = key end = key + 1 assert start is not None assert end is not None assert type(value) == self.datatype assert end >= start if start == end: return # check next val if self._t: try: nkey = self._floor_key(end, bisect="right") nvalue = self._t[nkey] except KeyError: nkey = None nvalue = None else: # empty tree nkey = None nvalue = None # check prev val if self._t: try: pkey = self._floor_key(start) pvalue = self._t[pkey] except KeyError: pkey = None pvalue = None else: pkey = None pvalue = None # remove intermediate steps if any if self._t: a = self._t.bisect_left(start) b = self._t.bisect(end) assert a <= b del self._t.iloc[a:b] # set an end marker if necessary if nkey is None: self._t[end] = self.datatype() elif nvalue != value: self._t[end] = nvalue # set a start marker if necessary if pkey is None or pvalue != value: self._t[start] = value def __iter__(self): start, end = self._bounds if not self._t: # empty tree if start is None or end is None: raise StopIteration # FIXME: can't figure out a better thing to do if only one is None else: if start < end: yield (start, end, self.datatype()) raise StopIteration if start is None: a = 0 else: a = max(0, self._bisect_right(start) - 1) if end is None: b = len(self._t) else: b = self._bisect_right(end) assert b >= a if a == b: if a is None: start = self._t[a] if b is None: end = self._t[b] if start < end: yield (start, end, self.datatype()) raise StopIteration it = self._t.islice(a, b) currkey = next(it) currvalue = self._t[currkey] if start is not None: currkey = max(start, currkey) if start < currkey: yield (start, currkey, self.datatype()) prevkey, prevvalue = currkey, currvalue for currkey in it: currvalue = self._t[currkey] yield (prevkey, currkey, prevvalue) prevkey = currkey prevvalue = currvalue if end is not None: if currkey < end: yield (currkey, end, prevvalue) def add_value(self, start, end, value): assert type(value) == self.datatype # can't modify self while iterating over values; will change the tree, and thus f**k up iteration items = list(self[start:end]) for a, b, x in items: if self.datatype == set: y = x.copy() y.update(value) else: y = x + value self[a:b] = y def _bisect_left(self, key): return self._t.bisect_left(key) def _bisect_right(self, key): return self._t.bisect_right(key) def _floor_key(self, key, bisect="left"): """ Returns the greatest key less than or equal to key """ if bisect == "right": p = self._bisect_right(key) else: p = self._bisect_left(key) if p == 0: raise KeyError else: return self._t.iloc[p - 1]
class BaseColorCodePatchBuilder(ASAxesPatchBuilder, PickablePatchBuilder): """ The patch generator build the matplotlib patches for each capability node. The nodes are rendered as lines with a different color depending on the permission bits of the capability. The builder produces a LineCollection for each combination of permission bits and creates the lines for the nodes. """ def __init__(self, figure, pgm): """ Constructor :param figure: the figure to attache the click callback :param pgm: the provenance graph model """ super().__init__(figure=figure) self._pgm = pgm """The provenance graph model""" self._collection_map = defaultdict(lambda: []) """ Map capability permission to the set where the line should go. Any combination of capability permissions is used as key for a list of (start, end) values that are used to build LineCollections. The key "call" is used for system call nodes, the int(0) key is used for no permission. """ self._colors = {} """ Map capability permission to line colors. XXX: keep this for now, move to a colormap """ self._bbox = [np.inf, np.inf, 0, 0] """Bounding box of the patches as (xmin, ymin, xmax, ymax).""" self._node_map = SortedDict() """Maps the Y axis coordinate to the graph node at that position""" def _clickable_element(self, vertex, y): """remember the node at the given Y for faster indexing.""" data = self._pgm.data[vertex] self._node_map[y] = data def _add_bbox(self, xmin, xmax, y): """Update the view bbox.""" if self._bbox[0] > xmin: self._bbox[0] = xmin if self._bbox[1] > y: self._bbox[1] = y if self._bbox[2] < xmax: self._bbox[2] = xmax if self._bbox[3] < y: self._bbox[3] = y def _get_patch_collections(self, axes): """Return a generator of collections of patches to add to the axes.""" pass def get_patches(self, axes): """ Return a collection of lines from the collection_map. """ super().get_patches(axes) for coll in self._get_patch_collections(axes): axes.add_collection(coll) def get_bbox(self): return Bbox.from_extents(*self._bbox) def on_click(self, event): """ Attempt to retreive the data in less than O(n) for better interactivity at the expense of having to hold a dictionary of references to nodes for each t_alloc. Note that t_alloc is unique for each capability node as it is the cycle count, so it can be used as the key. """ ax = event.inaxes if ax is None: return # back to data coords without scaling y_coord = int(event.ydata) y_max = self._bbox[3] # tolerance for y distance, 0.1 * 10^6 cycles epsilon = 0.1 * 10**6 # try to get the node closer to the y_coord # in the fast way # For now fall-back to a reduced linear search but would be # useful to be able to index lines with an R-tree? idx_min = self._node_map.bisect_left(max(0, y_coord - epsilon)) idx_max = self._node_map.bisect_right(min(y_max, y_coord + epsilon)) iter_keys = self._node_map.islice(idx_min, idx_max) # find the closest node to the click position pick_target = None for key in iter_keys: node = self._node_map[key] if (node.cap.base <= event.xdata and node.cap.bound >= event.xdata): # the click event is within the node bounds and # the node Y is closer to the click event than # the previous pick_target if (pick_target is None or abs(y_coord - key) < abs(y_coord - pick_target.cap.t_alloc)): pick_target = node if pick_target is not None: ax.set_status_message(pick_target) else: ax.set_status_message("")
class FederationRemoteSendQueue(AbstractFederationSender): """A drop in replacement for FederationSender""" def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id # We may have multiple federation sender instances, so we need to track # their positions separately. self._sender_instances = hs.config.worker.federation_shard_config.instances self._sender_positions = {} # type: Dict[str, int] # Pending presence map user_id -> UserPresenceState self.presence_map = {} # type: Dict[str, UserPresenceState] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = ( SortedDict()) # type: SortedDict[int, Tuple[str, Iterable[str]]] # (destination, key) -> EDU self.keyed_edu = {} # type: Dict[Tuple[str, tuple], Edu] # stream position -> (destination, key) self.keyed_edu_changed = (SortedDict() ) # type: SortedDict[int, Tuple[str, tuple]] self.edus = SortedDict() # type: SortedDict[int, Edu] # stream ID for the next entry into keyed_edu_changed/edus. self.pos = 1 # map from stream ID to the time that stream entry was generated, so that we # can clear out entries after a while self.pos_time = SortedDict() # type: SortedDict[int, int] # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name: str, queue: Sized) -> None: LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue), ) for queue_name in [ "presence_map", "keyed_edu", "keyed_edu_changed", "edus", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self) -> int: pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self) -> None: """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete: int) -> None: """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids = { user_id for user_id, _ in self.presence_destinations.values() } to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) keys_to_del = [ edu_key for edu_key in self.keyed_edu if edu_key not in live_keys ] for edu_key in keys_to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] def notify_new_events(self, max_token: RoomStreamToken) -> None: """As per FederationSender""" # This should never get called. raise NotImplementedError() def build_and_send_edu( self, destination: str, edu_type: str, content: JsonDict, key: Optional[Hashable] = None, ) -> None: """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() async def send_read_receipt(self, receipt: ReadReceipt) -> None: """As per FederationSender Args: receipt: """ # nothing to do here: the replication listener will handle it. def send_presence_to_destinations(self, states: Iterable[UserPresenceState], destinations: Iterable[str]) -> None: """As per FederationSender Args: states destinations """ for state in states: pos = self._next_pos() self.presence_map.update( {state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination: str) -> None: """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. def wake_destination(self, server: str) -> None: pass def get_current_token(self) -> int: return self.pos - 1 def federation_ack(self, instance_name: str, token: int) -> None: if self._sender_instances: # If we have configured multiple federation sender instances we need # to track their positions separately, and only clear the queue up # to the token all instances have acked. self._sender_positions[instance_name] = token token = min(self._sender_positions.values()) self._clear_queue_before_pos(token) async def get_replication_rows( self, instance_name: str, from_token: int, to_token: int, target_row_count: int ) -> Tuple[List[Tuple[int, Tuple]], int, bool]: """Get rows to be sent over federation between the two tokens Args: instance_name: the name of the current process from_token: the previous stream token: the starting point for fetching the updates to_token: the new stream token: the point to get updates up to target_row_count: a target for the number of rows to be returned. Returns: a triplet `(updates, new_last_token, limited)`, where: * `updates` is a list of `(token, row)` entries. * `new_last_token` is the new position in stream. * `limited` is whether there are more updates to fetch. """ # TODO: Handle target_row_count. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # type: List[Tuple[int, BaseFederationRow]] # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append(( pos, PresenceDestinationsRow(state=self.presence_map[user_id], destinations=list(dests)), )) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in keyed_edus.items(): rows.append(( pos, KeyedEduRow(key=edu_key, edu=self.keyed_edu[(destination, edu_key)]), )) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Sort rows based on pos rows.sort() return ( [(pos, (row.TypeId, row.to_data())) for pos, row in rows], to_token, False, )
def test6(): """ 有序的map: SortedDict 网址: http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html """ from sortedcontainers import SortedDict sd = SortedDict() # 插入、删除元素 sd["wxx"] = 21 sd["hh"] = 18 sd["other"] = 20 print(sd) # SortedDict({'hh': 18, 'other': 20, 'wxx': 21}) print(sd["wxx"]) # 访问不存在的键会报错, KeyError print(sd.get("c")) # 访问不存在的键会返回None None # SortedDict转dict print(dict(sd)) # {'hh': 18, 'other': 20, 'wxx': 21} # 返回最后一个元素和最后一个元素 print(sd.peekitem(0)) # 类型tuple, 返回第一个元素 ('hh', 18) print(sd.peekitem()) # 类型tuple, 返回最后一个元素 ('wxx', 21) # 遍历 for k, v in sd.items(): print(k, ':', v, sep="", end=", ") # sep取消每行输出之间的空格 print() for k in sd: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(sd[k]), end=", ") print() for v in sd.values(): # 遍历值v print(v, end=", ") print() # 返回Map中的一个键 print(sd.peekitem()[0]) # 返回Map中的一个值 print(sd.peekitem()[1]) # 中判断某元素是否存在 print("wxx" in sd) # True # bisect_left() / bisect_right() sd["a"] = 1 sd["c1"] = 2 sd["c2"] = 4 print( sd ) # SortedDict({'a': 1, 'c1': 2, 'c2': 4, 'hh': 18, 'other': 20, 'wxx': 21}) print(sd.bisect_left("c1")) # 返回键大于等于"c1"的最小元素对应的下标 1 print(sd.bisect_right("c1")) # 返回键大于"c1"的最小元素对应的下标 2 # 清空 sd.clear() print(len(sd)) # 0 print(len(sd) == 0) # True """ 无序的map: dict """ print("---------------------------------------") d = {"c1": 2, "c2": 4, "hh": 18, "wxx": 21, 13: 14, 1: 0} print(d["wxx"]) # 21 print(d[13]) # 14 d[13] += 1 print(d[13]) # 15 d["future"] = "wonderful" # 字典中添加键值对 del d[1] # 删除字典d中键1对应的数据值 print("wxx" in d) # 判断键"wxx"是否在字典d中,如果在返回True,否则False print(d.keys()) # 返回字典d中所有的键信息 dict_keys(['c1', 'c2', 'hh', 'wxx', 13]) print(d.values()) # 返回字典d中所有的值信息 dict_values([2, 4, 18, 21, 14]) print(d.items( )) # dict_items([('c1', 2), ('c2', 4), ('hh', 18), ('wxx', 21), (13, 14)]) for k, v in d.items(): # 遍历 k, v print(k, ':', v) for k in d: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(d[k]), end=", ") print() for v in d.values(): # 遍历值v print(v, end=", ") print() # 字典类型操作函数和方法 print("---------------------------------------") d = {"中国": "北京", "美国": "华盛顿", "法国": "巴黎"} print(len(d)) # 返回字典d中元素的个数 3 print(d.get("中国", "不存在")) # 键k存在,则返回相应值,不在则返回<default>值 北京 print(d.get("中", "不存在")) # 不存在 print(d.get("中")) # None d["美国"] = "Washington" # 修改键对应的值 print(d.pop("美国")) # 键k存在,则返回相应值,并将其从dict中删除 print(d.popitem()) # 随机从字典d中取出一个键值对,以元组形式返回,并将其从dict中删除 d.clear() # 删除所有的键值对