Пример #1
0
    def oddEvenJumps(self, nums) -> int:
        if len(nums) == 0:
            return 0
        if len(nums) == 1:
            return 1

        n = len(nums)
        isOkEven = [False] * len(nums)
        isOkOdd = [False] * len(nums)
        isOkEven[n - 1] = True
        isOkOdd[n - 1] = True
        tree = SortedDict()
        tree[nums[n - 1]] = n - 1
        for i in range(n - 2, -1, -1):
            val = nums[i]
            if val in tree:
                isOkEven[i] = isOkOdd[tree[val]]
                isOkOdd[i] = isOkEven[tree[val]]
            else:
                smallestP = tree.bisect_left(val)
                largestP = tree.bisect_left(val) - 1
                isOkOdd[i] = True if smallestP != len(tree) and isOkEven[
                    tree.peekitem(smallestP)[1]] else False
                isOkEven[i] = True if largestP != -1 and isOkOdd[tree.peekitem(
                    largestP)[1]] else False
            tree[val] = i
        res = 0
        for e in isOkOdd:
            if e:
                res += 1
        return res
Пример #2
0
    def oddEvenJumps(self, arr: List[int]) -> int:
        N = len(arr)
        odd = [False] * N
        even = [False] * N
        odd[-1] = True
        even[-1] = True
        sd = SortedDict()
        sd[arr[N - 1]] = N - 1

        for i in range(N - 2, -1, -1):
            if arr[i] in sd:
                odd[i] = even[sd[arr[i]]]
                even[i] = odd[sd[arr[i]]]
            else:
                # greatest smaller
                floor_idx = sd.bisect_left(arr[i]) - 1

                if floor_idx != -1:
                    even[i] = odd[sd.peekitem(floor_idx)[1]]

                # smallest greater
                ceiling_idx = sd.bisect_left(arr[i])

                if ceiling_idx != len(sd):
                    odd[i] = even[sd.peekitem(ceiling_idx)[1]]

            sd[arr[i]] = i

        return odd.count(True)
class LogSystem:

    def __init__(self):
        self.map = SortedDict(list)
        self.gra = {
            'Year': 5,
            'Month':8,
            'Day':11,
            'Hour': 14,
            'Minute':17,
            'Second':20,
        }

    def put(self, id: int, timestamp: str) -> None:
        self.map.setdefault(timestamp, []).append(id)
        

    def retrieve(self, start: str, end: str, granularity: str) -> List[int]:
        idx = self.gra[granularity]
        left = self.map.bisect_left(start[:idx]) 
        
        result = []
        for i in range(left, len(self.map)):   # DO NOT USE bisect to find right, it may only granular to year, but date exceeds
            key = self.map.keys()[i]
            if key[:idx]>end[:idx]: break
            
            result.extend(self.map[key])
        return result
Пример #4
0
class UnboundedInterpolator:
    """Class that can linearly interpolate through a function that is costly
    to compute, on the go, with no need to specify bounds or pre-compute
    It is costly to do the binary search though so I would recommend using
    BoundedInterpolator instead."""
    def __init__(self, func, resolution, debug=False):
        self._func = func
        self._resolution = resolution
        self._data = SortedDict()
        self._keys = self._data.keys()
        self._debug = debug
        # vectorized function so it can take ndarrays
        self._vf = np.vectorize(self._eval)

    def min_val(self):
        return self._keys[0]

    def max_val(self):
        return self._keys[-1]

    def __call__(self, x):
        if type(x) is np.ndarray or type(x) is list:
            return self._vf(x)
        return self._eval(x)

    def _eval(self, x):
        if x in self._data:
            return self._data[x]

        # if there are <= 1 data points or if x is less than or greater than all
        # existing keys, always compute the value
        if len(self._data) <= 1 or x < self.min_val() or x > self.max_val():
            if self._debug:
                print("Computing value of function because not enough data or"
                      " bigger or smaller than all other keys")
            self._data[x] = self._func(x)
            return self._data[x]

        # index of smallest key greater than x
        right_index = self._data.bisect_left(x)
        # index of largest key less than x
        left_index = right_index - 1

        ldiff = x - self._keys[left_index]
        rdiff = self._keys[right_index] - x

        if max(ldiff, rdiff) > self._resolution:
            # if the biggest distance to a neighbor is to big, compute the value
            if self._debug:
                print("Computing value of function because x value not close"
                      " enough to other keys")
            self._data[x] = self._func(x)
            return self._data[x]
        else:
            # otherwise, can interpolate
            if self._debug:
                print("Interpolating")
            lval = self._data[self._keys[left_index]]
            rval = self._data[self._keys[right_index]]
            return (lval * rdiff + rval * ldiff) / (ldiff + rdiff)
Пример #5
0
    def canAttendMeetings(self, intervals: List[List[int]]) -> bool:

        points = SortedDict()
        for start, end in intervals:
            # print(start, end)
            # print(points)
            i_start = points.bisect_right(start)
            i_end = points.bisect_left(end)
            # print("i_start", i_start)
            # print("i_end", i_end)
            if i_end != i_start:
                return False
            if i_start > 0 and points.peekitem(i_start-1)[1] == 1:
                return False

            if points.get(start) == -1:
                del points[start]
            else:
                points[start] = 1

            if points.get(end) == 1:
                del points[end]
            else:
                points[end] = -1
        return True
Пример #6
0
def trapezoid_decomposition_linear(polygons):
    """
    Keep track of which lines to add to GUI, keep track of the point_vertices.
    """
    # Enumerate all the edges and iteratively build up the set of trapezoids
    # Add a vertical line for each point in the polygon
    all_polygons = np.concatenate(polygons, axis=0)
    vertical_lines = SortedDict(
        {x[0]: [x[1], 1000000, 0]
         for x in all_polygons})

    # Loop over Polygons to determine end-points
    for polygon in polygons:
        start_vertex = polygon[0]
        for vertex in polygon[1:]:
            # find the lines in front of the smaller
            x_start = start_vertex[0]
            x_curr = vertex[0]
            start_idx = vertical_lines.bisect_right(min(x_start, x_curr))
            end_idx = vertical_lines.bisect_left(max(x_start, x_curr))
            x_vals = vertical_lines.keys()
            for i in range(start_idx, end_idx):
                x = x_vals[i]
                if x < min(x_start, x_curr) or x > max(x_start, x_curr):
                    continue
                y, top, bottom = vertical_lines[x]
                y_val = linear_interpolation(start_vertex, vertex, x)
                if y_val > y and y_val < top:
                    vertical_lines[x][1] = y_val
                elif y_val < y and y_val > bottom:
                    vertical_lines[x][2] = y_val
            start_vertex = vertex
    return vertical_lines
Пример #7
0
def test_bisect_key():
    temp = SortedDict(modulo, ((val, val) for val in range(100)))
    temp._reset(7)
    assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(
        temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
 def maxDepthBST(self, order: List[int]) -> int:
     sd = SortedDict()
     for x in order: 
         k = sd.bisect_left(x)
         val = 1
         if k: val = 1 + sd.values()[k-1]
         if k < len(sd): val = max(val, 1 + sd.values()[k])
         sd[x] = val
     return max(sd.values())
Пример #9
0
 def maxDepthBST(self, order: List[int]) -> int:
     sd = SortedDict({0: 0, float('inf'): 0, order[0]: 1})
     ans = 1
     for v in order[1:]:
         lower = sd.bisect_left(v) - 1
         higher = lower + 1
         depth = 1 + max(sd.values()[lower], sd.values()[higher])
         ans = max(ans, depth)
         sd[v] = depth
     return ans
Пример #10
0
class MyCalendar:
    def __init__(self):
        self.booked = SortedDict()

    def book(self, start: int, end: int) -> bool:
        i = self.booked.bisect_left(end)
        if i == 0 or self.booked.items()[i - 1][1] <= start:
            self.booked[start] = end
            return True
        return False
Пример #11
0
def find_index_before(sorted_dict: SortedDict, key: Any) -> int:
    '''
    Find index of the first key in a sorted dict that is less than or equal to the key passed in.
    If the key is less than the first key in the dict, return -1
    '''
    size = len(sorted_dict)
    if not size: return -1
    i = sorted_dict.bisect_left(key)
    if i == size: return size - 1
    if sorted_dict.keys()[i] != key:
        return i - 1
    return i
Пример #12
0
class QtDictListModel(QAbstractListModel):
    def __init__(self):
        QAbstractListModel.__init__(self)
        self._items = SortedDict()

    def role(self, item, role):
        return item

    def rowCount(self, parent):
        if parent.isValid():
            return 0
        return len(self._items)

    def from_index(self, index):
        if not index.isValid() or index.row() >= len(self._items):
            return None
        return self._items.peekitem(index.row())[1]

    def data(self, index, role):
        item = self.from_index(index)
        if item is None:
            return None
        return self.role(item, role)

    def _add(self, key, item):
        assert key not in self._items
        next_index = self._items.bisect_left(key)
        self.beginInsertRows(QModelIndex(), next_index, next_index)
        self._items[key] = item
        self.endInsertRows()

    # TODO - removal is O(n).
    def _remove(self, key):
        assert key in self._items
        item_index = self._items.index(key)
        self.beginRemoveRows(QModelIndex(), item_index, item_index)
        del self._items[key]
        self.endRemoveRows()

    def _clear(self):
        self.beginRemoveRows(QModelIndex(), 0, len(self._items) - 1)
        self._items.clear()
        self.endRemoveRows()

    # O(n). Rework if it's too slow.
    def _update(self, key, roles=None):
        item_index = self._items.index(key)
        index = self.index(item_index, 0)
        if roles is None:
            self.dataChanged.emit(index, index)
        else:
            self.dataChanged.emit(index, index, roles)
    def maxDepthBST(self, order: List[int]) -> int:
		# python way for binary treemap
        depths = SortedDict()
		# add dummy bounds to avoid extra ifs
        depths[-math.inf] = 0
        depths[math.inf] = 0
        
		# for every value find bounds and take the lowest depth + 1
		# put the value back to depths
        for x in order:
            i = depths.bisect_left(x)
            depths[x] = 1 + max(depths.values()[i - 1:i + 1])
        # return the maximum value so far
        return max(depths.values())
Пример #14
0
    def oddEvenJumps(self, A: List[int]) -> int:
        INT_MIN = -(2**31)
        INT_MAX = 2**31 - 1

        a = A
        n = len(a)

        mm = SortedDict()
        oj = [-1 for i in range(n)]
        for i in range(n - 1, 0, -1):
            mm[a[i]] = i
            j = mm.bisect_left(a[i - 1])
            if j == len(mm):
                continue
            j = mm.iloc[j]
            oj[i - 1] = mm[j]

        mm = SortedDict()
        ej = [-1 for i in range(n)]
        for i in range(n - 1, 0, -1):
            mm[a[i]] = i
            j = mm.bisect_right(a[i - 1]) - 1
            if j == -1:
                continue
            j = mm.iloc[j]
            ej[i - 1] = mm[j]

        dp = {}

        def dfs(idx, odd):
            nonlocal dp

            if idx == n - 1:
                return True
            if (idx, odd) in dp:
                return dp[(idx, odd)]
            idx1 = oj[idx] if odd else ej[idx]
            if idx1 == -1:
                dp[(idx, odd)] = False
            else:
                dp[(idx, odd)] = dfs(idx1, not odd)
            return dp[(idx, odd)]

        res = 0
        for i in range(n):
            if dfs(i, True):
                res += 1
        return res
 def jobScheduling(self, startTime: List[int], endTime: List[int],
                   profit: List[int]) -> int:
     tasks = SortedList(zip(startTime, endTime, profit), key=lambda t: t[0])
     n = len(tasks)
     dp = SortedDict()
     for i in reversed(range(n)):
         s1, e, p = tasks[i]
         if s1 in dp:
             dp[s1] = max(dp[s1], p)
         else:
             dp[s1] = p
         j = dp.bisect_left(e)
         if j < len(dp):
             s2 = dp.keys()[j]
             dp[s1] = max(dp[s1], p + dp[s2])
         k = dp.index(s1)
         if k < len(dp) - 1:
             s2 = dp.keys()[k + 1]
             dp[s1] = max(dp[s1], dp[s2])
     return max(dp.values())
Пример #16
0
def populate_component_matrix(paths: List[Path],
                              schematic: PangenomeSchematic):
    # the loops are 1) paths, and then 2) schematic.components
    # paths are in the same order as schematic.path_names
    for i, path in enumerate(paths):
        sorted_bins = SortedDict((bin.bin_id, bin) for bin in path.bins)
        values = list(sorted_bins.values())
        for component in schematic.components:
            from_id = sorted_bins.bisect_left(component.first_bin)
            to_id = sorted_bins.bisect_right(component.last_bin)
            relevant = values[from_id:to_id]
            padded = []
            if relevant:
                padded = [[]] * (component.last_bin - component.first_bin + 1)
                for bin in relevant:
                    padded[bin.bin_id - component.first_bin] =  \
                        Bin(bin.coverage, bin.inversion_rate, bin.first_nucleotide, bin.last_nucleotide)
            component.matrix.append(
                padded)  # ensure there's always 1 entry for each path
    print("Populated Matrix per component per path.")
    populate_component_occupancy(schematic)
Пример #17
0
 def containsNearbyAlmostDuplicateOrderMap(self, nums, k, t):
     """
     :type nums: List[int]
     :type k: int
     :type t: int
     :rtype: bool
     """
     m = SortedDict()
     j = 0
     for i in range(len(nums)):
         # only need to consider index range [i-k, i]
         if i - j > k:
             m.pop(nums[j])
             j += 1
         # abs(nums[i]-nums[j]) <= t --> nums[i] - t < nums[j]
         # if nums[j] < nums[i]-t, abs(nums[i]-nums[j]) <= t won't hold
         # search for an index where nums[a] first >= nums[i]-t
         a = m.bisect_left(nums[i] - t)
         keys = m.keys()
         if a < len(m) and abs(keys[a] - nums[i]) <= t:
             return True
         m[nums[i]] = i
     return False
Пример #18
0
    def fallingSquares(self, positions):
        """
        :type positions: List[List[int]]
        :rtype: List[int]
        """
        res = []
        sd = SortedDict()
        curMax = 0
        for pos in positions:
            start, end, h = pos[0], pos[0] + pos[1], 0
            t = []
            keys = sd.keys()
            index = sd.bisect_left((start, start))
            if index > 0:
                index -= 1
                if keys[index][1] <= start:
                    index += 1
            delete = []
            while index < len(keys) and keys[index][0] < end:
                if (start > keys[index][0]):
                    t.append((keys[index][0], start, sd[keys[index]]))
                if (end < keys[index][1]):
                    t.append((end, keys[index][1], sd[keys[index]]))
                h = max(h, sd[keys[index]])
                delete.append(keys[index])
                index += 1

            for d in delete:
                sd.pop(d)
            sd[(start, end)] = h + pos[1]
            for a in t:
                sd[(a[0], a[1])] = a[2]
            curMax = max(curMax, h + pos[1])

            res.append(curMax)
        return res
Пример #19
0
    def oddEvenJumps(self, A: List[int]) -> int:
        n = len(A)
        m = SortedDict()
        dp = [[0] * 2 for _ in range(n)]
        dp[n - 1][0] = dp[n - 1][1] = 1
        m[A[n - 1]] = n - 1
        res = 1

        for i in range(n - 2, -1, -1):
            # return index of lower bound, eg, first item >= A[i]
            # bisect_left return item >= value
            o = m.bisect_left(A[i])
            if o != len(m):
                dp[i][0] = dp[m.items()[o][1]][1]
            # index of first item <= A[i]
            # bisect_right return item > val
            # so e - 1 represents item <= val
            e = m.bisect_right(A[i])
            if e != 0:
                dp[i][1] = dp[m.items()[e - 1][1]][0]
            if dp[i][0]:
                res += 1
            m[A[i]] = i
        return res
Пример #20
0
class TimeSeries(TictsMagicMixin, TictsOperationMixin, PandasMixin,
                 TictsIOMixin, TictsPlot):
    """ TimeSeries object.

    Args:
        default: The default value of timeseries.
        permissive (bool): Whether to allow accessing non-existing values or not.
            If is True, getting non existing item returns None.
            If is False, getting non existing item raises.
    """
    _default_interpolate = "previous"

    _meta_keys = ('default', 'name', 'permissive')

    @property
    def index(self):
        return self.data.keys()

    @property
    def lower_bound(self):
        """Return the lower bound time index."""
        if self.empty:
            return MINTS
        return self.index[0]

    @property
    def upper_bound(self):
        """Return the upper bound time index."""
        if self.empty:
            return MAXTS
        return self.index[-1]

    @property
    def _has_default(self):
        return self.default != NO_DEFAULT

    @property
    def _kwargs_special_keys(self):
        kwargs = {}
        for attr_name in self._meta_keys:
            kwargs[attr_name] = getattr(self, attr_name)
        return kwargs

    @property
    def empty(self):
        """Return whether the TimeSeries is empty or not."""
        return len(self) == 0

    def __init__(self,
                 data=None,
                 default=NO_DEFAULT,
                 name=DEFAULT_NAME,
                 permissive=True,
                 tz='UTC'):
        """"""
        if isinstance(data, self.__class__):
            for attr in ('data', *self._meta_keys):
                setattr(self, attr, getattr(data, attr))

            # Only set 'default' and 'name' if is different from default
            if default != NO_DEFAULT:
                setattr(self, 'default', default)
            if name != DEFAULT_NAME:
                setattr(self, 'name', name)
            return

        if hasattr(default, 'lower') and default.lower() == 'no_default':
            # 'no_default' as string is used at JSON serealization time
            self.default = NO_DEFAULT
        else:
            self.default = default

        self.name = name
        self.permissive = permissive

        # Overwrite the name if data is an instance of pd.DataFrame or pd.Series
        if isinstance(data, pd.DataFrame):
            if len(data.columns) != 1:
                msg = ("Can't convert a DataFrame with several columns into "
                       "one timeseries: {}.")
                raise ValueError(msg.format(data.columns))
            self.name = data.columns[0]

        elif isinstance(data, pd.Series):
            self.name = data.name

        try:
            tz = pytz.timezone(tz)
        except pytz.UnknownTimeZoneError:
            raise ValueError('{} is not a valid timezone'.format(tz))

        # SortedDict.__init__ does not use the __setitem__
        # Hence we got to parse datetime keys ourselves.
        # SortedDict use the first arg given and check if is a callable
        # in case you want to give your custom sorting function.
        self.data = SortedDict(None, _process_args(data, tz))

    def __setitem__(self, key, value):
        if isinstance(key, slice):
            return self.set_interval(key.start, key.stop, value)
        if key in self._meta_keys:
            super().__setitem__(key, value)
        else:
            key = timestamp_converter(key, self.tz)
            self.data[key] = value

    def __getitem__(self, key):
        """Get the value of the time series, even in-between measured values by interpolation.
        Args:
            key (datetime): datetime index
            interpolate (str): interpolate operator among ["previous", "linear"]
        """

        interpolate = self._default_interpolate

        if isinstance(key, tuple):
            if len(key) == 2:
                key, interpolate = key
            elif len(key) > 2:
                raise KeyError

        if isinstance(key, slice):
            return self.slice(key.start, key.stop)

        key = timestamp_converter(key, self.tz)

        basemsg = "Getting {} but default attribute is not set".format(key)
        if self.empty:
            if self._has_default:
                return self.default
            else:
                if self.permissive:
                    return
                else:
                    raise KeyError(
                        "{} and timeseries is empty".format(basemsg))

        if key < self.lower_bound:
            if self._has_default:
                return self.default
            else:
                if self.permissive:
                    return
                else:
                    msg = "{}, can't deduce value before the oldest measurement"
                    raise KeyError(msg.format(basemsg))

        # If the key is already defined:
        if key in self.index:
            return self.data[key]

        if interpolate.lower() == "previous":
            fn = self._get_previous
        elif interpolate.lower() == "linear":
            fn = self._get_linear_interpolate
        else:
            raise ValueError("'{}' interpolation unknown.".format(interpolate))

        return fn(key)

    def _get_previous(self, time):
        # In this case, bisect_left == bisect_right == bisect
        # And idx > 0 as we already handled other cases
        previous_idx = self.data.bisect(time) - 1
        time_idx = self.index[previous_idx]
        return self.data[time_idx]

    def _get_linear_interpolate(self, time):
        # TODO: put it into a 'get_previous_index' method
        idx = self.data.bisect_left(time)
        previous_time_idx = self.index[idx - 1]

        # TODO: check on left bound case

        # out of right bound case:
        if idx == len(self):
            return self.data[previous_time_idx]

        next_time_idx = self.index[idx]

        previous_value = self.data[previous_time_idx]
        next_value = self.data[next_time_idx]

        coeff = (time - previous_time_idx) / (
            next_time_idx - previous_time_idx)

        value = previous_value + coeff * (next_value - previous_value)
        return value

    def slice(self, start, end):  # noqa A003
        """Slice your timeseries for give interval.

        Args:
            start (datetime or str): lower bound
            end (datetime or str): upper bound

        Returns:
            TimeSeries sliced
        """
        start = timestamp_converter(start, self.tz)
        end = timestamp_converter(end, self.tz)

        newts = TimeSeries(**self._kwargs_special_keys)

        for key in self.data.irange(start, end, inclusive=(True, False)):
            newts[key] = self[key]

        should_add_left_closure = (start not in newts.index
                                   and start >= self.lower_bound)
        if should_add_left_closure:
            newts[start] = self[start]  # is applying get_previous on self

        return newts

    def set_interval(self, start, end, value):
        """Set a value for an interval of time.

        Args:
            start (datetime or str): lower bound
            end (datetime or str): upper bound
            value: the value to be set

        Returns:
            self

        Raises:
            NotImplementedError: when no default is set.
        """
        if not self._has_default:
            msg = "At the moment, you have to set a default for set_interval"
            raise NotImplementedError(msg)

        start = timestamp_converter(start, self.tz)
        end = timestamp_converter(end, self.tz)

        keys = self.data.irange(start, end, inclusive=(True, False))

        last_value = self[end]

        for key in list(keys):
            del self.data[key]

        self[start] = value
        self[end] = last_value

    def compact(self):
        """Convert this instance to a compact version: consecutive measurement of the
        same value are discarded.

        Returns:
            TimeSeries
        """
        ts = TimeSeries(**self._kwargs_special_keys)
        for time, value in self.items():
            should_set_it = ts.empty or (ts[time] != value)
            if should_set_it:
                ts[time] = value
        return ts

    def iterintervals(self, end=None):
        """Iterator that contain start, end of intervals.

        Args:
            end (datetime): right bound of last interval.
        """
        lst_keys = SortedList(self.index)
        if not end:
            end = self.upper_bound
        else:
            end = timestamp_converter(end, self.tz)
            if end not in lst_keys:
                lst_keys.add(end)

        for i, key in enumerate(lst_keys[:-1]):
            next_key = lst_keys[i + 1]
            if next_key > end:  # stop there
                raise StopIteration
            yield key, next_key

    def equals(self, other, check_default=True, check_name=True):
        if not isinstance(other, self.__class__):
            raise TypeError("Can't compare {} with {}".format(
                self.__class__.__name__, other.__class__.__name__))

        is_equal = self.data == other.data

        if check_default:
            is_equal = is_equal and self.default == other.default

        if check_name:
            is_equal = is_equal and self.name == other.name

        return is_equal

    @property
    def tz(self):
        if self.empty:
            return pytz.UTC
        return str(self.index[0].tz)

    def tz_convert(self, tz):
        try:
            tz = pytz.timezone(tz)
        except pytz.UnknownTimeZoneError:
            raise ValueError('{} is not a valid timezone'.format(tz))

        ts = deepcopy(self)

        for key in ts.index:
            ts[key.tz_convert(tz)] = ts.data.pop(key)

        return ts
Пример #21
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for FederationSender"""

    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {}  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict()  # Stream position -> list[user_id]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = SortedDict()

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict()  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,),
                       "", [], lambda: len(queue))

        for queue_name in [
            "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed",
            "edus", "device_messages", "pos_time", "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = set(
                user_id
                for uids in self.presence_changed.values()
                for user_id in uids
            )

            keys = self.presence_destinations.keys()
            i = self.presence_destinations.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_destinations[key]

            user_ids.update(
                user_id for user_id, _ in self.presence_destinations.values()
            )

            to_del = [
                user_id for user_id in self.presence_map if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys]
            for edu_key in to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

            # Delete things out of device map
            keys = self.device_messages.keys()
            i = self.device_messages.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.device_messages[key]

    def notify_new_events(self, current_id):
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def build_and_send_edu(self, destination, edu_type, content, key=None):
        """As per FederationSender"""
        if destination == self.server_name:
            logger.info("Not sending EDU to ourselves")
            return

        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_read_receipt(self, receipt):
        """As per FederationSender

        Args:
            receipt (synapse.types.ReadReceipt):
        """
        # nothing to do here: the replication listener will handle it.
        pass

    def send_presence(self, states):
        """As per FederationSender

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update({state.user_id: state for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_presence_to_destinations(self, states, destinations):
        """As per FederationSender

        Args:
            states (list[UserPresenceState])
            destinations (list[str])
        """
        for state in states:
            pos = self._next_pos()
            self.presence_map.update({state.user_id: state for state in states})
            self.presence_destinations[pos] = (state.user_id, destinations)

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per FederationSender"""
        pos = self._next_pos()
        self.device_messages[pos] = destination
        self.notifier.on_new_replication_data()

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, token):
        self._clear_queue_before_pos(token)

    def get_replication_rows(self, from_token, to_token, limit, federation_ack=None):
        """Get rows to be sent over federation between the two tokens

        Args:
            from_token (int)
            to_token(int)
            limit (int)
            federation_ack (int): Optional. The position where the worker is
                explicitly acknowledged it has handled. Allows us to drop
                data from before that point
        """
        # TODO: Handle limit.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []

        # There should be only one reader, so lets delete everything its
        # acknowledged its seen.
        if federation_ack:
            self._clear_queue_before_pos(federation_ack)

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(
                state=self.presence_map[user_id],
            )))

        # Fetch presence to send to destinations
        i = self.presence_destinations.bisect_right(from_token)
        j = self.presence_destinations.bisect_right(to_token) + 1

        for pos, (user_id, dests) in self.presence_destinations.items()[i:j]:
            rows.append((pos, PresenceDestinationsRow(
                state=self.presence_map[user_id],
                destinations=list(dests),
            )))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in iteritems(keyed_edus):
            rows.append((pos, KeyedEduRow(
                key=edu_key,
                edu=self.keyed_edu[(destination, edu_key)],
            )))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Fetch changed device messages
        i = self.device_messages.bisect_right(from_token)
        j = self.device_messages.bisect_right(to_token) + 1
        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}

        for (destination, pos) in iteritems(device_messages):
            rows.append((pos, DeviceRow(
                destination=destination,
            )))

        # Sort rows based on pos
        rows.sort()

        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
def test_bisect():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert temp.bisect_left('a') == 0
    assert temp.bisect_right('f') == 6
    assert temp.bisect('f') == 6
class FileTable(object):
    """docstring for FileTable"""
    def __init__(self, myip, server):
        super(FileTable, self).__init__()
        self.ring = SortedDict()
        self.hasher = hashlib.sha224
        self.myhash = self.hash(myip)
        self.add_node(myip)

        self.server = server

    def hash(self, key):
        return self.hasher(key).hexdigest()[:-10]

    def hash_at(self, idx):
        idx %= len(self.ring)
        hash = self.ring.iloc[idx]
        return hash

    def add_node(self, ip):
        hash = self.hash(ip)
        self.ring[hash] = {'ip': ip, 'files': []}

        SDFS_LOGGER.info('After adding %s - %s' % (ip, repr(self.ring)))

    def remove_node(self, failed_list):
        start_time = time.time()
        # this is for debug
        flag  = False
            
        # deep copy failed list because it will be reset soon
        ip_list = list(failed_list)

        # change the order of failed node
        # make sure the smaller id node be handled first
        if len(ip_list) == 2:
            if self.hash(ip_list[0]) == 0 and self.hash(ip_list[1]) == len(self.ring) - 1:
                ip_list[0], ip_list[1] = ip_list[1], ip_list[0]
            elif self.ring.index(self.hash(ip_list[0])) == self.ring.index(self.hash(ip_list[1])) + 1:
                ip_list[0], ip_list[1] = ip_list[1], ip_list[0]

        for ip in ip_list:
            hash = self.hash(ip)
            idx = self.ring.index(hash)

            # if the node is not the direct successor of the failed node, do nothing
            if len(ip_list) == 2 and ip == ip_list[1] and self.hash_at((idx + 2) % len(self.ring)) == self.myhash:
                continue

            if self.hash_at((idx + 1) % len(self.ring)) == self.myhash or (self.hash_at((idx + 2) % len(self.ring)) == self.myhash and len(ip_list) == 2):
                # this is for debug
                flag = True

                heritage = set(self.ring[hash]['files'])
                my_files = set(self.ring[self.myhash]['files'])
                next_files = set(self.ring[self.hash_at(idx + 2)]['files'])

                # determine the 
                to_me = heritage - my_files
                to_next = (heritage & my_files) - next_files
                to_next_next = heritage & my_files & next_files
                replica_list = [list(to_me), list(to_next), list(to_next_next)]
                
                self.ring[self.myhash]['files'].extend(to_me)

                # handle replica
                dest_ip_to_me = self.ring[self.hash_at(self.ring.index(hash) - 1)]['ip']
                dest_ip_to_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 1)]['ip']
                dest_ip_to_next_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 2)]['ip']
                dest_ip_list = [dest_ip_to_me, dest_ip_to_next, dest_ip_to_next_next]
                
                del self.ring[hash]

                self.server.handle_replica(replica_list, dest_ip_list, ip_list)
            
            else:
                del self.ring[hash]
            
            elapsed_time = time.time() - start_time
            if flag:
                print "It takes", elapsed_time, "to handle replica"

    def lookup(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        ip_list = [self.ring[self.hash_at(idx + i)]['ip'] for i in xrange(3)]
        return ip_list

    def insert(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        for i in xrange(3):
            node_hash = self.hash_at(idx + i)
            self.ring[node_hash]['files'].append(sdfs_filename)
            
            SDFS_LOGGER.info('Inserted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip']))

    def delete(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        for i in xrange(3):
            node_hash = self.hash_at(idx + i)
            self.ring[node_hash]['files'].remove(sdfs_filename)
            
            SDFS_LOGGER.info('Deleted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip']))

    def update_replica(self, replica_list, dest_ip_list):
        for i in xrange(3):
            self.ring[self.hash(dest_ip_list[i])]['files'] = list(set(self.ring[self.hash(dest_ip_list[i])]['files'] + replica_list[i]))


    def list_my_store(self):
        print '-' * 5 + 'my files are:'
        for f in self.ring[self.myhash]['files']:
            print f,
        print
        print '-' * 5 + 'that is all'

    def list_file_location(self):
        all_files = set()
        for value in self.ring.values():
            all_files.update(set(value['files']))

        for f in all_files:
            print f + ' is stored at ',
            for value in self.ring.values():
                if f in value['files']:
                    print value['ip'],
            print 
Пример #24
0
def test_bisect():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert temp.bisect_left('a') == 0
    assert temp.bisect_right('f') == 6
    assert temp.bisect('f') == 6
def test_bisect_key():
    temp = SortedDict(modulo, 7, ((val, val) for val in range(100)))
    assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
Пример #26
0
class AddressSpaceCollapseTransform(transforms.Transform):
    """
    Transform that shrinks selected segments of the address-space

    Given a list of address ranges in which we are not interested,
    the trasform applies a linear scale to the address-space regions
    marked as Range.T_KEEP, a different scale is applied to Range.T_OMIT
    regions so that these occupy 5% of the total size of the
    Range.T_KEEP regions.
    """
    def __init__(self, *args, **kwargs):
        super(AddressSpaceCollapseTransform, self).__init__(*args, **kwargs)
        self.target_ranges = RangeSet()
        """List of ranges to keep and omit"""

        self.omit_scale = 1
        """Scale factor of the omitted address ranges"""

        self.target_ranges.append(Range(0, np.inf, Range.T_KEEP))

        self._precomputed_offsets = None
        """SortedDict ... """

        self._inverse = False
        """Is this transform performing the direct or inverse operation"""

        self.has_inverse = False  # pyplot seems not to care
        self.is_separable = True
        self.input_dims = 2
        self.output_dims = 2
        self._precompute_offsets()

    def update_range(self, range_list):
        """
        Update parameters depending on the omit ranges.
        The range list must be complete, in the sense that it should
        mark every part of the address-range without holes as either
        omit or keep.
        """
        self.target_ranges = range_list

        keep = [r for r in self.target_ranges if r.rtype == Range.T_KEEP]
        omit = [r for r in self.target_ranges if r.rtype == Range.T_OMIT]
        # total size of the KEEP ranges
        keep_size = reduce(
            lambda acc, r: acc + r.size if r.size < np.inf else acc, keep, 0)
        omit_size = reduce(
            lambda acc, r: acc + r.size if r.size < np.inf else acc, omit, 0)
        if omit_size != 0:
            # we want the omitted ranges to take up 5% of the keep ranges
            # in size
            # scale = <percent_of_keep_size_to_take> * sum(keep) / sum(omit)
            self.omit_scale = 0.05 * keep_size / omit_size
        self._precompute_offsets()

    def _precompute_offsets(self):
        # reset previous offsets
        self._precomputed_offsets = SortedDict()
        x_collapsed = 0
        for r in self.target_ranges:
            r_scale = 1 if r.rtype == Range.T_KEEP else self.omit_scale
            self._precomputed_offsets[r.start] = (x_collapsed, r_scale)
            x_collapsed += r.size * r_scale

    def get_x(self, x_dataspace):
        """
        Scale the x from data-space coordinates to the collapsed
        address-space coordinates.
        The conversion uses a fast lookup of precomputed offsets
        based on the omit/keep range intervals.
        """
        if x_dataspace < 0:
            return x_dataspace
        base_idx = self._precomputed_offsets.bisect_left(x_dataspace)
        if (len(self._precomputed_offsets) == base_idx
                or self._precomputed_offsets.iloc[base_idx] > x_dataspace):
            key = self._precomputed_offsets.iloc[base_idx - 1]
        else:
            key = x_dataspace
        x_collapsed, x_scale = self._precomputed_offsets[key]
        return x_collapsed + (x_dataspace - key) * x_scale

    def get_x_inv(self, x):
        """
        Inverse of get_x

        Find the address range corresponding to the plot range
        given by scanning all the target ranges
        """
        x_inverse = 0
        x_current = 0
        for r in self.target_ranges:
            if r.rtype == Range.T_KEEP:
                if x > x_current + r.size:
                    x_current += r.size
                    x_inverse += r.size
                else:
                    x_inverse += x - x_current
                    break
            elif r.rtype == Range.T_OMIT:
                scaled_size = r.size * self.omit_scale
                if x > x_current + scaled_size:
                    x_current += scaled_size
                    x_inverse += r.size
                else:
                    x_inverse += (x - x_current) / self.omit_scale
                    break
            else:
                logger.error("The range %s must have a valid type", r)
                raise ValueError("Unexpected range in transform %s", r)
        return x_inverse

    def transform_x(self, x):
        """
        Handle the X axis transformation
        """
        if self._inverse:
            return self.get_x_inv(x)
        else:
            return self.get_x(x)

    def transform_non_affine(self, datain):
        """
        The transform modifies only the X-axis, Y-axis is identity

        datain is a numpy array of size Nx2
        return a numpy array of size Nx2
        """
        _prev = np.array(datain)
        dataout = np.array(datain)
        for point in dataout:
            point[0] = self.transform_x(point[0])
        return dataout

    def inverted(self):
        trans = AddressSpaceCollapseTransform()
        trans.target_ranges = self.target_ranges
        trans.omit_scale = self.omit_scale
        trans._inverse = not self._inverse
        return trans
Пример #27
0
class AddressSpaceCollapseTransform(transforms.Transform):
    """
    Transform that shrinks selected segments of the address-space

    Given a list of address ranges in which we are not interested,
    the trasform applies a linear scale to the address-space regions
    marked as Range.T_KEEP, a different scale is applied to Range.T_OMIT
    regions so that these occupy 5% of the total size of the
    Range.T_KEEP regions.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self._target_ranges = []
        """
        Unsorted list of target ranges, possibly with duplicates or
        overlapping ranges.
        """

        self._intervals = None
        """
        Numpy array that holds intervals [start,end,type].
        The type is 0 for omit ranges and 1 for keep ranges.
        """

        self._precomputed_offsets = None
        """
        SortedDict that caches the transformed X corresponding to the
        start of each interval
        """

        self.omit_scale = 1
        """Scale factor of the omitted address ranges"""

        self._inverse = False
        """Is this transform performing the direct or inverse operation"""

        self.has_inverse = False  # pyplot seems not to care
        self.is_separable = True
        self.input_dims = 2
        self.output_dims = 2

    def set_ranges(self, ranges):
        """
        The ranges here represent the parts of the address space we
        want to show.

        :param ranges: list of intervals in the form [(start, end), ...]
        :type ranges: list of 2-tuples
        """
        logger.debug("Set collapse ranges (%d)", len(ranges))
        self._target_ranges = ranges
        self._precomputed_offsets = None
        self._intervals = None

    def get_ranges(self):
        """See :meth:`set_ranges`."""
        return self._target_ranges

    def _merge(self, intervals):
        """
        Given a set of intervals [(start, end), ...] merge the overlapping
        intervals.
        This is O(n*log(n)) but if all goes well is only done once for every
        plot.
        """
        merged = SortedListWithKey(intervals, key=lambda k: (k[0], k[1]))
        out = []
        if len(merged) == 0:
            return out
        curr = merged[0]
        idx = 1
        while idx < len(merged):
            to_merge = merged[idx]
            if to_merge[0] > curr[1]:
                # we are done with to_merge
                out.append(curr)
                curr = to_merge
            else:
                curr = (curr[0], to_merge[1])
            end_idx = merged.bisect((curr[1], np.inf))
            idx = end_idx
            if end_idx == len(merged):
                end_idx -= 1
            if merged[end_idx][0] <= curr[1]:
                end = max(curr[1], merged[end_idx][1])
            else:
                end = max(curr[1], merged[end_idx - 1][1])
            curr = (curr[0], end)
        out.append(curr)
        logger.debug("Merge collapse ranges (remaining %d)", len(out))
        return out

    def _gen_omit_scale(self, intervals):
        """
        Generate the scale used to collapse omit ranges.
        The scale is computed so that the omitted ranges take up 5% of the
        total size of the keep ranges.
        """
        keep = intervals[intervals[:, 2] == 1]
        omit = intervals[intervals[:, 2] == 0]
        keep_size = np.sum(keep[:, 1] - keep[:, 0])
        # the last omit interval always goes to Inf
        omit_size = np.sum(omit[:, 1] - omit[:, 0])
        if omit_size != 0:
            # we want the omitted ranges to take up 5% of the keep ranges
            # in size
            # scale = <percent_of_keep_size_to_take> * sum(keep) / sum(omit)
            self.omit_scale = 0.05 * keep_size / omit_size
        logger.debug("Omit scale 5%%: total-keep:%d total-omit:%d scale:%s",
                     keep_size, omit_size, self.omit_scale)

    def _range_len(self, start, end, step):
        return (end - start - 1) // step + 1

    def _gen_intervals(self):
        """
        Generate the non-overlapping intervals to display in the
        axis.
        The intervals generated cover the whole axis without holes.
        """
        logger.debug("Generate collapse intervals")
        # merge ranges O(n*log(n)) and sort them
        merged_intervals = self._merge(self._target_ranges)
        if len(merged_intervals) == 0:
            self._intervals = np.zeros((0, 3))
            return
        # try not using fancy vectorization
        intervals = []
        prev_end = 0
        for r in merged_intervals:
            if prev_end < r[0]:
                # omit
                intervals.append((prev_end, r[0], 0))
            # keep
            intervals.append((r[0], r[1], 1))
            prev_end = r[1]
        # last always omitted to infinity
        intervals.append((prev_end, np.inf, 0))
        self._intervals = np.array(intervals)
        self._gen_omit_scale(self._intervals[:-1])

    def _precompute_offsets(self):
        """
        Precompute the transformed X base values for the start of each
        interval on the axis. The base addresses are used to look up
        the closest interval start when transforming.
        """
        self._gen_intervals()
        logger.debug("Precompute collapse range offsets")
        # reset previous offsets
        self._precomputed_offsets = SortedDict()
        x_collapsed = 0
        for r in self._intervals:
            r_scale = 1 if r[2] else self.omit_scale
            self._precomputed_offsets[r[0]] = (x_collapsed, r_scale)
            x_collapsed += (r[1] - r[0]) * r_scale

    def get_x(self, x_dataspace):
        """
        Get the transformed X coordinate.
        This is just a lookup in the precomputed offsets and some calculations,
        should be O(log(n)) in the number of intervals (which is expected to be
        at most in the order of 10**3~10**4)
        """
        if self._precomputed_offsets == None:
            self._precompute_offsets()

        if x_dataspace < 0 or len(self._precomputed_offsets) == 0:
            return x_dataspace
        base_idx = self._precomputed_offsets.bisect_left(x_dataspace)
        if (len(self._precomputed_offsets) == base_idx
                or self._precomputed_offsets.iloc[base_idx] > x_dataspace):
            key = self._precomputed_offsets.iloc[base_idx - 1]
        else:
            key = x_dataspace
        x_collapsed, x_scale = self._precomputed_offsets[key]
        return x_collapsed + (x_dataspace - key) * x_scale

    def get_x_inv(self, x):
        """
        Inverse of get_x

        Find the address range corresponding to the plot range
        given by scanning all the target ranges
        XXX: this may be made faster by using a reverse form of
        the precomputed offsets but there is no need for
        such an effort because the inverse transform is not
        invoked as much.
        """
        if self._precomputed_offsets == None:
            self._precompute_offsets()
        x_inverse = 0
        x_current = 0
        for r in self._intervals:
            r_size = r[1] - r[0]
            if r[2] == 1:
                # range is type KEEP
                if x > x_current + r_size:
                    x_current += r_size
                    x_inverse += r_size
                else:
                    x_inverse += x - x_current
                    break
            elif r[2] == 0:
                scaled_size = r_size * self.omit_scale
                if x > x_current + scaled_size:
                    x_current += scaled_size
                    x_inverse += r_size
                else:
                    x_inverse += (x - x_current) / self.omit_scale
                    break
            else:
                logger.error("The range %s must have a valid type", r)
                raise ValueError("Unexpected range in transform %s", r)
        return x_inverse

    def transform_x(self, x):
        """
        Handle the X axis transformation
        """
        if self._inverse:
            return self.get_x_inv(x)
        else:
            return self.get_x(x)

    def transform_non_affine(self, datain):
        """
        The transform modifies only the X-axis, Y-axis is identity

        datain is a numpy array of size Nx2
        return a numpy array of size Nx2
        """
        _prev = np.array(datain)
        dataout = np.array(datain)
        for point in dataout:
            point[0] = self.transform_x(point[0])
        return dataout

    def inverted(self):
        trans = AddressSpaceCollapseTransform()
        trans._target_ranges = self._target_ranges
        trans._intervals = self._intervals
        trans._precomputed_offsets = self._precomputed_offsets
        trans.omit_scale = self.omit_scale
        trans._inverse = not self._inverse
        return trans
Пример #28
0
class SnapshotGraph(object):
    def __init__(self, **attr):
        self.graph = {}
        self.graph.update(attr)
        self.snapshots = SortedDict()

    @property
    def name(self):
        """String identifier of the snapshot graph.

        This snapshot graph attribute appears in the attribute dict SnapshotGraph.graph
        keyed by the string `"name"`. as well as an attribute (technically
        a property) `SnapshotGraph.name`. This is entirely user controlled.
        """
        return self.graph.get('name', '')

    @name.setter
    def name(self, s):
        self.graph['name'] = s

    def __str__(self):
        """Return the snapshot graph name.

        Returns
        -------
        name : string
            The name of the snapshot graph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph(name='foo')
        >>> str(G)
        'foo'
        """
        return self.name

    def __len__(self):
        """Return the number of snapshots. Use: 'len(G)'.

        Returns
        -------
        num_snapshots : int
            The number of snapshots in the graph.

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> len(G)
        2

        """
        return len(self.snapshots)

    def __contains__(self, graph):
        """Return True if graph in the snapshot graph, False otherwise. Use: 'graph in G'.

        Parameters
        ----------
        graph: networkx graph object
            networkx graph to be looked for into snapshot graph.

        Returns
        -------
        None

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> nxG1 in G
        True
        """

        try:
            return graph in self.snapshots.values()
        except TypeError:
            return False

    def __iter__(self):
        """Iterates through snapshots in snapshot graph.


        Returns
        -------
        Iterable of snapshots

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG2 = nx.Graph()
        >>>
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> nxG2.add_edges_from([(1, 4), (1, 3)])
        >>>
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot(graph=nxG1)
        >>> G.add_snapshot(graph=nxG2)
        >>> for snapshot in G:
                print(True)
        True
        True
        """

        return iter(self.snapshots.values())

    def insert(self, graph, start=None, end=None, time=None):
        """Insert a graph into the snapshot graph, with specified intervals.

        Parameters
        ----------
        graph: networkx graph object
            A networkx graph to be inserted into snapshot graph.
        start: start of the interval, inclusive
        end: end of the interval, exclusive
        time: timestamp for impulses, cannot be used together with (start, end)

        Returns
        -------
        None

        Examples
        --------
        >>> nxG1 = nx.Graph()
        >>> nxG1.add_edges_from([(1, 2), (1, 3)])
        >>> G = dnx.SnapshotGraph()
        >>> G.insert(nxG1, start=0, end=3)

        """
        if time is not None and (start or end):
            raise ValueError('Time and (start or end) cannot both be specified.')
        elif time is not None:
            self.snapshots.update({(time, time): graph})
        elif start is None or end is None:
            raise ValueError('Either time or both start and end must be specified.')
        elif start > end:
            raise ValueError('Start of the interval must be lower or equal to end')
        else:
            self.snapshots.update({(start, end): graph})

    def add_snapshot(self, ebunch=None, graph=None, start=None, end=None, time=None):
        """Add a snapshot with a bunch of edge values.

        Parameters
        ----------
        ebunch : container of edges, optional (default= None)
            Each edge in the ebunch list will be included to all added graphs.
        graph : networkx graph object, optional (default= None)
            networkx graph to be inserted into snapshot graph.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        time: timestamp for impulses, cannot be used together with (start, end)

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 4), (1, 3)], start=0, end=3)
        """
        if not graph:
            g = Graph()
            g.add_edges_from(ebunch)
        else:
            g = graph

        if time is not None and (start or end):
            raise ValueError('Time and (start or end) cannot both be specified.')
        elif time is not None:
            self.insert(g, time=time)
        elif start is None and end is None:
            raise ValueError('Either time or both start and end must be specified.')
        else:
            self.insert(g, start=start, end=end)

    def subgraph(self, nbunch, sbunch=None, start=None, end=None):
        """Return a snapshot graph containing only the nodes in bunch, and snapshot indexes in sbunch.

        Parameters
        ----------
        nbunch : container of nodes
            Each node in the nbunch list will be included in all subgraphs indexed in sbunch.
        sbunch : container of edges, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of subgraphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            snap_graph : SnapshotGraph object
                Contains only the nodes in bunch, and snapshot indexes in sbunch.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=0, end=3)
        >>> G.add_snapshot([(1, 2), (2, 3), (4, 6), (2, 4)], start=3, end=10)
        >>> H = G.subgraph([4, 6])
        >>> type(H)
        <class 'snapshotgraph.SnapshotGraph'>
        >>> list(H.get([0])[0].edges(data=True))
        [(4, 6, {})]
        """

        subgraph = SnapshotGraph()
        subgraph.graph = self.graph

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for key, snapshot in self._get(sbunch=sbunch):
                subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1])
        else:
            for key, snapshot in self._get(start=start, end=end, include_interval=True):
                subgraph.add_snapshot(graph=snapshot.subgraph(nbunch), start=key[0], end=key[1])

        return subgraph

    def degree(self, sbunch=None, nbunch=None, start=None, end=None, weight=None):
        """Return a list of tuples containing the degrees of each node in each snapshot

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        nbunch : container of nodes, optional (default= None)
            Each node in the nbunch list will be included in the returned list of
            node degrees.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        weight : string, optional (default= None)
            The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1.
            The degree is the sum of the edge weights adjacent to the node.

        Returns
        -------
            degree_list : list
                List of DegreeView objects containing the degree of each node, indexed by requested snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3. end=10)
        >>> G.degree(sbunch=[1])
        [DegreeView({1: 2, 4: 1, 3: 1})]
        >>> G.degree(nbunch=[1, 2])
        [DegreeView({1: 2, 2: 1}), DegreeView({1: 2})]
        """
        # returns a list of degrees for each graph snapshot in snapshots
        # use generator to create list of degrees

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            if nbunch:
                return [graph.degree(nbunch, weight=weight) for graph in self._get(sbunch=sbunch)]
            else:
                return [graph.degree(graph, weight=weight) for graph in self._get(sbunch=sbunch)]
        else:
            if nbunch:
                return [graph.degree(nbunch, weight=weight) for graph in self._get(start=start, end=end)]
            else:
                return [graph.degree(graph, weight=weight) for graph in self._get(start=start, end=end)]

    def number_of_nodes(self, sbunch=None, start=None, end=None):
        """Gets number of nodes in each snapshot requested in 'sbunch'.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of number of nodes in the snapshot. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            num_nodes : list
                A list of of the number of nodes in each requested snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.number_of_nodes(sbunch=[1])
        [3]
        >>> G.number_of_nodes(sbunch=[0, 1])
        [3, 3]
        """
        # returns a list of the number of nodes in each graph in the range

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.number_of_nodes() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.number_of_nodes() for graph in self._get(start=start, end=end)]

    def order(self, sbunch=None, start=None, end=None):
        """Returns order of each graph requested in 'sbunch'.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node orders. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            snapshot_orders : list
                A list of the orders of each snapshot.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.order(sbunch=[1])
        [3]
        >>> G.order(sbunch=[0, 1])
        [3, 3]
        """
        # returns a list of the order of the graph in the range

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.order() for graph in self._get(sbunch=sbunch)]
        else:
            return [g.order() for g in self._get(start=start, end=end)]

    def has_node(self, n, sbunch=None, start=None, end=None):
        """Gets boolean list of if a snapshot in 'sbunch' contains node 'n'.

        Parameters
        ----------
        n : node
            Node to be checked for in requested snapshots.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of if the snapshot graph includes the node. It is highly recommended
            that this list is sequential, however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            List of boolean values if index in sbunch contains n.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.has_node(1, sbunch=[1])
        [True]
        >>> G.has_node(1)
        [True, True]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.has_node(n) for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.has_node(n) for graph in self._get(start=start, end=end)]

    def is_multigraph(self, sbunch=None, start=None, end=None):
        """Returns a list of boolean values for if the graph at the index is a multigraph.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of booleans. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            mutli_list : list
                List of boolean values if index in sbunch is a multigraph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.is_multigraph(sbunch=[0, 1])
        [False, False]
        >>> G.is_multigraph()
        [False, False]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.is_multigraph() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.is_multigraph() for graph in self._get(start=start, end=end)]

    def is_directed(self, sbunch=None, start=None, end=None):
        """Returns a list of boolean values for if the graph at the index is a directed graph.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of booleans. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            is_direct_list : list
                List of boolean values if index in sbunch is a directed graph.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.is_directed(sbunch=[0, 1])
        [False, False]
        >>> G.is_directed()
        [False, False]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.is_directed() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.is_directed() for graph in self._get(start=start, end=end)]

    def to_directed(self, sbunch=None, start=None, end=None):
        """Returns a list of networkx directed graph objects.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of directed graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            direct_list : list
                List of networkx directed graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.to_directed(sbunch=[0, 1])
        [<networkx.classes.digraph.DiGraph object at 0x7f1a6de49dd8>, <networkx.classes.digraph.DiGraph object at 0x7f1a6de49e10>]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.to_directed() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.to_directed() for graph in self._get(start=start, end=end)]

    def to_undirected(self, sbunch=None, start=None, end=None, ):
        """Returns a list of networkx graph objects.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of undirected graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
            undirect_list : list
                List of networkx graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.to_directed(sbunch=[0, 1])
        [<networkx.classes.graph.Graph object at 0x7ff532219e10>, <networkx.classes.graph.Graph object at 0x7ff532219e48>]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.to_undirected() for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.to_undirected() for graph in self._get(start=start, end=end)]

    def size(self, sbunch=None, start=None, end=None, weight=None):
        """Returns the size of each graph index as specified in sbunch as a list.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of sizes. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        weight : string, optional (default=None)
            The edge attribute that holds the numerical value used as a weight.
            If None, then each edge has weight 1.

        Returns
        -------
            size_list: list
                List of sizes of each graph indexed in sbunch.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G.size(sbunch=[0, 1])
        [2, 2]
        >>> G.size()
        [2, 2]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [graph.size(weight=weight) for graph in self._get(sbunch=sbunch)]
        else:
            return [graph.size(weight=weight) for graph in self._get(start=start, end=end)]

    def _get(self, sbunch=None, start=None, end=None, include_interval=False, split_overlaps=False):
        """Returns a list of graphs specified in sbunch. Hidden utility tool for other functions.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        include_interval: if True, return snapshots with its corresponding intervals
        split_overlaps: if True, when query by time interval, split snapshots if query interval overlaps with any
            snapshots' intervals. For ex: graph G contains snapshots with time intervals [(0,4),(4,6),(6,10)]. If query
            interval is [2,10], the snapshot with interval (0,4) will be split into two snapshots (0,2) and (2,4), both
            of which have the same copy of the original snapshot. This parameter is used for updating graphs by
            interval. For intance, with the example above, if you want to update interval (2,10), then the snapshot at
            (0,2) won't be updated.

        Returns
        -------
        If include_interval: List of tuples of (interval, networkx graph object).
        else: List of networkx graph objects.

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G._get(sbunch=[0])
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>]
        >>> G._get()
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        >>> G._get(start=2, end=6)
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        """

        if include_interval:
            graphs = self.snapshots.items()
        else:
            graphs = self.snapshots.values()

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:  # if retrieve by indexes
            for index in sbunch:
                yield graphs[index]
        else:  # if retrieve by interval
            if start is None:
                min_idx = 0
            else:
                min_idx = self.snapshots.bisect_left((start,))

                # Decrease 1 index if start is in the middle of an interval
                # Eg: if Keys = [(2,5)(5,6)], start=3 won't retrieve (2,5) as we want,
                # therefore, decrease 1 index to include (2,5). If start=5, then we won't need to change
                if min_idx > 0 and start < self.snapshots.keys()[min_idx][0]:
                    if split_overlaps:
                        # Eg: if Keys = [(2,5)(5,6)] and start=3, split (2,5) into (2,3) and (3,5)
                        key, g = self.snapshots.popitem(min_idx - 1)
                        self.insert(g, key[0], start)
                        self.insert(copy.deepcopy(g), start, key[1])
                    else:
                        min_idx -= 1

            if end is None:
                max_idx = len(self.snapshots)
            else:
                max_idx = self.snapshots.bisect_left((end,))
                # Split the snapshot if 'end' is in the middle of an interval
                # Eg: if Keys = [(2,5)(5,9)] and end=7, split (5,9) into (5,7) and (7,9)
                if split_overlaps and max_idx < len(self.snapshots) and end < self.snapshots.keys()[max_idx][1]:
                    key, g = self.snapshots.popitem(max_idx)
                    self.insert(g, key[0], end)
                    self.insert(copy.deepcopy(g), end, key[1])

            for graph in graphs[min_idx: max_idx]:
                yield graph

    def get(self, sbunch=None, start=None, end=None):
        """Returns a list of graphs specified in sbunch. Interface function for users.

        Parameters
        ----------
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of graphs. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        List of networkx graph objects.


        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)
        >>> G._get(sbunch=[0])
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>]
        >>> G._get()
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        >>> G._get(start=2, end=6)
        [<networkx.classes.graph.Graph object at 0x7f27f5bd39b0>, <networkx.classes.graph.Graph object at 0x7f27f5bd3d30>]
        """

        return [snapshot for snapshot in self._get(sbunch, start, end)]

    def add_nodes_from(self, nbunch, sbunch=None, start=None, end=None, **attrs):
        """Adds nodes to snapshots in sbunch.
        Note: This function may lead to increase in number of snapshots if changes occur within a snapshot.

        Parameters
        ----------
        nbunch : container of nodes
            Each node in the nbunch list will be added to all graphs indexed in sbunch.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)

        >>> G.add_nodes_from([5, 6, 7], [0])
        >>> G.add_nodes_from([8, 9, 10, 11], [1])
        >>> nx.adjacency_matrix(G.get()[0]).todense()
        [[0 1 1 0 0 0]
         [1 0 0 0 0 0]
         [1 0 0 0 0 0]
         [0 0 0 0 0 0]
         [0 0 0 0 0 0]
         [0 0 0 0 0 0]]
        >>> nx.adjacency_matrix(G.get()[1]).todense()
        [[0 1 1 0 0 0 0]
         [1 0 0 0 0 0 0]
         [1 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]
         [0 0 0 0 0 0 0]]

        """
        
        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for graph in self._get(sbunch=sbunch):
                graph.add_nodes_from(nbunch, **attrs)
        else:
            for graph in self._get(start=start, end=end, split_overlaps=True):
                graph.add_nodes_from(nbunch, **attrs)

    def add_edges_from(self, ebunch, sbunch=None, start=None, end=None, **attrs):
        """Adds edges to snapshots in sbunch.
        Note: This function may lead to increase in number of snapshots if changes occur within a snapshot.

        Parameters
        ----------
        ebunch : container of edges
            Each edge in the ebunch list will be added to all graphs indexed in sbunch.
        sbunch : container of snapshot indexes, optional (default= None)
            Each snapshot index in this list will be included in the returned list
            of node degrees. It is highly recommended that this list is sequential,
            however it can be out of order.
        start: start timestamp, inclusive
        end: end timestamp, exclusive

        Returns
        -------
        None

        Examples
        --------
        >>> G = dnx.SnapshotGraph()
        >>> G.add_snapshot([(1, 2), (1, 3)], start=0, end=3)
        >>> G.add_snapshot([(1, 4), (1, 3)], start=3, end=10)

        >>> G.add_edges_from([(5, 6), (7, 6)], [0])
        >>> G.add_edges_from([(8, 9), (10, 11)], [0, 1])
        >>> nx.adjacency_matrix(G.get()[0]).todense()
        [[0 1 1 0 0 0 0 0 0 0]
         [1 0 0 0 0 0 0 0 0 0]
         [1 0 0 0 0 0 0 0 0 0]
         [0 0 0 0 1 0 0 0 0 0]
         [0 0 0 1 0 1 0 0 0 0]
         [0 0 0 0 1 0 0 0 0 0]
         [0 0 0 0 0 0 0 1 0 0]
         [0 0 0 0 0 0 1 0 0 0]
         [0 0 0 0 0 0 0 0 0 1]
         [0 0 0 0 0 0 0 0 1 0]]
        >>> nx.adjacency_matrix(G.get()[1]).todense()
        [[0 1 1 0 0 0 0]
         [1 0 0 0 0 0 0]
         [1 0 0 0 0 0 0]
         [0 0 0 0 1 0 0]
         [0 0 0 1 0 0 0]
         [0 0 0 0 0 0 1]
         [0 0 0 0 0 1 0]]

        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            for graph in self._get(sbunch=sbunch):
                graph.add_edges_from(ebunch, **attrs)
        else:
            for graph in self._get(start=start, end=end, split_overlaps=True):
                graph.add_edges_from(ebunch, **attrs)

    @staticmethod
    def load_from_txt(path, delimiter=";", comments="#", start='start', end='end'):
        """Read snapshot graph in from path.
           Every line in the file must be an adjacency matrix, with rows separated by delimiter.

        Parameters
        ----------
        path : string or file
           Filename to read.

        comments : string, optional
           Marker for comment lines

        start: string, optional
            Marker for start timestamps

        end: string, optional
            Marker for end timestamps

        delimiter : string, optional
           Separator for rows in matrix.  The default is ;. Cannot be whitespace or \n.

        Returns
        -------
        G: SnapshotGraph
            The graph corresponding to the list of adjacency matrices.

        Examples
        --------
        >>> G=dnx.Snapshotgraph.load_from_txt("my_dygraph.txt")
        """

        if delimiter == ' ' or delimiter == '\n':
            raise ValueError("Delimiter cannot be " + delimiter + ".")

        sg = SnapshotGraph()

        with open(path, 'r') as file:
            for line in file:
                p = line.find(comments)
                if p >= 0:
                    line = line[:p]
                if not len(line):
                    continue

                p = min(line.find(start), line.find(end))
                interval = [None, None]

                for item in line[p:].split():
                    key, value = item.split('=')

                    try:
                        value = float(value)
                    except:
                        raise ValueError('Value of "{}" must be float.'.format(key))

                    if key == start:
                        interval[0] = value
                    else:
                        interval[1] = value

                if interval[0] is None or interval[1] is None:
                    raise ValueError('A snapshot does not include its interval')

                line = line[:p].strip()
                matrix = []
                for row in line.split(delimiter):
                    matrix.append(row.split(' '))

                g = from_numpy_array(np.array(matrix))
                sg.insert(g, start=interval[0], end=interval[1])

        return sg

    def save_to_txt(self, path, delimiter=";", start='start', end='end'):
        """Write snapshot graph to path.
           Every line in the file will be an adjacency matrix.

        Parameters
        ----------
        path : string or file
           Filename to write.

        start: string, optional
            Marker for start timestamps

        end: string, optional
            Marker for end timestamps

        delimiter : string, optional
           Separator for rows in matrix.  The default is ;. Cannot be whitespace or \n.

        Examples
        --------
        >>> G.save_to_txt("my_dygraph.txt")
        """

        if len(self) == 0:
            raise ValueError("Given graph is empty.")

        if delimiter == ' ' or delimiter == '\n':
            raise ValueError("Delimiter cannot be " + delimiter + ".")

        with open(path, 'w') as file:
            for interval, graph in self._get(include_interval=True):
                m = adjacency_matrix(graph).todense()
                line = delimiter.join(' '.join(x for x in y) for y in np.asarray(m, dtype=str)) + ' ' + start + '=' +\
                    str(interval[0]) + ' ' + end + '=' + str(interval[1]) + '\n'

                file.write(line)

    def compute_network_statistic(self, nx_statistic_function, sbunch=None, start=None, end=None, **kwargs):
        """Compute networkx statistics on each snapshot.

        Parameters
        ----------
        nx_statistic_function : function from networkx.algorithms
           Statistic function to calculate.
        sbunch: snapshots indices to compute statistic
        start: start timestamp, inclusive
        end: end timestamp, exclusive
        kwargs : optional
           inputs for nx_statistic_function

        Examples
        --------
        >>> G.compute_network_statistic(nx.algorithms.centrality.degree_centrality)
        """

        if sbunch and (start or end):
            raise ValueError('Either sbunch or (start and end) can be specified.')
        elif sbunch:
            return [nx_statistic_function(graph, **kwargs) for graph in self._get(sbunch=sbunch)]
        else:
            return [nx_statistic_function(graph, **kwargs) for graph in self._get(start=start, end=end)]
Пример #29
0
class LevelTrace(object):
    """ Traces the level of some entity across a time span """

    def __init__(self, trace=None):
        """ Creates a new level trace, possibly copying from an existing object. """

        if trace is None:
            self._trace = SortedDict()
        elif isinstance(trace, LevelTrace):
            self._trace = SortedDict(trace._trace)
        else:
            self._trace = SortedDict(trace)

        # Make sure trace is terminated (returns to 0)
        if len(self._trace) > 0 and self._trace[self._trace.keys()[-1]] != 0:
            raise ValueError(
                "Trace not terminated - ends with {}:{}!".format(
                    self._trace.keys()[-1], self._trace[self._trace.keys()[-1]])
                )

    def __repr__(self):
        items = ', '.join(["{!r}: {!r}".format(k, v)
                           for k, v in self._trace.items()])
        return "LevelTrace({{{}}})".format(items)

    def __eq__(self, other):
        return self._trace == other._trace

    def __neg__(self):
        return self.map(operator.neg)
    def __sub__(self, other):
        return self.zip_with(other, operator.sub)
    def __add__(self, other):
        return self.zip_with(other, operator.add)

    def start(self):
        """ Returns first non-null point in trace """
        if len(self._trace) == 0:
            return 0
        return self._trace.keys()[0]

    def end(self):
        """ Returns first point in trace that is null and only followed by nulls """
        if len(self._trace) == 0:
            return 0
        return self._trace.keys()[-1]

    def length(self):
        if len(self._trace) == 0:
            return 0
        return self.end() - self.start()

    def get(self, time):
        ix = self._trace.bisect_right(time) - 1
        if ix < 0:
            return 0
        else:
            (_, lvl) = self._trace.peekitem(ix)
            return lvl

    def map(self, fn):
        return LevelTrace({t: fn(v) for t, v in self._trace.items()})
    def map_key(self, fn):
        return LevelTrace(dict(fn(t, v) for t, v in self._trace.items()))

    def shift(self, time):
        return self.map_key(lambda t, v: (t + time, v))

    def __getitem__(self, where):

        # For non-slices defaults to get
        if not isinstance(where, slice):
            return self.get(where)
        if where.step is not None:
            raise ValueError("Stepping meaningless for LevelTrace!")

        # Limit
        res = LevelTrace(self)
        if where.start is not None and where.start > res.start():
            res.set(res.start(), where.start, 0)
        if where.stop is not None and where.stop < res.end():
            res.set(where.stop, res.end(), 0)

        # Shift, if necessary
        if where.start is not None:
            res = res.shift(-where.start)
        return res

    def set(self, start, end, level):
        """ Sets the level for some time range
        :param start: Start of range
        :param end: End of range
        :aram amount: Level to set
        """

        # Check errors, no-ops
        if start >= end:
            return

        # Determine levels at start (and before start)
        start_ix = self._trace.bisect_right(start) - 1
        prev_lvl = lvl = 0
        if start_ix >= 0:
            (t, lvl) = self._trace.peekitem(start_ix)
            # If we have no entry exactly at our start point, the
            # level was constant at this point before
            if start > t:
                prev_lvl = lvl
            # Otherwise look up previous level. Default 0 (see above)
            elif start_ix > 0:
                (_, prev_lvl) = self._trace.peekitem(start_ix-1)

        # Prepare start
        if prev_lvl == level:
            if start in self._trace:
                del self._trace[start]
        else:
            self._trace[start] = level

        # Remove all in-between states
        for time in list(self._trace.irange(start, end, inclusive=(False, False))):
            lvl = self._trace[time]
            del self._trace[time]

        # Add or remove end, if necessary
        if end not in self._trace:
            if lvl != level:
                self._trace[end] = lvl
        elif level == self._trace[end]:
            del self._trace[end]


    def add(self, start, end, amount):
        """ Increases the level for some time range
        :param start: Start of range
        :param end: End of range
        :aram amount: Amount to add to level
        """

        # Check errors, no-ops
        if start > end:
            raise ValueError("End needs to be after start!")
        if start == end or amount == 0:
            return

        # Determine levels at start (and before start)
        start_ix = self._trace.bisect_right(start) - 1
        prev_lvl = lvl = 0
        if start_ix >= 0:
            (t, lvl) = self._trace.peekitem(start_ix)
            # If we have no entry exactly at our start point, the
            # level was constant at this point before
            if start > t:
                prev_lvl = lvl
            # Otherwise look up previous level. Default 0 (see above)
            elif start_ix > 0:
                (_, prev_lvl) = self._trace.peekitem(start_ix-1)

        # Prepare start
        if prev_lvl == lvl + amount:
            del self._trace[start]
        else:
            self._trace[start] = lvl + amount

        # Update all in-between states
        for time in self._trace.irange(start, end, inclusive=(False, False)):
            lvl = self._trace[time]
            self._trace[time] = lvl + amount

        # Add or remove end, if necessary
        if end not in self._trace:
            self._trace[end] = lvl
        elif lvl + amount == self._trace[end]:
            del self._trace[end]

    def __delitem__(self, where):

        # Cannot set single values
        if not isinstance(where, slice):
            raise ValueError("Cannot set level for single point, pass an interval!")
        if where.step is not None:
            raise ValueError("Stepping meaningless for LevelTrace!")

        # Set range to zero
        start = (where.start if where.start is not None else self.start())
        end = (where.stop if where.stop is not None else self.end())
        self.set(start, end, 0)

    def __setitem__(self, where, value):

        # Cannot set single values
        if not isinstance(where, slice):
            raise ValueError("Cannot set level for single point, pass an interval!")
        if where.step is not None:
            raise ValueError("Stepping meaningless for LevelTrace!")

        # Setting a level trace?
        if isinstance(value, LevelTrace):

            # Remove existing data
            del self[where]
            if where.start is not None:
                if value.start() < 0:
                    raise ValueError("Level trace starts before 0!")
                value = value.shift(where.start)
            if where.stop is not None:
                if value.end() > where.stop:
                    raise ValueError("Level trace to set is larger than slice!")
            self._trace = (self + value)._trace

        else:

            # Otherwise set constant value
            start = (where.start if where.start is not None else self.start())
            end = (where.stop if where.stop is not None else self.end())
            self.set(start, end, value)

    def foldl1(self, start, end, fn):
        """
        Does a left-fold over the levels present in the given range. Seeds
        with level at start.
        """

        if start > end:
            raise ValueError("End needs to be after start!")
        val = self.get(start)
        start_ix = self._trace.bisect_right(start)
        end_ix = self._trace.bisect_left(end)
        for lvl in self._trace.values()[start_ix:end_ix]:
            val = fn(val, lvl)
        return val

    def minimum(self, start, end):
        """ Returns the lowest level in the given range """
        return self.foldl1(start, end, min)
    def maximum(self, start, end):
        """ Returns the highest level in the given range """
        return self.foldl1(start, end, max)

    def foldl_time(self, start, end, val, fn):
        """
        Does a left-fold over the levels present in the given range,
        also passing how long the level was held. Seed passed.
        """

        if start > end:
            raise ValueError("End needs to be after start!")

        last_time = start
        last_lvl = self.get(start)

        start_ix = self._trace.bisect_right(start)
        end_ix = self._trace.bisect_left(end)
        for time, lvl in self._trace.items()[start_ix:end_ix]:
            val = fn(val, time-last_time, last_lvl)
            last_time = time
            last_lvl = lvl

        return fn(val, end-last_time, last_lvl)

    def integrate(self, start, end):
        """ Returns the integral over a range (sum below level curve) """
        return self.foldl_time(start, end, 0,
                               lambda v, time, lvl: v + time * lvl)
    def average(self, start, end):
        """ Returns the average level over a given range """
        return self.integrate(start, end) / (end - start)

    def find_above(self, time, level):
        """Returns the first time larger or equal to the given start time
        where the level is at least the specified value.
        """

        if self.get(time) >= level:
            return time
        ix = self._trace.bisect_right(time)
        for t, lvl in self._trace.items()[ix:]:
            if lvl >= level:
                return t
        return None

    def find_below(self, time, level):
        """Returns the first time larger or equal to the given start time
        where the level is less or equal the specified value.
        """

        if self.get(time) <= level:
            return time
        ix = self._trace.bisect_right(time)
        for t, lvl in self._trace.items()[ix:]:
            if lvl <= level:
                return t
        return None

    def find_below_backward(self, time, level):
        """Returns the last time smaller or equal to the given time where
        there exists a region to the left where the level is below the
        given value.
        """

        last = time
        ix = self._trace.bisect_right(time)-1
        if ix >= 0:
            for t, lvl in self._trace.items()[ix::-1]:
                if lvl <= level and time > t:
                    return last
                last = t
        if level >= 0:
            return last
        return None

    def find_above_backward(self, time, level):
        """Returns the last time smaller or equal to the given time where
        there exists a region to the left where the level is below the
        given value.
        """

        last = time
        ix = self._trace.bisect_right(time)-1
        if ix >= 0:
            for t, lvl in self._trace.items()[ix::-1]:
                if lvl >= level and time > t:
                    return last
                last = t
        if level <= 0:
            return last
        return None

    def find_period_below(self, start, end, target, length):
        """Returns a period where the level is below the target for a certain
        length of time, within a given start and end time"""

        if start > end:
            raise ValueError("End needs to be after start!")
        if length < 0:
            raise ValueError("Period length must be larger than zero!")

        period_start = (start if self.get(start) <= target else None)

        start_ix = self._trace.bisect_right(start)
        end_ix = self._trace.bisect_left(end)
        for time, lvl in self._trace.items()[start_ix:end_ix]:
            # Period long enough?
            if period_start is not None:
                if time >= period_start + length:
                    return period_start
            # Not enough space until end?
            elif time + length > end:
               return None
            # Above target? Reset period
            if lvl > target:
                period_start = None
            else:
                if period_start is None:
                    period_start = time

        # Possible at end?
        if period_start is not None and period_start+length <= end:
            return period_start

        # Nothing found
        return None

    def zip_with(self, other, fn):

        # Simple cases
        if len(self._trace) == 0:
            return other.map(lambda x: fn(0, x))
        if len(other._trace) == 0:
            return self.map(lambda x: fn(x, 0))

        # Read first item from both sides
        left = self._trace.items()
        right = other._trace.items()
        left_ix = 0
        right_ix = 0
        left_val = 0
        right_val = 0
        last_val = 0

        trace = SortedDict()

        # Go through pairs
        while left_ix < len(left) and right_ix < len(right):

            # Next items
            lt,lv = left[left_ix]
            rt,rv = right[right_ix]

            # Determine what to do
            if lt < rt:
                v = fn(lv, right_val)
                if v != last_val:
                    last_val = trace[lt] = v
                left_val = lv
                left_ix += 1
            elif lt > rt:
                v = fn(left_val, rv)
                if v != last_val:
                    last_val = trace[rt] = v
                right_val = rv
                right_ix += 1
            else:
                v = fn(lv, rv)
                if v != last_val:
                    last_val = trace[lt] = v
                left_val = lv
                left_ix += 1
                right_val = rv
                right_ix += 1

        # Handle left-overs
        while left_ix < len(left):
            lt,lv = left[left_ix]
            v = fn(lv, right_val)
            if v != last_val:
                last_val = trace[lt] = v
            left_ix += 1
        while right_ix < len(right):
            rt,rv = right[right_ix]
            v = fn(left_val, rv)
            if v != last_val:
                last_val = trace[rt] = v
            right_ix += 1

        return LevelTrace(trace)
Пример #30
0
      posStrand = [False]
    # CAN THEORETICALLY EXTRACT FROM BOTH STRANDS
    else: posStrand = [True,False]
    
    for strand in posStrand:
      kmer = ""
      if strand: kmer = context[posInWindow:posInWindow+forwardLength]
      else: kmer = context[posInWindow-forwardLength+1:posInWindow+1]
      
      for i in xrange(targetCov//len(posStrand)):
        # Pick a threshold
        rval = random.random()
        if rval <= forward[strand][kmer]*correctForward:
          # Pick a length
          lval = random.random()
          key = lengthDist.bisect_left(lval)
          selLength = lengthDist[lengthDist.iloc[key]]
        
          # Check the kmer at the other end
          rkmer = ""
          if strand: rkmer = context[posInWindow+selLength-reverseLength:posInWindow+selLength]
          else: rkmer = context[posInWindow-selLength+1:posInWindow-selLength+reverseLength+1]

          rval = random.random()
          if rval <= reverse[not strand][rkmer]*correctReverse:
            # Extract sequence and write BAM record
            selSeq = ""
            if strand: selSeq = context[posInWindow:posInWindow+selLength]
            else: selSeq = context[posInWindow-selLength+1:posInWindow+1]
            writeBAMentry(chrom,pos,selSeq,selLength,strand)
            #print strand,kmer,rkmer,selSeq,len(selSeq),selLength
Пример #31
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for TransactionQueue"""
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {
        }  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict()  # Stream position -> user_id

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict(
        )  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ), "",
                [], lambda: len(queue))

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "device_messages",
                "pos_time",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = set(user_id
                           for uids in itervalues(self.presence_changed)
                           for user_id in uids)

            to_del = [
                user_id for user_id in self.presence_map
                if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            to_del = [
                edu_key for edu_key in self.keyed_edu
                if edu_key not in live_keys
            ]
            for edu_key in to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

            # Delete things out of device map
            keys = self.device_messages.keys()
            i = self.device_messages.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.device_messages[key]

    def notify_new_events(self, current_id):
        """As per TransactionQueue"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def send_edu(self, destination, edu_type, content, key=None):
        """As per TransactionQueue"""
        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_presence(self, states):
        """As per TransactionQueue

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(
            filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update(
            {state.user_id: state
             for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per TransactionQueue"""
        pos = self._next_pos()
        self.device_messages[pos] = destination
        self.notifier.on_new_replication_data()

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, token):
        self._clear_queue_before_pos(token)

    def get_replication_rows(self,
                             from_token,
                             to_token,
                             limit,
                             federation_ack=None):
        """Get rows to be sent over federation between the two tokens

        Args:
            from_token (int)
            to_token(int)
            limit (int)
            federation_ack (int): Optional. The position where the worker is
                explicitly acknowledged it has handled. Allows us to drop
                data from before that point
        """
        # TODO: Handle limit.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []

        # There should be only one reader, so lets delete everything its
        # acknowledged its seen.
        if federation_ack:
            self._clear_queue_before_pos(federation_ack)

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(state=self.presence_map[user_id], )))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in iteritems(keyed_edus):
            rows.append((pos,
                         KeyedEduRow(
                             key=edu_key,
                             edu=self.keyed_edu[(destination, edu_key)],
                         )))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Fetch changed device messages
        i = self.device_messages.bisect_right(from_token)
        j = self.device_messages.bisect_right(to_token) + 1
        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}

        for (destination, pos) in iteritems(device_messages):
            rows.append((pos, DeviceRow(destination=destination, )))

        # Sort rows based on pos
        rows.sort()

        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
Пример #32
0
class StepVector():
    @classmethod
    def sliced(cls, other, start, end):
        newobj = cls(other.datatype, _tree=other._t, _bounds=(start, end))
        return newobj

    def __init__(self, datatype, _tree=None, _bounds=None):
        self.datatype = datatype

        if _tree is not None:
            self._t = _tree
        else:
            self._t = SortedDict()

        if _bounds is not None:
            self._bounds = _bounds
        else:
            self._bounds = (None, None)  # set upon slicing/subsetting

    def __getitem__(self, key):
        if type(key) == slice:
            if (key.step is not None) and (key.step != 1):
                raise ValueError("Invalid step value")

            start = key.start
            end = key.stop

            if self._bounds[0] is not None:
                if start is None:
                    start = self._bounds[0]
                else:
                    if start < self._bounds[0]:
                        raise ValueError("Start out of bounds")
            if self._bounds[1] is not None:
                if end is None:
                    end = self._bounds[1]
                else:
                    if end > self._bounds[1]:
                        raise ValueError("End out of bounds")

            return self.sliced(self, start, end)
        else:
            assert type(key) == int

            if self._bounds[0] is not None:
                if key < self._bounds[0]:
                    raise ValueError("Key out of bounds")
            if self._bounds[1] is not None:
                if key >= self._bounds[0]:
                    raise ValueError("Key out of bounds")

            if self._t:
                try:
                    prevkey = self._floor_key(key)
                    return self._t[prevkey]
                except KeyError:
                    # no item smaller than or equal to key
                    return self.datatype()
            else:
                # empty tree
                return self.datatype()

    def __setitem__(self, key, value):
        if type(key) == slice:
            start = key.start
            end = key.stop
        else:
            assert type(key) == int
            start = key
            end = key + 1

        assert start is not None
        assert end is not None

        assert type(value) == self.datatype
        assert end >= start

        if start == end:
            return

        # check next val
        if self._t:
            try:
                nkey = self._floor_key(end, bisect="right")
                nvalue = self._t[nkey]
            except KeyError:
                nkey = None
                nvalue = None
        else:
            # empty tree
            nkey = None
            nvalue = None

        # check prev val
        if self._t:
            try:
                pkey = self._floor_key(start)
                pvalue = self._t[pkey]
            except KeyError:
                pkey = None
                pvalue = None
        else:
            pkey = None
            pvalue = None

        # remove intermediate steps if any
        if self._t:
            a = self._t.bisect_left(start)
            b = self._t.bisect(end)
            assert a <= b
            del self._t.iloc[a:b]

        # set an end marker if necessary
        if nkey is None:
            self._t[end] = self.datatype()
        elif nvalue != value:
            self._t[end] = nvalue

        # set a start marker if necessary
        if pkey is None or pvalue != value:
            self._t[start] = value

    def __iter__(self):
        start, end = self._bounds

        if not self._t:
            # empty tree
            if start is None or end is None:
                raise StopIteration  # FIXME: can't figure out a better thing to do if only one is None
            else:
                if start < end:
                    yield (start, end, self.datatype())
                raise StopIteration

        if start is None:
            a = 0
        else:
            a = max(0, self._bisect_right(start) - 1)

        if end is None:
            b = len(self._t)
        else:
            b = self._bisect_right(end)

        assert b >= a
        if a == b:
            if a is None:
                start = self._t[a]
            if b is None:
                end = self._t[b]

            if start < end:
                yield (start, end, self.datatype())

            raise StopIteration

        it = self._t.islice(a, b)

        currkey = next(it)
        currvalue = self._t[currkey]
        if start is not None:
            currkey = max(start, currkey)
            if start < currkey:
                yield (start, currkey, self.datatype())

        prevkey, prevvalue = currkey, currvalue
        for currkey in it:
            currvalue = self._t[currkey]
            yield (prevkey, currkey, prevvalue)
            prevkey = currkey
            prevvalue = currvalue

        if end is not None:
            if currkey < end:
                yield (currkey, end, prevvalue)

    def add_value(self, start, end, value):
        assert type(value) == self.datatype

        # can't modify self while iterating over values; will change the tree, and thus f**k up iteration
        items = list(self[start:end])

        for a, b, x in items:
            if self.datatype == set:
                y = x.copy()
                y.update(value)
            else:
                y = x + value

            self[a:b] = y

    def _bisect_left(self, key):
        return self._t.bisect_left(key)

    def _bisect_right(self, key):
        return self._t.bisect_right(key)

    def _floor_key(self, key, bisect="left"):
        """
        Returns the greatest key less than or equal to key
        """

        if bisect == "right":
            p = self._bisect_right(key)
        else:
            p = self._bisect_left(key)

        if p == 0:
            raise KeyError
        else:
            return self._t.iloc[p - 1]
Пример #33
0
class BaseColorCodePatchBuilder(ASAxesPatchBuilder, PickablePatchBuilder):
    """
    The patch generator build the matplotlib patches for each
    capability node.

    The nodes are rendered as lines with a different color depending
    on the permission bits of the capability. The builder produces
    a LineCollection for each combination of permission bits and
    creates the lines for the nodes.
    """
    def __init__(self, figure, pgm):
        """
        Constructor

        :param figure: the figure to attache the click callback
        :param pgm: the provenance graph model
        """
        super().__init__(figure=figure)

        self._pgm = pgm
        """The provenance graph model"""

        self._collection_map = defaultdict(lambda: [])
        """
        Map capability permission to the set where the line should go.
        Any combination of capability permissions is used as key for
        a list of (start, end) values that are used to build LineCollections.
        The key "call" is used for system call nodes, the int(0) key is used
        for no permission.
        """

        self._colors = {}
        """
        Map capability permission to line colors.
        XXX: keep this for now, move to a colormap
        """

        self._bbox = [np.inf, np.inf, 0, 0]
        """Bounding box of the patches as (xmin, ymin, xmax, ymax)."""

        self._node_map = SortedDict()
        """Maps the Y axis coordinate to the graph node at that position"""

    def _clickable_element(self, vertex, y):
        """remember the node at the given Y for faster indexing."""
        data = self._pgm.data[vertex]
        self._node_map[y] = data

    def _add_bbox(self, xmin, xmax, y):
        """Update the view bbox."""
        if self._bbox[0] > xmin:
            self._bbox[0] = xmin
        if self._bbox[1] > y:
            self._bbox[1] = y
        if self._bbox[2] < xmax:
            self._bbox[2] = xmax
        if self._bbox[3] < y:
            self._bbox[3] = y

    def _get_patch_collections(self, axes):
        """Return a generator of collections of patches to add to the axes."""
        pass

    def get_patches(self, axes):
        """
        Return a collection of lines from the collection_map.
        """
        super().get_patches(axes)
        for coll in self._get_patch_collections(axes):
            axes.add_collection(coll)

    def get_bbox(self):
        return Bbox.from_extents(*self._bbox)

    def on_click(self, event):
        """
        Attempt to retreive the data in less than O(n) for better
        interactivity at the expense of having to hold a dictionary of
        references to nodes for each t_alloc.
        Note that t_alloc is unique for each capability node as it
        is the cycle count, so it can be used as the key.
        """
        ax = event.inaxes
        if ax is None:
            return

        # back to data coords without scaling
        y_coord = int(event.ydata)
        y_max = self._bbox[3]
        # tolerance for y distance, 0.1 * 10^6 cycles
        epsilon = 0.1 * 10**6

        # try to get the node closer to the y_coord
        # in the fast way
        # For now fall-back to a reduced linear search but would be
        # useful to be able to index lines with an R-tree?
        idx_min = self._node_map.bisect_left(max(0, y_coord - epsilon))
        idx_max = self._node_map.bisect_right(min(y_max, y_coord + epsilon))
        iter_keys = self._node_map.islice(idx_min, idx_max)
        # find the closest node to the click position
        pick_target = None
        for key in iter_keys:
            node = self._node_map[key]
            if (node.cap.base <= event.xdata
                    and node.cap.bound >= event.xdata):
                # the click event is within the node bounds and
                # the node Y is closer to the click event than
                # the previous pick_target
                if (pick_target is None or abs(y_coord - key) <
                        abs(y_coord - pick_target.cap.t_alloc)):
                    pick_target = node
        if pick_target is not None:
            ax.set_status_message(pick_target)
        else:
            ax.set_status_message("")
Пример #34
0
class FederationRemoteSendQueue(AbstractFederationSender):
    """A drop in replacement for FederationSender"""
    def __init__(self, hs: "HomeServer"):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        # We may have multiple federation sender instances, so we need to track
        # their positions separately.
        self._sender_instances = hs.config.worker.federation_shard_config.instances
        self._sender_positions = {}  # type: Dict[str, int]

        # Pending presence map user_id -> UserPresenceState
        self.presence_map = {}  # type: Dict[str, UserPresenceState]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = (
            SortedDict())  # type: SortedDict[int, Tuple[str, Iterable[str]]]

        # (destination, key) -> EDU
        self.keyed_edu = {}  # type: Dict[Tuple[str, tuple], Edu]

        # stream position -> (destination, key)
        self.keyed_edu_changed = (SortedDict()
                                  )  # type: SortedDict[int, Tuple[str, tuple]]

        self.edus = SortedDict()  # type: SortedDict[int, Edu]

        # stream ID for the next entry into keyed_edu_changed/edus.
        self.pos = 1

        # map from stream ID to the time that stream entry was generated, so that we
        # can clear out entries after a while
        self.pos_time = SortedDict()  # type: SortedDict[int, int]

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name: str, queue: Sized) -> None:
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ),
                "",
                [],
                lambda: len(queue),
            )

        for queue_name in [
                "presence_map",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "pos_time",
                "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self) -> int:
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self) -> None:
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete: int) -> None:
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_destinations.keys()
            i = self.presence_destinations.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_destinations[key]

            user_ids = {
                user_id
                for user_id, _ in self.presence_destinations.values()
            }

            to_del = [
                user_id for user_id in self.presence_map
                if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            keys_to_del = [
                edu_key for edu_key in self.keyed_edu
                if edu_key not in live_keys
            ]
            for edu_key in keys_to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

    def notify_new_events(self, max_token: RoomStreamToken) -> None:
        """As per FederationSender"""
        # This should never get called.
        raise NotImplementedError()

    def build_and_send_edu(
        self,
        destination: str,
        edu_type: str,
        content: JsonDict,
        key: Optional[Hashable] = None,
    ) -> None:
        """As per FederationSender"""
        if destination == self.server_name:
            logger.info("Not sending EDU to ourselves")
            return

        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    async def send_read_receipt(self, receipt: ReadReceipt) -> None:
        """As per FederationSender

        Args:
            receipt:
        """
        # nothing to do here: the replication listener will handle it.

    def send_presence_to_destinations(self,
                                      states: Iterable[UserPresenceState],
                                      destinations: Iterable[str]) -> None:
        """As per FederationSender

        Args:
            states
            destinations
        """
        for state in states:
            pos = self._next_pos()
            self.presence_map.update(
                {state.user_id: state
                 for state in states})
            self.presence_destinations[pos] = (state.user_id, destinations)

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination: str) -> None:
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.

    def wake_destination(self, server: str) -> None:
        pass

    def get_current_token(self) -> int:
        return self.pos - 1

    def federation_ack(self, instance_name: str, token: int) -> None:
        if self._sender_instances:
            # If we have configured multiple federation sender instances we need
            # to track their positions separately, and only clear the queue up
            # to the token all instances have acked.
            self._sender_positions[instance_name] = token
            token = min(self._sender_positions.values())

        self._clear_queue_before_pos(token)

    async def get_replication_rows(
            self, instance_name: str, from_token: int, to_token: int,
            target_row_count: int
    ) -> Tuple[List[Tuple[int, Tuple]], int, bool]:
        """Get rows to be sent over federation between the two tokens

        Args:
            instance_name: the name of the current process
            from_token: the previous stream token: the starting point for fetching the
                updates
            to_token: the new stream token: the point to get updates up to
            target_row_count: a target for the number of rows to be returned.

        Returns: a triplet `(updates, new_last_token, limited)`, where:
           * `updates` is a list of `(token, row)` entries.
           * `new_last_token` is the new position in stream.
           * `limited` is whether there are more updates to fetch.
        """
        # TODO: Handle target_row_count.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []  # type: List[Tuple[int, BaseFederationRow]]

        # Fetch presence to send to destinations
        i = self.presence_destinations.bisect_right(from_token)
        j = self.presence_destinations.bisect_right(to_token) + 1

        for pos, (user_id, dests) in self.presence_destinations.items()[i:j]:
            rows.append((
                pos,
                PresenceDestinationsRow(state=self.presence_map[user_id],
                                        destinations=list(dests)),
            ))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in keyed_edus.items():
            rows.append((
                pos,
                KeyedEduRow(key=edu_key,
                            edu=self.keyed_edu[(destination, edu_key)]),
            ))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Sort rows based on pos
        rows.sort()

        return (
            [(pos, (row.TypeId, row.to_data())) for pos, row in rows],
            to_token,
            False,
        )
Пример #35
0
def test6():
    """
    有序的map: SortedDict
    网址: http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html
    """
    from sortedcontainers import SortedDict
    sd = SortedDict()
    # 插入、删除元素
    sd["wxx"] = 21
    sd["hh"] = 18
    sd["other"] = 20
    print(sd)  # SortedDict({'hh': 18, 'other': 20, 'wxx': 21})
    print(sd["wxx"])  # 访问不存在的键会报错, KeyError
    print(sd.get("c"))  # 访问不存在的键会返回None     None
    # SortedDict转dict
    print(dict(sd))  # {'hh': 18, 'other': 20, 'wxx': 21}
    # 返回最后一个元素和最后一个元素
    print(sd.peekitem(0))  # 类型tuple, 返回第一个元素    ('hh', 18)
    print(sd.peekitem())  # 类型tuple, 返回最后一个元素    ('wxx', 21)
    # 遍历
    for k, v in sd.items():
        print(k, ':', v, sep="", end=", ")  # sep取消每行输出之间的空格
    print()
    for k in sd:  # 遍历键k, 等价于for k in d.keys:
        print(str(k) + ":" + str(sd[k]), end=", ")
    print()
    for v in sd.values():  # 遍历值v
        print(v, end=", ")
    print()
    # 返回Map中的一个键
    print(sd.peekitem()[0])
    # 返回Map中的一个值
    print(sd.peekitem()[1])
    # 中判断某元素是否存在
    print("wxx" in sd)  # True
    # bisect_left() / bisect_right()
    sd["a"] = 1
    sd["c1"] = 2
    sd["c2"] = 4
    print(
        sd
    )  # SortedDict({'a': 1, 'c1': 2, 'c2': 4, 'hh': 18, 'other': 20, 'wxx': 21})
    print(sd.bisect_left("c1"))  # 返回键大于等于"c1"的最小元素对应的下标    1
    print(sd.bisect_right("c1"))  # 返回键大于"c1"的最小元素对应的下标    2
    # 清空
    sd.clear()
    print(len(sd))  # 0
    print(len(sd) == 0)  # True
    """
    无序的map: dict
    """
    print("---------------------------------------")
    d = {"c1": 2, "c2": 4, "hh": 18, "wxx": 21, 13: 14, 1: 0}
    print(d["wxx"])  # 21
    print(d[13])  # 14
    d[13] += 1
    print(d[13])  # 15
    d["future"] = "wonderful"  # 字典中添加键值对
    del d[1]  # 删除字典d中键1对应的数据值
    print("wxx" in d)  # 判断键"wxx"是否在字典d中,如果在返回True,否则False
    print(d.keys())  # 返回字典d中所有的键信息  dict_keys(['c1', 'c2', 'hh', 'wxx', 13])
    print(d.values())  # 返回字典d中所有的值信息  dict_values([2, 4, 18, 21, 14])
    print(d.items(
    ))  # dict_items([('c1', 2), ('c2', 4), ('hh', 18), ('wxx', 21), (13, 14)])
    for k, v in d.items():  # 遍历 k, v
        print(k, ':', v)
    for k in d:  # 遍历键k, 等价于for k in d.keys:
        print(str(k) + ":" + str(d[k]), end=", ")
    print()
    for v in d.values():  # 遍历值v
        print(v, end=", ")
    print()
    # 字典类型操作函数和方法
    print("---------------------------------------")
    d = {"中国": "北京", "美国": "华盛顿", "法国": "巴黎"}
    print(len(d))  # 返回字典d中元素的个数  3
    print(d.get("中国", "不存在"))  # 键k存在,则返回相应值,不在则返回<default>值  北京
    print(d.get("中", "不存在"))  # 不存在
    print(d.get("中"))  # None
    d["美国"] = "Washington"  # 修改键对应的值
    print(d.pop("美国"))  # 键k存在,则返回相应值,并将其从dict中删除
    print(d.popitem())  # 随机从字典d中取出一个键值对,以元组形式返回,并将其从dict中删除
    d.clear()  # 删除所有的键值对