class TwoDimensionTable: def __init__(self): self.__rows = SortedDict() @classmethod def from_json(cls, input_file): with open(input_file, 'r') as json_f: json_data = json.load(json_f) table = TwoDimensionTable() for d in json_data: speedup = d['speedup_mine_over_sol'] input_size = d['input_size'] proc_count = d['proc_count'] table.add(row=proc_count, col=input_size, val=speedup) return table def add(self, row, col, val): if row not in self.__rows: self.__add_new_row(row) for r, c in self.__rows.items(): if r == row: c[col] = val elif col not in c: c[col] = None def row_items(self): return self.__rows.items() def __add_new_row(self, row_name): self.__rows[row_name] = SortedDict() for r, c in self.__rows.items(): if r is not row_name: for c_title, _ in c.items(): self.__rows[row_name][c_title] = None break def __repr__(self): return self.__str__() def __str__(self): table = ['rows ' + u'\u2193' + ' columns ' + u'\u2192'] for r_title, col in self.row_items(): row = [str(r_title), '->'] for c_title, val in col.items(): row.append('({0}, {1})'.format(c_title, val)) table.append(' '.join(row)) return '\n'.join(table)
def test_update(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict() temp.update() temp.update(mapping) temp.update(dict(mapping)) assert list(temp.items()) == mapping
def test_clear(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping) assert len(temp) == 26 assert list(temp.items()) == mapping temp.clear() assert len(temp) == 0
class PositionMapping: __slots__ = ('_pos', '_posmap') DUPLICATION_CHECK = True def __init__(self): self._pos = 0 self._posmap = SortedDict() def items(self): return self._posmap.items() # # Properties # @property def pos(self): return self._pos @pos.setter def pos(self, v): self._pos = v # # Public methods # def add_mapping(self, start_pos, length, obj): # duplication check if self.DUPLICATION_CHECK: try: pre = next(self._posmap.irange(maximum=start_pos, reverse=True)) if start_pos in self._posmap[pre]: raise ValueError("New mapping is overlapping with an existing element.") except StopIteration: pass self._posmap[start_pos] = PositionMappingElement(start_pos, length, obj) def tick_pos(self, delta): self._pos += delta def get_node(self, pos): element = self.get_element(pos) if element is None: return None return element.obj def get_element(self, pos): try: pre = next(self._posmap.irange(maximum=pos, reverse=True)) except StopIteration: return None element = self._posmap[pre] if pos in element: return element return None
class InMemoryStorage(object): def __init__(self): self.kvstore = SortedDict() # hashtable def get(self, k): try: return self.kvstore[k] except: return 1 def put(self, k, v): self.kvstore[k] = v return 0 def delete(self, k): try: del self.kvstore[k] return 0 except: return 1 def split(self, section, keyspace_mid): """ delete one half of keystore for group split operation """ midKey = None for key in self.kvstore.keys(): # TODO make more efficient for better performance if key > str(keyspace_mid): # use iloc to estimate midpoint midKey = self.kvstore.index(key) break if section: # section is either 0 or 1 self.kvstore = self.kvstore.items()[midKey:] else: self.kvstore = self.kvstore.items()[:midKey] print(self.kvstore) return 0 def save(self): # need metadata here save_state("data/backup/db_copy.pkl", self.kvstore) def load(self): self.kvstore = load_state("data/backup/db_copy.pkl")
def test_init(): sdict = SortedDict() sdict._check() sdict = SortedDict() sdict._reset(17) sdict._check() sdict = SortedDict((val, -val) for val in range(10000)) sdict._check() assert all(key == -val for key, val in sdict.items()) sdict.clear() sdict._check() assert len(sdict) == 0 sdict = SortedDict.fromkeys(range(1000), None) assert all(sdict[key] == None for key in range(1000))
def test_itemsview(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) items = temp.items() assert len(items) == 13 assert ('a', 0) in items assert list(items) == mapping[:13] assert items[0] == ('a', 0) assert items[-3:] == [('k', 10), ('l', 11), ('m', 12)] assert list(reversed(items)) == list(reversed(mapping[:13])) assert items.index(('f', 5)) == 5 assert items.count(('m', 12)) == 1 assert items.isdisjoint([('0', 26), ('1', 27)]) assert not items.isdisjoint([('a', 0), ('b', 1)]) temp.update(mapping[13:]) assert len(items) == 26 assert ('z', 25) in items assert list(items) == mapping that = dict(mapping) that_items = get_itemsview(that) assert items == that_items assert not (items != that_items) assert not (items < that_items) assert not (items > that_items) assert items <= that_items assert items >= that_items assert list(items & that_items) == mapping assert list(items | that_items) == mapping assert list(items - that_items) == [] assert list(items ^ that_items) == [] items = SortedDict(mapping[:2]).items() assert repr(items) == "SortedItemsView(SortedDict({'a': 0, 'b': 1}))"
pixx, pixy = worldpix[0][0], worldpix[0][1] #print(pixx, pixy) #print(pixx, pixy) print('before loop') #print(RA_test, DEC_test) #print(pixx_im,pixy_im) #print(corner_1[0], corner_4[0]) #print(corner_1[0], RA_test,corner_2[0]) #print(corner_1[1], DEC_test,corner_3[1]) if corner_1[0] <= RA_test <= corner_2[0] and corner_1[ 1] >= DEC_test >= corner_3[1]: mydic[date] = [path, pixx, pixy] for i, (key, (path, pixx, pixy)) in enumerate(mydic.items()): path_cand = '/fred/oz100/CANVIS/cand_images/' + run + '/cand_' + format( objid, '05') + '_' + field + '_' + run + '/' path_cutout = '/fred/oz100/CANVIS/cand_images/' + run + '/cand_' + format( objid, '05') + '_' + field + '_' + run + '/cand_' + format( objid, '05') + '_' + run + '_cutout_' + format(i, '03') + '.fits' if not os.path.exists(path_cand): os.makedirs(path_cand, 0o755) else: pass size = 200 with fits.open(path) as hdu: nom_data = (hdu[0].data - np.min(hdu[0].data)) / ( np.max(hdu[0].data) - np.min(hdu[0].data)) print(' FOUND ON THIS CCD ' + fitsim)
class IntervalTree(MutableSet): """ A binary lookup tree of intervals. The intervals contained in the tree are represented using ``Interval(a, b, data)`` objects. Each such object represents a half-open interval ``[a, b)`` with optional data. Examples: --------- Initialize a blank tree:: >>> tree = IntervalTree() >>> tree IntervalTree() Initialize a tree from an iterable set of Intervals in O(n * log n):: >>> tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)]) >>> tree IntervalTree([Interval(-20.0, -10.0), Interval(-10, 10)]) >>> len(tree) 2 Note that this is a set, i.e. repeated intervals are ignored. However, Intervals with different data fields are regarded as different:: >>> tree = IntervalTree([Interval(-10, 10), Interval(-10, 10), Interval(-10, 10, "x")]) >>> tree IntervalTree([Interval(-10, 10), Interval(-10, 10, 'x')]) >>> len(tree) 2 Insertions:: >>> tree = IntervalTree() >>> tree[0:1] = "data" >>> tree.add(Interval(10, 20)) >>> tree.addi(19.9, 20) >>> tree IntervalTree([Interval(0, 1, 'data'), Interval(10, 20), Interval(19.9, 20)]) >>> tree.update([Interval(19.9, 20.1), Interval(20.1, 30)]) >>> len(tree) 5 Inserting the same Interval twice does nothing:: >>> tree = IntervalTree() >>> tree[-10:20] = "arbitrary data" >>> tree[-10:20] = None # Note that this is also an insertion >>> tree IntervalTree([Interval(-10, 20), Interval(-10, 20, 'arbitrary data')]) >>> tree[-10:20] = None # This won't change anything >>> tree[-10:20] = "arbitrary data" # Neither will this >>> len(tree) 2 Deletions:: >>> tree = IntervalTree(Interval(b, e) for b, e in [(-10, 10), (-20, -10), (10, 20)]) >>> tree IntervalTree([Interval(-20, -10), Interval(-10, 10), Interval(10, 20)]) >>> tree.remove(Interval(-10, 10)) >>> tree IntervalTree([Interval(-20, -10), Interval(10, 20)]) >>> tree.remove(Interval(-10, 10)) Traceback (most recent call last): ... ValueError >>> tree.discard(Interval(-10, 10)) # Same as remove, but no exception on failure >>> tree IntervalTree([Interval(-20, -10), Interval(10, 20)]) Delete intervals, overlapping a given point:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.1) >>> tree IntervalTree([Interval(-1.1, 1.1)]) Delete intervals, overlapping an interval:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(0, 0.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.7, 1.8) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.6, 1.6) # Null interval does nothing >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.6, 1.5) # Ditto >>> tree IntervalTree([Interval(0.5, 1.7)]) Delete intervals, enveloped in the range:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_envelop(-1.0, 1.5) >>> tree IntervalTree([Interval(-1.1, 1.1), Interval(0.5, 1.7)]) >>> tree.remove_envelop(-1.1, 1.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_envelop(0.5, 1.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_envelop(0.5, 1.7) >>> tree IntervalTree() Point queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> assert tree[-1.1] == set([Interval(-1.1, 1.1)]) >>> assert tree.at(1.1) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # Same as tree[1.1] >>> assert tree.at(1.5) == set([Interval(0.5, 1.7)]) # Same as tree[1.5] Interval overlap queries >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> assert tree.overlap(1.7, 1.8) == set() >>> assert tree.overlap(1.5, 1.8) == set([Interval(0.5, 1.7)]) >>> assert tree[1.5:1.8] == set([Interval(0.5, 1.7)]) # same as previous >>> assert tree.overlap(1.1, 1.8) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> assert tree[1.1:1.8] == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # same as previous Interval envelop queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> assert tree.envelop(-0.5, 0.5) == set() >>> assert tree.envelop(-0.5, 1.5) == set([Interval(-0.5, 1.5)]) Membership queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> Interval(-0.5, 0.5) in tree False >>> Interval(-1.1, 1.1) in tree True >>> Interval(-1.1, 1.1, "x") in tree False >>> tree.overlaps(-1.1) True >>> tree.overlaps(1.7) False >>> tree.overlaps(1.7, 1.8) False >>> tree.overlaps(-1.2, -1.1) False >>> tree.overlaps(-1.2, -1.0) True Sizing:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> len(tree) 3 >>> tree.is_empty() False >>> IntervalTree().is_empty() True >>> not tree False >>> not IntervalTree() True >>> print(tree.begin()) # using print() because of floats in Python 2.6 -1.1 >>> print(tree.end()) # ditto 1.7 Iteration:: >>> tree = IntervalTree([Interval(-11, 11), Interval(-5, 15), Interval(5, 17)]) >>> [iv.begin for iv in sorted(tree)] [-11, -5, 5] >>> assert tree.items() == set([Interval(-5, 15), Interval(-11, 11), Interval(5, 17)]) Copy- and typecasting, pickling:: >>> tree0 = IntervalTree([Interval(0, 1, "x"), Interval(1, 2, ["x"])]) >>> tree1 = IntervalTree(tree0) # Shares Interval objects >>> tree2 = tree0.copy() # Shallow copy (same as above, as Intervals are singletons) >>> import pickle >>> tree3 = pickle.loads(pickle.dumps(tree0)) # Deep copy >>> list(tree0[1])[0].data[0] = "y" # affects shallow copies, but not deep copies >>> tree0 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree1 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree2 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree3 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['x'])]) Equality testing:: >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1)]) True >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1, "x")]) False """ @classmethod def from_tuples(cls, tups): """ Create a new IntervalTree from an iterable of 2- or 3-tuples, where the tuple lists begin, end, and optionally data. """ ivs = [Interval(*t) for t in tups] return IntervalTree(ivs) def __init__(self, intervals=None): """ Set up a tree. If intervals is provided, add all the intervals to the tree. Completes in O(n*log n) time. """ intervals = set(intervals) if intervals is not None else set() for iv in intervals: if iv.is_null(): raise ValueError( "IntervalTree: Null Interval objects not allowed in IntervalTree:" " {0}".format(iv)) self.all_intervals = intervals self.top_node = Node.from_intervals(self.all_intervals) self.boundary_table = SortedDict() for iv in self.all_intervals: self._add_boundaries(iv) def copy(self): """ Construct a new IntervalTree using shallow copies of the intervals in the source tree. Completes in O(n*log n) time. :rtype: IntervalTree """ return IntervalTree(iv.copy() for iv in self) def _add_boundaries(self, interval): """ Records the boundaries of the interval in the boundary table. """ begin = interval.begin end = interval.end if begin in self.boundary_table: self.boundary_table[begin] += 1 else: self.boundary_table[begin] = 1 if end in self.boundary_table: self.boundary_table[end] += 1 else: self.boundary_table[end] = 1 def _remove_boundaries(self, interval): """ Removes the boundaries of the interval from the boundary table. """ begin = interval.begin end = interval.end if self.boundary_table[begin] == 1: del self.boundary_table[begin] else: self.boundary_table[begin] -= 1 if self.boundary_table[end] == 1: del self.boundary_table[end] else: self.boundary_table[end] -= 1 def add(self, interval): """ Adds an interval to the tree, if not already present. Completes in O(log n) time. """ if interval in self: return if interval.is_null(): raise ValueError( "IntervalTree: Null Interval objects not allowed in IntervalTree:" " {0}".format(interval)) if not self.top_node: self.top_node = Node.from_interval(interval) else: self.top_node = self.top_node.add(interval) self.all_intervals.add(interval) self._add_boundaries(interval) append = add def addi(self, begin, end, data=None): """ Shortcut for add(Interval(begin, end, data)). Completes in O(log n) time. """ return self.add(Interval(begin, end, data)) appendi = addi def update(self, intervals): """ Given an iterable of intervals, add them to the tree. Completes in O(m*log(n+m), where m = number of intervals to add. """ for iv in intervals: self.add(iv) def remove(self, interval): """ Removes an interval from the tree, if present. If not, raises ValueError. Completes in O(log n) time. """ #self.verify() if interval not in self: #print(self.all_intervals) raise ValueError self.top_node = self.top_node.remove(interval) self.all_intervals.remove(interval) self._remove_boundaries(interval) #self.verify() def removei(self, begin, end, data=None): """ Shortcut for remove(Interval(begin, end, data)). Completes in O(log n) time. """ return self.remove(Interval(begin, end, data)) def discard(self, interval): """ Removes an interval from the tree, if present. If not, does nothing. Completes in O(log n) time. """ if interval not in self: return self.all_intervals.discard(interval) self.top_node = self.top_node.discard(interval) self._remove_boundaries(interval) def discardi(self, begin, end, data=None): """ Shortcut for discard(Interval(begin, end, data)). Completes in O(log n) time. """ return self.discard(Interval(begin, end, data)) def difference(self, other): """ Returns a new tree, comprising all intervals in self but not in other. """ ivs = set() for iv in self: if iv not in other: ivs.add(iv) return IntervalTree(ivs) def difference_update(self, other): """ Removes all intervals in other from self. """ for iv in other: self.discard(iv) def union(self, other): """ Returns a new tree, comprising all intervals from self and other. """ return IntervalTree(set(self).union(other)) def intersection(self, other): """ Returns a new tree of all intervals common to both self and other. """ ivs = set() shorter, longer = sorted([self, other], key=len) for iv in shorter: if iv in longer: ivs.add(iv) return IntervalTree(ivs) def intersection_update(self, other): """ Removes intervals from self unless they also exist in other. """ ivs = list(self) for iv in ivs: if iv not in other: self.remove(iv) def symmetric_difference(self, other): """ Return a tree with elements only in self or other but not both. """ if not isinstance(other, set): other = set(other) me = set(self) ivs = me.difference(other).union(other.difference(me)) return IntervalTree(ivs) def symmetric_difference_update(self, other): """ Throws out all intervals except those only in self or other, not both. """ other = set(other) ivs = list(self) for iv in ivs: if iv in other: self.remove(iv) other.remove(iv) self.update(other) def remove_overlap(self, begin, end=None): """ Removes all intervals overlapping the given point or range. Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches * r = size of the search range (this is 1 for a point) """ hitlist = self.at(begin) if end is None else self.overlap(begin, end) for iv in hitlist: self.remove(iv) def remove_envelop(self, begin, end): """ Removes all intervals completely enveloped in the given range. Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches * r = size of the search range """ hitlist = self.envelop(begin, end) for iv in hitlist: self.remove(iv) def chop(self, begin, end, datafunc=None): """ Like remove_envelop(), but trims back Intervals hanging into the chopped area so that nothing overlaps. """ insertions = set() begin_hits = [iv for iv in self.at(begin) if iv.begin < begin] end_hits = [iv for iv in self.at(end) if iv.end > end] if datafunc: for iv in begin_hits: insertions.add(Interval(iv.begin, begin, datafunc(iv, True))) for iv in end_hits: insertions.add(Interval(end, iv.end, datafunc(iv, False))) else: for iv in begin_hits: insertions.add(Interval(iv.begin, begin, iv.data)) for iv in end_hits: insertions.add(Interval(end, iv.end, iv.data)) self.remove_envelop(begin, end) self.difference_update(begin_hits) self.difference_update(end_hits) self.update(insertions) def slice(self, point, datafunc=None): """ Split Intervals that overlap point into two new Intervals. if specified, uses datafunc(interval, islower=True/False) to set the data field of the new Intervals. :param point: where to slice :param datafunc(interval, isupper): callable returning a new value for the interval's data field """ hitlist = set(iv for iv in self.at(point) if iv.begin < point) insertions = set() if datafunc: for iv in hitlist: insertions.add(Interval(iv.begin, point, datafunc(iv, True))) insertions.add(Interval(point, iv.end, datafunc(iv, False))) else: for iv in hitlist: insertions.add(Interval(iv.begin, point, iv.data)) insertions.add(Interval(point, iv.end, iv.data)) self.difference_update(hitlist) self.update(insertions) def clear(self): """ Empties the tree. Completes in O(1) tine. """ self.__init__() def find_nested(self): """ Returns a dictionary mapping parent intervals to sets of intervals overlapped by and contained in the parent. Completes in O(n^2) time. :rtype: dict of [Interval, set of Interval] """ result = {} def add_if_nested(): if parent.contains_interval(child): if parent not in result: result[parent] = set() result[parent].add(child) long_ivs = sorted(self.all_intervals, key=Interval.length, reverse=True) for i, parent in enumerate(long_ivs): for child in long_ivs[i + 1:]: add_if_nested() return result def overlaps(self, begin, end=None): """ Returns whether some interval in the tree overlaps the given point or range. Completes in O(r*log n) time, where r is the size of the search range. :rtype: bool """ if end is not None: return self.overlaps_range(begin, end) elif isinstance(begin, Number): return self.overlaps_point(begin) else: return self.overlaps_range(begin.begin, begin.end) def overlaps_point(self, p): """ Returns whether some interval in the tree overlaps p. Completes in O(log n) time. :rtype: bool """ if self.is_empty(): return False return bool(self.top_node.contains_point(p)) def overlaps_range(self, begin, end): """ Returns whether some interval in the tree overlaps the given range. Returns False if given a null interval over which to test. Completes in O(r*log n) time, where r is the range length and n is the table size. :rtype: bool """ if self.is_empty(): return False elif begin >= end: return False elif self.overlaps_point(begin): return True return any( self.overlaps_point(bound) for bound in self.boundary_table if begin < bound < end) def split_overlaps(self): """ Finds all intervals with overlapping ranges and splits them along the range boundaries. Completes in worst-case O(n^2*log n) time (many interval boundaries are inside many intervals), best-case O(n*log n) time (small number of overlaps << n per interval). """ if not self: return if len(self.boundary_table) == 2: return bounds = sorted(self.boundary_table) # get bound locations new_ivs = set() for lbound, ubound in zip(bounds[:-1], bounds[1:]): for iv in self[lbound]: new_ivs.add(Interval(lbound, ubound, iv.data)) self.__init__(new_ivs) def merge_overlaps(self, data_reducer=None, data_initializer=None, strict=True): """ Finds all intervals with overlapping ranges and merges them into a single interval. If provided, uses data_reducer and data_initializer with similar semantics to Python's built-in reduce(reducer_func[, initializer]), as follows: If data_reducer is set to a function, combines the data fields of the Intervals with current_reduced_data = data_reducer(current_reduced_data, new_data) If data_reducer is None, the merged Interval's data field will be set to None, ignoring all the data fields of the merged Intervals. On encountering the first Interval to merge, if data_initializer is None (default), uses the first Interval's data field as the first value for current_reduced_data. If data_initializer is not None, current_reduced_data is set to a shallow copy of data_initializer created with copy.copy(data_initializer). If strict is True (default), intervals are only merged if their ranges actually overlap; adjacent, touching intervals will not be merged. If strict is False, intervals are merged even if they are only end-to-end adjacent. Completes in O(n*logn). """ if not self: return sorted_intervals = sorted(self.all_intervals) # get sorted intervals merged = [] # use mutable object to allow new_series() to modify it current_reduced = [None] higher = None # iterating variable, which new_series() needs access to def new_series(): if data_initializer is None: current_reduced[0] = higher.data merged.append(higher) return else: # data_initializer is not None current_reduced[0] = copy(data_initializer) current_reduced[0] = data_reducer(current_reduced[0], higher.data) merged.append( Interval(higher.begin, higher.end, current_reduced[0])) for higher in sorted_intervals: if merged: # series already begun lower = merged[-1] if (higher.begin < lower.end or not strict and higher.begin == lower.end): # should merge upper_bound = max(lower.end, higher.end) if data_reducer is not None: current_reduced[0] = data_reducer( current_reduced[0], higher.data) else: # annihilate the data, since we don't know how to merge it current_reduced[0] = None merged[-1] = Interval(lower.begin, upper_bound, current_reduced[0]) else: new_series() else: # not merged; is first of Intervals to merge new_series() self.__init__(merged) def merge_equals(self, data_reducer=None, data_initializer=None): """ Finds all intervals with equal ranges and merges them into a single interval. If provided, uses data_reducer and data_initializer with similar semantics to Python's built-in reduce(reducer_func[, initializer]), as follows: If data_reducer is set to a function, combines the data fields of the Intervals with current_reduced_data = data_reducer(current_reduced_data, new_data) If data_reducer is None, the merged Interval's data field will be set to None, ignoring all the data fields of the merged Intervals. On encountering the first Interval to merge, if data_initializer is None (default), uses the first Interval's data field as the first value for current_reduced_data. If data_initializer is not None, current_reduced_data is set to a shallow copy of data_initiazer created with copy.copy(data_initializer). Completes in O(n*logn). """ if not self: return sorted_intervals = sorted(self.all_intervals) # get sorted intervals merged = [] # use mutable object to allow new_series() to modify it current_reduced = [None] higher = None # iterating variable, which new_series() needs access to def new_series(): if data_initializer is None: current_reduced[0] = higher.data merged.append(higher) return else: # data_initializer is not None current_reduced[0] = copy(data_initializer) current_reduced[0] = data_reducer(current_reduced[0], higher.data) merged.append( Interval(higher.begin, higher.end, current_reduced[0])) for higher in sorted_intervals: if merged: # series already begun lower = merged[-1] if higher.range_matches(lower): # should merge upper_bound = max(lower.end, higher.end) if data_reducer is not None: current_reduced[0] = data_reducer( current_reduced[0], higher.data) else: # annihilate the data, since we don't know how to merge it current_reduced[0] = None merged[-1] = Interval(lower.begin, upper_bound, current_reduced[0]) else: new_series() else: # not merged; is first of Intervals to merge new_series() self.__init__(merged) def items(self): """ Constructs and returns a set of all intervals in the tree. Completes in O(n) time. :rtype: set of Interval """ return set(self.all_intervals) def is_empty(self): """ Returns whether the tree is empty. Completes in O(1) time. :rtype: bool """ return 0 == len(self) def at(self, p): """ Returns the set of all intervals that contain p. Completes in O(m + log n) time, where: * n = size of the tree * m = number of matches :rtype: set of Interval """ root = self.top_node if not root: return set() return root.search_point(p, set()) def envelop(self, begin, end=None): """ Returns the set of all intervals fully contained in the range [begin, end). Completes in O(m + k*log n) time, where: * n = size of the tree * m = number of matches * k = size of the search range :rtype: set of Interval """ root = self.top_node if not root: return set() if end is None: iv = begin return self.envelop(iv.begin, iv.end) elif begin >= end: return set() result = root.search_point(begin, set()) # bound_begin might be greater boundary_table = self.boundary_table bound_begin = boundary_table.bisect_left(begin) bound_end = boundary_table.bisect_left( end) # up to, but not including end result.update( root.search_overlap( # slice notation is slightly slower boundary_table.keys()[index] for index in xrange(bound_begin, bound_end))) # TODO: improve envelop() to use node info instead of less-efficient filtering result = set(iv for iv in result if iv.begin >= begin and iv.end <= end) return result def overlap(self, begin, end=None): """ Returns a set of all intervals overlapping the given range. Completes in O(m + k*log n) time, where: * n = size of the tree * m = number of matches * k = size of the search range :rtype: set of Interval """ root = self.top_node if not root: return set() if end is None: iv = begin return self.overlap(iv.begin, iv.end) elif begin >= end: return set() result = root.search_point(begin, set()) # bound_begin might be greater boundary_table = self.boundary_table bound_begin = boundary_table.bisect_left(begin) bound_end = boundary_table.bisect_left( end) # up to, but not including end result.update( root.search_overlap( # slice notation is slightly slower boundary_table.keys()[index] for index in xrange(bound_begin, bound_end))) return result def begin(self): """ Returns the lower bound of the first interval in the tree. Completes in O(1) time. """ if not self.boundary_table: return 0 return self.boundary_table.keys()[0] def end(self): """ Returns the upper bound of the last interval in the tree. Completes in O(1) time. """ if not self.boundary_table: return 0 return self.boundary_table.keys()[-1] def range(self): """ Returns a minimum-spanning Interval that encloses all the members of this IntervalTree. If the tree is empty, returns null Interval. :rtype: Interval """ return Interval(self.begin(), self.end()) def span(self): """ Returns the length of the minimum-spanning Interval that encloses all the members of this IntervalTree. If the tree is empty, return 0. """ if not self: return 0 return self.end() - self.begin() def print_structure(self, tostring=False): """ ## FOR DEBUGGING ONLY ## Pretty-prints the structure of the tree. If tostring is true, prints nothing and returns a string. :rtype: None or str """ if self.top_node: return self.top_node.print_structure(tostring=tostring) else: result = "<empty IntervalTree>" if not tostring: print(result) else: return result def verify(self): """ ## FOR DEBUGGING ONLY ## Checks the table to ensure that the invariants are held. """ if self.all_intervals: ## top_node.all_children() == self.all_intervals try: assert self.top_node.all_children() == self.all_intervals except AssertionError as e: print( 'Error: the tree and the membership set are out of sync!') tivs = set(self.top_node.all_children()) print('top_node.all_children() - all_intervals:') try: pprint except NameError: from pprint import pprint pprint(tivs - self.all_intervals) print('all_intervals - top_node.all_children():') pprint(self.all_intervals - tivs) raise e ## All members are Intervals for iv in self: assert isinstance(iv, Interval), ( "Error: Only Interval objects allowed in IntervalTree:" " {0}".format(iv)) ## No null intervals for iv in self: assert not iv.is_null(), ( "Error: Null Interval objects not allowed in IntervalTree:" " {0}".format(iv)) ## Reconstruct boundary_table bound_check = {} for iv in self: if iv.begin in bound_check: bound_check[iv.begin] += 1 else: bound_check[iv.begin] = 1 if iv.end in bound_check: bound_check[iv.end] += 1 else: bound_check[iv.end] = 1 ## Reconstructed boundary table (bound_check) ==? boundary_table assert set(self.boundary_table.keys()) == set(bound_check.keys()),\ 'Error: boundary_table is out of sync with ' \ 'the intervals in the tree!' # For efficiency reasons this should be iteritems in Py2, but we # don't care much for efficiency in debug methods anyway. for key, val in self.boundary_table.items(): assert bound_check[key] == val, \ 'Error: boundary_table[{0}] should be {1},' \ ' but is {2}!'.format( key, bound_check[key], val) ## Internal tree structure self.top_node.verify(set()) else: ## Verify empty tree assert not self.boundary_table, \ "Error: boundary table should be empty!" assert self.top_node is None, \ "Error: top_node isn't None!" def score(self, full_report=False): """ Returns a number between 0 and 1, indicating how suboptimal the tree is. The lower, the better. Roughly, this number represents the fraction of flawed Intervals in the tree. :rtype: float """ if len(self) <= 2: return 0.0 n = len(self) m = self.top_node.count_nodes() def s_center_score(): """ Returns a normalized score, indicating roughly how many times intervals share s_center with other intervals. Output is full-scale from 0 to 1. :rtype: float """ raw = n - m maximum = n - 1 return raw / float(maximum) report = { "depth": self.top_node.depth_score(n, m), "s_center": s_center_score(), } cumulative = max(report.values()) report["_cumulative"] = cumulative if full_report: return report return cumulative def __getitem__(self, index): """ Returns a set of all intervals overlapping the given index or slice. Completes in O(k * log(n) + m) time, where: * n = size of the tree * m = number of matches * k = size of the search range (this is 1 for a point) :rtype: set of Interval """ try: start, stop = index.start, index.stop if start is None: start = self.begin() if stop is None: return set(self) if stop is None: stop = self.end() return self.overlap(start, stop) except AttributeError: return self.at(index) def __setitem__(self, index, value): """ Adds a new interval to the tree. A shortcut for add(Interval(index.start, index.stop, value)). If an identical Interval object with equal range and data already exists, does nothing. Completes in O(log n) time. """ self.addi(index.start, index.stop, value) def __delitem__(self, point): """ Delete all items overlapping point. """ self.remove_overlap(point) def __contains__(self, item): """ Returns whether item exists as an Interval in the tree. This method only returns True for exact matches; for overlaps, see the overlaps() method. Completes in O(1) time. :rtype: bool """ # Removed point-checking code; it might trick the user into # thinking that this is O(1), which point-checking isn't. #if isinstance(item, Interval): return item in self.all_intervals #else: # return self.contains_point(item) def containsi(self, begin, end, data=None): """ Shortcut for (Interval(begin, end, data) in tree). Completes in O(1) time. :rtype: bool """ return Interval(begin, end, data) in self def __iter__(self): """ Returns an iterator over all the intervals in the tree. Completes in O(1) time. :rtype: collections.Iterable[Interval] """ return self.all_intervals.__iter__() iter = __iter__ def __len__(self): """ Returns how many intervals are in the tree. Completes in O(1) time. :rtype: int """ return len(self.all_intervals) def __eq__(self, other): """ Whether two IntervalTrees are equal. Completes in O(n) time if sizes are equal; O(1) time otherwise. :rtype: bool """ return (isinstance(other, IntervalTree) and self.all_intervals == other.all_intervals) def __repr__(self): """ :rtype: str """ ivs = sorted(self) if not ivs: return "IntervalTree()" else: return "IntervalTree({0})".format(ivs) __str__ = __repr__ def __reduce__(self): """ For pickle-ing. :rtype: tuple """ return IntervalTree, (sorted(self.all_intervals), ) def iterOverlap(self, begin=None, end=None, endOrder=False): """ Returns an iterator over a search range. TODO: complexity :rtype: collections.Iterable[Interval] """ if begin is None: begin = self.begin() if end is None: end = self.end() root = self.top_node if not root or begin >= end: return yield from root.iterOverlap(begin, end, endOrder) def computeCountHistogram(self, bins=100, begin=None, end=None): """ Returns a list of evenly-spaced Intervals of length bins, where the data payload in each interval is the total number of Intervals that intersect with the bin. Completes in O(b * log(n)) time, where b is the number of bins. :rtype: list of Interval """ counts = [0] * bins if self.top_node is None: globalBegin = begin or 0 globalEnd = end or 1 else: globalBegin = begin or self.top_node.begin globalEnd = end or self.top_node.end binSize = (globalEnd - globalBegin) / bins def getBin(value): b = (value - globalBegin) / binSize # Consistent with this class's interpretation, all Intervals, # including bins, are not inclusive of their upper bound. However, # for complete histograms, we make an exception for the highest bin # (which we interpret as inclusive) exclusive = b == floor(b) and not value == globalEnd b = floor(b) return (b, exclusive) def clampBin(b): b = max(b, 0) # clamp to 0 b = min(b, bins) # clamp to num bins return b def recurse(node): beginBin = getBin(node.begin)[0] endBin, exclusiveEnd = getBin(node.end) if beginBin >= bins or endBin < 0 or (exclusiveEnd and endBin == 0): # this node is outside the bins that we're even counting; we # can ignore it and its descendants return if beginBin == endBin or (exclusiveEnd and beginBin == endBin - 1): # If the node's range fits within a single bin, just add its # count to that bin and return early counts[beginBin] += node.totalCount else: # Otherwise, count this node's intervals individually, and recurse for interval in node.s_center: beginBin = clampBin(getBin(interval.begin)[0]) endBin, exclusiveEnd = getBin(interval.end) if not exclusiveEnd: endBin += 1 endBin = clampBin(endBin) for binNo in range(beginBin, endBin): counts[binNo] += 1 if node.left_node: recurse(node.left_node) if node.right_node: recurse(node.right_node) if self.top_node: recurse(self.top_node) results = [] for i, count in enumerate(counts): begin = globalBegin + i * binSize end = globalBegin + (i + 1) * binSize results.append(Interval(begin, end, count)) return results def computeUtilizationHistogram(self, bins=100, begin=None, end=None): """ Returns a list of evenly-spaced Intervals of length bins, where the data payload in each interval is the percentage of the bin containing an interval. Note that this will be higher than 1.0 if multiple intervals within the bin overlap each other (i.e. two intervals covering the entire bin would result in 2.0, or a 200% utilization rate). Completes in O(b * log(n)) time, where b is the number of bins. :rtype: list of Interval """ scores = [0] * bins if self.top_node is None: globalBegin = begin or 0 globalEnd = end or 1 else: globalBegin = begin or self.top_node.begin globalEnd = end or self.top_node.end binSize = (globalEnd - globalBegin) / bins def getBin(value): b = (value - globalBegin) / binSize # Consistent with this class's interpretation, all Intervals, # including bins, are not inclusive of their upper bound. However, # for complete histograms, we make an exception for the highest bin # (which we interpret as inclusive) exclusive = b == floor(b) and not value == globalEnd b = floor(b) return (b, exclusive) def clampBin(b): b = max(b, 0) # clamp to 0 b = min(b, bins) # clamp to num bins return b def recurse(node): beginBin = getBin(node.begin)[0] endBin, exclusiveEnd = getBin(node.end) if beginBin >= bins or endBin < 0 or (exclusiveEnd and endBin == 0): # this node is outside the bins that we're including; we # can ignore it and its descendants return if beginBin == endBin or (exclusiveEnd and beginBin == endBin - 1): # If the node's range fits within a single bin, just add its # normalized score to that bin and return early scores[beginBin] += node.utilization * (node.end - node.begin) / binSize else: # Otherwise, score this node's intervals individually, and recurse for interval in node.s_center: beginBin = clampBin(getBin(interval.begin)[0]) endBin, exclusiveEnd = getBin(interval.end) if not exclusiveEnd: endBin += 1 endBin = clampBin(endBin) for binNo in range(beginBin, endBin): beginBound = globalBegin + binSize * binNo endBound = beginBound + binSize overlapBegin = max(beginBound, interval.begin) overlapEnd = min(endBound, interval.end) scores[binNo] += (overlapEnd - overlapBegin) / binSize if node.left_node: recurse(node.left_node) if node.right_node: recurse(node.right_node) if self.top_node: recurse(self.top_node) results = [] for i, score in enumerate(scores): begin = globalBegin + i * binSize end = globalBegin + (i + 1) * binSize results.append(Interval(begin, end, score)) return results
def get_initial_mapping( logical_graph: nx.Graph, device_graph: nx.Graph, random_state: Optional[Union[np.random.RandomState, int]] = None ) -> Dict[ops.Qid, ops.Qid]: """Gets an initial mapping of logical to physical qubits for routing. Args: logical_graph: The graph whose edges correspond to pairs of qubits that should be mapped to nearby physical qubits. device_graph: The graph of the device. random_state: Random state or random state seed. The mapping starts by mapping the center of the logical graph to the center of the physical graph. Subsequent logical qubits are mapped to physical qubits greedily. At each iteration, the logical qubits with the largest number of already mapped neighbors and the physical qubits neighboring those already mapped to are considered. The pair of logical and physical qubits that minimizes the average distance to already mapped logical neighbors is selected. """ if random_state is None: prng = np.random elif (isinstance(random_state, np.random.RandomState) or random_state == np.random): prng = random_state else: prng = np.random.RandomState(random_state) unplaced_vertices = set(logical_graph) logical_center = cast(ops.Qid, get_center(logical_graph)) device_center = cast(ops.Qid, get_center(device_graph)) mapping = {device_center: logical_center} unplaced_vertices.remove(logical_center) physical_distances = { (a, b): d for a, neighbor_distances in nx.shortest_path_length(device_graph) for b, d in neighbor_distances.items() } while unplaced_vertices: placed_vertices = set(mapping.values()) placed_neighbors = { v: placed_vertices.intersection(logical_graph[v]) for v in unplaced_vertices } nums_placed_neighbors = { v: len(N) for v, N in placed_neighbors.items() } max_num_placed_neighbors = max(nums_placed_neighbors.values()) candidates = [ v for v, n in nums_placed_neighbors.items() if n == max_num_placed_neighbors ] border = SortedSet().union(*(device_graph[v] for v in mapping)).difference(mapping) total_distances = SortedDict() for l, p in itertools.product(candidates, border): total_distance = 0 for pp, ll in mapping.items(): if logical_graph.has_edge(l, ll): total_distance += physical_distances[p, pp] total_distances[l, p] = total_distance min_total_distance = min(total_distances.values()) best_candidates = [ lp for lp, d in total_distances.items() if d == min_total_distance ] choice = prng.choice(len(best_candidates)) l, p = best_candidates[choice] assert p not in mapping assert l not in mapping.values() mapping[p] = l unplaced_vertices.remove(l) return mapping
class DictionaryBuilder(object): __BYTE_MAX_VALUE = 127 __MAX_LENGTH = 255 __COLS_NUM = 18 __BUFFER_SIZE = 1024 * 1024 __PATTERN_UNICODE_LITERAL = re.compile(r"\\u([0-9a-fA-F]{4}|{[0-9a-fA-F]+})") __ARRAY_MAX_LENGTH = __BYTE_MAX_VALUE # max value of byte in Java __STRING_MAX_LENGTH = 32767 # max value of short in Java is_user_dictionary = False class WordEntry: headword = None parameters = None wordinfo = None aunit_split_string = None bunit_split_string = None cunit_split_string = None class PosTable(object): def __init__(self): self.table = [] def get_id(self, str_): id_ = self.table.index(str_) if str_ in self.table else -1 if id_ < 0: id_ = len(self.table) self.table.append(str_) return id_ def get_list(self): return self.table @staticmethod def __default_logger(): handler = StreamHandler() handler.terminator = "" handler.setLevel(DEBUG) logger = getLogger(__name__) logger.setLevel(DEBUG) logger.addHandler(handler) logger.propagate = False return logger def __init__(self, *, logger=None): self.byte_buffer = JTypedByteBuffer() self.trie_keys = SortedDict() self.entries = [] self.is_dictionary = False self.pos_table = self.PosTable() self.logger = logger or self.__default_logger() def build(self, lexicon_paths, matrix_input_stream, out_stream): self.logger.info('reading the source file...') for path in lexicon_paths: with open(path, 'r', encoding='utf-8') as rf: self.build_lexicon(rf) self.logger.info('{} words\n'.format(len(self.entries))) self.write_grammar(matrix_input_stream, out_stream) self.write_lexicon(out_stream) def build_lexicon(self, lexicon_input_stream): line_no = -1 try: for i, row in enumerate(csv.reader(lexicon_input_stream)): line_no = -1 entry = self.parse_line(row) if entry.headword: self.add_to_trie(entry.headword, len(self.entries)) self.entries.append(entry) except Exception as e: if line_no > 0: self.logger.error( '{} at line {} in {}\n'.format(e.args[0], line_no, lexicon_input_stream.name)) raise e def parse_line(self, cols): if len(cols) != self.__COLS_NUM: raise ValueError('invalid format') cols = [self.decode(col) for col in cols] if not self.__is_length_valid(cols): raise ValueError('string is too long') if not cols[0]: raise ValueError('headword is empty') entry = self.WordEntry() # head word for trie if cols[1] != '-1': entry.headword = cols[0] # left-id, right-id, connect_cost entry.parameters = [int(cols[i]) for i in [1, 2, 3]] # part of speech pos_id = self.get_posid(cols[5:11]) if pos_id < 0: raise ValueError('invalid part of speech') entry.aunit_split_string = cols[15] entry.bunit_split_string = cols[16] entry.cunit_split_string = cols[17] self.check_splitinfo_format(entry.aunit_split_string) self.check_splitinfo_format(entry.bunit_split_string) self.check_splitinfo_format(entry.cunit_split_string) if cols[14] == 'A' and \ not (entry.aunit_split_string == '*' and entry.bunit_split_string == '*'): raise ValueError('invalid splitting') head_length = len(cols[0].encode('utf-8')) dict_from_wordid = -1 if cols[13] == '*' else int(cols[13]) entry.wordinfo = WordInfo( cols[4], head_length, pos_id, cols[12], dict_from_wordid, '', cols[11], None, None, None) return entry def __is_length_valid(self, cols): head_length = len(cols[0].encode('utf-8')) return head_length <= self.__STRING_MAX_LENGTH \ and len(cols[4]) <= self.__STRING_MAX_LENGTH \ and len(cols[11]) <= self.__STRING_MAX_LENGTH \ and len(cols[12]) <= self.__STRING_MAX_LENGTH def add_to_trie(self, headword, word_id): key = headword.encode('utf-8') if key not in self.trie_keys: self.trie_keys[key] = [] self.trie_keys[key].append(word_id) def get_posid(self, strs): return self.pos_table.get_id(','.join(strs)) def write_grammar(self, matrix_input_stream, output_stream): self.logger.info('writing the POS table...') self.convert_postable(self.pos_table.get_list()) self.byte_buffer.seek(0) output_stream.write(self.byte_buffer.read()) self.__logging_size(self.byte_buffer.tell()) self.byte_buffer.clear() self.logger.info('writing the connection matrix...') if not matrix_input_stream: self.byte_buffer.write_int(0, 'short') self.byte_buffer.write_int(0, 'short') self.byte_buffer.seek(0) output_stream.write(self.byte_buffer.read()) self.__logging_size(self.byte_buffer.tell()) self.byte_buffer.clear() return matrix = self.convert_matrix(matrix_input_stream) self.byte_buffer.seek(0) output_stream.write(self.byte_buffer.read()) self.byte_buffer.clear() output_stream.write(matrix.read()) self.__logging_size(matrix.tell() + 4) def convert_postable(self, pos_list): self.byte_buffer.write_int(len(pos_list), 'short') for pos in pos_list: for text in pos.split(','): self.write_string(text) def convert_matrix(self, matrix_input): header = matrix_input.readline().strip() if re.fullmatch(r"\s*", header): raise ValueError('invalid format at line 0') lr = header.split() lsize, rsize = [int(x) for x in lr] self.byte_buffer.write_int(lsize, 'short') self.byte_buffer.write_int(rsize, 'short') matrix = JTypedByteBuffer() for i, line in enumerate(matrix_input.readlines()): line = line.strip() if re.fullmatch(r"\s*", line) or re.match("#", line): continue cols = line.split() if len(cols) < 3: self.logger.warn('invalid format at line {}'.format(i)) continue l, r, cost = [int(col) for col in cols] pos = matrix.tell() matrix.seek(2 * (l + lsize * r)) matrix.write_int(cost, 'short') matrix.seek(pos) return matrix def write_lexicon(self, io_out): trie = DoubleArray() wordid_table = JTypedByteBuffer() keys = [] vals = [] for key, word_ids in self.trie_keys.items(): keys.append(key) vals.append(wordid_table.tell()) wordid_table.write_int(len(word_ids), 'byte') for wid in word_ids: wordid_table.write_int(wid, 'int') self.logger.info('building the trie...') trie.build(keys, lengths=[len(k) for k in keys], values=vals) self.logger.info('done\n') self.logger.info('writing the trie...') self.byte_buffer.clear() self.byte_buffer.write_int(trie.size(), 'int') self.byte_buffer.seek(0) io_out.write(self.byte_buffer.read()) self.byte_buffer.clear() io_out.write(trie.array()) self.__logging_size(trie.size() * 4 + 4) trie.clear() del trie self.logger.info('writing the word-ID table...') self.byte_buffer.write_int(wordid_table.tell(), 'int') self.byte_buffer.seek(0) io_out.write(self.byte_buffer.read()) self.byte_buffer.clear() wordid_table.seek(0) io_out.write(wordid_table.read()) self.__logging_size(wordid_table.tell() + 4) del wordid_table self.logger.info('writing the word parameters...') self.byte_buffer.write_int(len(self.entries), 'int') for entry in self.entries: self.byte_buffer.write_int(entry.parameters[0], 'short') self.byte_buffer.write_int(entry.parameters[1], 'short') self.byte_buffer.write_int(entry.parameters[2], 'short') self.byte_buffer.seek(0) io_out.write(self.byte_buffer.read()) self.byte_buffer.clear() self.__logging_size(len(self.entries) * 6 + 4) self.write_wordinfo(io_out) def write_wordinfo(self, io_out): mark = io_out.tell() io_out.seek(mark * 4 + len(self.entries)) offsets = JTypedByteBuffer() self.logger.info('writing the word_infos...') base = io_out.tell() for entry in self.entries: wi = entry.wordinfo offsets.write_int(io_out.tell(), 'int') self.write_string(wi.surface) self.write_stringlength(wi.length()) self.byte_buffer.write_int(wi.pos_id, 'short') if wi.normalized_form == wi.surface: self.write_string('') else: self.write_string(wi.normalized_form) self.byte_buffer.write_int(wi.dictionary_form_word_id, 'int') if wi.reading_form == wi.surface: self.write_string('') else: self.write_string(wi.reading_form) self.write_intarray(self.parse_splitinfo(entry.aunit_split_string)) self.write_intarray(self.parse_splitinfo(entry.bunit_split_string)) self.write_intarray(self.parse_splitinfo(entry.cunit_split_string)) self.byte_buffer.seek(0) io_out.write(self.byte_buffer.read()) self.byte_buffer.clear() self.__logging_size(io_out.tell() - base) self.logger.info('writing word_info offsets...') io_out.seek(mark) offsets.seek(0) io_out.write(offsets.read()) self.__logging_size(offsets.tell()) def decode(self, str_): def replace(match): uni_text = match.group() uni_text = uni_text.replace('{', '').replace('}', '') if len(uni_text) > 6: uni_text = ('\\U000{}'.format(uni_text[2:])) return uni_text.encode('ascii').decode('unicode-escape') return re.sub(self.__PATTERN_UNICODE_LITERAL, replace, str_) def check_splitinfo_format(self, str_): if str_.count('/') + 1 > self.__ARRAY_MAX_LENGTH: raise ValueError('too many units') def parse_splitinfo(self, info): if info == '*': return [] words = info.split('/') if len(words) > self.__ARRAY_MAX_LENGTH: raise ValueError('too many units') ids = [] for word in words: if self.__is_id(word): ids.append(self.parse_id(word)) else: ids.append(self.word_to_id(word)) if ids[-1] < 0: return ValueError('not found such a word') return ids @staticmethod def __is_id(text): return re.match(r'U?\d+', text) def parse_id(self, text): if text.startswith('U'): id_ = int(text[1:]) if self.is_user_dictionary: id_ |= (1 << 28) else: id_ = int(text) self.check_wordid(id_) return id_ def word_to_id(self, text): cols = text.split(',') if len(cols) < 8: raise ValueError('too few columns') headword = self.decode(cols[0]) pos_id = self.get_posid([cols[i] for i in range(1, 7)]) if pos_id < 0: raise ValueError('invalid part of speech') reading = self.decode(cols[7]) return self.get_wordid(headword, pos_id, reading) def get_wordid(self, headword, pos_id, reading_form): for i in range(len(self.entries)): info = self.entries[i].wordinfo if info.surface == headword \ and info.pos_id == pos_id \ and info.reading_form == reading_form: return i return -1 def check_wordid(self, wid): if wid < 0 or wid >= len(self.entries): raise ValueError('invalid word ID') def write_string(self, text): len_ = 0 for c in text: if 0x10000 <= ord(c) <= 0x10FFFF: len_ += 2 else: len_ += 1 self.write_stringlength(len_) self.byte_buffer.write_str(text) def write_stringlength(self, len_): if len_ <= self.__BYTE_MAX_VALUE: self.byte_buffer.write_int(len_, 'byte') else: self.byte_buffer.write_int((len_ >> 8) | 0x80, 'byte') self.byte_buffer.write_int((len_ & 0xFF), 'byte') def write_intarray(self, array): self.byte_buffer.write_int(len(array), 'byte') for item in array: self.byte_buffer.write_int(item, 'int') def __logging_size(self, size): self.logger.info('{} bytes\n'.format(size))
class SecondCandle: def __init__(self, logger, product, timescale, parameters): self.candleupdated = Event() self.product = product self.logger = logger self.timescale = timescale self._parameters = parameters self.lastcandle = datetime.now(timezone(timedelta(hours=9), 'JST')) self.lastexecution = [] self.lastexecutiontime = time.time() self.sectimer = time.time() self.mid_price = -1 self.spot_mid_price = -1 self.spot_price = -1 self.spot_price_last = -1 self.spot_price_exec = -1 # 約定履歴を貯めるバッファ。溢れないように余裕をもって多めにしておく # 現状の取引量だと秒あたり100~200約定なので、必要量の2.5~5倍程度を確保している self.executions = deque(maxlen=timescale*5000) self.RealtimeHandler() warnings.simplefilter(action="ignore", category=FutureWarning) # レイテンシの計測用バッファ (直近5配信分) self.latancy_buf = deque(maxlen=5) # 現在足の生成 self.current_open = 0 self.current_high = 0 self.current_low = 0 self.current_close = 0 self.current_volume = 0 self.current_buy_volume = 0 self.current_sell_volume = 0 self.current_count = 0 self.current_buy_count = 0 self.current_sell_count = 0 self.current_total_value = 0 # 板情報の更新中ロック self.board_lock = Lock() self.spot_board_lock = Lock() # 約定履歴更新中ロック self.execution_lock = Lock() # レイテンシの計測用バッファ更新中ロック self.latancy_buf_lock = Lock() # 板情報を格納 self.bids = SortedDict() self.asks = SortedDict() self.board_age = 0 self.spot_bids = SortedDict() self.spot_asks = SortedDict() self.previous_candles = deque(maxlen=3) # NaNでない秒足のリスト (3本あれば十分) @property def current_latency(self): with self.latancy_buf_lock: latency = int(mean(self.latancy_buf)*1000) return latency def format_date(self, date_line, time_diff): exec_date = date_line.replace('T', ' ')[:-1] try: if len(exec_date) == 19: exec_date = exec_date + '.0' d = datetime(int(exec_date[0:4]), int(exec_date[5:7]), int(exec_date[8:10]), int( exec_date[11:13]), int(exec_date[14:16]), int(exec_date[17:19]), int(exec_date[20:26]), tzinfo=timezone(time_diff, 'JST')) + time_diff except Exception as e: self._logger.exception("Error while parsing date str : exec_date:{} {}, {}".format( exec_date, e, traceback.print_exc())) d = parser.parse(exec_date) + time_diff return d def random4digit(self): return "+{:04}".format(random.randint(0, 9999)) # executionリストをもとに1秒足のローソクを生成 def updatecandle(self): try: start = time.time() with self.execution_lock: # dequeをリストに tmpExecutions = list(self.executions) self.raw = pandas.DataFrame([[ tick["exec_date"], tick["price"], tick["size"], tick["size"]if tick["side"] == 'BUY'else 0, tick["size"]if tick["side"] == 'SELL'else 0, 1 if tick["size"] != 0 else 0, 1 if tick["side"] == 'BUY' else 0, 1 if tick["side"] == 'SELL' else 0, tick["price"] * tick["size"] ] for tick in tmpExecutions], columns=["date", "price", "volume", "buy", "sell", "count", "count_buy", "count_sell", "total_value"]) self.candle = self.raw.set_index('date').resample(str(self.timescale)+"s").agg({ "price": "ohlc", "volume": "sum", "buy": "sum", "sell": "sum", "count": "sum", "count_buy": "sum", "count_sell": "sum", "total_value": "sum"}) self.candle.columns = self.candle.columns.droplevel() self.previous_candles.clear() # NaNでない秒足のリスト candle_index = self.candle.index.values for i in range(1, len(candle_index)): # NaNが自身との等号判定でfalseを返すという性質を利用してNanかどうかを判定 if self.candle.at[candle_index[i], "open"] != self.candle.at[candle_index[i], "open"]: # その期間に約定履歴が無い場合にはひとつ前の足からコピー self.candle.loc[candle_index[i], [ "open", "high", "low", "close"]] = self.candle.at[candle_index[i-1], "close"] else: self.previous_candles.append(self.candle.index[i]) self.lastcandle = self.candle[-1:].index[0] # self.logger.log(5, "Conversion elapsed_time:{:.1f}".format((time.time() - start)*1000) + "[msec]") # self.logger.log(5, "{}ticks -> {}candles x 1sec".format(len(tmpExecutions),len(self.candle))) except: pass # 負荷軽減のため、ローソク足に変換済みの約定履歴を破棄 def reduce_exeution_buffer(self): if len(self.executions) == 0: return if len(self.previous_candles) == 0: return with self.execution_lock: while True: i = self.executions.popleft() if self.previous_candles[0].timestamp() <= i['exec_date'].timestamp(): self.executions.appendleft(i) break # 板情報から実効Ask/Bid(=指値を入れる基準値)を計算する関数 startpriceから上下サイズ分をみて価格を決める def get_effective_tick(self, size_thru, startprice, limitprice): try: with self.board_lock: asks = self.asks.items() bids = self.bids.items() total = 0 asks_pos = self.mid_price for price, size in asks: if price > startprice or startprice == 0: if startprice == 0: startprice = price total += size asks_pos = price if total > size_thru or price > startprice+limitprice: break total = 0 bids_pos = self.mid_price for price, size in reversed(bids): if price < startprice or startprice == 0: if startprice == 0: startprice = price total += size bids_pos = price if total > size_thru or price < startprice-limitprice: break except: return {'bid': 0, 'ask': 0} return {'bid': bids_pos, 'ask': asks_pos} # 板情報を返す関数(bFからのレスポンスと同じ形に成型する) def get_board(self): with self.board_lock: asks = self.asks.items() bids = self.bids.items() bids_dict = [{'price': a[0], 'size':a[1]} for a in list(bids)] asks_dict = [{'price': a[0], 'size':a[1]} for a in list(asks)] bids_dict.reverse() return {'mid_price': self.mid_price, 'bids': bids_dict, 'asks': asks_dict} # 板情報を返す関数(bFからのレスポンスと同じ形に成型する) def get_spot_board(self): with self.spot_board_lock: asks = self.spot_asks.items() bids = self.spot_bids.items() bids_dict = [{'price': a[0], 'size':a[1]} for a in list(bids)] asks_dict = [{'price': a[0], 'size':a[1]} for a in list(asks)] bids_dict.reverse() return {'mid_price': self.spot_mid_price, 'bids': bids_dict, 'asks': asks_dict} def RealtimeHandler(self): # ハンドラ呼び出し def handler(func, *args): return func(*args) # board(SortedDict)にdの板情報を挿入(削除) def update_board(board, d): for i in d: p, s = i['price'], i['size'] if s != 0: board[p] = s elif p in board: del board[p] def check_rollback_exec(): # 過去データにヒットしていたら with self._parameters.executed_order_pending_rollback_lock: if self._parameters.executed_order_pending_rollback == True: self._parameters.executed_order_pending_rollback = False # 過去のヒットしなかった約定データを再精査 for i in range(len(self._parameters.executed_order_pending_detail)): r = self._parameters.executed_order_pending_detail.popleft() check_execution(r) def check_execution(r): # 自分が発行したオーダーのリストに当該のidがあれば自分の約定 if r['child_order_acceptance_id'] in list(self._parameters.childorder_id_list): with self._parameters.server_accepted_time_lock: accepted_order = [i for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'ORDER'][0] apitime = accepted_order['time'] if accepted_order == []: accepted_order = { 'sendorder': 0, 'accepted': 0, 'ordered': 0} if 'ordered' not in accepted_order: accepted_order['ordered'] = time.time() self.logger.info(" EXECUTION: ({:.0f}msec) {} {} price:{} size:{} [sfd:{}]".format((time.time()-apitime)*1000, r['child_order_acceptance_id'], r['side'], r['price'], r['size'], r['sfd'])) self._parameters.sfd_commission += r['sfd'] if r['sfd'] > 0: self._parameters.sfd_profit += r['sfd'] if r['sfd'] < 0: self._parameters.sfd_loss += r['sfd'] if not self._parameters._config['execution_check_with_public_channel']: # 想定ポジション(速報値)を増減させる lotsize = round(r['size'] if r['side'] == "BUY" else -r['size'], 8) self._parameters.estimated_position2 += lotsize self._parameters.executed_size[-1] += r['size'] # 取引高 self._parameters.executed_size_today += r['size'] # 取引高 # とりあえずここでは約定済みリストに入れておいて、後でポジション管理へ回してポジション変化を計算 with self._parameters.executed_order_lock: self._parameters.executed_order.append({'id': r['child_order_acceptance_id']+self.random4digit( ), 'price': r["price"], 'lot': lotsize, 'date': r["event_date"], 'timestamp': time.time(), 'sendorder': accepted_order['time'], 'accepted': accepted_order['accepted'], 'ordered': accepted_order['ordered']}) self._parameters.execution_event.set() if self._parameters.drive_by_executions: # drive_by_executions がセットされていれば、約定検出でrealtime_logic()を呼び出す self._parameters.logic_execution_event.set() self._parameters.order_signal_event.set() else: self._parameters.executed_order_pending.append( r['child_order_acceptance_id']) # オーダーと約定通知が前後したときのために突っ込んで保管しておく self._parameters.executed_order_pending_detail.append( r) # オーダーと約定通知が前後したときのために突っ込んで保管しておく # https://bf-lightning-api.readme.io/docs/realtime-executions def on_executions(recept_data): self._parameters.execution_timestamp = time.time() self._parameters.ltp = recept_data[-1]["price"] try: # 現在時刻と約定履歴のタイムスタンプの差から配信遅延を計測 # 参考URL) https://gist.github.com/nagadomi/bbf4df93a4ac2fce10d89e4206e4cb7a # https://twitter.com/ultraistter/status/1046186504370966528 with self.latancy_buf_lock: latency_sec = datetime.now().timestamp() - \ self.format_date( recept_data[-1]["exec_date"], timedelta(hours=0)).timestamp() self.latancy_buf.append(latency_sec) self._parameters.all_latency_history.append( int(latency_sec*1000)) except: pass try: if self._parameters._strategy_class != None: self._parameters._strategy_class.hit_check(recept_data) except Exception as e: self.logger.exception( "Error in executions routine : {}, {}".format(e, traceback.print_exc())) try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.execution_handler != None: handler(self._parameters.execution_handler, recept_data) except Exception as e: self.logger.exception( "Error in executions routine : {}, {}".format(e, traceback.print_exc())) with self.execution_lock: for i in recept_data: # ローソク足生成のために保管 self.executions.append({'exec_date': self.format_date(i["exec_date"], timedelta(hours=9)), 'price': i["price"], 'size': i["size"], 'side': i["side"]}) ask_top = int(recept_data[0]['price']) bid_bottom = int(recept_data[0]['price']) self._parameters.execution_counter[-1] += len(recept_data) for i in recept_data: # 現在足のデータ生成 self.current_exec_date = self.format_date( i["exec_date"], timedelta(hours=9)) current_price = int(i['price']) current_size = i['size'] if self.current_open == 0: self.current_open = current_price self.current_high = current_price self.current_low = current_price self.current_close = current_price self.current_high = max(current_price, self.current_high) self.current_low = min(current_price, self.current_low) self.current_close = current_price self.current_volume += current_size self.current_count += 1 self.current_total_value += current_price * current_size if i['side'] == 'BUY': self.current_buy_volume += current_size self.current_buy_count += 1 self._parameters.best_ask = current_price else: self.current_sell_volume += current_size self.current_sell_count += 1 self._parameters.best_bid = current_price # 今回のexecutionsパックの高値低値 ask_top = max(current_price, ask_top) bid_bottom = min(current_price, bid_bottom) # Private channel の認証が終わっていなければ publicで判断 if self._parameters._config['execution_check_with_public_channel'] or not self.realtimeapi.auth_check(): # 検索用id buy_acceptance_id = i["buy_child_order_acceptance_id"] buy_acceptance_id_4d = buy_acceptance_id+self.random4digit() sell_acceptance_id = i["sell_child_order_acceptance_id"] sell_acceptance_id_4d = sell_acceptance_id+self.random4digit() # 注文リストに約定履歴と同じオーダー番号があれば約定と判断 with self._parameters.order_id_list_lock: checklist = list( self._parameters.childorder_id_list) fHitBuy = True if buy_acceptance_id in checklist else False fHitSell = True if sell_acceptance_id in checklist else False if fHitBuy: # 詳細な発注リストから該当のIDを抜き出し、売買方向も含めて詳細にチェック hitlist = [x for x in self._parameters.childorder_information if x['id'] == buy_acceptance_id and x['side'] == 'BUY' and x['remain'] >= i['size'] and (x['child_order_type'] == 'MARKET' or x['price'] >= i['price'])] if hitlist: # 想定ポジション(速報値)を増減させる self._parameters.estimated_position2 += i['size'] # 取引高 self._parameters.executed_size[-1] += i['size'] # 取引高 self._parameters.executed_size_today += i['size'] # とりあえずここでは約定済みリストに入れておいて、後でポジション変化を計算 accepted_order = [ a for a in self._parameters.server_accepted_time_detail if a['id'] == buy_acceptance_id and a['event'] == 'ORDER'][0] if accepted_order == []: accepted_order = { 'sendorder': 0, 'accepted': 0, 'ordered': 0} if 'ordered' not in accepted_order: accepted_order['ordered'] = time.time() with self._parameters.executed_order_lock: self._parameters.executed_order.append( {'id': buy_acceptance_id_4d, 'price': i["price"], 'lot': i["size"], 'date': i["exec_date"], 'timestamp': time.time(), 'sendorder': accepted_order['time'], 'accepted': accepted_order['accepted'], 'ordered': accepted_order['ordered']}) self.logger.debug(" HIT({})*****BUY!!! ({}) {} price:{:.0f} size:{:.8f}".format(len( hitlist), self.format_date(i["exec_date"], timedelta(hours=9)), buy_acceptance_id, i["price"], i["size"])) self._parameters.execution_event.set() if self._parameters.drive_by_executions: # drive_by_executions がセットされていれば、約定検出でrealtime_logic()を呼び出す self._parameters.logic_execution_event.set() self._parameters.order_signal_event.set() else: self.logger.error(" Unexpected BUY!!! ({}) price:{:.0f} size:{:.8f}".format( buy_acceptance_id, i["price"], i["size"])) if fHitSell: # 詳細な発注リストから該当のIDを抜き出し、売買方向も含めて詳細にチェック hitlist = [x for x in self._parameters.childorder_information if x['id'] == sell_acceptance_id and x['side'] == 'SELL' and x['remain'] >= i['size'] and (x['child_order_type'] == 'MARKET' or x['price'] <= i['price'])] if hitlist: # 想定ポジション(速報値)を増減させる self._parameters.estimated_position2 -= i['size'] # 取引高 self._parameters.executed_size[-1] += i['size'] # 取引高 self._parameters.executed_size_today += i['size'] # とりあえずここでは約定済みリストに入れておいて、後でポジション変化を計算 accepted_order = [ a for a in self._parameters.server_accepted_time_detail if a['id'] == sell_acceptance_id and a['event'] == 'ORDER'][0] if accepted_order == []: accepted_order = { 'sendorder': 0, 'accepted': 0, 'ordered': 0} if 'ordered' not in accepted_order: accepted_order['ordered'] = time.time() with self._parameters.executed_order_lock: self._parameters.executed_order.append( {'id': sell_acceptance_id_4d, 'price': i["price"], 'lot': -i["size"], 'date': i["exec_date"], 'timestamp': time.time(), 'sendorder': accepted_order['time'], 'accepted': accepted_order['accepted'], 'ordered': accepted_order['ordered']}) self.logger.debug(" HIT({})*****SELL!!! ({}) {} price:{:.0f} size:{:.8f}".format(len( hitlist), self.format_date(i["exec_date"], timedelta(hours=9)), sell_acceptance_id, i["price"], i["size"])) self._parameters.execution_event.set() if self._parameters.drive_by_executions: # drive_by_executions がセットされていれば、約定検出でrealtime_logic()を呼び出す self._parameters.logic_execution_event.set() self._parameters.order_signal_event.set() else: self.logger.error(" Unexpected SELL!!! ({}) price:{:.0f} size:{:.8f}".format( sell_acceptance_id, i["price"], i["size"])) # 約定履歴で板を削る with self.board_lock: asks = self.asks.items() for price, size in asks: if price < ask_top: update_board( self.asks, [{'price': price, 'size': 0}]) else: break bids = self.bids.items() for price, size in reversed(bids): if price > bid_bottom: update_board( self.bids, [{'price': price, 'size': 0}]) else: break if time.time() - self.sectimer > 1: self.sectimer = time.time() self._parameters.latency_history.append( self.current_latency) # 約定履歴のレイテンシ # 送られてきたexecutionの時間が前回ローソク足更新時の最後の足よりもtimescale進んでいればローソク足の更新作業を行う if len(recept_data) > 0 and self.format_date(recept_data[-1]["exec_date"], timedelta(hours=9)).timestamp() - self.lastcandle.timestamp() >= self.timescale: self.lastexecutiontime = self._parameters.execution_timestamp # ローソク足更新を通知 self.candleupdated.set() # https://bf-lightning-api.readme.io/docs/realtime-board-snapshot def on_board_snapshot(recept_data): self._parameters.board_timestamp = time.time() self.mid_price = int(recept_data['mid_price']) # 板スナップショット with self.board_lock: bids, asks = SortedDict(), SortedDict() # 空のSortedDictを作って update_board(bids, recept_data['bids']) # すべてのmessageを update_board(asks, recept_data['asks']) # 突っ込む self.bids, self.asks = bids, asks self.board_age = 0 try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.board_updated_handler != None: handler(self._parameters.board_updated_handler) except Exception as e: self.logger.exception( "Error in board_updated routine : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-board def on_board(recept_data): check_rollback_exec() self._parameters.board_timestamp = time.time() self.mid_price = int(recept_data['mid_price']) # 板更新情報 # 取得したデータでスナップショットを更新する with self.board_lock: update_board(self.bids, recept_data['bids']) # messageを update_board(self.asks, recept_data['asks']) # 突っ込む self.board_age += 1 try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.board_updated_handler != None: handler(self._parameters.board_updated_handler) except Exception as e: self.logger.exception( "Error in board_updated routine : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-child-order-events def on_child_order_events(recept_data): for r in recept_data: try: if r['event_type'] == 'EXECUTION': check_execution(r) elif r['event_type'] == 'ORDER': if r['child_order_acceptance_id'] in list(self._parameters.childorder_id_list): with self._parameters.server_accepted_time_lock: accepted_order = [i for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'ORDER'][0] apitime = accepted_order['time'] index = self._parameters.server_accepted_time_detail.index( accepted_order) self._parameters.server_accepted_time_detail[ index]['sendorder'] = accepted_order['time'] self._parameters.server_accepted_time_detail[ index]['accepted'] = accepted_order['accepted'] self._parameters.server_accepted_time_detail[index]['ordered'] = time.time( ) self._parameters.server_order_delay_history.append( [datetime.utcnow()+timedelta(hours=9), (time.time()-apitime)*1000]) self.logger.info(" ORDER: ({:.0f}msec) {} {} {} price:{} size:{} (latency {:.0f}msec)".format((time.time()-apitime)*1000, r['child_order_acceptance_id'], r['child_order_type'], r['side'], r['price'], r['size'], self.current_latency)) self._parameters.ordered_speed_history[-1].append( (time.time()-apitime)*1000) # 詳細な発注リストから該当のIDを抜き出し、発注完了時刻、受付時刻を追記 order_list = [ x for x in self._parameters.childorder_information if x['id'] == r['child_order_acceptance_id']][0] index = self._parameters.childorder_information.index( order_list) self._parameters.childorder_information[index][ 'accepted'] = accepted_order['accepted'] self._parameters.childorder_information[index][ 'ordered'] = accepted_order['ordered'] elif r['event_type'] == 'ORDER_FAILED': self.logger.debug(r) if r['child_order_acceptance_id'] in list(self._parameters.childorder_id_list): with self._parameters.server_accepted_time_lock: apitime = [i['time'] for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'ORDER'][0] self.logger.error(" ORDER_FAILED: ({:.0f}msec) {} {}".format( (time.time()-apitime)*1000, r['child_order_acceptance_id'], r['reason'])) self._parameters.canceled_child_order.append( r['child_order_acceptance_id']) # 後でポジション管理へ回して処理 elif r['event_type'] == 'EXPIRE': # 詳細な発注リストから該当のIDを抜き出し order_list = [ x for x in self._parameters.childorder_information if x['id'] == r['child_order_acceptance_id']] if len(order_list) != 0 and order_list[0]['remain'] != order_list[0]['size']: self._parameters.order_partial_filled_count[-1] += 1 if r['child_order_acceptance_id'] in list(self._parameters.childorder_id_list): with self._parameters.server_accepted_time_lock: apitime = [i['time'] for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'ORDER'][0] self.logger.info(" EXPIRE: ({:.0f}msec) {}".format( (time.time()-apitime)*1000, r['child_order_acceptance_id'])) self._parameters.canceled_child_order.append( r['child_order_acceptance_id']) # 後でポジション管理へ回して処理 elif r['event_type'] == 'CANCEL': # 詳細な発注リストから該当のIDを抜き出し order_list = [ x for x in self._parameters.childorder_information if x['id'] == r['child_order_acceptance_id']] if len(order_list) != 0 and order_list[0]['remain'] != order_list[0]['size']: self._parameters.order_partial_filled_count[-1] += 1 apitime = 0 self._parameters.order_not_filled_count[-1] += 1 # キャンセルコマンドを発行してキャンセルしたもの if r['child_order_acceptance_id'] in list(self._parameters.cancel_child_id_list): with self._parameters.server_accepted_time_lock: apitime = [i['time'] for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'CANCEL'][0] self._parameters.server_cancel_delay_history.append( [datetime.utcnow()+timedelta(hours=9), (time.time()-apitime)*1000]) self.logger.info(" CANCEL: ({:.0f}msec) {} (latency {:.0f}msec)".format( (time.time()-apitime)*1000, r['child_order_acceptance_id'], self.current_latency)) self._parameters.canceled_speed_history[-1].append( (time.time()-apitime)*1000) # キャンセルコマンドを発行したものでなくても自分が発行したオーダーのリストに当該のidがあれば自分のオーダーのキャンセル if r['child_order_acceptance_id'] in list(self._parameters.childorder_id_list): if apitime == 0: self.logger.info(" CANCEL: {} (latency {:.0f}msec)".format( r['child_order_acceptance_id'], self.current_latency)) self._parameters.canceled_child_order.append( r['child_order_acceptance_id']) # 後でポジション管理へ回して処理 elif r['event_type'] == 'CANCEL_FAILED': self.logger.debug(r) if r['child_order_acceptance_id'] in list(self._parameters.cancel_child_id_list): with self._parameters.server_accepted_time_lock: apitime = [i['time'] for i in self._parameters.server_accepted_time_detail if i['id'] == r['child_order_acceptance_id'] and i['event'] == 'CANCEL'][0] self.logger.error(" CANCEL_FAILED: ({:.0f}msec) {}".format( # self.format_date(r['event_date'],timedelta(hours=9)), (time.time()-apitime)*1000, r['child_order_acceptance_id'])) self._parameters.canceled_child_order.append( r['child_order_acceptance_id']) # 後でポジション管理へ回して処理 except Exception as e: self.logger.info(r) self.logger.exception( "Error in handling child_order_event : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-parent-order-events def on_parent_order_events(recept_data): for r in recept_data: if r['parent_order_acceptance_id'] in list(self._parameters.parentorder_id_list): try: if r['event_type'] == 'ORDER': self.logger.info(" PARENT ORDER: {} {}".format( r['parent_order_acceptance_id'], r['parent_order_type'])) elif r['event_type'] == 'ORDER_FAILED': self.logger.debug(r) self.logger.error(" PARENT ORDER_FAILED: {} {}".format( r['parent_order_acceptance_id'], r['reason'])) with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif r['event_type'] == 'CANCEL': self.logger.debug(r) self.logger.info(" PARENT CANCEL: {}".format( r['parent_order_acceptance_id'])) with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif r['event_type'] == 'EXPIRE': self.logger.debug(r) self.logger.info(" PARENT EXPIRE: {}".format( r['parent_order_acceptance_id'])) with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif r['event_type'] == 'COMPLETE': order_method = self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] if (order_method in ['IFD', 'IFDOCO']) and r['parameter_index'] == 1: self.logger.info(" PARENT COMPLETE (IF CONDITION): {} IDX:{} child_acceptance_id:{}".format( r['parent_order_acceptance_id'], r['parameter_index'], r['child_order_acceptance_id'])) else: self.logger.info(" PARENT COMPLETE: {} IDX:{} child_acceptance_id:{}".format( r['parent_order_acceptance_id'], r['parameter_index'], r['child_order_acceptance_id'])) if order_method == 'SIMPLE': if r['parameter_index'] == 1: with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif order_method == 'IFD': if r['parameter_index'] == 2: with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif order_method == 'OCO': if r['parameter_index'] == 1 or r['parameter_index'] == 2: with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif order_method == 'IFDOCO': if r['parameter_index'] == 2 or r['parameter_index'] == 3: with self._parameters.order_id_list_lock: self._parameters.canceled_parent_order.append( r['parent_order_acceptance_id']) # 後でポジション管理へ回して処理 try: del self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] except: pass try: del self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']] except: pass elif r['event_type'] == 'TRIGGER': self.logger.info(" PARENT TRIGGER: {} IDX:{} child_acceptance_id:{}".format( r['parent_order_acceptance_id'], r['parameter_index'], r['child_order_acceptance_id'])) # 発注の詳細データからヒットしたトリガー価格を取得 try: child_condition_type = self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']][r['parameter_index']-1]['condition_type'] if child_condition_type == 'MARKET': order_price = self._parameters.ltp elif child_condition_type == 'LIMIT': order_price = self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']][r['parameter_index']-1]['price'] elif child_condition_type == 'STOP': order_price = self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']][r['parameter_index']-1]['trigger_price'] elif child_condition_type == 'STOP_LIMIT': order_price = self._parameters.parentorder_detail_param[ r['parent_order_acceptance_id']][r['parameter_index']-1]['price'] elif child_condition_type == 'TRAIL': order_price = self._parameters.ltp else: order_method = self._parameters.parentorder_method_dict[ r['parent_order_acceptance_id']] self.logger.error("Unknown type : child_order_type:{} child_condition_type:{} order_method:{}\n{}".format( r['child_order_type'], child_condition_type, order_method, r)) order_price = self._parameters.ltp except: order_price = self._parameters.ltp self.logger.error( "Can't find parend order detail information : {}".format(r)) self.logger.error("{}".format( self._parameters.parentorder_detail_param)) self.logger.info("order_price:{} / {}".format( order_price, self._parameters.parentorder_detail_param[r['parent_order_acceptance_id']][r['parameter_index']-1])) # 子注文をリストに追加 (約定履歴と突き合わせる) with self._parameters.order_id_list_lock: # 重複を防ぐために、リストに無いIDだけを追加 if r['child_order_acceptance_id'] not in list(self._parameters.childorder_id_list): self._parameters.childorder_id_list.append( r['child_order_acceptance_id']) # websocketで迅速に照合させるためのリスト self._parameters.childorder_information.append({'id': r['child_order_acceptance_id'], 'child_order_type': r['child_order_type'], 'remain': r['size'], 'side': r['side'], 'TTE': int( time.time()), 'price': order_price, 'size': r['size'], 'parent_id': r['parent_order_acceptance_id'], 'sendorder': time.time()}) self._parameters.executed_order_pending_rollback = True # オーダー時間統計用 with self._parameters.server_accepted_time_lock: self._parameters.server_accepted_time_detail.append( {'id': r['child_order_acceptance_id'], 'time': time.time(), 'event': 'ORDER', 'accepted': time.time()}) except Exception as e: self.logger.info(r) self.logger.exception("Error in handling parent_order_event : {}, {}".format( e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-ticker def on_spot_ticker(recept_data): try: self.spot_price = int(recept_data["ltp"]) if self.spot_price_last != self.spot_price: # drive_by_spot_ticker がセットされていれば、Ticker検出(変更時のみ)でrealtime_logic()を呼び出す if self._parameters.drive_by_spot_ticker: self._parameters.spot_ticker_event.set() self._parameters.order_signal_event.set() self.spot_price_last = self.spot_price except Exception as e: self.logger.info(r) self.logger.exception( "Error in handling spot_ticker : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-executions def on_spot_executions(recept_data): try: self.spot_price_exec = recept_data[-1]['price'] except Exception as e: self.logger.info(r) self.logger.exception( "Error in handling spot_executions : {}, {}".format(e, traceback.print_exc())) try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.spot_execution_handler != None: handler( self._parameters.spot_execution_handler, recept_data) except Exception as e: self.logger.exception( "Error in spot_executions routine : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-board-snapshot def on_spot_board_snapshot(recept_data): self.spot_mid_price = int(recept_data['mid_price']) # 板スナップショット with self.spot_board_lock: bids, asks = SortedDict(), SortedDict() # 空のSortedDictを作って update_board(bids, recept_data['bids']) # すべてのmessageを update_board(asks, recept_data['asks']) # 突っ込む self.spot_bids, self.spot_asks = bids, asks try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.spot_board_updated_handler != None: handler(self._parameters.spot_board_updated_handler) except Exception as e: self.logger.exception( "Error in spot_board_updated routine : {}, {}".format(e, traceback.print_exc())) # https://bf-lightning-api.readme.io/docs/realtime-board def on_spot_board(recept_data): self.spot_mid_price = int(recept_data['mid_price']) # 板更新情報 # 取得したデータでスナップショットを更新する with self.spot_board_lock: update_board(self.spot_bids, recept_data['bids']) # messageを update_board(self.spot_asks, recept_data['asks']) # 突っ込む try: # ストラテジーのハンドラーが登録されていれば呼び出す if self._parameters.spot_board_updated_handler != None: handler(self._parameters.spot_board_updated_handler) except Exception as e: self.logger.exception( "Error in spot_board_updated routine : {}, {}".format(e, traceback.print_exc())) # チャンネル登録 public_handler_mapping = {} public_handler_mapping["lightning_executions_{}".format( self.product)] = on_executions public_handler_mapping["lightning_board_snapshot_{}".format( self.product)] = on_board_snapshot public_handler_mapping["lightning_board_{}".format( self.product)] = on_board public_handler_mapping["lightning_ticker_BTC_JPY"] = on_spot_ticker if self._parameters.handle_spot_realtime_api: public_handler_mapping["lightning_executions_BTC_JPY"] = on_spot_executions public_handler_mapping["lightning_board_snapshot_BTC_JPY"] = on_spot_board_snapshot public_handler_mapping["lightning_board_BTC_JPY"] = on_spot_board private_handler_mapping = {} private_handler_mapping["child_order_events"] = on_child_order_events private_handler_mapping["parent_order_events"] = on_parent_order_events # websocket作成 self.realtimeapi = realtimeapi.RealtimeAPIWebsocket( self.logger, self._parameters, public_handler_mapping, private_handler_mapping)
class FederationRemoteSendQueue(object): """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = {} # Pending presence map user_id -> UserPresenceState self.presence_changed = SortedDict() # Stream position -> list[user_id] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = SortedDict() self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = SortedDict() # stream position -> (destination, key) self.edus = SortedDict() # stream position -> Edu self.device_messages = SortedDict() # stream position -> destination self.pos = 1 self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "device_messages", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = set( user_id for uids in self.presence_changed.values() for user_id in uids ) keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids.update( user_id for user_id, _ in self.presence_destinations.values() ) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys] for edu_key in to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] def notify_new_events(self, current_id): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): """As per FederationSender Args: receipt (synapse.types.ReadReceipt): """ # nothing to do here: the replication listener will handle it. pass def send_presence(self, states): """As per FederationSender Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list(filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update({state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_presence_to_destinations(self, states, destinations): """As per FederationSender Args: states (list[UserPresenceState]) destinations (list[str]) """ for state in states: pos = self._next_pos() self.presence_map.update({state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) def get_replication_rows(self, from_token, to_token, limit, federation_ack=None): """Get rows to be sent over federation between the two tokens Args: from_token (int) to_token(int) limit (int) federation_ack (int): Optional. The position where the worker is explicitly acknowledged it has handled. Allows us to drop data from before that point """ # TODO: Handle limit. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # There should be only one reader, so lets delete everything its # acknowledged its seen. if federation_ack: self._clear_queue_before_pos(federation_ack) # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow( state=self.presence_map[user_id], ))) # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append((pos, PresenceDestinationsRow( state=self.presence_map[user_id], destinations=list(dests), ))) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], ))) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow( destination=destination, ))) # Sort rows based on pos rows.sort() return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
class Replica(HasActionQueue, MessageProcessor): def __init__(self, node: 'plenum.server.node.Node', instId: int, isMaster: bool = False): """ Create a new replica. :param node: Node on which this replica is located :param instId: the id of the protocol instance the replica belongs to :param isMaster: is this a replica of the master protocol instance """ super().__init__() self.stats = Stats(TPCStat) self.config = getConfig() routerArgs = [(ReqDigest, self._preProcessReqDigest)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) routerArgs.append((Checkpoint, self.processCheckpoint)) routerArgs.append((ThreePCState, self.process3PhaseState)) self.inBoxRouter = Router(*routerArgs) self.threePhaseRouter = Router( (PrePrepare, self.processPrePrepare), (Prepare, self.processPrepare), (Commit, self.processCommit) ) self.node = node self.instId = instId self.name = self.generateName(node.name, self.instId) self.outBox = deque() """ This queue is used by the replica to send messages to its node. Replica puts messages that are consumed by its node """ self.inBox = deque() """ This queue is used by the replica to receive messages from its node. Node puts messages that are consumed by the replica """ self.inBoxStash = deque() """ If messages need to go back on the queue, they go here temporarily and are put back on the queue on a state change """ self.isMaster = isMaster # Indicates name of the primary replica of this protocol instance. # None in case the replica does not know who the primary of the # instance is self._primaryName = None # type: Optional[str] # Requests waiting to be processed once the replica is able to decide # whether it is primary or not self.postElectionMsgs = deque() # PRE-PREPAREs that are waiting to be processed but do not have the # corresponding request digest. Happens when replica has not been # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE # with a different digest and since we dont have the request finalised, # we store all PRE-PPREPARES self.prePreparesPendingReqDigest = {} # type: Dict[Tuple[str, int], List] # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and # prepare sequence number as key and a deque of PREPAREs as value. # This deque is attempted to be flushed on receiving every # PRE-PREPARE request. self.preparesWaitingForPrePrepare = {} # type: Dict[Tuple[int, int], deque] # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE # received self.commitsWaitingForPrepare = {} # type: Dict[Tuple[int, int], deque] # Dictionary of sent PRE-PREPARE that are stored by primary replica # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is a tuple of Request Digest and time self.sentPrePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value is # a tuple of Request Digest and time self.prePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received Prepare requests. Key of dictionary is a 2 # element tuple with elements viewNo, seqNo and value is a 2 element # tuple containing request digest and set of sender node names(sender # replica names in case of multiple protocol instances) # (viewNo, seqNo) -> ((identifier, reqId), {senders}) self.prepares = Prepares() # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] self.commits = Commits() # type: Dict[Tuple[int, int], # Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is # (viewNo, ppSeqNo) self.ordered = OrderedSet() # type: OrderedSet[Tuple[int, int]] # Dictionary to keep track of the which replica was primary during each # view. Key is the view no and value is the name of the primary # replica during that view self.primaryNames = {} # type: Dict[int, str] # Holds msgs that are for later views self.threePhaseMsgsForLaterView = deque() # type: deque[(ThreePhaseMsg, str)] # Holds tuple of view no and prepare seq no of 3-phase messages it # received while it was not participating self.stashingWhileCatchingUp = set() # type: Set[Tuple] # Commits which are not being ordered since commits with lower view # numbers and sequence numbers have not been ordered yet. Key is the # viewNo and value a map of pre-prepare sequence number to commit self.stashedCommitsForOrdering = {} # type: Dict[int, # Dict[int, Commit]] self.checkpoints = SortedDict(lambda k: k[0]) self.stashingWhileOutsideWaterMarks = deque() # Low water mark self._h = 0 # type: int # High water mark self.H = self._h + self.config.LOG_SIZE # type: int self.lastPrePrepareSeqNo = self.h # type: int @property def h(self) -> int: return self._h @h.setter def h(self, n): self._h = n self.H = self._h + self.config.LOG_SIZE @property def requests(self): return self.node.requests def shouldParticipate(self, viewNo: int, ppSeqNo: int): # Replica should only participating in the consensus process and the # replica did not stash any of this request's 3-phase request return self.node.isParticipating and (viewNo, ppSeqNo) \ not in self.stashingWhileCatchingUp @staticmethod def generateName(nodeName: str, instId: int): """ Create and return the name for a replica using its nodeName and instanceId. Ex: Alpha:1 """ return "{}:{}".format(nodeName, instId) @staticmethod def getNodeName(replicaName: str): return replicaName.split(":")[0] @property def isPrimary(self): """ Is this node primary? :return: True if this node is primary, False otherwise """ return self._primaryName == self.name if self._primaryName is not None \ else None @property def primaryName(self): """ Name of the primary replica of this replica's instance :return: Returns name if primary is known, None otherwise """ return self._primaryName @primaryName.setter def primaryName(self, value: Optional[str]) -> None: """ Set the value of isPrimary. :param value: the value to set isPrimary to """ if not value == self._primaryName: self._primaryName = value self.primaryNames[self.viewNo] = value logger.debug("{} setting primaryName for view no {} to: {}". format(self, self.viewNo, value)) logger.debug("{}'s primaryNames for views are: {}". format(self, self.primaryNames)) self._stateChanged() def _stateChanged(self): """ A series of actions to be performed when the state of this replica changes. - UnstashInBox (see _unstashInBox) """ self._unstashInBox() if self.isPrimary is not None: # TODO handle suspicion exceptions here self.process3PhaseReqsQueue() # TODO handle suspicion exceptions here try: self.processPostElectionMsgs() except SuspiciousNode as ex: self.outBox.append(ex) self.discard(ex.msg, ex.reason, logger.warning) def _stashInBox(self, msg): """ Stash the specified message into the inBoxStash of this replica. :param msg: the message to stash """ self.inBoxStash.append(msg) def _unstashInBox(self): """ Append the inBoxStash to the right of the inBox. """ self.inBox.extend(self.inBoxStash) self.inBoxStash.clear() def __repr__(self): return self.name @property def f(self) -> int: """ Return the number of Byzantine Failures that can be tolerated by this system. Equal to (N - 1)/3, where N is the number of nodes in the system. """ return self.node.f @property def viewNo(self): """ Return the current view number of this replica. """ return self.node.viewNo def isPrimaryInView(self, viewNo: int) -> Optional[bool]: """ Return whether a primary has been selected for this view number. """ return self.primaryNames[viewNo] == self.name def isMsgForLaterView(self, msg): """ Return whether this request's view number is greater than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo > self.viewNo def isMsgForCurrentView(self, msg): """ Return whether this request's view number is equal to the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo == self.viewNo def isMsgForPrevView(self, msg): """ Return whether this request's view number is less than the current view number of this replica. """ viewNo = getattr(msg, "viewNo", None) return viewNo < self.viewNo def isPrimaryForMsg(self, msg) -> Optional[bool]: """ Return whether this replica is primary if the request's view number is equal this replica's view number and primary has been selected for the current view. Return None otherwise. :param msg: message """ if self.isMsgForLaterView(msg): self.discard(msg, "Cannot get primary status for a request for a later " "view {}. Request is {}".format(self.viewNo, msg), logger.error) else: return self.isPrimary if self.isMsgForCurrentView(msg) \ else self.isPrimaryInView(msg.viewNo) def isMsgFromPrimary(self, msg, sender: str) -> bool: """ Return whether this message was from primary replica :param msg: :param sender: :return: """ if self.isMsgForLaterView(msg): logger.error("{} cannot get primary for a request for a later " "view. Request is {}".format(self, msg)) else: return self.primaryName == sender if self.isMsgForCurrentView( msg) else self.primaryNames[msg.viewNo] == sender def _preProcessReqDigest(self, rd: ReqDigest) -> None: """ Process request digest if this replica is not a primary, otherwise stash the message into the inBox. :param rd: the client Request Digest """ if self.isPrimary is not None: self.processReqDigest(rd) else: logger.debug("{} stashing request digest {} since it does not know " "its primary status". format(self, (rd.identifier, rd.reqId))) self._stashInBox(rd) def serviceQueues(self, limit=None): """ Process `limit` number of messages in the inBox. :param limit: the maximum number of messages to process :return: the number of messages successfully processed """ # TODO should handle SuspiciousNode here r = self.inBoxRouter.handleAllSync(self.inBox, limit) r += self._serviceActions() return r # Messages that can be processed right now needs to be added back to the # queue. They might be able to be processed later def processPostElectionMsgs(self): """ Process messages waiting for the election of a primary replica to complete. """ while self.postElectionMsgs: msg = self.postElectionMsgs.popleft() logger.debug("{} processing pended msg {}".format(self, msg)) self.dispatchThreePhaseMsg(*msg) def process3PhaseReqsQueue(self): """ Process the 3 phase requests from the queue whose view number is equal to the current view number of this replica. """ unprocessed = deque() while self.threePhaseMsgsForLaterView: request, sender = self.threePhaseMsgsForLaterView.popleft() logger.debug("{} processing pended 3 phase request: {}" .format(self, request)) # If the request is for a later view dont try to process it but add # it back to the queue. if self.isMsgForLaterView(request): unprocessed.append((request, sender)) else: self.processThreePhaseMsg(request, sender) self.threePhaseMsgsForLaterView = unprocessed @property def quorum(self) -> int: r""" Return the quorum of this RBFT system. Equal to :math:`2f + 1`. Return None if `f` is not yet determined. """ return self.node.quorum def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: """ Create a three phase request to be handled by the threePhaseRouter. :param msg: the ThreePhaseMsg to dispatch :param sender: the name of the node that sent this request """ senderRep = self.generateName(sender, self.instId) if self.isPpSeqNoAcceptable(msg.ppSeqNo): try: self.threePhaseRouter.handleSync((msg, senderRep)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) else: logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is " "not between {} and {}". format(self, msg, msg.ppSeqNo, self.h, self.H)) self.stashingWhileOutsideWaterMarks.append((msg, sender)) def processReqDigest(self, rd: ReqDigest): """ Process a request digest. Works only if this replica has decided its primary status. :param rd: the client request digest to process """ self.stats.inc(TPCStat.ReqDigestRcvd) if self.isPrimary is False: self.dequeuePrePrepare(rd.identifier, rd.reqId) else: self.doPrePrepare(rd) def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): """ Process a 3-phase (pre-prepare, prepare and commit) request. Dispatch the request only if primary has already been decided, otherwise stash it. :param msg: the Three Phase message, one of PRE-PREPARE, PREPARE, COMMIT :param sender: name of the node that sent this message """ # Can only proceed further if it knows whether its primary or not if self.isMsgForLaterView(msg): self.threePhaseMsgsForLaterView.append((msg, sender)) logger.debug("{} pended received 3 phase request for a later view: " "{}".format(self, msg)) else: if self.isPrimary is None: self.postElectionMsgs.append((msg, sender)) logger.debug("Replica {} pended request {} from {}". format(self, msg, sender)) else: self.dispatchThreePhaseMsg(msg, sender) def processPrePrepare(self, pp: PrePrepare, sender: str): """ Validate and process the PRE-PREPARE specified. If validation is successful, create a PREPARE and broadcast it. :param pp: a prePrepareRequest :param sender: name of the node that sent this message """ key = (pp.viewNo, pp.ppSeqNo) logger.debug("{} Receiving PRE-PREPARE{} at {} from {}". format(self, key, time.perf_counter(), sender)) if self.canProcessPrePrepare(pp, sender): if not self.node.isParticipating: self.stashingWhileCatchingUp.add(key) self.addToPrePrepares(pp) logger.info("{} processed incoming PRE-PREPARE{}". format(self, key)) def tryPrepare(self, pp: PrePrepare): """ Try to send the Prepare message if the PrePrepare message is ready to be passed into the Prepare phase. """ if self.canSendPrepare(pp): self.doPrepare(pp) else: logger.debug("{} cannot send PREPARE".format(self)) def processPrepare(self, prepare: Prepare, sender: str) -> None: """ Validate and process the PREPARE specified. If validation is successful, create a COMMIT and broadcast it. :param prepare: a PREPARE msg :param sender: name of the node that sent the PREPARE """ # TODO move this try/except up higher logger.debug("{} received PREPARE{} from {}". format(self, (prepare.viewNo, prepare.ppSeqNo), sender)) try: if self.isValidPrepare(prepare, sender): self.addToPrepares(prepare, sender) self.stats.inc(TPCStat.PrepareRcvd) logger.debug("{} processed incoming PREPARE {}". format(self, (prepare.viewNo, prepare.ppSeqNo))) else: # TODO let's have isValidPrepare throw an exception that gets # handled and possibly logged higher logger.warning("{} cannot process incoming PREPARE". format(self)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) def processCommit(self, commit: Commit, sender: str) -> None: """ Validate and process the COMMIT specified. If validation is successful, return the message to the node. :param commit: an incoming COMMIT message :param sender: name of the node that sent the COMMIT """ logger.debug("{} received COMMIT {} from {}". format(self, commit, sender)) if self.isValidCommit(commit, sender): self.stats.inc(TPCStat.CommitRcvd) self.addToCommits(commit, sender) logger.debug("{} processed incoming COMMIT{}". format(self, (commit.viewNo, commit.ppSeqNo))) def tryCommit(self, prepare: Prepare): """ Try to commit if the Prepare message is ready to be passed into the commit phase. """ if self.canCommit(prepare): self.doCommit(prepare) else: logger.debug("{} not yet able to send COMMIT".format(self)) def tryOrder(self, commit: Commit): """ Try to order if the Commit message is ready to be ordered. """ canOrder, reason = self.canOrder(commit) if canOrder: logger.debug("{} returning request to node".format(self)) self.tryOrdering(commit) else: logger.trace("{} cannot return request to node: {}". format(self, reason)) def doPrePrepare(self, reqDigest: ReqDigest) -> None: """ Broadcast a PRE-PREPARE to all the replicas. :param reqDigest: a tuple with elements identifier, reqId, and digest """ if not self.node.isParticipating: logger.error("Non participating node is attempting PRE-PREPARE. " "This should not happen.") return if self.lastPrePrepareSeqNo == self.H: logger.debug("{} stashing PRE-PREPARE {} since outside greater " "than high water mark {}". format(self, (self.viewNo, self.lastPrePrepareSeqNo+1), self.H)) self.stashingWhileOutsideWaterMarks.append(reqDigest) return self.lastPrePrepareSeqNo += 1 tm = time.time()*1000 logger.debug("{} Sending PRE-PREPARE {} at {}". format(self, (self.viewNo, self.lastPrePrepareSeqNo), time.perf_counter())) prePrepareReq = PrePrepare(self.instId, self.viewNo, self.lastPrePrepareSeqNo, *reqDigest, tm) self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest.key, tm) self.send(prePrepareReq, TPCStat.PrePrepareSent) def doPrepare(self, pp: PrePrepare): logger.debug("{} Sending PREPARE {} at {}". format(self, (pp.viewNo, pp.ppSeqNo), time.perf_counter())) prepare = Prepare(self.instId, pp.viewNo, pp.ppSeqNo, pp.digest, pp.ppTime) self.send(prepare, TPCStat.PrepareSent) self.addToPrepares(prepare, self.name) def doCommit(self, p: Prepare): """ Create a commit message from the given Prepare message and trigger the commit phase :param p: the prepare message """ logger.debug("{} Sending COMMIT{} at {}". format(self, (p.viewNo, p.ppSeqNo), time.perf_counter())) commit = Commit(self.instId, p.viewNo, p.ppSeqNo, p.digest, p.ppTime) self.send(commit, TPCStat.CommitSent) self.addToCommits(commit, self.name) def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: """ Decide whether this replica is eligible to process a PRE-PREPARE, based on the following criteria: - this replica is non-primary replica - the request isn't in its list of received PRE-PREPAREs - the request is waiting to for PRE-PREPARE and the digest value matches :param pp: a PRE-PREPARE msg to process :param sender: the name of the node that sent the PRE-PREPARE msg :return: True if processing is allowed, False otherwise """ # TODO: Check whether it is rejecting PRE-PREPARE from previous view # PRE-PREPARE should not be sent from non primary if not self.isMsgFromPrimary(pp, sender): raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary if self.isPrimaryForMsg(pp) is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) # A PRE-PREPARE is sent that has already been received if (pp.viewNo, pp.ppSeqNo) in self.prePrepares: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp) key = (pp.identifier, pp.reqId) if not self.requests.isFinalised(key): self.enqueuePrePrepare(pp, sender) return False # A PRE-PREPARE is sent that does not match request digest if self.requests.digest(key) != pp.digest: raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp) return True def addToPrePrepares(self, pp: PrePrepare) -> None: """ Add the specified PRE-PREPARE to this replica's list of received PRE-PREPAREs. :param pp: the PRE-PREPARE to add to the list """ key = (pp.viewNo, pp.ppSeqNo) self.prePrepares[key] = \ ((pp.identifier, pp.reqId), pp.ppTime) self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) self.tryPrepare(pp) def hasPrepared(self, request) -> bool: return self.prepares.hasPrepareFrom(request, self.name) def canSendPrepare(self, request) -> bool: """ Return whether the request identified by (identifier, requestId) can proceed to the Prepare step. :param request: any object with identifier and requestId attributes """ return self.shouldParticipate(request.viewNo, request.ppSeqNo) \ and not self.hasPrepared(request) \ and self.requests.isFinalised((request.identifier, request.reqId)) def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: """ Return whether the PREPARE specified is valid. :param prepare: the PREPARE to validate :param sender: the name of the node that sent the PREPARE :return: True if PREPARE is valid, False otherwise """ key = (prepare.viewNo, prepare.ppSeqNo) primaryStatus = self.isPrimaryForMsg(prepare) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares # If a non primary replica and receiving a PREPARE request before a # PRE-PREPARE request, then proceed # PREPARE should not be sent from primary if self.isMsgFromPrimary(prepare, sender): raise SuspiciousNode(sender, Suspicions.PR_FRM_PRIMARY, prepare) # If non primary replica if primaryStatus is False: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE not received for the PREPARE, might be slow network if key not in ppReqs: self.enqueuePrepare(prepare, sender) return False elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True # If primary replica else: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE was not sent for this PREPARE, certainly # malicious behavior elif key not in ppReqs: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) elif prepare.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare) else: return True def addToPrepares(self, prepare: Prepare, sender: str): self.prepares.addVote(prepare, sender) self.tryCommit(prepare) def hasCommitted(self, request) -> bool: return self.commits.hasCommitFrom(ThreePhaseKey( request.viewNo, request.ppSeqNo), self.name) def canCommit(self, prepare: Prepare) -> bool: """ Return whether the specified PREPARE can proceed to the Commit step. Decision criteria: - If this replica has got just 2f PREPARE requests then commit request. - If less than 2f PREPARE requests then probably there's no consensus on the request; don't commit - If more than 2f then already sent COMMIT; don't commit :param prepare: the PREPARE """ return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \ self.prepares.hasQuorum(prepare, self.f) and \ not self.hasCommitted(prepare) def isValidCommit(self, commit: Commit, sender: str) -> bool: """ Return whether the COMMIT specified is valid. :param commit: the COMMIT to validate :return: True if `request` is valid, False otherwise """ primaryStatus = self.isPrimaryForMsg(commit) ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares key = (commit.viewNo, commit.ppSeqNo) if key not in ppReqs: self.enqueueCommit(commit, sender) return False if (key not in self.prepares and key not in self.preparesWaitingForPrePrepare): logger.debug("{} rejecting COMMIT{} due to lack of prepares". format(self, key)) # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit) return False elif self.commits.hasCommitFrom(commit, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit) elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)): raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit) elif key in ppReqs and commit.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, commit) else: return True def addToCommits(self, commit: Commit, sender: str): """ Add the specified COMMIT to this replica's list of received commit requests. :param commit: the COMMIT to add to the list :param sender: the name of the node that sent the COMMIT """ self.commits.addVote(commit, sender) self.tryOrder(commit) def hasOrdered(self, viewNo, ppSeqNo) -> bool: return (viewNo, ppSeqNo) in self.ordered def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: """ Return whether the specified commitRequest can be returned to the node. Decision criteria: - If have got just 2f+1 Commit requests then return request to node - If less than 2f+1 of commit requests then probably don't have consensus on the request; don't return request to node - If more than 2f+1 then already returned to node; don't return request to node :param commit: the COMMIT """ if not self.commits.hasQuorum(commit, self.f): return False, "no quorum: {} commits where f is {}".\ format(commit, self.f) if self.hasOrdered(commit.viewNo, commit.ppSeqNo): return False, "already ordered" if not self.isNextInOrdering(commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if viewNo not in self.stashedCommitsForOrdering: self.stashedCommitsForOrdering[viewNo] = {} self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit # self._schedule(self.orderStashedCommits, 2) self.startRepeating(self.orderStashedCommits, 2) return False, "stashing {} since out of order".\ format(commit) return True, None def isNextInOrdering(self, commit: Commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1): return True for (v, p) in self.commits: if v < viewNo: # Have commits from previous view that are unordered. # TODO: Question: would commits be always ordered, what if # some are never ordered and its fine, go to PBFT. return False if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered: # If unordered commits are found with lower ppSeqNo then this # cannot be ordered. return False # TODO: Revisit PBFT paper, how to make sure that last request of the # last view has been ordered? Need change in `VIEW CHANGE` mechanism. # Somehow view change needs to communicate what the last request was. # Also what if some COMMITs were completely missed in the same view return True def orderStashedCommits(self): # TODO: What if the first few commits were out of order and stashed? # `self.ordered` would be empty if self.ordered: lastOrdered = self.ordered[-1] vToRemove = set() for v in self.stashedCommitsForOrdering: if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]: raise RuntimeError("{} found commits from previous view {}" " that were not ordered but last ordered" " is {}".format(self, v, lastOrdered)) pToRemove = set() for p, commit in self.stashedCommitsForOrdering[v].items(): if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \ (v > lastOrdered[0] and self.isLowestCommitInView(commit)): logger.debug("{} ordering stashed commit {}". format(self, commit)) if self.tryOrdering(commit): lastOrdered = (v, p) pToRemove.add(p) for p in pToRemove: del self.stashedCommitsForOrdering[v][p] if not self.stashedCommitsForOrdering[v]: vToRemove.add(v) for v in vToRemove: del self.stashedCommitsForOrdering[v] # if self.stashedCommitsForOrdering: # self._schedule(self.orderStashedCommits, 2) if not self.stashedCommitsForOrdering: self.stopRepeating(self.orderStashedCommits) def isLowestCommitInView(self, commit): # TODO: Assumption: This assumes that at least one commit that was sent # for any request by any node has been received in the view of this # commit ppSeqNos = [] for v, p in self.commits: if v == commit.viewNo: ppSeqNos.append(p) return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True def tryOrdering(self, commit: Commit) -> None: """ Attempt to send an ORDERED request for the specified COMMIT to the node. :param commit: the COMMIT message """ key = (commit.viewNo, commit.ppSeqNo) logger.debug("{} trying to order COMMIT{}".format(self, key)) reqKey = self.getReqKeyFrom3PhaseKey(key) # type: Tuple digest = self.getDigestFor3PhaseKey(key) if not digest: logger.error("{} did not find digest for {}, request key {}". format(self, key, reqKey)) return self.doOrder(*key, *reqKey, digest, commit.ppTime) return True def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime): key = (viewNo, ppSeqNo) self.addToOrdered(*key) ordered = Ordered(self.instId, viewNo, identifier, reqId, ppTime) # TODO: Should not order or add to checkpoint while syncing # 3 phase state. self.send(ordered, TPCStat.OrderSent) if key in self.stashingWhileCatchingUp: self.stashingWhileCatchingUp.remove(key) logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo))) self.addToCheckpoint(ppSeqNo, digest) def processCheckpoint(self, msg: Checkpoint, sender: str): if self.checkpoints: seqNo = msg.seqNo _, firstChk = self.firstCheckPoint if firstChk.isStable: if firstChk.seqNo == seqNo: self.discard(msg, reason="Checkpoint already stable", logMethod=logger.debug) return if firstChk.seqNo > seqNo: self.discard(msg, reason="Higher stable checkpoint present", logMethod=logger.debug) return for state in self.checkpoints.values(): if state.seqNo == seqNo: if state.digest == msg.digest: state.receivedDigests[sender] = msg.digest break else: logger.error("{} received an incorrect digest {} for " "checkpoint {} from {}".format(self, msg.digest, seqNo, sender)) return if len(state.receivedDigests) == 2*self.f: self.markCheckPointStable(msg.seqNo) else: self.discard(msg, reason="No checkpoints present to tally", logMethod=logger.warn) def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState: s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 logger.debug("{} adding new checkpoint state for {}". format(self, (s, e))) state = CheckpointState(ppSeqNo, [digest, ], None, {}, False) self.checkpoints[s, e] = state return state def addToCheckpoint(self, ppSeqNo, digest): for (s, e) in self.checkpoints.keys(): if s <= ppSeqNo <= e: state = self.checkpoints[s, e] # type: CheckpointState state.digests.append(digest) state = updateNamedTuple(state, seqNo=ppSeqNo) self.checkpoints[s, e] = state break else: state = self._newCheckpointState(ppSeqNo, digest) s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ if len(state.digests) == self.config.CHK_FREQ: state = updateNamedTuple(state, digest=serialize(state.digests), digests=[]) self.checkpoints[s, e] = state self.send(Checkpoint(self.instId, self.viewNo, ppSeqNo, state.digest)) def markCheckPointStable(self, seqNo): previousCheckpoints = [] for (s, e), state in self.checkpoints.items(): if e == seqNo: state = updateNamedTuple(state, isStable=True) self.checkpoints[s, e] = state break else: previousCheckpoints.append((s, e)) else: logger.error("{} could not find {} in checkpoints". format(self, seqNo)) return self.h = seqNo for k in previousCheckpoints: logger.debug("{} removing previous checkpoint {}".format(self, k)) self.checkpoints.pop(k) self.gc(seqNo) logger.debug("{} marked stable checkpoint {}".format(self, (s, e))) self.processStashedMsgsForNewWaterMarks() def gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() reqKeys = set() for (v, p), (reqKey, _) in self.sentPrePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) for (v, p), (reqKey, _) in self.prePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) reqKeys.add(reqKey) logger.debug("{} found {} 3 phase keys to clean". format(self, len(tpcKeys))) logger.debug("{} found {} request keys to clean". format(self, len(reqKeys))) for k in tpcKeys: self.sentPrePrepares.pop(k, None) self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) if k in self.ordered: self.ordered.remove(k) for k in reqKeys: self.requests.pop(k, None) def processStashedMsgsForNewWaterMarks(self): while self.stashingWhileOutsideWaterMarks: item = self.stashingWhileOutsideWaterMarks.pop() logger.debug("{} processing stashed item {} after new stable " "checkpoint".format(self, item)) if isinstance(item, ReqDigest): self.doPrePrepare(item) elif isinstance(item, tuple) and len(tuple) == 2: self.dispatchThreePhaseMsg(*item) else: logger.error("{} cannot process {} " "from stashingWhileOutsideWaterMarks". format(self, item)) @property def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(0) @property def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: return self.checkpoints.peekitem(-1) def isPpSeqNoAcceptable(self, ppSeqNo: int): return self.h < ppSeqNo <= self.H def addToOrdered(self, viewNo: int, ppSeqNo: int): self.ordered.add((viewNo, ppSeqNo)) def enqueuePrePrepare(self, request: PrePrepare, sender: str): logger.debug("Queueing pre-prepares due to unavailability of finalised " "Request. Request {} from {}".format(request, sender)) key = (request.identifier, request.reqId) if key not in self.prePreparesPendingReqDigest: self.prePreparesPendingReqDigest[key] = [] self.prePreparesPendingReqDigest[key].append((request, sender)) def dequeuePrePrepare(self, identifier: int, reqId: int): key = (identifier, reqId) if key in self.prePreparesPendingReqDigest: pps = self.prePreparesPendingReqDigest[key] for (pp, sender) in pps: logger.debug("{} popping stashed PRE-PREPARE{}". format(self, key)) if pp.digest == self.requests.digest(key): self.prePreparesPendingReqDigest.pop(key) self.processPrePrepare(pp, sender) logger.debug( "{} processed {} PRE-PREPAREs waiting for finalised " "request for identifier {} and reqId {}". format(self, pp, identifier, reqId)) break def enqueuePrepare(self, request: Prepare, sender: str): logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.preparesWaitingForPrePrepare: self.preparesWaitingForPrePrepare[key] = deque() self.preparesWaitingForPrePrepare[key].append((request, sender)) def dequeuePrepares(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.preparesWaitingForPrePrepare: i = 0 # Keys of pending prepares that will be processed below while self.preparesWaitingForPrePrepare[key]: prepare, sender = self.preparesWaitingForPrePrepare[ key].popleft() logger.debug("{} popping stashed PREPARE{}".format(self, key)) self.processPrepare(prepare, sender) i += 1 self.preparesWaitingForPrePrepare.pop(key) logger.debug("{} processed {} PREPAREs waiting for PRE-PREPARE for" " view no {} and seq no {}". format(self, i, viewNo, ppSeqNo)) def enqueueCommit(self, request: Commit, sender: str): logger.debug("Queueing commit due to unavailability of PREPARE. " "Request {} from {}".format(request, sender)) key = (request.viewNo, request.ppSeqNo) if key not in self.commitsWaitingForPrepare: self.commitsWaitingForPrepare[key] = deque() self.commitsWaitingForPrepare[key].append((request, sender)) def dequeueCommits(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.commitsWaitingForPrepare: i = 0 # Keys of pending prepares that will be processed below while self.commitsWaitingForPrepare[key]: commit, sender = self.commitsWaitingForPrepare[ key].popleft() logger.debug("{} popping stashed COMMIT{}".format(self, key)) self.processCommit(commit, sender) i += 1 self.commitsWaitingForPrepare.pop(key) logger.debug("{} processed {} COMMITs waiting for PREPARE for" " view no {} and seq no {}". format(self, i, viewNo, ppSeqNo)) def getDigestFor3PhaseKey(self, key: ThreePhaseKey) -> Optional[str]: reqKey = self.getReqKeyFrom3PhaseKey(key) digest = self.requests.digest(reqKey) if not digest: logger.debug("{} could not find digest in sent or received " "PRE-PREPAREs or PREPAREs for 3 phase key {} and req " "key {}".format(self, key, reqKey)) return None else: return digest def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey): reqKey = None if key in self.sentPrePrepares: reqKey = self.sentPrePrepares[key][0] elif key in self.prePrepares: reqKey = self.prePrepares[key][0] elif key in self.prepares: reqKey = self.prepares[key][0] else: logger.debug("Could not find request key for 3 phase key {}". format(key)) return reqKey @property def threePhaseState(self): # TODO: This method is incomplete # Gets the current stable and unstable checkpoints and creates digest # of unstable checkpoints if self.checkpoints: pass else: state = [] return ThreePCState(self.instId, state) def process3PhaseState(self, msg: ThreePCState, sender: str): # TODO: This is not complete pass def send(self, msg, stat=None) -> None: """ Send a message to the node on which this replica resides. :param msg: the message to send """ logger.display("{} sending {}".format(self, msg.__class__.__name__), extra={"cli": True}) logger.trace("{} sending {}".format(self, msg)) if stat: self.stats.inc(stat) self.outBox.append(msg)
def do_plot(values, intra, inter, y_desc, plot_type, plot_name, sim_size, t0): sinter = SortedDict(inter) colors = [ 'purple', 'indigo', 'darkblue', 'royalblue', 'skyblue', 'green', 'lime', 'gold', 'orange', 'darkorange', 'red', 'firebrick', 'black' ] keys = [] vals = [] for k, vv in intra.items(): v = all_values(vv) if k in ['crit_chance', 'hit_chance']: keys.append(k.split('_')[0]) vals.append((100.0 * np.array(v)).astype(np.int32)) elif k == 'spell_power': keys.append('SP') vals.append(np.array(v)) elif k == 'num_mages': keys.append('mages') vals.append(np.array([nv['num_mages'] for nv in v])) else: keys.append(k) vals.append(np.array(v)) plt.close('all') plt.figure(figsize=(10.0, 7.0), dpi=200) title = '' fn = '' for k, v in sinter.items(): if k == 'rotation': pass elif k in ['duration', 'delay']: title += ' {:s} = {:.1f}s'.format(k, v) fn += '_{:s}{:d}'.format(k[1], int(v)) elif k in ['crit_chance', 'hit_chance']: title += ' {:s} = {:d}%'.format(k.split('_')[0], int(v * 100)) fn += '_{:s}{:d}'.format(k[0], int(v * 100)) elif k == 'spell_power': title += ' SP = {:d}'.format(int(v)) fn += '_s{:d}'.format(int(v)) elif k == 'single': title += ' single mage crit = {:d}%'.format(int(v * 100)) fn += '_m{:d}'.format(int(v * 100)) elif k == 'num_mages': title += ' # mages = {:d}, {:d} w/MQG, {:d} w/PI'.format( v['num_mages'], v['num_mqg'], v['num_pi']) fn += '_n{:d}'.format(v['num_mages']) fn += '_ss{:d}'.format(sim_size) plt.title(title) print('{:8.1f}: {:s}'.format(time.time() - t0, title)) sys.stdout.flush() relabel = { 'SP': 'Spell Power', 'hit': 'Hit Chance (percent)', 'crit': 'Crit Chance (percent)', 'single': 'Single Mage Crit Chance (percent)', 'duration': 'Duration (seconds)', 'delay': 'Delay (seconds)', 'mages': 'Number of Mages' } plt.xlabel(relabel[keys[1]]) plt.ylabel(y_desc) for index, (lval, yval) in enumerate(zip(vals[0], values)): color = colors[index * len(colors) // values.shape[0]] plt.plot(vals[1], yval, label='{:} {:s}'.format(lval, keys[0]), color=color, marker='.') plt.legend() plt.grid() savefile = '../plots/{:s}/{:s}{:s}.png'.format(plot_type, plot_name, fn) os.makedirs('../plots/{:s}'.format(plot_type), exist_ok=True) plt.savefig(savefile) savefile = '../savestates/{:s}/{:s}{:s}.pck'.format( plot_type, plot_name, fn) os.makedirs('../savestates/{:s}'.format(plot_type), exist_ok=True) with open(savefile, 'wb') as fid: pickle.dump(values, fid) pickle.dump(intra, fid) pickle.dump(inter, fid) pickle.dump(sim_size, fid)
class Part(object): # SortedDict mapping {str: Part} # - listing of all Part() objects by rootNodeId rootNodeId_part = SortedDict() # dictionary mapping {int: SortedSet{str: int}} clustIdx_partRootNodeIds = {} # dictionary mapping {(int, int): set((str, str))} pairClustIdxs_pairPartRootNodeIds = {} # dictionary mapping {int: set((int, int))} # clustIdx_pairClustIdxs = {} def getClustPartRootNodeIds(): return Part.clustIdx_partRootNodeIds def getPairPartRootNodeIds(parClustIdx=None, chdClustIdx=None): if parClustIdx is None and chdClustIdx is None: return Part.pairClustIdxs_pairPartRootNodeIds elif parClustIdx is None: return { k: v for k, v in Part.pairClustIdxs_pairPartRootNodeIds.items() if k[1] == chdClustIdx } elif chdClustIdx is None: return { k: v for k, v in Part.pairClustIdxs_pairPartRootNodeIds.items() if k[0] == parClustIdx } else: if (parClustIdx, chdClustIdx) in Part.pairClustIdxs_pairPartRootNodeIds: return Part.pairClustIdxs_pairPartRootNodeIds[(parClustIdx, chdClustIdx)] else: return None def getPartByRootNodeId(rnId): if rnId in Part.rootNodeId_part: return Part.rootNodeId_part[rnId] else: return None def getPartRootNodeIds(clustIdx): if clustIdx in Part.clustIdx_partRootNodeIds: return Part.clustIdx_partRootNodeIds[clustIdx] else: return None def __init__(self, relTreeRoot): self._relTreeRoot = relTreeRoot # TreeNode self._relTypeIdx = RelType.getRelType(self._relTreeRoot) self._clustIdx = -1 self._nxtArgIdx = 0 # Remember next index because _args is OrderedDict self._parPart = None self._parArgIdx = -1 # Dictionary mapping {int: Argument} self._args = SortedDict() # Dictionary mapping {int: int} self._argIdx_argClustIdx = {} # Dictionary mapping {int: set(int)} self._argClustIdx_argIdxs = {} Part.rootNodeId_part[self._relTreeRoot.getId()] = self return None def addArgument(self, arg): argIdx = self._nxtArgIdx self._nxtArgIdx += 1 self._args[argIdx] = arg return argIdx def changeClust(self, newClustIdx, newRelTypeIdx, clust_only=False): oldClustIdx = self.getClustIdx() rootID = self.getRelTreeRoot().getId() Part.clustIdx_partRootNodeIds[oldClustIdx].discard(rootID) if clust_only: self._relTypeIdx = newRelTypeIdx else: ocl = Clust.getClust(oldClustIdx) ocl.onPartUnsetClust(self) self.setRelTypeIdx(newRelTypeIdx) self.setClust(newClustIdx, clust_only=clust_only) parent = self.getParPart() if parent is None: if newClustIdx in Clust.clustIdx_rootCnt: Clust.clustIdx_rootCnt[newClustIdx] += 1 else: Clust.clustIdx_rootCnt[newClustIdx] = 1 Clust.clustIdx_rootCnt[newClustIdx] -= 1 else: parent_clust_id = parent.getClustIdx() paci = parent.getArgClust(self.getParArgIdx()) pcl = Clust.getClust(parent_clust_id) pac = pcl._argClusts[paci] pac._chdClustIdx_cnt[oldClustIdx] -= 1 if newClustIdx in pac._chdClustIdx_cnt: pac._chdClustIdx_cnt[newClustIdx] += 1 else: pac._chdClustIdx_cnt[newClustIdx] = 1 pa = (parent_clust_id, paci) Clust.clustIdx_parArgs[oldClustIdx][pa] -= 1 if newClustIdx not in Clust.clustIdx_parArgs: Clust.clustIdx_parArgs[newClustIdx] = {} if pa in Clust.clustIdx_parArgs[newClustIdx]: Clust.clustIdx_parArgs[newClustIdx][pa] += 1 else: Clust.clustIdx_parArgs[newClustIdx][pa] = 1 opci = (parent_clust_id, oldClustIdx) npci = (parent_clust_id, newClustIdx) ptnid = (parent.getRelTreeRoot().getId(), rootID) Part.pairClustIdxs_pairPartRootNodeIds[opci].discard(ptnid) if len(Part.pairClustIdxs_pairPartRootNodeIds[opci]) == 0: del Part.pairClustIdxs_pairPartRootNodeIds[opci] # Part.clustIdx_pairClustIdxs[oldClustIdx].discard(opci) # Part.clustIdx_pairClustIdxs[parent_clust_id].discard(opci) if npci not in Part.pairClustIdxs_pairPartRootNodeIds: Part.pairClustIdxs_pairPartRootNodeIds[npci] = set() Part.pairClustIdxs_pairPartRootNodeIds[npci].add(ptnid) # Part.clustIdx_pairClustIdxs[parent_clust_id].add(npci) # if newClustIdx not in Part.clustIdx_pairClustIdxs: # Part.clustIdx_pairClustIdxs[newClustIdx] = set() # Part.clustIdx_pairClustIdxs[newClustIdx].add(npci) return None def changeClustRemap(self, newClustIdx, argClustIdx_newArgClustIdx, clust_only=False): if not clust_only: oldClustIdx = self.getClustIdx() ocl = Clust.getClust(oldClustIdx) self.changeClust(newClustIdx, self.getRelTypeIdx(), clust_only=clust_only) argIdx_newArgClustIdx = {} for ai, arg in self._args.items(): oaci = self._argIdx_argClustIdx.pop(ai) self._argClustIdx_argIdxs[oaci].remove(ai) if len(self._argClustIdx_argIdxs[oaci]) == 0: del self._argClustIdx_argIdxs[oaci] argIdx_newArgClustIdx[ai] = argClustIdx_newArgClustIdx[oaci] if not clust_only: ocl.onPartUnsetArg(self, arg, oaci) for ai in self._args: aci = argIdx_newArgClustIdx[ai] self.setArgClust(ai, aci, clust_only=clust_only) return None def destroy(self): tid = self.getRelTreeRoot().getId() Part.clustIdx_partRootNodeIds[self._clustIdx].discard(tid) if len(Part.clustIdx_partRootNodeIds[self._clustIdx]) == 0: del Part.clustIdx_partRootNodeIds[self._clustIdx] del Part.rootNodeId_part[tid] return None def getArgument(self, argIdx): return self._args[argIdx] def getArguments(self): return self._args def getArgClust(self, argIdx): if argIdx in self._argIdx_argClustIdx: return self._argIdx_argClustIdx[argIdx] else: return None def getParArgIdx(self): return self._parArgIdx def getClustIdx(self): return self._clustIdx def getParArgIdx(self): return self._parArgIdx def getParPart(self): return self._parPart def getRelTreeRoot(self): return self._relTreeRoot def getRelTypeIdx(self): return self._relTypeIdx def removeArgument(self, argIdx, clust_only=False): arg = self.getArgument(argIdx) oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx) self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx) if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0: del self._argClustIdx_argIdxs[oldArgClustIdx] if not clust_only: cl = Clust.getClust(self.getClustIdx()) cl.onPartUnsetArg(self, arg, oldArgClustIdx) del self._args[argIdx] return None def setArgClust(self, argIdx, argClustIdx, clust_only=False): oldArgClustIdx = -1 if argIdx in self._argIdx_argClustIdx: oldArgClustIdx = self.getArgClust(argIdx) if oldArgClustIdx != argClustIdx: self._argIdx_argClustIdx[argIdx] = argClustIdx if argClustIdx not in self._argClustIdx_argIdxs: self._argClustIdx_argIdxs[argClustIdx] = set() self._argClustIdx_argIdxs[argClustIdx].add(argIdx) arg = self.getArgument(argIdx) if not clust_only: cl = Clust.getClust(self._clustIdx) if oldArgClustIdx < 0: if not clust_only: cl.onPartSetArg(self, arg, argClustIdx) else: self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx) if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0: del self._argClustIdx_argIdxs[oldArgClustIdx] if not clust_only: cl.onPartSetArg(self, arg, argClustIdx, oldArgClustIdx) return None def setClust(self, clustIdx, clust_only=False): self._clustIdx = clustIdx rootID = self.getRelTreeRoot().getId() if clustIdx not in Part.clustIdx_partRootNodeIds: Part.clustIdx_partRootNodeIds[clustIdx] = SortedSet() Part.clustIdx_partRootNodeIds[clustIdx].add(rootID) if not clust_only: cl = Clust.getClust(clustIdx) cl.onPartSetClust(self) return None def setParent(self, parPart, parArgIdx): ''' Unset previous parent if it exists ''' if self.getParPart() is not None: self.unsetParent() self._parPart = parPart self._parArgIdx = parArgIdx clustIdx = self.getClustIdx() parClustID = parPart.getClustIdx() assert (parClustID >= 0) & (clustIdx >= 0) pcci = (parClustID, clustIdx) # if parClustID not in Part.clustIdx_pairClustIdxs: # Part.clustIdx_pairClustIdxs[parClustID] = set() # Part.clustIdx_pairClustIdxs[parClustID].add(pcci) # if clustIdx not in Part.clustIdx_pairClustIdxs: # Part.clustIdx_pairClustIdxs[clustIdx] = set() # Part.clustIdx_pairClustIdxs[clustIdx].add(pcci) pids = (parPart.getRelTreeRoot().getId(), self.getRelTreeRoot().getId()) if pcci not in Part.pairClustIdxs_pairPartRootNodeIds: Part.pairClustIdxs_pairPartRootNodeIds[pcci] = set() Part.pairClustIdxs_pairPartRootNodeIds[pcci].add(pids) if parPart is not None: arg = parPart.getArgument(parArgIdx) dep = arg._path.getDep() if (parClustID != clustIdx) & dep.startswith('conj_'): if parClustID < clustIdx: pci = pcci else: pci = (pcci[1], pcci[0]) if pci not in Clust.pairClustIdx_conjCnt: Clust.pairClustIdx_conjCnt[pci] = 1 else: Clust.pairClustIdx_conjCnt[pci] += 1 return None def setRelTypeIdx(self, newRelTypeIdx): self._relTypeIdx = newRelTypeIdx cl = Clust.getClust(self._clustIdx) cl.onPartSetRelTypeIdx(newRelTypeIdx) return None def unsetArgClust(self, argIdx, clust_only=False): oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx) arg = self.getArgument(argIdx) self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx) if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0: del self._argClustIdx_argIdxs[oldArgClustIdx] if not clust_only: cl = Clust.getClust(self.getClustIdx()) cl.onPartUnsetArg(self, arg, oldArgClustIdx) return None def unsetParent(self): ''' Remove parent-child cluster index information Remove parent-child relationship index information ''' parent = self.getParPart() clustIdx = self.getClustIdx() if parent is not None: parClustID = parent.getClustIdx() par_child_clust_pair = (parClustID, clustIdx) # Part.clustIdx_pairClustIdxs[parClustID].discard(par_child_clust_pair) # Part.clustIdx_pairClustIdxs[clustIdx].discard(par_child_clust_pair) part_pair = (parent.getRelTreeRoot().getId(), self.getRelTreeRoot().getId()) if par_child_clust_pair in Part.pairClustIdxs_pairPartRootNodeIds: Part.pairClustIdxs_pairPartRootNodeIds[ par_child_clust_pair].discard(part_pair) if len(Part.pairClustIdxs_pairPartRootNodeIds[ par_child_clust_pair]) == 0: del Part.pairClustIdxs_pairPartRootNodeIds[ par_child_clust_pair] arg = parent.getArgument(self.getParArgIdx()) dep = arg._path.getDep() if (parClustID != clustIdx) & dep.startswith('conj_'): if parClustID < clustIdx: conj_pair = par_child_clust_pair else: conj_pair = (par_child_clust_pair[1], par_child_clust_pair[0]) if conj_pair in Clust.pairClustIdx_conjCnt: Clust.pairClustIdx_conjCnt[conj_pair] -= 1 if Clust.pairClustIdx_conjCnt[conj_pair] == 0: del Clust.pairClustIdx_conjCnt[conj_pair] self._parPart = None self._parArgIdx = -1 return None # def check_parents(): # mistakes = {} # for nid, part in Part.rootNodeId_part.items(): # parent = part.getParPart() # clustIdx = part.getClustIdx() # if parent is not None: # parClustID = parent.getClustIdx() # pcci = (parClustID, clustIdx) # if pcci not in Part.clustIdx_pairClustIdxs[parClustID]: # if nid not in mistakes: # mistakes[nid] = 1 # else: # mistakes[nid] += 1 # if pcci not in Part.clustIdx_pairClustIdxs[clustIdx]: # if nid not in mistakes: # mistakes[nid] = 2 # else: # mistakes[nid] += 2 # return mistakes def unsetRelTypeIdx(self): old_type = self._relTypeIdx cl = Clust.getClust(self._clustIdx) cl.onPartUnsetRelTypeIdx(old_type) return None
class CacheStore(object): class CacheItem(object): def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: item = self.store[key] if key in self.store else self.CacheItem() item.data = data item.valid.set() if key not in self.store: self.store[key] = item return True return False def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: if key in self.store: del self.store[key] return True return False def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return wrap(list(self.validvalues())).query(*filter, **params)
class Node(BaseNode): def __init__(self, *args, **kwargs): self.rest = None self.offset = None super().__init__(*args, **kwargs) def _select(self, key): """ Selects the bucket the key should belong to. """ # If the key is smaller than the min or larger than the max, immediately return. if key < min(self.bucket): return self.rest elif key >= max(self.bucket): return self.bucket.values()[-1] # Else find the correct node for k, v in reversed(list(self.bucket.items())): if k <= key: return v return self.rest def _insert(self, key, value): """ Recursively inserts the key and value by selecting the bucket the key should belong to, and inserting the key and value into that back. If the node has been split, it inserts the key of the newly created node into the bucket of this node. """ result = self._select(key)._insert(key, value) self.changed = True if result is None: return key, other = result return super()._insert(key, other) def _split(self): other = LazyNode(node=Node(tree=self.tree, changed=True), tree=self.tree) #other = Node(self.tree) values = self.bucket.items() self.bucket = SortedDict(values[:len(values) // 2]) other.bucket = SortedDict(values[len(values) // 2:]) key, value = other.bucket.popitem(last=False) other.rest = value return (key, other) def _commit(self): self.rest._commit() for child in self.bucket.values(): child._commit() data = packb({ 'rest': self.rest.offset, 'values': {k: v.offset for k, v in self.bucket.items()} }) return self.tree.store.write(data) def __getitem__(self, key): return self._select(key)[key] def __len__(self): print(len(self.rest)) print (self.bucket.values()) return sum([len(child) for child in self.bucket.values()]) + len(self.rest) def __iter__(self): for key in self.rest: yield key for child in self.bucket.values(): for key in child: yield key
class DotMap(MutableMapping): def __init__(self, *args, **kwargs): self._map = SortedDict() if args: d = args[0] if type(d) is dict: for k, v in self.__call_items(d): if type(v) is dict: v = DotMap(v) self._map[k] = v if kwargs: for k, v in self.__call_items(kwargs): self._map[k] = v @staticmethod def __call_items(obj): if hasattr(obj, 'iteritems') and ismethod(getattr(obj, 'iteritems')): return obj.iteritems() else: return obj.items() def items(self): return self.iteritems() def iteritems(self): return self.__call_items(self._map) def __iter__(self): return self._map.__iter__() def __setitem__(self, k, v): self._map[k] = v def __getitem__(self, k): if k not in self._map: # automatically extend to new DotMap self[k] = DotMap() return self._map[k] def __setattr__(self, k, v): if k == '_map': super(DotMap, self).__setattr__(k, v) else: self[k] = v def __getattr__(self, k): if k == '_map': return self._map else: return self[k] def __delattr__(self, key): return self._map.__delitem__(key) def __contains__(self, k): return self._map.__contains__(k) def __str__(self): items = [] for k, v in self.__call_items(self._map): items.append('{0}={1}'.format(k, repr(v))) out = 'DotMap({0})'.format(', '.join(items)) return out def __repr__(self): return str(self) def to_dict(self): d = {} for k, v in self.items(): if type(v) is DotMap: v = v.to_dict() d[k] = v return d def pprint(self): pprint(self.to_dict()) # proper dict subclassing def values(self): return self._map.values() @staticmethod def parse_other(other): if type(other) is DotMap: return other._map else: return other def __cmp__(self, other): other = DotMap.parse_other(other) return self._map.__cmp__(other) def __eq__(self, other): other = DotMap.parse_other(other) if not isinstance(other, dict): return False return self._map.__eq__(other) def __ge__(self, other): other = DotMap.parse_other(other) return self._map.__ge__(other) def __gt__(self, other): other = DotMap.parse_other(other) return self._map.__gt__(other) def __le__(self, other): other = DotMap.parseOther(other) return self._map.__le__(other) def __lt__(self, other): other = DotMap.parse_other(other) return self._map.__lt__(other) def __ne__(self, other): other = DotMap.parse_other(other) return self._map.__ne__(other) def __delitem__(self, key): return self._map.__delitem__(key) def __len__(self): return self._map.__len__() def copy(self): return self def get(self, key, default=None): return self._map.get(key, default) def has_key(self, key): return key in self._map def iterkeys(self): return self._map.iterkeys() def itervalues(self): return self._map.itervalues() def keys(self): return self._map.keys() def pop(self, key, default=None): return self._map.pop(key, default) def setdefault(self, key, default=None): return self._map.setdefault(key, default) def viewitems(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewitems() else: return self._map.items() def viewkeys(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewkeys() else: return self._map.keys() def viewvalues(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewvalues() else: return self._map.values() @classmethod def fromkeys(cls, seq, value=None): d = DotMap() d._map = SortedDict.fromkeys(seq, value) return d
class AutonomousNavigationSystem: def __init__(self, id, fis, ap): self.id = id self.fuzzy_inference_system = fis self.corrections = SortedDict() self.auto_pilot = ap self.min_col_time = math.inf self.prev_min_col_time = self.min_col_time self.prev_recc = dict() self.min_dist = dict(main=None, target=None, time=None, dist=9999) def get_visible_vessels(self, shipstate): tmp = [] for vessel in vesselService.vessels: if vessel.id != self.id: if helpers.distance(vessel.shipstate.position, shipstate.position) < config.visibility: tmp.append(vessel) return tmp def next_position(self, shipstate, i): if self.auto_pilot: self.calculate_corrections(shipstate, i) return shipstate.update_position() def back_to_course(self, shipstate): diff = shipstate.target_heading - shipstate.heading heading_correction = (diff + 180) % 360 - 180 if heading_correction < -shipstate.rate_of_turn: shipstate.standard_rate_turn('left') elif heading_correction > shipstate.rate_of_turn: shipstate.standard_rate_turn('right') elif heading_correction != 0: shipstate.heading = (shipstate.heading + heading_correction) % 360 elif abs(self.prev_min_col_time) < abs( self.min_col_time) and not self.is_fis_corrections(): diff = shipstate.orig_heading - shipstate.heading heading_correction = (diff + 180) % 360 - 180 if heading_correction < 0: shipstate.target_heading = (shipstate.target_heading - shipstate.rate_of_turn / 2) % 360 elif heading_correction > 0: shipstate.target_heading = (shipstate.target_heading + shipstate.rate_of_turn / 2) % 360 def back_to_speed(self, shipstate): speed_correction = shipstate.target_speed - shipstate.speed if speed_correction <= -1: shipstate.slow_down() elif speed_correction >= 1: shipstate.speed_up() # elif self.prev_min_col_time < self.min_col_time: if not self.is_fis_corrections(): speed_correction = shipstate.orig_speed - shipstate.speed if speed_correction < 0: shipstate.target_speed = shipstate.target_speed - .5 elif speed_correction > 0: shipstate.target_speed = shipstate.target_speed + .5 def change(self, course, speed, target): if abs(speed) >= 1 or abs(course) >= 1: ret = True else: ret = False if target is None and ret: if 'TOT' in self.prev_recc and abs( self.prev_recc['TOT']['course'] - course) <= 1 and abs( self.prev_recc['TOT']['speed'] - speed) <= 1: ret = False if 'TOT' in self.prev_recc: self.prev_recc['TOT']['course'] = course self.prev_recc['TOT']['speed'] = speed else: self.prev_recc['TOT'] = dict(course=course, speed=speed) elif ret: if target in self.prev_recc and abs( self.prev_recc[target]['course'] - course) <= 1 and abs( self.prev_recc[target]['speed'] - speed) <= 1: ret = False if target in self.prev_recc: self.prev_recc[target]['course'] = course self.prev_recc[target]['speed'] = speed else: self.prev_recc[target] = dict(course=course, speed=speed) return ret def is_fis_corrections(self): return 'course_change' in self.fuzzy_inference_system.output or 'speed_change' in self.fuzzy_inference_system.output def calculate_corrections(self, shipstate, i): debug_strings = [] self.corrections = SortedDict() self.prev_min_col_time = self.min_col_time self.min_col_time = math.inf course_change_proposed = False speed_change_proposed = False for observed_vessel in self.get_visible_vessels(shipstate): main_observed, observed_main = helpers.cartesian_coords_to_relative( shipstate, observed_vessel.shipstate) vm = shipstate.speed vt = observed_vessel.shipstate.speed cm = math.radians(shipstate.heading) ct = math.radians(observed_vessel.shipstate.heading) vr = -math.sqrt( pow(vm, 2) + pow(vt, 2) - 2 * vm * vt * math.cos(cm - ct)) distance = helpers.distance(shipstate.position, observed_vessel.shipstate.position) if (distance < self.min_dist['dist']): self.min_dist['dist'] = distance self.min_dist['main'] = self.id self.min_dist['target'] = observed_vessel.id self.min_dist['time'] = i time_until_collision = math.inf course_diff = min( (shipstate.heading - observed_vessel.shipstate.heading) % 360, (observed_vessel.shipstate.heading - shipstate.heading) % 360) if 270 < main_observed <= 360 or 0 <= main_observed < 90: if course_diff < 90: if vm > vt: vr = -vr else: vr = -vr elif 90 < main_observed < 270: if vm < vt and course_diff < 90: vr = -vr else: if vm == 0 or vt == 0: vr = -vr if vr != 0: time_until_collision = distance / abs(vr) else: time_until_collision = math.inf if not self.min_col_time: self.min_col_time = time_until_collision else: self.min_col_time = min(self.min_col_time, time_until_collision) if vr > 0: relative_course = observed_vessel.shipstate.heading - shipstate.heading if relative_course < 0: relative_course = 360 + relative_course if observed_vessel.shipstate.speed == 0: speed_ratio = 0 elif shipstate.speed == 0: speed_ratio = 10 else: speed_ratio = observed_vessel.shipstate.speed / shipstate.speed self.fuzzy_inference_system.input['bearing'] = main_observed self.fuzzy_inference_system.input[ 'relative_course'] = relative_course self.fuzzy_inference_system.input['range'] = distance self.fuzzy_inference_system.input['speed_ratio'] = speed_ratio self.fuzzy_inference_system.output = collections.OrderedDict() try: self.fuzzy_inference_system.compute() except ValueError as e: pass # print(e) if self.is_fis_corrections(): if self.change( self.fuzzy_inference_system. output['course_change'], self.fuzzy_inference_system.output['speed_change'], observed_vessel.id): self.corrections[time_until_collision] = {} self.corrections[time_until_collision][ 'target'] = observed_vessel.id if 'course_change' in self.fuzzy_inference_system.output: course_change = self.fuzzy_inference_system.output[ 'course_change'] self.corrections[time_until_collision][ 'course_change'] = course_change course_change_proposed = True if 'speed_change' in self.fuzzy_inference_system.output: speed_change = self.fuzzy_inference_system.output[ 'speed_change'] self.corrections[time_until_collision][ 'speed_change'] = speed_change speed_change_proposed = True if course_change_proposed or speed_change_proposed: debug_strings.append("Target: " + str(observed_vessel.id)) debug_strings.append("Relative speed: " + str(vr)) debug_strings.append( self.fuzzy_inference_system.input.__repr__()) debug_strings.append( json.dumps(self.fuzzy_inference_system.output, indent=4)) course_tot_weight = 0 speed_tot_weight = 0 course_change = 0 speed_change = 0 for idx, correction in self.corrections.items(): if 'course_change' in correction: course_weight = 1 / idx course_tot_weight = course_tot_weight + course_weight course_change = course_change + correction[ 'course_change'] * course_weight if 'speed_change' in correction: speed_weight = 1 / idx speed_tot_weight = speed_tot_weight + speed_weight speed_change = speed_change + correction[ 'speed_change'] * speed_weight if course_change_proposed: course_change = course_change / course_tot_weight shipstate.target_heading = shipstate.heading + course_change if speed_change_proposed: speed_change = speed_change / speed_tot_weight shipstate.target_speed = shipstate.speed + speed_change course_change = round(course_change) speed_change = round(speed_change) if self.change(course_change, speed_change, None): print("i: " + str(i), file=open("Logs/" + config.name, "a")) print("Main: " + str(self.id), file=open("Logs/" + config.name, "a")) print("TOT:", file=open("Logs/" + config.name, "a")) print("Course change: " + str(course_change), file=open("Logs/" + config.name, "a")) print("Speed change: " + str(speed_change), file=open("Logs/" + config.name, "a")) for string__ in debug_strings: print(string__, file=open("Logs/" + config.name, "a")) print( "__________________________________________________________________________________________________", file=open("Logs/" + config.name, "a")) if course_change >= shipstate.rate_of_turn: shipstate.standard_rate_turn('right') elif course_change <= -shipstate.rate_of_turn: shipstate.standard_rate_turn('left') elif abs(course_change) >= 1: if (shipstate.heading + course_change) % 360 >= 0: shipstate.heading = (shipstate.heading + course_change) % 360 else: shipstate.heading = (shipstate.heading + 360 + course_change) % 360 elif not course_change_proposed: self.back_to_course(shipstate) if speed_change > 1 * config.playback['rate']: shipstate.speed_up() elif speed_change < -1 * config.playback['rate']: shipstate.slow_down() elif abs(speed_change) != 0: if shipstate.speed + speed_change >= 0: shipstate.speed = shipstate.speed + speed_change else: shipstate.speed = 0 elif not speed_change_proposed: self.back_to_speed(shipstate)
class PositionMapping: __slots__ = ('_pos', '_posmap') DUPLICATION_CHECK = True def __init__(self): self._pos = 0 self._posmap = SortedDict() def items(self): return self._posmap.items() # # Properties # @property def pos(self): return self._pos @pos.setter def pos(self, v): self._pos = v # # Public methods # def add_mapping(self, start_pos, length, obj): # duplication check if self.DUPLICATION_CHECK: try: pre = next(self._posmap.irange(maximum=start_pos, reverse=True)) if start_pos in self._posmap[pre]: raise ValueError( "New mapping is overlapping with an existing element.") except StopIteration: pass self._posmap[start_pos] = PositionMappingElement( start_pos, length, obj) def tick_pos(self, delta): self._pos += delta def get_node(self, pos): element = self.get_element(pos) if element is None: return None return element.obj def get_element(self, pos): try: pre = next(self._posmap.irange(maximum=pos, reverse=True)) except StopIteration: return None element = self._posmap[pre] if pos in element: return element return None
class IntervalTree(collections.MutableSet): """ A binary lookup tree of intervals. The intervals contained in the tree are represented using ``Interval(a, b, data)`` objects. Each such object represents a half-open interval ``[a, b)`` with optional data. Examples: --------- Initialize a blank tree:: >>> tree = IntervalTree() >>> tree IntervalTree() Initialize a tree from an iterable set of Intervals in O(n * log n):: >>> tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)]) >>> tree IntervalTree([Interval(-20.0, -10.0), Interval(-10, 10)]) >>> len(tree) 2 Note that this is a set, i.e. repeated intervals are ignored. However, Intervals with different data fields are regarded as different:: >>> tree = IntervalTree([Interval(-10, 10), Interval(-10, 10), Interval(-10, 10, "x")]) >>> tree IntervalTree([Interval(-10, 10), Interval(-10, 10, 'x')]) >>> len(tree) 2 Insertions:: >>> tree = IntervalTree() >>> tree[0:1] = "data" >>> tree.add(Interval(10, 20)) >>> tree.addi(19.9, 20) >>> tree IntervalTree([Interval(0, 1, 'data'), Interval(10, 20), Interval(19.9, 20)]) >>> tree.update([Interval(19.9, 20.1), Interval(20.1, 30)]) >>> len(tree) 5 Inserting the same Interval twice does nothing:: >>> tree = IntervalTree() >>> tree[-10:20] = "arbitrary data" >>> tree[-10:20] = None # Note that this is also an insertion >>> tree IntervalTree([Interval(-10, 20), Interval(-10, 20, 'arbitrary data')]) >>> tree[-10:20] = None # This won't change anything >>> tree[-10:20] = "arbitrary data" # Neither will this >>> len(tree) 2 Deletions:: >>> tree = IntervalTree(Interval(b, e) for b, e in [(-10, 10), (-20, -10), (10, 20)]) >>> tree IntervalTree([Interval(-20, -10), Interval(-10, 10), Interval(10, 20)]) >>> tree.remove(Interval(-10, 10)) >>> tree IntervalTree([Interval(-20, -10), Interval(10, 20)]) >>> tree.remove(Interval(-10, 10)) Traceback (most recent call last): ... ValueError >>> tree.discard(Interval(-10, 10)) # Same as remove, but no exception on failure >>> tree IntervalTree([Interval(-20, -10), Interval(10, 20)]) Delete intervals, overlapping a given point:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.1) >>> tree IntervalTree([Interval(-1.1, 1.1)]) Delete intervals, overlapping an interval:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(0, 0.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.7, 1.8) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.6, 1.6) # Null interval does nothing >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.6, 1.5) # Ditto >>> tree IntervalTree([Interval(0.5, 1.7)]) Delete intervals, enveloped in the range:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_envelop(-1.0, 1.5) >>> tree IntervalTree([Interval(-1.1, 1.1), Interval(0.5, 1.7)]) >>> tree.remove_envelop(-1.1, 1.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_envelop(0.5, 1.5) >>> tree IntervalTree([Interval(0.5, 1.7)]) >>> tree.remove_envelop(0.5, 1.7) >>> tree IntervalTree() Point/interval overlap queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> assert tree[-1.1] == set([Interval(-1.1, 1.1)]) >>> assert tree.search(1.1) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # Same as tree[1.1] >>> assert tree[-0.5:0.5] == set([Interval(-0.5, 1.5), Interval(-1.1, 1.1)]) # Interval overlap query >>> assert tree.search(1.5, 1.5) == set() # Same as tree[1.5:1.5] >>> assert tree.search(1.5) == set([Interval(0.5, 1.7)]) # Same as tree[1.5] >>> assert tree.search(1.7, 1.8) == set() Envelop queries:: >>> assert tree.search(-0.5, 0.5, strict=True) == set() >>> assert tree.search(-0.4, 1.7, strict=True) == set([Interval(0.5, 1.7)]) Membership queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> Interval(-0.5, 0.5) in tree False >>> Interval(-1.1, 1.1) in tree True >>> Interval(-1.1, 1.1, "x") in tree False >>> tree.overlaps(-1.1) True >>> tree.overlaps(1.7) False >>> tree.overlaps(1.7, 1.8) False >>> tree.overlaps(-1.2, -1.1) False >>> tree.overlaps(-1.2, -1.0) True Sizing:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> len(tree) 3 >>> tree.is_empty() False >>> IntervalTree().is_empty() True >>> not tree False >>> not IntervalTree() True >>> print(tree.begin()) # using print() because of floats in Python 2.6 -1.1 >>> print(tree.end()) # ditto 1.7 Iteration:: >>> tree = IntervalTree([Interval(-11, 11), Interval(-5, 15), Interval(5, 17)]) >>> [iv.begin for iv in sorted(tree)] [-11, -5, 5] >>> assert tree.items() == set([Interval(-5, 15), Interval(-11, 11), Interval(5, 17)]) Copy- and typecasting, pickling:: >>> tree0 = IntervalTree([Interval(0, 1, "x"), Interval(1, 2, ["x"])]) >>> tree1 = IntervalTree(tree0) # Shares Interval objects >>> tree2 = tree0.copy() # Shallow copy (same as above, as Intervals are singletons) >>> import pickle >>> tree3 = pickle.loads(pickle.dumps(tree0)) # Deep copy >>> list(tree0[1])[0].data[0] = "y" # affects shallow copies, but not deep copies >>> tree0 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree1 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree2 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree3 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['x'])]) Equality testing:: >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1)]) True >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1, "x")]) False """ @classmethod def from_tuples(cls, tups): """ Create a new IntervalTree from an iterable of 2- or 3-tuples, where the tuple lists begin, end, and optionally data. """ ivs = [Interval(*t) for t in tups] return IntervalTree(ivs) def __init__(self, intervals=None): """ Set up a tree. If intervals is provided, add all the intervals to the tree. Completes in O(n*log n) time. """ intervals = set(intervals) if intervals is not None else set() for iv in intervals: if iv.is_null(): raise ValueError( "IntervalTree: Null Interval objects not allowed in IntervalTree:" " {0}".format(iv) ) self.all_intervals = intervals self.top_node = Node.from_intervals(self.all_intervals) self.boundary_table = SortedDict() for iv in self.all_intervals: self._add_boundaries(iv) def copy(self): """ Construct a new IntervalTree using shallow copies of the intervals in the source tree. Completes in O(n*log n) time. :rtype: IntervalTree """ return IntervalTree(iv.copy() for iv in self) def _add_boundaries(self, interval): """ Records the boundaries of the interval in the boundary table. """ begin = interval.begin end = interval.end if begin in self.boundary_table: self.boundary_table[begin] += 1 else: self.boundary_table[begin] = 1 if end in self.boundary_table: self.boundary_table[end] += 1 else: self.boundary_table[end] = 1 def _remove_boundaries(self, interval): """ Removes the boundaries of the interval from the boundary table. """ begin = interval.begin end = interval.end if self.boundary_table[begin] == 1: del self.boundary_table[begin] else: self.boundary_table[begin] -= 1 if self.boundary_table[end] == 1: del self.boundary_table[end] else: self.boundary_table[end] -= 1 def add(self, interval): """ Adds an interval to the tree, if not already present. Completes in O(log n) time. """ if interval in self: return if interval.is_null(): raise ValueError( "IntervalTree: Null Interval objects not allowed in IntervalTree:" " {0}".format(interval) ) if not self.top_node: self.top_node = Node.from_interval(interval) else: self.top_node = self.top_node.add(interval) self.all_intervals.add(interval) self._add_boundaries(interval) append = add def addi(self, begin, end, data=None): """ Shortcut for add(Interval(begin, end, data)). Completes in O(log n) time. """ return self.add(Interval(begin, end, data)) appendi = addi def update(self, intervals): """ Given an iterable of intervals, add them to the tree. Completes in O(m*log(n+m), where m = number of intervals to add. """ for iv in intervals: self.add(iv) def extend(self, intervals): """ Deprecated: Replaced by update(). """ warn("IntervalTree.extend() has been deprecated. Consider using update() instead", DeprecationWarning) self.update(intervals) def remove(self, interval): """ Removes an interval from the tree, if present. If not, raises ValueError. Completes in O(log n) time. """ #self.verify() if interval not in self: #print(self.all_intervals) raise ValueError self.top_node = self.top_node.remove(interval) self.all_intervals.remove(interval) self._remove_boundaries(interval) #self.verify() def removei(self, begin, end, data=None): """ Shortcut for remove(Interval(begin, end, data)). Completes in O(log n) time. """ return self.remove(Interval(begin, end, data)) def discard(self, interval): """ Removes an interval from the tree, if present. If not, does nothing. Completes in O(log n) time. """ if interval not in self: return self.all_intervals.discard(interval) self.top_node = self.top_node.discard(interval) self._remove_boundaries(interval) def discardi(self, begin, end, data=None): """ Shortcut for discard(Interval(begin, end, data)). Completes in O(log n) time. """ return self.discard(Interval(begin, end, data)) def difference(self, other): """ Returns a new tree, comprising all intervals in self but not in other. """ ivs = set() for iv in self: if iv not in other: ivs.add(iv) return IntervalTree(ivs) def difference_update(self, other): """ Removes all intervals in other from self. """ for iv in other: self.discard(iv) def union(self, other): """ Returns a new tree, comprising all intervals from self and other. """ return IntervalTree(set(self).union(other)) def intersection(self, other): """ Returns a new tree of all intervals common to both self and other. """ ivs = set() shorter, longer = sorted([self, other], key=len) for iv in shorter: if iv in longer: ivs.add(iv) return IntervalTree(ivs) def intersection_update(self, other): """ Removes intervals from self unless they also exist in other. """ for iv in self: if iv not in other: self.remove(iv) def symmetric_difference(self, other): """ Return a tree with elements only in self or other but not both. """ if not isinstance(other, set): other = set(other) me = set(self) ivs = me - other + (other - me) return IntervalTree(ivs) def symmetric_difference_update(self, other): """ Throws out all intervals except those only in self or other, not both. """ other = set(other) for iv in self: if iv in other: self.remove(iv) other.remove(iv) self.update(other) def remove_overlap(self, begin, end=None): """ Removes all intervals overlapping the given point or range. Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches * r = size of the search range (this is 1 for a point) """ hitlist = self.search(begin, end) for iv in hitlist: self.remove(iv) def remove_envelop(self, begin, end): """ Removes all intervals completely enveloped in the given range. Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches * r = size of the search range (this is 1 for a point) """ hitlist = self.search(begin, end, strict=True) for iv in hitlist: self.remove(iv) def chop(self, begin, end, datafunc=None): """ Like remove_envelop(), but trims back Intervals hanging into the chopped area so that nothing overlaps. """ insertions = set() begin_hits = [iv for iv in self[begin] if iv.begin < begin] end_hits = [iv for iv in self[end] if iv.end > end] if datafunc: for iv in begin_hits: insertions.add(Interval(iv.begin, begin, datafunc(iv, True))) for iv in end_hits: insertions.add(Interval(end, iv.end, datafunc(iv, False))) else: for iv in begin_hits: insertions.add(Interval(iv.begin, begin, iv.data)) for iv in end_hits: insertions.add(Interval(end, iv.end, iv.data)) self.remove_envelop(begin, end) self.difference_update(begin_hits) self.difference_update(end_hits) self.update(insertions) def slice(self, point, datafunc=None): """ Split Intervals that overlap point into two new Intervals. if specified, uses datafunc(interval, islower=True/False) to set the data field of the new Intervals. :param point: where to slice :param datafunc(interval, isupper): callable returning a new value for the interval's data field """ hitlist = set(iv for iv in self[point] if iv.begin < point) insertions = set() if datafunc: for iv in hitlist: insertions.add(Interval(iv.begin, point, datafunc(iv, True))) insertions.add(Interval(point, iv.end, datafunc(iv, False))) else: for iv in hitlist: insertions.add(Interval(iv.begin, point, iv.data)) insertions.add(Interval(point, iv.end, iv.data)) self.difference_update(hitlist) self.update(insertions) def clear(self): """ Empties the tree. Completes in O(1) tine. """ self.__init__() def find_nested(self): """ Returns a dictionary mapping parent intervals to sets of intervals overlapped by and contained in the parent. Completes in O(n^2) time. :rtype: dict of [Interval, set of Interval] """ result = {} def add_if_nested(): if parent.contains_interval(child): if parent not in result: result[parent] = set() result[parent].add(child) long_ivs = sorted(self.all_intervals, key=Interval.length, reverse=True) for i, parent in enumerate(long_ivs): for child in long_ivs[i + 1:]: add_if_nested() return result def overlaps(self, begin, end=None): """ Returns whether some interval in the tree overlaps the given point or range. Completes in O(r*log n) time, where r is the size of the search range. :rtype: bool """ if end is not None: return self.overlaps_range(begin, end) elif isinstance(begin, Number): return self.overlaps_point(begin) else: return self.overlaps_range(begin.begin, begin.end) def overlaps_point(self, p): """ Returns whether some interval in the tree overlaps p. Completes in O(log n) time. :rtype: bool """ if self.is_empty(): return False return bool(self.top_node.contains_point(p)) def overlaps_range(self, begin, end): """ Returns whether some interval in the tree overlaps the given range. Returns False if given a null interval over which to test. Completes in O(r*log n) time, where r is the range length and n is the table size. :rtype: bool """ if self.is_empty(): return False elif begin >= end: return False elif self.overlaps_point(begin): return True return any( self.overlaps_point(bound) for bound in self.boundary_table if begin < bound < end ) def split_overlaps(self): """ Finds all intervals with overlapping ranges and splits them along the range boundaries. Completes in worst-case O(n^2*log n) time (many interval boundaries are inside many intervals), best-case O(n*log n) time (small number of overlaps << n per interval). """ if not self: return if len(self.boundary_table) == 2: return bounds = sorted(self.boundary_table) # get bound locations new_ivs = set() for lbound, ubound in zip(bounds[:-1], bounds[1:]): for iv in self[lbound]: new_ivs.add(Interval(lbound, ubound, iv.data)) self.__init__(new_ivs) def merge_overlaps(self, data_reducer=None, data_initializer=None): """ Finds all intervals with overlapping ranges and merges them into a single interval. If provided, uses data_reducer and data_initializer with similar semantics to Python's built-in reduce(reducer_func[, initializer]), as follows: If data_reducer is set to a function, combines the data fields of the Intervals with current_reduced_data = data_reducer(current_reduced_data, new_data) If data_reducer is None, the merged Interval's data field will be set to None, ignoring all the data fields of the merged Intervals. On encountering the first Interval to merge, if data_initializer is None (default), uses the first Interval's data field as the first value for current_reduced_data. If data_initializer is not None, current_reduced_data is set to a shallow copy of data_initiazer created with copy.copy(data_initializer). Completes in O(n*logn). """ if not self: return sorted_intervals = sorted(self.all_intervals) # get sorted intervals merged = [] # use mutable object to allow new_series() to modify it current_reduced = [None] higher = None # iterating variable, which new_series() needs access to def new_series(): if data_initializer is None: current_reduced[0] = higher.data merged.append(higher) return else: # data_initializer is not None current_reduced[0] = copy(data_initializer) current_reduced[0] = data_reducer(current_reduced[0], higher.data) merged.append(Interval(higher.begin, higher.end, current_reduced[0])) for higher in sorted_intervals: if merged: # series already begun lower = merged[-1] if higher.begin <= lower.end: # should merge upper_bound = max(lower.end, higher.end) if data_reducer is not None: current_reduced[0] = data_reducer(current_reduced[0], higher.data) else: # annihilate the data, since we don't know how to merge it current_reduced[0] = None merged[-1] = Interval(lower.begin, upper_bound, current_reduced[0]) else: new_series() else: # not merged; is first of Intervals to merge new_series() self.__init__(merged) def merge_equals(self, data_reducer=None, data_initializer=None): """ Finds all intervals with equal ranges and merges them into a single interval. If provided, uses data_reducer and data_initializer with similar semantics to Python's built-in reduce(reducer_func[, initializer]), as follows: If data_reducer is set to a function, combines the data fields of the Intervals with current_reduced_data = data_reducer(current_reduced_data, new_data) If data_reducer is None, the merged Interval's data field will be set to None, ignoring all the data fields of the merged Intervals. On encountering the first Interval to merge, if data_initializer is None (default), uses the first Interval's data field as the first value for current_reduced_data. If data_initializer is not None, current_reduced_data is set to a shallow copy of data_initiazer created with copy.copy(data_initializer). Completes in O(n*logn). """ if not self: return sorted_intervals = sorted(self.all_intervals) # get sorted intervals merged = [] # use mutable object to allow new_series() to modify it current_reduced = [None] higher = None # iterating variable, which new_series() needs access to def new_series(): if data_initializer is None: current_reduced[0] = higher.data merged.append(higher) return else: # data_initializer is not None current_reduced[0] = copy(data_initializer) current_reduced[0] = data_reducer(current_reduced[0], higher.data) merged.append(Interval(higher.begin, higher.end, current_reduced[0])) for higher in sorted_intervals: if merged: # series already begun lower = merged[-1] if higher.range_matches(lower): # should merge upper_bound = max(lower.end, higher.end) if data_reducer is not None: current_reduced[0] = data_reducer(current_reduced[0], higher.data) else: # annihilate the data, since we don't know how to merge it current_reduced[0] = None merged[-1] = Interval(lower.begin, upper_bound, current_reduced[0]) else: new_series() else: # not merged; is first of Intervals to merge new_series() self.__init__(merged) def items(self): """ Constructs and returns a set of all intervals in the tree. Completes in O(n) time. :rtype: set of Interval """ return set(self.all_intervals) def is_empty(self): """ Returns whether the tree is empty. Completes in O(1) time. :rtype: bool """ return 0 == len(self) def search(self, begin, end=None, strict=False): """ Returns a set of all intervals overlapping the given range. Or, if strict is True, returns the set of all intervals fully contained in the range [begin, end]. Completes in O(m + k*log n) time, where: * n = size of the tree * m = number of matches * k = size of the search range (this is 1 for a point) :rtype: set of Interval """ root = self.top_node if not root: return set() if end is None: try: iv = begin return self.search(iv.begin, iv.end, strict=strict) except: return root.search_point(begin, set()) elif begin >= end: return set() else: result = root.search_point(begin, set()) boundary_table = self.boundary_table bound_begin = boundary_table.bisect_left(begin) bound_end = boundary_table.bisect_left(end) # exclude final end bound result.update(root.search_overlap( # slice notation is slightly slower boundary_table.iloc[index] for index in xrange(bound_begin, bound_end) )) # TODO: improve strict search to use node info instead of less-efficient filtering if strict: result = set( iv for iv in result if iv.begin >= begin and iv.end <= end ) return result def begin(self): """ Returns the lower bound of the first interval in the tree. Completes in O(n) time. """ if not self.boundary_table: return 0 return self.boundary_table.iloc[0] def end(self): """ Returns the upper bound of the last interval in the tree. Completes in O(n) time. """ if not self.boundary_table: return 0 return self.boundary_table.iloc[-1] def range(self): """ Returns a minimum-spanning Interval that encloses all the members of this IntervalTree. If the tree is empty, returns null Interval. :rtype: Interval """ return Interval(self.begin(), self.end()) def span(self): """ Returns the length of the minimum-spanning Interval that encloses all the members of this IntervalTree. If the tree is empty, return 0. """ if not self: return 0 return self.end() - self.begin() def print_structure(self, tostring=False): """ ## FOR DEBUGGING ONLY ## Pretty-prints the structure of the tree. If tostring is true, prints nothing and returns a string. :rtype: None or str """ if self.top_node: return self.top_node.print_structure(tostring=tostring) else: result = "<empty IntervalTree>" if not tostring: print(result) else: return result def verify(self): """ ## FOR DEBUGGING ONLY ## Checks the table to ensure that the invariants are held. """ if self.all_intervals: ## top_node.all_children() == self.all_intervals try: assert self.top_node.all_children() == self.all_intervals except AssertionError as e: print( 'Error: the tree and the membership set are out of sync!' ) tivs = set(self.top_node.all_children()) print('top_node.all_children() - all_intervals:') try: pprint except NameError: from pprint import pprint pprint(tivs - self.all_intervals) print('all_intervals - top_node.all_children():') pprint(self.all_intervals - tivs) raise e ## All members are Intervals for iv in self: assert isinstance(iv, Interval), ( "Error: Only Interval objects allowed in IntervalTree:" " {0}".format(iv) ) ## No null intervals for iv in self: assert not iv.is_null(), ( "Error: Null Interval objects not allowed in IntervalTree:" " {0}".format(iv) ) ## Reconstruct boundary_table bound_check = {} for iv in self: if iv.begin in bound_check: bound_check[iv.begin] += 1 else: bound_check[iv.begin] = 1 if iv.end in bound_check: bound_check[iv.end] += 1 else: bound_check[iv.end] = 1 ## Reconstructed boundary table (bound_check) ==? boundary_table assert set(self.boundary_table.keys()) == set(bound_check.keys()),\ 'Error: boundary_table is out of sync with ' \ 'the intervals in the tree!' # For efficiency reasons this should be iteritems in Py2, but we # don't care much for efficiency in debug methods anyway. for key, val in self.boundary_table.items(): assert bound_check[key] == val, \ 'Error: boundary_table[{0}] should be {1},' \ ' but is {2}!'.format( key, bound_check[key], val) ## Internal tree structure self.top_node.verify(set()) else: ## Verify empty tree assert not self.boundary_table, \ "Error: boundary table should be empty!" assert self.top_node is None, \ "Error: top_node isn't None!" def score(self, full_report=False): """ Returns a number between 0 and 1, indicating how suboptimal the tree is. The lower, the better. Roughly, this number represents the fraction of flawed Intervals in the tree. :rtype: float """ if len(self) <= 2: return 0.0 n = len(self) m = self.top_node.count_nodes() def s_center_score(): """ Returns a normalized score, indicating roughly how many times intervals share s_center with other intervals. Output is full-scale from 0 to 1. :rtype: float """ raw = n - m maximum = n - 1 return raw / float(maximum) report = { "depth": self.top_node.depth_score(n, m), "s_center": s_center_score(), } cumulative = max(report.values()) report["_cumulative"] = cumulative if full_report: return report return cumulative def __getitem__(self, index): """ Returns a set of all intervals overlapping the given index or slice. Completes in O(k * log(n) + m) time, where: * n = size of the tree * m = number of matches * k = size of the search range (this is 1 for a point) :rtype: set of Interval """ try: start, stop = index.start, index.stop if start is None: start = self.begin() if stop is None: return set(self) if stop is None: stop = self.end() return self.search(start, stop) except AttributeError: return self.search(index) def __setitem__(self, index, value): """ Adds a new interval to the tree. A shortcut for add(Interval(index.start, index.stop, value)). If an identical Interval object with equal range and data already exists, does nothing. Completes in O(log n) time. """ self.addi(index.start, index.stop, value) def __delitem__(self, point): """ Delete all items overlapping point. """ self.remove_overlap(point) def __contains__(self, item): """ Returns whether item exists as an Interval in the tree. This method only returns True for exact matches; for overlaps, see the overlaps() method. Completes in O(1) time. :rtype: bool """ # Removed point-checking code; it might trick the user into # thinking that this is O(1), which point-checking isn't. #if isinstance(item, Interval): return item in self.all_intervals #else: # return self.contains_point(item) def containsi(self, begin, end, data=None): """ Shortcut for (Interval(begin, end, data) in tree). Completes in O(1) time. :rtype: bool """ return Interval(begin, end, data) in self def __iter__(self): """ Returns an iterator over all the intervals in the tree. Completes in O(1) time. :rtype: collections.Iterable[Interval] """ return self.all_intervals.__iter__() iter = __iter__ def __len__(self): """ Returns how many intervals are in the tree. Completes in O(1) time. :rtype: int """ return len(self.all_intervals) def __eq__(self, other): """ Whether two IntervalTrees are equal. Completes in O(n) time if sizes are equal; O(1) time otherwise. :rtype: bool """ return ( isinstance(other, IntervalTree) and self.all_intervals == other.all_intervals ) def __repr__(self): """ :rtype: str """ ivs = sorted(self) if not ivs: return "IntervalTree()" else: return "IntervalTree({0})".format(ivs) __str__ = __repr__ def __reduce__(self): """ For pickle-ing. :rtype: tuple """ return IntervalTree, (sorted(self.all_intervals),)
class Scanner: def __init__(self, fileName): #.cpp filename self.fileName = fileName # file to store the program internal form self.pifFileName = "output/pif.txt" # clear file if already exist open(self.pifFileName, 'w').close() # file to store the identifiers table self.outputIdentifiersTable = "output/id_table.txt" # file to store the constants table self.outputConstantsTable = "output/const_table.txt" # dictionary for all the program symbols (if, for, while, else, int, float, etc) self.codificationTable = {} # dictionary for storing the identifiers, as a pair identifier -> integer id self.identifiersTable = SortedDict() # dictionary for storing the identifiers, as a pair constant -> integer id self.constantsTable = SortedDict() # load all the toy language symbols self.populateCodificationTable() self.inputText = self.getInputText() self.currentIndex = 0 # method loads symbol table in memory from the disc def populateCodificationTable(self): try: # open the file f = open("files/codifications.dat") # iterate through its lines for line in f.readlines(): # get the symbol and the symbol id (symbol, sid) = line.split() # add to the symbols table self.codificationTable[symbol] = sid except IOError: # In case there is no such file, fail fast! print("ERROR: Symbols file not found!") sys.exit() #get all text from filename def getInputText(self): try: # open the file for reading f = open(self.fileName, "r") return f.read() + "\n" # if file was not found, print error and fail fast except IOError: print("ERROR: Source file not found!") #get next character from input.txt. Throws error if no more characters def getNextCharacter(self): if self.currentIndex >= len(self.inputText): raise EOFError c = self.inputText[self.currentIndex] self.currentIndex += 1 return c def pickNextCharacter(self): if self.currentIndex >= len(self.inputText): return '' return self.inputText[self.currentIndex] # method append buff to the file outputFileName def appendToPifFile(self, buff, token=''): #for debugging #buff = buff.rstrip('\n') + " " + token + "\n" # open file with open(self.pifFileName, "a") as f: # write the string buff as a new line f.write(buff) # method write the identifier and constant tables def writeTables(self): # open file for identifiers table with open(self.outputIdentifiersTable, "w") as f: # iterate through the identifiers table for (key, val) in self.identifiersTable.items(): # write the pair on a new line f.write("%s %s\n" % (key, val)) # open file for constant table with open(self.outputConstantsTable, "w") as f: # iterate through the constants table for (key, val) in self.constantsTable.items(): # write the pair on a new line f.write("%s %s\n" % (key, val)) # method decides if _token is a symbol or an identifier def addToken(self, _token): # if the token is in the codification table, then it's a symbol if _token in self.codificationTable: self.appendToPifFile( str(self.codificationTable[_token]) + " -1\n", _token) # else, it must be an identifier else: self.addIdentifier(_token) def addSymbol(self, _symbol): # if the symbol is in the symbol table if _symbol in self.codificationTable: # print it self.appendToPifFile( str(self.codificationTable[_symbol]) + " -1\n", _symbol) return True else: # return false because _symbol is not a valid symbol return False # method prints identifier and it's id to the output file def addIdentifier(self, _id): if _id not in self.identifiersTable: self.identifiersTable[_id] = len(self.identifiersTable) + 1 # print to program internal form output file self.appendToPifFile( self.codificationTable["identifier"] + " " + str(self.identifiersTable[_id]) + "\n", _id) # method adds a constant to the table and prints it to the output file def addConstant(self, _val): # assign a new, unsued integer id for the current identifier if _val not in self.constantsTable: self.constantsTable[_val] = len(self.constantsTable) + 1 # print to the program internal form output file self.appendToPifFile( self.codificationTable["constant"] + " " + str(self.constantsTable[_val]) + "\n", _val) # method tokenize the source file def tokenize(self): try: ch = self.getNextCharacter() #iterate character by character while True: # in case we have an alphabet character (a, b, .. z, A, B, .. Z) if ch.isalpha(): # variable to store the current identifier _id = "" # we iterate while we have valid identifier characters while ch.isalpha() or ch == '_': # append the current character to _id _id += ch # get the next character ch = self.getNextCharacter() # at the end, if the lenght of the iterator is more than the max allowed lenght # throw an error, and fail fast if len(_id) > 250: print("ERROR: Identifier has too many characters.") sys.exit() # add the token self.addToken(_id) # in case we have a digit (0-9) elif ch.isdigit() or (ch in "+-" and self.pickNextCharacter().isdigit()): # variable stores the current constant _val = ch ch = self.getNextCharacter() # while there is a digit or if the current character is . while ch.isdigit() or ch == '.': # append the character to the constant _val += ch # get next character ch = self.getNextCharacter() # add the constant to the program internal form and to the internal hashmaps self.addConstant(_val) # ignore whitespace characters elif ch.isspace(): # get the next character ch = self.getNextCharacter() # else, we may have a symbol or an invalid identifier else: # pick the next character next = self.pickNextCharacter() # if we are in one of the cases >=, <=, == or !=, we update the variable if ch + next in ["<=", ">=", "!=", "=="]: _id = _id + next # if we couldn't add the symobl, we throw an error because it is an unexpected # symbol identifier if not self.addSymbol(ch): print("ERROR: Unexpected token '%s'" % ch) sys.exit() ch = self.getNextCharacter() # in case we reached the end of the iteration except EOFError: self.writeTables() print("> finish") return
class CacheStore(object): class CacheItem(object): __slots__ = ('valid', 'data') def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: try: item = self.store[key] item.data = data item.valid.set() return False except KeyError: item = self.CacheItem() item.data = data item.valid.set() self.store[key] = item return True def update(self, **kwargs): with self.lock: items = {} created = [] updated = [] for k, v in kwargs.items(): items[k] = self.CacheItem() items[k].data = v items[k].valid.set() if k in self.store: updated.append(k) else: created.append(k) self.store.update(**items) return created, updated def update_one(self, key, **kwargs): with self.lock: item = self.get(key) if not item: return False for k, v in kwargs.items(): set(item, k, v) self.put(key, item) return True def update_many(self, key, predicate, **kwargs): with self.lock: updated = [] for k, v in self.itervalid(): if predicate(v): if self.update_one(k, **kwargs): updated.append(key) return updated def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: try: del self.store[key] return True except KeyError: return False def remove_many(self, keys): with self.lock: removed = [] for key in keys: try: del self.store[key] removed.append(key) except KeyError: pass return removed def clear(self): with self.lock: items = list(self.store.keys()) self.store.clear() return items def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return query(list(self.validvalues()), *filter, **params)
def test_items_view_index(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping[:13]) items = temp.items() with pytest.raises(ValueError): items.index(('f', 100))
def general(table, zoom_levels, db_pair, cli_args, id_col='id', batch_rows=Constants.batch_rows, condition=None, printer=Prindenter()): # prepare for recursion if not already in it if type(table) == str: printer("[Examining table: {}]".format(table)) with Indent(printer): try: table = pre_general(table, db_pair, cli_args, id_col, batch_rows, condition=condition, printer=printer) except sh.ErrorReturnCode_1 as err: # handle schema mismatches with a sledgehammer # TODO: allow user to provide path to migration scripts, # run outstanding ones if they show up in migration_tracker if "Column count doesn't match" in str(err): printer("Upstream schema differs, pulling it down") with Indent(printer): # get upstream schema filename = 'newschema_{}.sql'.format(table) mysqldump_schema_nofk(cli_args.upstream, filename, restrict_to_table=table, printer=printer) # drop downstream table drop = 'drop table {};'.format(table) with Connection(db_pair.downstream.args ) as downstream_connection: with downstream_connection.cursor( ) as downstream_cursor: show_do_query(downstream_cursor, drop, printer=printer) # recreate downstream table mysqlload(cli_args.downstream, filename, printer=printer) # try again printer("[New schema loaded, downstream table is empty]") table = pre_general(table, db_pair, cli_args, id_col, condition=condition, printer=printer) else: raise if type(zoom_levels) == list: # set up for recursion if table.needs_work: printer( "Sync: 'general' received magnification list instead of zoom_level map, building zoom_level map...", end='') with Indent(printer): # prepare the zoom-level map zoom_levels = SortedDict({x: None for x in zoom_levels}) # append the outermost zoom level (completed in general) zoom_levels[table.upstream.max_id] = [ Ids.Interval(0, table.upstream.max_id) ] else: printer("Sync: 'general' finished early: presync was sufficient") return printer("done\n") # begin recursion printer("[Sync: 'general' top-level recursion]") with Indent(printer): return general(table, zoom_levels, db_pair, cli_args, condition=condition, printer=printer) # if control gets this far, recursion has begun granularity = None scopes = None # examine the scope map by decreasing magnification # find the transition from unknowns to knowns for ((smaller_granularity, smaller_scope), (larger_granularity, larger_scope)) \ in reversed(list(zip(zoom_levels.items(), zoom_levels.items()[1:]))): if not smaller_scope: scopes = larger_scope # we'll be filling these out granularity = smaller_granularity # by breaking them into pieces this big break if not scopes: printer( "Zoom-level map fully populated, no more 'general' recursions will follow" ) conditions = [] final_size = zoom_levels.keys()[0] final_scopes = list(zoom_levels.values()[0]) final_scopes.sort() if final_size <= 1 and type(final_scopes[0]) == int: printer("Scanned down to individual rows") row_lists = Ids.partition(Constants.batch_fingerprints, final_scopes) for rows in row_lists: conditions.append("{} in ({})".format( table.id_col, ",".join([str(x) for x in rows]))) elif final_size > 1 and isinstance(final_scopes[0], Ids.Interval): printer("Scanned down to row-ranges of size {}".format(final_size)) interval_lists = Ids.partition(Constants.batch_fingerprints, final_scopes) conditions = [] for intervals in interval_lists: conditions.append(" OR ".join([ "{} BETWEEN {} AND {}".format(table.id_col, i.start, i.end) for i in intervals ])) else: raise ValueError( "Can't decide whether to transfer rows, or row-ranges") printer("[Transfer proceeding in {} batches]".format(len(conditions))) with Indent(printer): for condition in conditions: # dump upstream data mysqldump_data(cli_args.upstream, table.name, condition, printer=printer) # clear old rows from downstream delete = 'delete from {} where {};'.format( table.name, condition) with Connection( db_pair.downstream.args) as downstream_connection: with downstream_connection.cursor() as cursor: show_do_query(cursor, delete, printer=printer) # load new rows into downstream mysqlload(cli_args.downstream, table.name, printer=printer) with Connection(db_pair.downstream.args) as downstream_connection: with downstream_connection.cursor() as downstream_cursor: with db_pair.upstream.connection.cursor() as upstream_cursor: table.is_synced_warn(upstream_cursor, downstream_cursor, message='(after general sync)', printer=printer) table.try_sync_schema(upstream_cursor, downstream_cursor, throw=True, printer=printer) # if we found a row with unpopulated scopes, then we have more scanning to do else: printer( "[Given {} larger-granules, making smaller granules of size {} and fingerprinting them]" .format(len(scopes), granularity)) next_scopes = [] with Indent(printer): with Connection(db_pair.downstream.args) as downstream_connection: with downstream_connection.cursor() as downstream_cursor: with db_pair.upstream.connection.cursor( ) as upstream_cursor: # new sessions, reset group_concat (default is oddly low) db_pair.reup_maxes(downstream_cursor, upstream_cursor, printer=printer) #for scope in scopes: # next_scopes += list(Db.find_diffs(upstream_cursor, downstream_cursor, table, scope, granularity, # printer=printer)) # rather than making a round trip for each one, lets do them all at once next_scopes += list( Db.find_diffs(upstream_cursor, downstream_cursor, table, scopes, granularity, condition=condition, printer=printer)) printer( '' ) # Db.find_diffs ends without a newline... add one # if no ranges were found to contain diffs if len( next_scopes ) == 0: # note that any([0]) is False, but len([0]) == 0 is True # we want the latter, else we ignore row 0 message = textwrap.dedent(""" Found no ranges with diffs. Nothing to do. If the tables were truly identical, TABLE CHECKSUM would have prevented sync from gettin this far. Perhaps some columns were ignored during the scan? (e.g. timestamps, as an ugly hack to avoid thinking about time zones) """) printer(message) printer.append_summary( "{} : IDENTICAL? (TABLE CHECKSUM failed but a custom MD5 scan found no diffs)" .format(table.name)) # if no ranges were found to contain diffs else: zoom_levels[granularity] = next_scopes printer("[Another 'general' recursion]") with Indent(printer): return general(table, zoom_levels, db_pair, cli_args, condition=condition, printer=printer)
def generate_report(year=None, quarter=None): dir = os.path.dirname(__file__) config = ConfigParser() config.read(os.path.join(dir, 'config.cfg')) report = SortedDict() gbif = gbif_downloads() for date_str, downloads in gbif.items(): m, y = map(int, date_str.split('-')) if year and y != year: continue if quarter and m not in quarter_months[quarter]: continue report_add_entry(report, y, m, 'gbif_records', downloads['records']) report_add_entry(report, y, m, 'gbif_download_events', downloads['download_events']) last_timestamp = 0 # Load the legacy data # This has been derived from redis/celery task queue # Which is how we did things before storing the download count in the # ckanpackager stats.db with open(os.path.join(dir, 'src', 'legacy.json')) as data_file: data = json.load(data_file) for row in data: ts = timestring.Date(row['date']) # We want to know what the last timestamp is if ts.to_unixtime() > last_timestamp: last_timestamp = ts.to_unixtime() process_row(report, year, quarter, ts, row.get('resource_id'), row.get('count', None)) db = config.get('sqlite', 'db') if not os.path.isfile(db): raise IOError('Stats.db does not exist') conn = sqlite3.connect(db) # Retrieve all requests received after the last entry in the legacy data requests = conn.execute("SELECT * FROM requests WHERE timestamp > '%s'" % last_timestamp) # Loop through requests, adding them to the stats for request in requests: resource_id = request[2] ts = datetime.datetime.fromtimestamp(request[3]) count = int(request[4]) if request[4] else None process_row(report, year, quarter, ts, resource_id, count) header = OrderedDict([ ('collection_records', 'Collection records'), ('other_records', 'Other records'), ('gbif_records', 'GBIF records'), ('collection_download_events', 'Collection download events'), ('other_download_events', 'Other download events'), ('gbif_download_events', 'GBIF download events'), ]) table = Texttable() table.set_deco(Texttable.HEADER) rows = [] totals = OrderedDict([(k, 0) for k in header.keys()]) for year, months in report.items(): if len(rows) == 0: rows.append(['Month'] + header.values()) for month, items in months.items(): row = [get_color_string(bcolors.GREEN, '%s %s' % (calendar.month_abbr[month], str(year)[2:4]))] for key in header.keys(): row.append(str(items.get(key, ''))) # Update totals totals[key] += items.get(key, 0) rows.append(row) rows.append([get_color_string(bcolors.YELLOW, str(t)) for t in ['Totals'] + totals.values()]) table.add_rows(rows) print(table.draw())
class KeyedRegion: """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ __slots__ = ('_storage', '_object_mapping', '_phi_node_contains') def __init__(self, tree=None, phi_node_contains=None): self._storage = SortedDict() if tree is None else tree self._object_mapping = weakref.WeakValueDictionary() self._phi_node_contains = phi_node_contains def __getstate__(self): return self._storage, dict( self._object_mapping), self._phi_node_contains def __setstate__(self, s): self._storage, om, self._phi_node_contains = s self._object_mapping = weakref.WeakValueDictionary(om) def _get_container(self, offset): try: base_offset = next( self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one variable covering the given offset. :param offset: :return: """ if type(offset) is not int: raise TypeError("KeyedRegion only accepts concrete offsets.") return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion(phi_node_contains=self._phi_node_contains) kr = KeyedRegion(phi_node_contains=self._phi_node_contains) for key, ro in self._storage.items(): kr._storage[key] = ro.copy() kr._object_mapping = self._object_mapping.copy() return kr def merge(self, other, replacements=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False) return self def merge_to_top(self, other, replacements=None, top=None): """ Merge another KeyedRegion into this KeyedRegion, but mark all variables with different values as TOP. :param other: The other instance to merge with. :param replacements: :return: self """ for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False, merge_to_top=True, top=top) return self def replace(self, replacements): """ Replace variables with other variables. :param dict replacements: A dict of variable replacements. :return: self """ for old_var, new_var in replacements.items(): old_var_id = id(old_var) if old_var_id in self._object_mapping: # FIXME: we need to check if old_var still exists in the storage old_so = self._object_mapping[old_var_id] # type: StoredObject self._store(old_so.start, new_var, old_so.size, overwrite=True) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = {} for key in sorted(keys): ro = self._storage[key] variables = [obj.obj for obj in ro.stored_objects] offset_to_vars[key] = variables s = [] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A set of variables. :rtype: set """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects def get_all_variables(self): """ Get all variables covering the current region. :return: A set of all variables. """ variables = set() for ro in self._storage.values(): ro: RegionObject variables |= ro.internal_objects return variables # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self._object_mapping[stored_object.obj_id] = stored_object self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False, merge_to_top=False, top=None): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. True to make a strong update, False to make a weak update. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end - 1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_with_check(b, stored_object, merge_to_top=merge_to_top, top=top) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_with_check(a, stored_object, merge_to_top=merge_to_top, top=top) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_with_check(item, stored_object, merge_to_top=merge_to_top, top=top) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next( self._storage.irange(maximum=end - 1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next( self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_with_check(self, item, stored_object, merge_to_top=False, top=None): if len({stored_object.obj} | item.internal_objects) > 1: if merge_to_top: item.set_object( StoredObject(stored_object.start, top, stored_object.size)) return if self._phi_node_contains is not None: # check if `item` is a phi node that contains stored_object.obj for so in item.internal_objects: if self._phi_node_contains(so, stored_object.obj): # yes! so we want to skip this object return # check if `stored_object.obj` is a phi node that contains item.internal_objects if all( self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects): # yes! item.set_object(stored_object) return item.add_object(stored_object)
('召回期高热总投稿数(火山+抖→火)', 0), ('抖→火召回期高热当日总vv', 0), ('抖→火召回期高热总投稿数', 0), ('抖→火召回期投稿当日火山端展现uv', 0), ('抖→火召回期投稿当日火山端播放uv', 0), ('火→抖日新增投稿数', 4), ('火→抖召回期投稿数', 1), ('火→抖日新增投稿当日抖音端vv', 6), ('火→抖日新增投稿当日火山端vv', 5), ('火→抖召回期投稿当日抖音端vv', 3), ('火→抖召回期投稿当日火山端vv', 2), ('日期', 0)] COMPUTE_COLS = SortedDict({ "抖→火召回期投稿vv占当日火山端vv占比": ("抖→火召回期投稿当日火山端vv", "火山端当日总vv", 4), "抖→火召回期高热vv占当日火山端高热vv占比": ("抖→火召回期高热当日总vv", "召回期高热火山端当日总vv", 5) }) AW2HS = [(each[0], each[1]) for each in COLS_TUPLE if each[0].startswith('抖→火') and each[1] > 0] HS2AW = [(each[0], each[1]) for each in COLS_TUPLE if each[0].startswith('火→抖') and each[1] > 0] for k, v in COMPUTE_COLS.items(): AW2HS.append((k, v[2])) AW2HS.sort(key=lambda x: x[1]) HS2AW.sort(key=lambda x: x[1]) # SQL SQL = """ select * from ies_hotsoon_bgpm.rpt_vcd_items_stats_operation_day where `date` >= '${date-14}' """ def post_to_lark(title, text, hook): url = f'https://open.feishu.cn/open-apis/bot/hook/{hook}' s = requests.post(url, data=json.dumps({"title": title, "text": text})) return s.json()
class Leaderboard: def __init__(self): self.scores = {} self.sortedScores = SortedDict() ''' O(logN) for addScore. This is because each addition to the BST takes a logarithmic time for search. The addition itself once the location of the parent is known, takes constant time. ''' def addScore(self, playerId: int, score: int) -> None: # The scores dictionary simply contains the mapping from the # playerId to their score. The sortedScores contain a BST with # key as the score and value as the number of players that have # that score. if playerId not in self.scores: self.scores[playerId] = score self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: preScore = self.scores[playerId] val = self.sortedScores.get(-preScore) if val == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] = val - 1 newScore = preScore + score self.scores[playerId] = newScore self.sortedScores[-newScore] = self.sortedScores.get(-newScore, 0) + 1 ''' It takes O(K) for our top function since we simply iterate over the keys of the TreeMap and stop once we're done considering K scores. ''' def top(self, K: int) -> int: count, total = 0, 0 for key, value in self.sortedScores.items(): # already sorted by keys times = self.sortedScores.get(key) # times == value for _ in range(times): total += -key count += 1 # Found top-K scores, break. if count == K: break # Found top-K scores, break. if count == K: break return total ''' O(logN) for reset since we need to search for the score in the BST and then update/remove it. Note that this complexity is in the case when every player always maintains a unique score. ''' def reset(self, playerId: int) -> None: preScore = self.scores[playerId] if self.sortedScores[-preScore] == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] -= 1 del self.scores[playerId]
class Board(): def __init__(self, exchange_id, symbol): self.__exchange_id = exchange_id self.__symbol = symbol self.__queue = Queue() self.__wsclient = WebsocketClient(self.__queue, exchange_id, symbol) self.__ccxtclient = CcxtClient(exchange_id, symbol) self.__logger = get_ccxt_logger() self.bids = SortedDict() self.asks = SortedDict() self.__build_board() producer_worker = threading.Thread(target=self.__wsclient.fetch_ticks, daemon=True) consumer_worker = threading.Thread(target=self.__update_board, daemon=True) producer_worker.start() consumer_worker.start() def __append_to_board(self, board, order_list): if len(order_list) == 0: return for order in order_list: rate = int(float(order[0])) amount = float(order[1]) if amount != 0: board[rate] = amount elif rate in board: del board[rate] def __build_board(self): res = self.__ccxtclient.fetch_order_book() bids = res["bids"] asks = res["asks"] self.__append_to_board(self.bids, bids) self.__append_to_board(self.asks, asks) def __update_board(self): while True: if not self.__queue.empty(): data = self.__queue.get() if data.type == WsDataType.TRADES: self.__remove_order(data) else: self.__append_order(data) self.__queue.task_done() def __append_order(self, data): self.__append_to_board(self.bids, data.bids) self.__append_to_board(self.asks, data.asks) def __remove_order(self, data): rate = int(data.rate) amount = data.amount side = data.side if data.side == "sell": board = self.bids else: board = self.asks if amount != 0: if rate in board: board[rate] = float( Decimal(str(board[rate])) - Decimal(str(amount))) if board[rate] <= 0: del board[rate] elif rate in board: del board[rate] rates = [] if side == 'sell': for k in board.keys()[::-1]: if k > rate: rates.append(k) else: break else: for k in board.keys(): if k < rate: rates.append(k) else: break for rate in rates: del board[rate] def __logging_tick(self, bid, ask): self.__logger.info('tick bid=%s ask=%s (%s:%s)', bid, ask, self.__exchange_id.value, self.__symbol) def get_eff_tick(self, amount=1.0): bids = self.bids.items()[::-1] asks = self.asks.items() if len(bids) == 0 or len(asks) == 0: return None bid_total_amount = 0 bid_rate = bids[0][0] for bid in bids: bid_total_amount += bid[1] if bid_total_amount >= amount: bid_rate = bid[0] break ask_total_amount = 0 ask_rate = asks[0][0] for ask in asks: ask_total_amount += ask[1] if ask_total_amount >= amount: ask_rate = ask[0] break self.__logging_tick(bid_rate, ask_rate) timestamp = dt.now_timestamp_ms() tick = Tick(self.__exchange_id, timestamp, bid_rate, ask_rate) return tick def display(self): print("=== bids(買い注文) ===") print(list(self.bids.items())) print("=== asks(売り注文) ===") print(list(self.asks.items())) print()
# setup counter to store menu choice menu_choice = 0 # display your menu print_menu() # as long as the menu choice isn't "quit" get user options while menu_choice != 5: # get menu choice from user menu_choice = int(input("Type in a number (1-5): ")) # view current entries if menu_choice == 1: print("Current Users:") for x, y in usernames.items(): print("Name: {} \tUser Name: {} \n".format(x, y)) # add an entry elif menu_choice == 2: print("Add User") name = input("Name: ") username = input("User Name: ") usernames[name] = username # remove an entry elif menu_choice == 3: print("Remove User") name = input("Name: ") if name in usernames: del usernames[name]
def get_weights(n_nt, data_path): """ Create a pandas dataframe of the specified data that will be used for creating the heatmap n_nt: integer, indicates how many neighbouring positions for both upstream and downstream of the SNP are considered data_path: string, directory where the data can be found """ # Create a dictionary weight_samples = {} # Get directory where the numpy array of the weights are stored working_dir = os.path.dirname(os.path.abspath(__file__)) path_name = working_dir + data_path # Get the data for every file with the correct name in the specified directory for file in glob.glob(path_name): # Read the h5py file back to a numpy array h5f = h5py.File(file, 'r') # Get the correct data for every dataset varying in data size for size in [2000, 20000, 200000, 2000000]: size_name = size try: # Load the data back to a numpy array weights = h5f['dataset_{}'.format(size_name)][:] # Convert the data into a list weights = weights.ravel().tolist() # Add the data to the correct dictionary key if size_name in weight_samples.keys(): weight_samples[size_name] += [weights] else: weight_samples.update({size_name: [weights]}) except: pass # Sort the dictionary by data size my_dic = SortedDict(weight_samples) # Convert the dictionary into a list of tuples (data size, [[samples], [samples]]) complete_samples = [] for sample_name, sample_weights in my_dic.items(): complete_samples.append((sample_name, sample_weights)) # Get the y-labels for the heatmap, and the data values row_names = [] weights = [] for samples in complete_samples: # position 1 contains the sample weights and position 0 the sample size n_samples = len(samples[1]) for i in range(n_samples): row_names.append(samples[0]) # Get the sample weights for sample in samples[1]: weights.append(sample) # Get the feature labels; x-labels of the heatmap feature_names = feature_labels(n_nt) # Specify the rows and columns of the data frame columns = pd.Index(feature_names, name="features") rows = pd.Index(row_names, name="sample size") # Create the data frame df_weights_samples = pd.DataFrame(data=weights, index=rows, columns=columns) return df_weights_samples
class FederationRemoteSendQueue(object): """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id # Pending presence map user_id -> UserPresenceState self.presence_map = {} # type: Dict[str, UserPresenceState] # Stream position -> list[user_id] self.presence_changed = SortedDict( ) # type: SortedDict[int, List[str]] # Stores the destinations we need to explicitly send presence to about a # given user. # Stream position -> (user_id, destinations) self.presence_destinations = ( SortedDict()) # type: SortedDict[int, Tuple[str, List[str]]] # (destination, key) -> EDU self.keyed_edu = {} # type: Dict[Tuple[str, tuple], Edu] # stream position -> (destination, key) self.keyed_edu_changed = (SortedDict() ) # type: SortedDict[int, Tuple[str, tuple]] self.edus = SortedDict() # type: SortedDict[int, Edu] # stream ID for the next entry into presence_changed/keyed_edu_changed/edus. self.pos = 1 # map from stream ID to the time that stream entry was generated, so that we # can clear out entries after a while self.pos_time = SortedDict() # type: SortedDict[int, int] # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue), ) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "pos_time", "presence_destinations", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000) def _next_pos(self): pos = self.pos self.pos += 1 self.pos_time[self.clock.time_msec()] = pos return pos def _clear_queue(self): """Clear the queues for anything older than N minutes""" FIVE_MINUTES_AGO = 5 * 60 * 1000 now = self.clock.time_msec() keys = self.pos_time.keys() time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return position_to_delete = max(keys[:time]) for key in keys[:time]: del self.pos_time[key] self._clear_queue_before_pos(position_to_delete) def _clear_queue_before_pos(self, position_to_delete): """Clear all the queues from before a given position""" with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] user_ids = { user_id for uids in self.presence_changed.values() for user_id in uids } keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_destinations[key] user_ids.update( user_id for user_id, _ in self.presence_destinations.values()) to_del = [ user_id for user_id in self.presence_map if user_id not in user_ids ] for user_id in to_del: del self.presence_map[user_id] # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] live_keys = set() for edu_key in self.keyed_edu_changed.values(): live_keys.add(edu_key) keys_to_del = [ edu_key for edu_key in self.keyed_edu if edu_key not in live_keys ] for edu_key in keys_to_del: del self.keyed_edu[edu_key] # Delete things out of edu map keys = self.edus.keys() i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] def notify_new_events(self, current_id): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return pos = self._next_pos() edu = Edu( origin=self.server_name, destination=destination, edu_type=edu_type, content=content, ) if key: assert isinstance(key, tuple) self.keyed_edu[(destination, key)] = edu self.keyed_edu_changed[pos] = (destination, key) else: self.edus[pos] = edu self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): """As per FederationSender Args: receipt (synapse.types.ReadReceipt): """ # nothing to do here: the replication listener will handle it. return defer.succeed(None) def send_presence(self, states): """As per FederationSender Args: states (list(UserPresenceState)) """ pos = self._next_pos() # We only want to send presence for our own users, so lets always just # filter here just in case. local_states = list( filter(lambda s: self.is_mine_id(s.user_id), states)) self.presence_map.update( {state.user_id: state for state in local_states}) self.presence_changed[pos] = [state.user_id for state in local_states] self.notifier.on_new_replication_data() def send_presence_to_destinations(self, states, destinations): """As per FederationSender Args: states (list[UserPresenceState]) destinations (list[str]) """ for state in states: pos = self._next_pos() self.presence_map.update( {state.user_id: state for state in states}) self.presence_destinations[pos] = (state.user_id, destinations) self.notifier.on_new_replication_data() def send_device_messages(self, destination): """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. def get_current_token(self): return self.pos - 1 def federation_ack(self, token): self._clear_queue_before_pos(token) async def get_replication_rows( self, instance_name: str, from_token: int, to_token: int, target_row_count: int ) -> Tuple[List[Tuple[int, Tuple]], int, bool]: """Get rows to be sent over federation between the two tokens Args: instance_name: the name of the current process from_token: the previous stream token: the starting point for fetching the updates to_token: the new stream token: the point to get updates up to target_row_count: a target for the number of rows to be returned. Returns: a triplet `(updates, new_last_token, limited)`, where: * `updates` is a list of `(token, row)` entries. * `new_last_token` is the new position in stream. * `limited` is whether there are more updates to fetch. """ # TODO: Handle target_row_count. # To handle restarts where we wrap around if from_token > self.pos: from_token = -1 # list of tuple(int, BaseFederationRow), where the first is the position # of the federation stream. rows = [] # type: List[Tuple[int, BaseFederationRow]] # Fetch changed presence i = self.presence_changed.bisect_right(from_token) j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) for pos, user_id_list in self.presence_changed.items()[i:j] for user_id in user_id_list ] for (key, user_id) in dest_user_ids: rows.append((key, PresenceRow(state=self.presence_map[user_id]))) # Fetch presence to send to destinations i = self.presence_destinations.bisect_right(from_token) j = self.presence_destinations.bisect_right(to_token) + 1 for pos, (user_id, dests) in self.presence_destinations.items()[i:j]: rows.append(( pos, PresenceDestinationsRow(state=self.presence_map[user_id], destinations=list(dests)), )) # Fetch changes keyed edus i = self.keyed_edu_changed.bisect_right(from_token) j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append(( pos, KeyedEduRow(key=edu_key, edu=self.keyed_edu[(destination, edu_key)]), )) # Fetch changed edus i = self.edus.bisect_right(from_token) j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Sort rows based on pos rows.sort() return ( [(pos, (row.TypeId, row.to_data())) for pos, row in rows], to_token, False, )
def main(): # test_file = "/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/p5_20_02_17/cam 1" # print(f"is dir {os.path.isdir(test_file)}") # return open_avi_for_test = False if open_avi_for_test: test_avi() return subject_id = "p8_20_02_27" # P12_20_01_20 p8_20_01_16 cam_folder_id_1 = "cam2" # "cam2" cam_folder_id_2 = "a001" # a000 a001 if cam_folder_id_2 is None: cam_folder_id = "20190430_a002" # ex cam1_a002, movie1, etc... else: cam_folder_id = f"{cam_folder_id_1}_{cam_folder_id_2}" tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/' tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/to_convert/' # tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/basler_recordings/' # tiffs_path_dir = '/media/julien/dream team/camera/' tiffs_path_dir = '/media/julien/Not_today/hne_not_today/data/behavior_movies/to_convert/' # On NAS # tiffs_path_dir = '/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/' if cam_folder_id_2 is not None: tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id_1, cam_folder_id_2) # tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id_2, cam_folder_id_1) else: tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id) # print(f"is dir {os.path.isdir(tiffs_path_dir)}") if cam_folder_id_1 is None: cam_id = "22983298" elif cam_folder_id_1 == "cam1": cam_id = "22983298" else: cam_id = "23109588" # cam1: 22983298 cam2: 23109588 # results_path = '/media/julien/My Book/robin_tmp/cameras/' # results_path = os.path.join(results_path, subject_id) results_path = "/media/julien/Not_today/hne_not_today/data/behavior_movies/converted_so_far/" files_in_dir = [ item for item in os.listdir(tiffs_path_dir) if os.path.isfile(os.path.join(tiffs_path_dir, item)) and ( item.endswith("tiff") or item.endswith("tif")) and ( not item.startswith(".")) ] # files_in_dir = sorted_tiff_ls(tiffs_path_dir) # print(f"len(files_in_dir) {len(files_in_dir)}") # for file_name in files_in_dir[-1000:]: # print(f"{file_name}") files_in_dir_dict = SortedDict() for file_name in files_in_dir: index_ = file_name[::-1].find("_") frame_number = int(file_name[-index_:-5]) files_in_dir_dict[frame_number] = file_name # print(f"{file_name[-index_:-5]}") # break # looking for a gap between frames last_tiff_frame = 0 error_detected = False for tiff_frame, tiff_file in files_in_dir_dict.items(): if tiff_frame - 1 != last_tiff_frame: print( f"Gap between frame n° {last_tiff_frame} and {tiff_frame}. File {tiff_file}" ) error_detected = True last_tiff_frame = tiff_frame if error_detected: raise Exception("ERROR: gap between 2 frames") # keep the name of the tiffs files yaml_file_name = os.path.join( results_path, f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}.yaml") with open(yaml_file_name, 'w') as outfile: yaml.dump(list(files_in_dir_dict.values()), outfile, default_flow_style=False) # raise Exception("TEST YAML") # # leave only regular files, insert creation date # entries = ((stat[ST_CTIME], path) # for stat, path in entries if S_ISREG(stat[ST_MODE])) # # NOTE: on Windows `ST_CTIME` is a creation date # # but on Unix it could be something else # # NOTE: use `ST_MTIME` to sort by a modification date # # for cdate, path in sorted(entries): # print(time.ctime(cdate), os.path.basename(path)) # sort by alaphabatical order size_avi = None vid_avi = None fps_avi = 20 avi_file_name = os.path.join( results_path, f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi" ) print( f"creating behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi from {len(files_in_dir_dict)} tiff files" ) is_color = True # put fourcc to 0 for no compression # fourcc = 0 fourcc = VideoWriter_fourcc(*"XVID") # fourcc = VideoWriter_fourcc(*"MPEG") # https://stackoverflow.com/questions/44947505/how-to-make-a-movie-out-of-images-in-python start_time = time() for tiff_frame, tiff_file in files_in_dir_dict.items(): if (tiff_frame > 0) and (tiff_frame % 5000 == 0): print(f"{tiff_frame} frames done") # img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file)) # img = np.array(img) if vid_avi is None: if size_avi is None: img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file)) img = np.array(img) print(f"img.shape {img.shape}") size_avi = img.shape[1], img.shape[0] # vid_avi = VideoWriter(avi_file_name, fourcc, float(fps_avi), size_avi, is_color) vid_avi = VideoWriter(avi_file_name, fourcc, fps_avi, size_avi, is_color) # vid_avi.write(img) vid_avi.write(imread(os.path.join(tiffs_path_dir, tiff_file))) cv2.destroyAllWindows() vid_avi.release() time_to_convert = time() - start_time print(f"time_to_convert: {time_to_convert} sec")
#display your menu menu() #get the users input for anything other than 4 while menu_choice != 6: try: # Have the user type one of the options menu_choice = int(input("Type in a number (1-6): ")) # If the user types something other than an integer stop and go back to the menu except: print("Sorry, we're gonna need an Integer.\n\n\n") #Shoe the dogs and their scores if menu_choice == 1: print("Dog Ratings:\n") for x, y in dogs.items(): print("Dog: {} \tRating: {} \n".format(x, y)) # Look up a specific dog rating elif menu_choice == 2: print("Lookup Dog\n") dog_type = input("Which Dog: ") if dog_type in dogs: print(dogs[dog_type]) else: print( "Uh oh, looks like we haven't rated that dog! You should with option 3!\n\n\n" ) #Take user input to add a dog rating elif menu_choice == 3:
class PageWidget(QWidget): move_drop_event = pyqtSignal(object, int, int) copy_drop_event = pyqtSignal(object, int, int) DRAG_MAGIC = 'LiSP_Drag&Drop' def __init__(self, rows, columns, *args): super().__init__(*args) self.setAcceptDrops(True) self.__rows = rows self.__columns = columns self.__widgets = SortedDict() self.setLayout(QGridLayout()) self.layout().setContentsMargins(4, 4, 4, 4) self.init_layout() def init_layout(self): for row in range(0, self.__rows): self.layout().setRowStretch(row, 1) # item = QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding) # self.layout().addItem(item, row, 0) for column in range(0, self.__columns): self.layout().setColumnStretch(column, 1) # item = QSpacerItem(0, 0, QSizePolicy.Expanding, QSizePolicy.Minimum) # self.layout().addItem(item, 0, column) def add_widget(self, widget, row, column): self._check_index(row, column) if (row, column) not in self.__widgets: widget.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.__widgets[(row, column)] = widget self.layout().addWidget(widget, row, column) widget.show() else: raise IndexError('cell {} already used'.format((row, column))) def take_widget(self, row, column): self._check_index(row, column) if (row, column) in self.__widgets: widget = self.__widgets.pop((row, column)) widget.hide() self.layout().removeWidget(widget) return widget else: raise IndexError('cell {} is empty'.format((row, column))) def move_widget(self, o_row, o_column, n_row, n_column): widget = self.take_widget(o_row, o_column) self.add_widget(widget, n_row, n_column) def widget(self, row, column): self._check_index(row, column) return self.__widgets.get((row, column)) def index(self, widget): for index, f_widget in self.__widgets.items(): if widget is f_widget: return index return -1, -1 def widgets(self): return iter(self.__widgets.values()) def reset(self): self.__widgets.clear() def dragEnterEvent(self, event): if event.mimeData().hasText(): if event.mimeData().text() == PageWidget.DRAG_MAGIC: event.accept() else: event.ignore() else: event.ignore() def dragLeaveEvent(self, event): event.ignore() def dropEvent(self, event): row, column = self._event_index(event) if self.layout().itemAtPosition(row, column) is None: if qApp.keyboardModifiers() == Qt.ControlModifier: event.setDropAction(Qt.MoveAction) event.accept() self.move_drop_event.emit(event.source(), row, column) elif qApp.keyboardModifiers() == Qt.ShiftModifier: event.setDropAction(Qt.CopyAction) self.copy_drop_event.emit(event.source(), row, column) event.accept() event.ignore() def dragMoveEvent(self, event): row, column = self._event_index(event) if self.layout().itemAtPosition(row, column) is None: event.accept() else: event.ignore() def _check_index(self, row, column): if not isinstance(row, int): raise TypeError('rows index must be integers, not {}'.format( row.__class__.__name__)) if not isinstance(column, int): raise TypeError('columns index must be integers, not {}'.format( column.__class__.__name__)) if not 0 <= row < self.__rows or not 0 <= column < self.__columns: raise IndexError('index out of bound {}'.format((row, column))) def _event_index(self, event): # Margins and spacings are equals space = self.layout().horizontalSpacing() margin = self.layout().contentsMargins().right() r_size = (self.height() + margin * 2) // self.__rows + space c_size = (self.width() + margin * 2) // self.__columns + space row = math.ceil(event.pos().y() / r_size) - 1 column = math.ceil(event.pos().x() / c_size) - 1 return row, column
def location_canvis(RA, DEC, field, date, run, verbose=False, debugmode=False): print('\n#########################') print('# CANVIS HAS STARTED #') print('#########################\n') print(f'CANVIS will make cutout for location RA: {RA} DEC: {DEC}') '''RA, DEC, CANVIS will go through all the data on this field and extract postage stamps around the given RA and DEC.''' print( "CANVIS will extract postage stamps around RA %s DEC %s for field %s." % (RA, DEC, field)) path = '/fred/oz100/pipes/DWF_PIPE/MARY_WORK/' + field + '_' + date + '_*_*/*/images_resampled/sci_*.resamp.fits' fitsfileslist = glob.glob(path) mydic = SortedDict() for i in fitsfileslist: with fits.open(i) as hdu: size = 200 w = WCS(hdu[0].header) head = hdu[0].header date = dt.datetime.strptime(head['DATE'], '%Y-%m-%dT%H:%M:%S') xlim = head['NAXIS1'] ylim = head['NAXIS2'] pixcrd_im = np.array([[xlim, ylim]], np.float_) world_im = w.wcs_pix2world(pixcrd_im, 1) pixx_im, pixy_im = world_im[0][0], world_im[0][1] corners = w.calc_footprint() corner_1 = corners[0] corner_2 = corners[1] corner_3 = corners[2] corner_4 = corners[3] differnce = corner_1 - corner_2 pixcrd = np.array([[RA, DEC]], np.float_) worldpix = w.wcs_world2pix(pixcrd, 1) pixx, pixy = worldpix[0][0], worldpix[0][1] if float(corner_4[0]) <= float(RA) <= float(corner_1[0]) and float( corner_2[1]) >= float(DEC) >= float(corner_1[1]): path = i mydic[date] = [path, pixx, pixy] if debugmode: print(mydic) images_found = False for i, (key, (path, pixx, pixy)) in enumerate(mydic.items()): images_found = True if debugmode == True: print('variable run is: {} and the type is: {}'.format( run, type(run))) print('variable RA is: {} and the type is: {}'.format( RA, type(RA))) print('variable DEC is: {} and the type is: {}'.format( DEC, type(DEC))) print('variable field is: {} and the type is: {}'.format( field, type(field))) path_cand = '/fred/oz100/canvis/cand_images/' + run + '/cand_' + RA + DEC + '_' + field + '_' + run + '/' path_cutout = '/fred/oz100/CANVIS/cand_images/' + run + '/cand_' + RA + DEC + '_' + field + '_' + run + '/' + RA + DEC + '_' + run + '_cutout_' + format( i, '03') if not os.path.exists(path_cand): os.makedirs(path_cand, 0o755) else: pass if not os.path.exists(path_cutout): os.makedirs(path_cutout, 0o755) else: pass size = 200 with fits.open(path) as hdu: nom_data = (hdu[0].data - np.min(hdu[0].data)) / ( np.max(hdu[0].data) - np.min(hdu[0].data)) cutout = Cutout2D(hdu[0].data, (pixx, pixy), size, wcs=w) hdu[0].data = cutout.data hdu[0].header['CRPIX1'] = cutout.wcs.wcs.crpix[0] hdu[0].header['CRPIX2'] = cutout.wcs.wcs.crpix[1] hdu.writeto(path_cutout + '.fits', overwrite=True) plt.axis('off') plt.imshow(hdu[0].data, cmap='gray') plt.colorbar() plt.savefig(path_cutout + '.png', overwite=True) plt.close() if images_found: files = [] path_cutout = '/fred/oz100/CANVIS/cand_images/' + run + '/cand_' + RA + DEC + '_' + field + '_' + run + '/' for cutouts in os.listdir(path_cutout): if cutouts.endswith('.png'): files.append(path_cutout + cutouts) writer = imageio.get_writer(str(path_cutout) + '_VIDEO.gif', fps=3) video_loc = str(path_cutout) + '_VIDEO.gif' for i in files: writer.append_data(imageio.imread(i)) writer.close() else: print( '\nCANVIS did not find any images to create a gif with, sorry!\n') if images_found: print( '\nDone! Look for your outputs here: /fred/oz100/CANVIS/cand_images/%s' % run) print('\n###########################') print('# CANVIS HAS Finished #') print('# Enjoy and discover! #') print('###########################\n') return video_loc
class TreeNode(object): # map {str: TreeNode} id_treeNodes = {} def __init__(self, tree_node_id, token): self._id = tree_node_id self._tkn = token # map {str: set(TreeNodes)} self._children = SortedDict() TreeNode.id_treeNodes[tree_node_id] = self def __hash__(self): return hash(self.toString()) def __eq__(self, other): return self.compareTo(other) == 0 def __lt__(self, other): return self.compareTo(other) < 0 def __str__(self): return self.toString() def __repr__(self): return self.toString() def addChild(self, dep, child): if dep not in self._children: self._children[dep] = SortedSet() self._children[dep].add(child) return None def getId(self): return self._id def getToken(self): return self._tkn def getChildren(self): return self._children def compareTo(self, z): if not isinstance(z, TreeNode): raise ValueError return self._tkn.compareTo(z._tkn) def toString(self): return self._tkn.toString() def getTreeNode(tree_node_id): return TreeNode.id_treeNodes[tree_node_id] def getTreeStr(self): id_str = SortedDict() if (len(self._children) > 0): for dep, nodes in self._children.items(): s = '' for node in nodes: if dep.startswith('prep_') or dep.startswith('conj_'): s = dep[5:] + ' ' s = s + node.getTreeStr() id_str[node.getId()] = s id_str[self._id] = self._tkn.getLemma() result = ' '.join(id_str.values()) return result
node_clust = node_clust + str(round(nx.clustering(G, i), 5)) + "\t" file.write("Node_clustering\t" + node_clust + "\n") ### Node closeness ### node_close = "" for i in node_names_sort: node_close = node_close + str( round(nx.closeness_centrality(G, i), 5)) + "\t" file.write("Node_closeness\t" + node_close + "\n") ### Eigenvector centrality ### # Note: un-commented this on 2/19/2020. Had issues with it before. Trying again. ecen_dict = nx.eigenvector_centrality(G) ecen_dict_sorted = SortedDict(ecen_dict) ecen = "" for key, value in ecen_dict_sorted.items(): ecen = ecen + str(round(value, 5)) + "\t" file.write("Eigenvalue_centrality\t" + ecen + "\n") ### Betweenness centrality ### ''' The numeber of times a single node appears on the shortest path between all other pairs of nodes in a network ''' bc_dict = nx.betweenness_centrality(G) bc_dict_sorted = SortedDict(bc_dict) bc = "" for key, value in bc_dict_sorted.items(): bc = bc + str(round(value, 5)) + "\t" file.write("Betweenness_centrality\t" + bc + "\n")
def run(env_name, input_code, avg, best, input_file, output_file, debug, force_fluid_discretization, fluid_discretization_history): env = make_gym(env_name) if input_code: lines = [input_code] elif input_file: with open(input_file, 'r') as f: lines = list(f.readlines()) else: raise ValueError( "No program found. Specify it as a command line argument or input file" ) render = debug or ((avg == 1) and (best == 1) and len(lines) < 5) start_time = time.monotonic() if output_file: results = SortedDict() for line in lines: print(line) code = line.split(' ')[-1].strip() observation_discretizer = bf_io.ObservationDiscretizer( env.observation_space, debug=debug, force_fluid=force_fluid_discretization, history_length=fluid_discretization_history) action_sampler = bf_io.ActionSampler(env.action_space, debug=debug) random_agent = bf.Executable('@!', observation_discretizer, action_sampler, cycle=True, debug=False) episode_count = bf_io.burn_in(env, random_agent, observation_discretizer, action_sampler) print(f'{episode_count} episodes of burn in done') if debug: print( f'Discretization thresholds: {observation_discretizer.get_thresholds()}' ) try: average_best_reward = average([ max( run_episode(env, code, observation_discretizer, action_sampler, render, debug).total_reward for best_idx in range(best)) for avg_idx in range(avg) ]) print(f'Average best reward {average_best_reward}') if output_file: results[average_best_reward] = line except ExecutionError: print('Error') print(f'{time.monotonic() - start_time} seconds elapsed') if output_file: with open(output_file, 'w') as f: f.writelines([ f'{average_reward} {line}' for average_reward, line in results.items() ])
class AlphaMiner: def __init__(self, Traces): # Traces within an event log self.traces = Traces # set of transition a.k.a activities or T self.transitions = SortedSet() # set of initial transitions Ti self.initial_transitions = SortedSet() # set of final transitions To self.final_transitions = SortedSet() # set of pairs (A,B) Xl self.pairs = [] # set of maximal pairs (A,B) Yl self.maxi_pairs = [] # set of p(A,B) between maxi_pairs Pl self.places = [] # Footprint , relations between activities self.relations = SortedDict() #Petri NET self.PetriNet = None def getTransitions(self): #Lemme 1 for trace in self.traces.values(): for activity in trace: self.transitions.add(activity) return self.transitions def getInitialTransitions(self): # Lemme 2 #For each trace get the first activity #and add it to a set of initial transitions for trace in self.traces.values(): print(trace[0]) self.initial_transitions.add(trace[0]) return self.initial_transitions def getFinalTransitions(self): #For each trace get the first activity #and add it to a set of initial transitions for trace in self.traces.values(): print(trace[len(trace) - 1]) self.final_transitions.add(trace[len(trace) - 1]) return self.final_transitions def extractRelations(self): #Extract non repetitive traces, alpha dont take care about frequencies ! nnrep_traces = SortedSet() for trace in self.traces.values(): nnrep_traces.add("".join(trace)) print(nnrep_traces) #Extract relations between each transitions # generate Footprint for transition1 in self.transitions: self.relations[transition1] = SortedDict() for transition2 in self.transitions: concat = transition1 + transition2 print(concat) relation = None for trace in nnrep_traces: if trace.find(concat) >= 0: #Causality print(concat) if relation == Relations.LEFT_CAUSALITY: relation = Relations.PARALLEL else: relation = Relations.RIGHT_CAUSALITY if trace.find(concat[::-1]) >= 0: print(concat[::-1]) if relation == Relations.RIGHT_CAUSALITY: relation = Relations.PARALLEL else: relation = Relations.LEFT_CAUSALITY if relation == None: relation = Relations.CHOICES self.relations[transition1][transition2] = relation return self.relations def computePairs(self): # extract pairs of set , each set contain activities #that doesnt have any relation between them and the activities in the two set have to be direcly successed by each other #Lemme 4 pairs_causality = [] pairs_choices = [] pairs = [] #Extract all possible pairs of activity with causality relation for activity1, relations1 in self.relations.items(): for activity2, relation in relations1.items(): if relation == Relations.RIGHT_CAUSALITY: pairs_causality.append((activity1, activity2)) if relation == Relations.CHOICES: if activity1 == activity2: pairs_choices.append((activity1, )) else: pairs_choices.append((activity1, activity2)) print(pairs_causality) pairs = pairs_causality print(pairs_choices) # find all possible sets of activity with causality relation # i = 0 j = len(pairs_choices) while i < j: seti = pairs_choices[i] for pair in pairs_choices: union = True if len(SortedSet(seti).intersection(SortedSet(pair))) != 0: for e1 in pair: if union == False: break for e2 in seti: if self.relations[e1][e2] != Relations.CHOICES: union = False break if union: new_pair = SortedSet(seti) | SortedSet(pair) if tuple(new_pair) not in pairs_choices: pairs_choices.append(tuple(new_pair)) j = j + 1 #Reevaluate the length i = i + 1 print(pairs_choices) # Union for pair_choices1 in pairs_choices: for pair_choices2 in pairs_choices: relation_between_pair = None makePair = True print("pair 1", pair_choices1) print("pair 2", pair_choices2) intersection = SortedSet(pair_choices1).intersection( pair_choices2) pair_choices2 = SortedSet(pair_choices2) if len(intersection) != 0: # remove intersection terms in the second pair for term in intersection: pair_choices2.discard(term) if (len(pair_choices2) == 0): continue pair_choices2 = tuple(pair_choices2) print("pair_choices2 with discarded term :", pair_choices2) for activity1 in pair_choices1: print(activity1) if makePair == False: break for activity2 in pair_choices2: print(activity2) relation = self.relations[activity1][activity2] if relation_between_pair != None and relation_between_pair != relation: makePair = False break else: relation_between_pair = relation if relation != Relations.RIGHT_CAUSALITY: makePair = False break if makePair == True: print("makepair true") print(pair_choices1) print(pair_choices2) if relation_between_pair == Relations.RIGHT_CAUSALITY: new_pair = (pair_choices1, pair_choices2) else: new_pair = (pair_choices2, pair_choices1) pairs.append(new_pair) print("\n") print("\n") print(pairs) self.pairs = pairs ''' combinations = list(itertools.combinations(list(self.transitions),len(self.transitions))) possible_successions = SortedSet() for combination in combinations: combination = "".join(combination) possible_successions.add(combination) print(possible_successions)''' def extract_maximal_pairs(self): pos1 = 0 pair_appended = [] maxi_pairs = [] for pair1 in self.pairs: append = True # flat the pair 1 flat_pair1 = [] for s in pair1: for e in s: flat_pair1.append(e) print("pair1 :", pair1) print("flat_pair1 :", flat_pair1) pos2 = 0 for pair2 in self.pairs: if pos1 != pos2: flat_pair2 = [] for s in pair2: for e in s: flat_pair2.append(e) print("pair2 :", pair2) print("flat_pair2 :", flat_pair2) # flat the pair 1 # flat the pair 2 # check if pair1 issubset of pair 2 or pair 2 is subset of 1 if SortedSet(flat_pair1).issubset( flat_pair2 ) and SortedSet(flat_pair1) != SortedSet(flat_pair2): print("issubset") append = False pos2 = pos2 + 1 if append == True: print("append") if SortedSet(flat_pair1) not in pair_appended: maxi_pairs.append(pair1) pair_appended.append(SortedSet(flat_pair1)) pos1 = pos1 + 1 print(maxi_pairs) self.maxi_pairs = maxi_pairs #Lemme 5 pass def add_places(self): #Lemme 6 # connect initial transition with place cpt = 0 self.places.append(("P" + str(cpt), self.initial_transitions)) cpt = 1 for pair in self.maxi_pairs: self.places.append((pair[0], "P" + str(cpt), pair[1])) cpt += 1 self.places.append((self.final_transitions, "P" + str(cpt))) print(self.places) def extract_PetriNet(self): n = PetriNet('N') n.add_place(Place('p' + str(0))) cpt_p = 1 for pair in self.maxi_pairs: n.add_place(Place('p' + str(cpt_p))) cpt_p += 1 n.add_place(Place('p' + str(cpt_p))) for transition in self.transitions: n.add_transition(Transition(transition)) print(self.initial_transitions) for transition in self.initial_transitions: n.add_input('p' + str(0), transition, Value(dot)) cpt_p = 1 for pair in self.maxi_pairs: #pair[0] produce #pair[1] consume for transition in pair[0]: n.add_output('p' + str(cpt_p), transition, Value(dot)) for transition in pair[1]: n.add_input('p' + str(cpt_p), transition, Value(dot)) cpt_p += 1 for transition in self.final_transitions: n.add_output('p' + str(cpt_p), transition, Value(dot)) self.PetriNet = n def show(self, model=None): def draw_place(place, attr): attr['label'] = place.name.upper() attr['color'] = '#FF0000' def draw_transition(trans, attr): if str(trans.guard) == 'True': attr['label'] = trans.name else: attr['label'] = '%s\n%s' % (trans.name, trans.guard) self.PetriNet.draw(',net-with-colors.png', place_attr=draw_place, trans_attr=draw_transition) import pygame pygame.init() size = width, height = 1200, 682 WHITE = (255, 255, 255) screen = pygame.display.set_mode(size) screen.fill(WHITE) pygame.display.set_caption("petri net alphaminer") petri_net = pygame.image.load(",net-with-colors.png").convert() surf = pygame.transform.rotate(petri_net, 90) screen.blit(surf, (50, 0)) pygame.display.flip() while True: for e in pygame.event.get(): if e.type == pygame.QUIT or (e.type == pygame.KEYDOWN and e.key == pygame.K_ESCAPE): done = True break
class KeyedRegion(object): """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ def __init__(self, tree=None): self._storage = SortedDict() if tree is None else tree def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one varaible covering the given offset. :param offset: :return: """ return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion() kr = KeyedRegion() for key, ro in self._storage.items(): kr._storage[key] = ro.copy() return kr def merge(self, other, make_phi_func=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for loc_and_var in item.stored_objects: self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False, make_phi_func=None): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_or_make_phi(b, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_or_make_phi(a, stored_object, make_phi_func=make_phi_func) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_or_make_phi(item, stored_object, make_phi_func=make_phi_func) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_or_make_phi(self, item, stored_object, make_phi_func=None): #pylint:disable=no-self-use if not make_phi_func or len({stored_object.obj} | item.internal_objects) == 1: item.add_object(stored_object) else: # make a phi node item.set_object(StoredObject(stored_object.start, make_phi_func(stored_object.obj, *item.internal_objects), stored_object.size, ) )
def test6(): """ 有序的map: SortedDict 网址: http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html """ from sortedcontainers import SortedDict sd = SortedDict() # 插入、删除元素 sd["wxx"] = 21 sd["hh"] = 18 sd["other"] = 20 print(sd) # SortedDict({'hh': 18, 'other': 20, 'wxx': 21}) print(sd["wxx"]) # 访问不存在的键会报错, KeyError print(sd.get("c")) # 访问不存在的键会返回None None # SortedDict转dict print(dict(sd)) # {'hh': 18, 'other': 20, 'wxx': 21} # 返回最后一个元素和最后一个元素 print(sd.peekitem(0)) # 类型tuple, 返回第一个元素 ('hh', 18) print(sd.peekitem()) # 类型tuple, 返回最后一个元素 ('wxx', 21) # 遍历 for k, v in sd.items(): print(k, ':', v, sep="", end=", ") # sep取消每行输出之间的空格 print() for k in sd: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(sd[k]), end=", ") print() for v in sd.values(): # 遍历值v print(v, end=", ") print() # 返回Map中的一个键 print(sd.peekitem()[0]) # 返回Map中的一个值 print(sd.peekitem()[1]) # 中判断某元素是否存在 print("wxx" in sd) # True # bisect_left() / bisect_right() sd["a"] = 1 sd["c1"] = 2 sd["c2"] = 4 print( sd ) # SortedDict({'a': 1, 'c1': 2, 'c2': 4, 'hh': 18, 'other': 20, 'wxx': 21}) print(sd.bisect_left("c1")) # 返回键大于等于"c1"的最小元素对应的下标 1 print(sd.bisect_right("c1")) # 返回键大于"c1"的最小元素对应的下标 2 # 清空 sd.clear() print(len(sd)) # 0 print(len(sd) == 0) # True """ 无序的map: dict """ print("---------------------------------------") d = {"c1": 2, "c2": 4, "hh": 18, "wxx": 21, 13: 14, 1: 0} print(d["wxx"]) # 21 print(d[13]) # 14 d[13] += 1 print(d[13]) # 15 d["future"] = "wonderful" # 字典中添加键值对 del d[1] # 删除字典d中键1对应的数据值 print("wxx" in d) # 判断键"wxx"是否在字典d中,如果在返回True,否则False print(d.keys()) # 返回字典d中所有的键信息 dict_keys(['c1', 'c2', 'hh', 'wxx', 13]) print(d.values()) # 返回字典d中所有的值信息 dict_values([2, 4, 18, 21, 14]) print(d.items( )) # dict_items([('c1', 2), ('c2', 4), ('hh', 18), ('wxx', 21), (13, 14)]) for k, v in d.items(): # 遍历 k, v print(k, ':', v) for k in d: # 遍历键k, 等价于for k in d.keys: print(str(k) + ":" + str(d[k]), end=", ") print() for v in d.values(): # 遍历值v print(v, end=", ") print() # 字典类型操作函数和方法 print("---------------------------------------") d = {"中国": "北京", "美国": "华盛顿", "法国": "巴黎"} print(len(d)) # 返回字典d中元素的个数 3 print(d.get("中国", "不存在")) # 键k存在,则返回相应值,不在则返回<default>值 北京 print(d.get("中", "不存在")) # 不存在 print(d.get("中")) # None d["美国"] = "Washington" # 修改键对应的值 print(d.pop("美国")) # 键k存在,则返回相应值,并将其从dict中删除 print(d.popitem()) # 随机从字典d中取出一个键值对,以元组形式返回,并将其从dict中删除 d.clear() # 删除所有的键值对
class Node(BaseNode, Mapping): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.rest = None def _select(self, key): for k, v in reversed(list(self.values.items())): if k <= key: return v return self.rest def _insert(self, key, value): result = self._select(key)._insert(key, value) self.changed = True if result is None: return key, other = result return super()._insert(key, other) def _split(self): other = LazyNode(node=Node(tree=self.tree, changed=True), tree=self.tree) values = self.values.items() self.values = SortedDict(values[:len(values) // 2]) other.values = SortedDict(values[len(values) // 2:]) key, value = other.values.popitem(last=False) other.rest = value return (key, other) def _commit(self): self.rest._commit() for child in self.values.values(): child._commit() data = packb({ 'rest': self.rest.offset, 'values': {k: v.offset for k, v in self.values.items()}, }) self.tree.chunk.write(ChunkId.Node, data) return self.tree.chunk.tell() def __getitem__(self, key): return self._select(key)[key] def __len__(self): return sum([len(value) for child in self.values.values() + \ len(self.rest)]) def __iter__(self): for key in self.rest: yield key for child in self.values.values(): for key in child: yield key
class Geofence(App): """A Geofence defines the space a vehicle is allowed to operate within. A geofence is constructed by layering additive and subtractive geometry to construct a 3-dimensional space of operations that a drone is allowed to fly in. Within a layer, a point is determined to be inside as if all the volumes in that layer were taken as a union. """ # TODO Use a small memory database (like TinyDB) to handle layer mapping. # Added benefit of allowing both name and order mapping to layer at once. req_telem = { 'latitude': '/Airliner/CNTL/VehicleGlobalPosition/Lat', 'longitude': '/Airliner/CNTL/VehicleGlobalPosition/Lon', 'altitude': '/Airliner/CNTL/VehicleGlobalPosition/Alt' } def __init__(self): super(Geofence, self).__init__() self._check_thread = None self.enabled = False self.fence_violation = False self.layers = SortedDict() """:type: dict[Any, _Layer]""" def __contains__(self, other): """True if the given other is contained within the Geofence.""" contained = False for layer in self.layers.values(): if other in layer: contained = layer.kind is LayerKind.ADDITIVE return contained def __str__(self): return 'Geofence{\n' + '\n'.join( ' {}{}: {}'.format( '+' if layer.kind is LayerKind.ADDITIVE else '-', order, layer) for order, layer in self.layers.items()) + '\n}' def attach(self, vehicle): super(Geofence, self).attach(vehicle) self._check_thread = PeriodicExecutor( self._check_fence, every=FENCE_SLEEP, logger=self.vehicle.logger, name='FenceCheck', exception=lambda e: self.vehicle.exception('Geofence Exception')) self._check_thread.start() def detach(self): self._check_thread.stop() super(Geofence, self).detach() @classmethod def required_telemetry_paths(cls): return cls.req_telem.values() def add_layer(self, layer_position, layer_name, layer_kind): if layer_position in self.layers: raise KeyError('This layer already exists.') if not isinstance(layer_kind, LayerKind): raise TypeError('layer_kind must be of type LayerKind.') layer = Layer(name=layer_name, kind=layer_kind) self.layers[layer_position] = layer return layer def _check_fence(self): old = self.fence_violation self.fence_violation = self.fence_violation or \ (self.enabled and self.position not in self) if not old and self.fence_violation: self.vehicle.error('Encountered Fence Violation at %s', self.position) self.vehicle.broadcast(Intent(action=ACTION_RTL)) print('Encountered fence violation. Press Ctrl-C exit.') def layer_by_name(self, name): for layer in self.layers.values(): if layer.name == name: return layer @property def position(self): return Position( App._telem(self.req_telem['latitude'])(self), App._telem(self.req_telem['longitude'])(self), App._telem(self.req_telem['altitude'])(self)) def remove_layer(self, position): del self.layers[position]
class KeyedRegion: """ KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in this region overlap with another variable in this region. Registers and function frames can all be viewed as a keyed region. """ __slots__ = ('_storage', '_object_mapping', '_phi_node_contains' ) def __init__(self, tree=None, phi_node_contains=None): self._storage = SortedDict() if tree is None else tree self._object_mapping = weakref.WeakValueDictionary() self._phi_node_contains = phi_node_contains def __getstate__(self): return self._storage, dict(self._object_mapping), self._phi_node_contains def __setstate__(self, s): self._storage, om, self._phi_node_contains = s self._object_mapping = weakref.WeakValueDictionary(om) def _get_container(self, offset): try: base_offset = next(self._storage.irange(maximum=offset, reverse=True)) except StopIteration: return offset, None else: container = self._storage[base_offset] if container.includes(offset): return base_offset, container return offset, None def __contains__(self, offset): """ Test if there is at least one variable covering the given offset. :param offset: :return: """ if type(offset) is not int: raise TypeError("KeyedRegion only accepts concrete offsets.") return self._get_container(offset)[1] is not None def __len__(self): return len(self._storage) def __iter__(self): return iter(self._storage.values()) def __eq__(self, other): if set(self._storage.keys()) != set(other._storage.keys()): return False for k, v in self._storage.items(): if v != other._storage[k]: return False return True def copy(self): if not self._storage: return KeyedRegion(phi_node_contains=self._phi_node_contains) kr = KeyedRegion(phi_node_contains=self._phi_node_contains) for key, ro in self._storage.items(): kr._storage[key] = ro.copy() kr._object_mapping = self._object_mapping.copy() return kr def merge(self, other, replacements=None): """ Merge another KeyedRegion into this KeyedRegion. :param KeyedRegion other: The other instance to merge with. :return: None """ # TODO: is the current solution not optimal enough? for _, item in other._storage.items(): # type: RegionObject for so in item.stored_objects: # type: StoredObject if replacements and so.obj in replacements: so = StoredObject(so.start, replacements[so.obj], so.size) self._object_mapping[so.obj_id] = so self.__store(so, overwrite=False) return self def replace(self, replacements): """ Replace variables with other variables. :param dict replacements: A dict of variable replacements. :return: self """ for old_var, new_var in replacements.items(): old_var_id = id(old_var) if old_var_id in self._object_mapping: # FIXME: we need to check if old_var still exists in the storage old_so = self._object_mapping[old_var_id] # type: StoredObject self._store(old_so.start, new_var, old_so.size, overwrite=True) return self def dbg_repr(self): """ Get a debugging representation of this keyed region. :return: A string of debugging output. """ keys = self._storage.keys() offset_to_vars = { } for key in sorted(keys): ro = self._storage[key] variables = [ obj.obj for obj in ro.stored_objects ] offset_to_vars[key] = variables s = [ ] for offset, variables in offset_to_vars.items(): s.append("Offset %#x: %s" % (offset, variables)) return "\n".join(s) def add_variable(self, start, variable): """ Add a variable to this region at the given offset. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.add_object(start, variable, size) def add_object(self, start, obj, object_size): """ Add/Store an object to this region at the given offset. :param start: :param obj: :param int object_size: Size of the object :return: """ self._store(start, obj, object_size, overwrite=False) def set_variable(self, start, variable): """ Add a variable to this region at the given offset, and remove all other variables that are fully covered by this variable. :param int start: :param SimVariable variable: :return: None """ size = variable.size if variable.size is not None else 1 self.set_object(start, variable, size) def set_object(self, start, obj, object_size): """ Add an object to this region at the given offset, and remove all other objects that are fully covered by this object. :param start: :param obj: :param object_size: :return: """ self._store(start, obj, object_size, overwrite=True) def get_base_addr(self, addr): """ Get the base offset (the key we are using to index objects covering the given offset) of a specific offset. :param int addr: :return: :rtype: int or None """ base_addr, container = self._get_container(addr) if container is None: return None else: return base_addr def get_variables_by_offset(self, start): """ Find variables covering the given region offset. :param int start: :return: A list of stack variables. :rtype: set """ _, container = self._get_container(start) if container is None: return [] else: return container.internal_objects def get_objects_by_offset(self, start): """ Find objects covering the given region offset. :param start: :return: """ _, container = self._get_container(start) if container is None: return set() else: return container.internal_objects # # Private methods # def _store(self, start, obj, size, overwrite=False): """ Store a variable into the storage. :param int start: The beginning address of the variable. :param obj: The object to store. :param int size: Size of the object to store. :param bool overwrite: Whether existing objects should be overwritten or not. :return: None """ stored_object = StoredObject(start, obj, size) self._object_mapping[stored_object.obj_id] = stored_object self.__store(stored_object, overwrite=overwrite) def __store(self, stored_object, overwrite=False): """ Store a variable into the storage. :param StoredObject stored_object: The descriptor describing start address and the variable. :param bool overwrite: Whether existing objects should be overwritten or not. True to make a strong update, False to make a weak update. :return: None """ start = stored_object.start object_size = stored_object.size end = start + object_size # region items in the middle overlapping_items = list(self._storage.irange(start, end-1)) # is there a region item that begins before the start and overlaps with this variable? floor_key, floor_item = self._get_container(start) if floor_item is not None and floor_key not in overlapping_items: # insert it into the beginning overlapping_items.insert(0, floor_key) # scan through the entire list of region items, split existing regions and insert new regions as needed to_update = {start: RegionObject(start, object_size, {stored_object})} last_end = start for floor_key in overlapping_items: item = self._storage[floor_key] if item.start < start: # we need to break this item into two a, b = item.split(start) if overwrite: b.set_object(stored_object) else: self._add_object_with_check(b, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end elif item.start > last_end: # there is a gap between the last item and the current item # fill in the gap new_item = RegionObject(last_end, item.start - last_end, {stored_object}) to_update[new_item.start] = new_item last_end = new_item.end elif item.end > end: # we need to split this item into two a, b = item.split(end) if overwrite: a.set_object(stored_object) else: self._add_object_with_check(a, stored_object) to_update[a.start] = a to_update[b.start] = b last_end = b.end else: if overwrite: item.set_object(stored_object) else: self._add_object_with_check(item, stored_object) to_update[item.start] = item self._storage.update(to_update) def _is_overlapping(self, start, variable): if variable.size is not None: # make sure this variable does not overlap with any other variable end = start + variable.size try: prev_offset = next(self._storage.irange(maximum=end-1, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: if start <= prev_offset < end: return True prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if start < prev_offset + prev_item_size < end: return True else: try: prev_offset = next(self._storage.irange(maximum=start, reverse=True)) except StopIteration: prev_offset = None if prev_offset is not None: prev_item = self._storage[prev_offset][0] prev_item_size = prev_item.size if prev_item.size is not None else 1 if prev_offset <= start < prev_offset + prev_item_size: return True return False def _add_object_with_check(self, item, stored_object): if len({stored_object.obj} | item.internal_objects) > 1: if self._phi_node_contains is not None: # check if `item` is a phi node that contains stored_object.obj for so in item.internal_objects: if self._phi_node_contains(so, stored_object.obj): # yes! so we want to skip this object return # check if `stored_object.obj` is a phi node that contains item.internal_objects if all(self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects): # yes! item.set_object(stored_object) return l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects)) # import ipdb; ipdb.set_trace() item.add_object(stored_object)
class Scanner: def __init__(self): self.__description = "program1.txt" self.__table = "handMadeInternalTable.txt" self.__pifTable = "programInternalForm.txt" self.__symTableFile = "symbolTable.txt" self.__inputTable = {} self.__parsedProgram = [] self.__symTable = SortedDict() self.readDescription() self.readHandMadeTable() def getDescription(self): return self.__description def getFileTable(self): return self.__table def getParsedProgram(self): return self.__parsedProgram def getInputTable(self): return self.__inputTable def readHandMadeTable(self): index = 1 f = open(self.__table, "r") for line in f: self.__inputTable[line.strip()] = index index += 1 f.close() def readDescription(self): f = open(self.__description, "r") for line in f: wordslist = re.compile( "('[^']*')|\t|\n| |(\\+)|(-)|(%)|(\\*)|(\\/)|(==)|(<=)|(>=)|(!=)|(=)|(\\!)|(<<)|(>>)|(>\\?)|(<)|(>)|(\\?)|(\\[)|(])|(\\()|(\\))|(:)|(;)|(,)|(\\.)" ).split(line) #print(wordslist) for word in wordslist: if word != None and word != "": #print("-----"+word+"------") self.__parsedProgram.append(word) f.close() def createPif(self): f = open(self.__pifTable, "w") g = open(self.__symTableFile, "w") index = 100 for word in self.__parsedProgram: if word not in self.__inputTable: if word not in self.__symTable: try: if self.validate(word): self.__symTable[word] = index index += 1 except Exception as e: print(e) return #self.__symTable = collections.OrderedDict(sorted(self.__symTable.items())) for k, v in self.__symTable.items(): g.write(str(k) + " | " + str(v) + "\n") for word in self.__parsedProgram: if word in self.__inputTable: f.write(str(self.__inputTable[word]) + " | -\n") elif word in self.__symTable: if word.isdigit(): f.write("300" + " | " + str(self.__symTable[word]) + "\n") else: f.write("500" + " | " + str(self.__symTable[word]) + "\n") f.close() g.close() def validate(self, word): if len(word) > 8: raise Exception("Error! Identifier '" + word + "' is too long!!") if word.isalpha(): return True if word.isdigit(): return True if not word[0].isalpha(): raise Exception("Error!" + word) for letter in word: if not letter.isalpha(): if not letter.isdigit(): raise Exception("Error!" + word) return True