def combine(n, q, x_iter, y_iter, z_iter): d = SortedDict() for _ in range(n): xi, yi = next(x_iter), next(y_iter) li = min(xi, yi) + 1 ri = max(xi, yi) + 1 d[ri] = d.get(ri, 0) + 1 d[li - 1] = d.get(li - 1, 0) - 1 for k in list(d.keys()): if d[k] == 0: d.pop(k, None) acc = 0 for k in reversed(d): d[k] += acc acc = d[k] keys = list(reversed(d.keys())) ranks = [] acc = 1 for i, k in enumerate(keys): start = keys[i] dups = d[start] ranks.append(Interval(acc, start, dups)) if i + 1 < len(keys): count = start - keys[i + 1] acc += count * dups result = 0 for i in range(1, q + 1): zi = next(z_iter) ki = zi + 1 result += i * pick_score(ki, ranks) return result
class MyCalendarTwo: # double list def __init__(self): self.calendar = [] self.overlaps = [] def book(self, start: int, end: int) -> bool: for s, e in self.overlaps: if start < e and end > s: return False for s, e in self.calendar: if start < e and end > s: self.overlaps.append((max(s, start), min(end, e))) self.calendar.append((start, end)) return True # use boundary count def __init__(self): self.calendar = SortedDict() def book(self, start: int, end: int) -> bool: self.calendar[start] = self.calendar.get(start, 0) + 1 self.calendar[end] = self.calendar.get(end, 0) - 1 active = 0 for v in self.calendar.values(): active += v if active >= 3: self.calendar[start] = self.calendar.pop(start, 0) - 1 self.calendar[end] = self.calendar.pop(end, 0) + 1 if self.calendar[start] == 0: del self.calendar[start] return False return True
def longestSubarray(self, nums: List[int], limit: int) -> int: # Time Complexity: O(N log N) (assuming SortedDict has time complexity of BBST) window_multiset = SortedDict() start = 0 end = start ret = 0 while start < len(nums) and end < len(nums): if self.can_add(window_multiset, nums[end], limit): window_multiset[nums[end]] = window_multiset.get(nums[end], 0) + 1 end += 1 else: ret = max(end - start, ret) window_multiset[nums[end]] = window_multiset.get(nums[end], 0) + 1 while not self.is_valid_window(window_multiset, limit): self.remove_multiset(window_multiset, nums[start]) start += 1 end += 1 ret = max(end - start, ret) return ret
def canAttendMeetings(self, intervals: List[List[int]]) -> bool: points = SortedDict() for start, end in intervals: # print(start, end) # print(points) i_start = points.bisect_right(start) i_end = points.bisect_left(end) # print("i_start", i_start) # print("i_end", i_end) if i_end != i_start: return False if i_start > 0 and points.peekitem(i_start-1)[1] == 1: return False if points.get(start) == -1: del points[start] else: points[start] = 1 if points.get(end) == 1: del points[end] else: points[end] = -1 return True
class Leaderboard: def __init__(self): # set the hashmap[player Id] = score self.hashmap = dict() # set a sorted map[score] = # of players self.sortedMap = SortedDict() def addScore(self, playerId: int, score: int) -> None: # if the player is not stored in the hashmap if playerId not in self.hashmap: # record the score in the hashmap self.hashmap[playerId] = score # increase the number of players in the sorted map self.sortedMap[-score] = self.sortedMap.get(-score, 0) + 1 # if the player exists in the hashmap else: # get the score of the current player preScore = self.hashmap[playerId] # get the number of players who have the same score freq = self.sortedMap.get(-preScore) # delete the sortedmap if only one person has the same score if freq == 1: del self.sortedMap[-preScore] # if more than one person has the same score else: # reduce the number of players who have the current score self.sortedMap[-preScore] = freq - 1 # accumulate the score of the current player newScore = preScore + score # store the new score in the hashmap self.hashmap[playerId] = newScore # increase the number of players that have the new score self.sortedMap[-newScore] = self.sortedMap.get(-newScore, 0) + 1 def top(self, K: int) -> int: # set the variable to track of the total score and the number of people total, count = 0, 0 # return the sum of the kth scores from the sorted map for score, freq in self.sortedMap.items(): for _ in range(freq): total += abs(score) count += 1 if count == K: return total # return the total even if the sum contains less than k return total def reset(self, playerId: int) -> None: # get the score of the current player preScore = self.hashmap[playerId] # delete the score from the sorted map if only one player exists if self.sortedMap[-preScore] == 1: del self.sortedMap[-preScore] # decreases the number of players from the the sorted map if multiple players exist else: self.sortedMap[-preScore] -= 1 # delete the player from the hashmap del self.hashmap[playerId]
class Leaderboard: def __init__(self): self.scores = {} self.sortedScores = SortedDict() def addScore(self, playerId: int, score: int) -> None: if playerId not in self.scores: self.scores[playerId] = score self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: preScore = self.scores[playerId] c = self.sortedScores[-preScore] if c == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] = c - 1 newScore = preScore + score self.scores[playerId] = newScore self.sortedScores[-newScore] = self.sortedScores.get(-newScore, 0) + 1 def top(self, K: int) -> int: count, total = 0, 0 for k, v in self.sortedScores.items(): if count + v <= K: total += (-k) * v count += v if count == K: break else: t = K - count total += (-k) * t break return total def reset(self, playerId: int) -> None: score = self.scores[playerId] if self.sortedScores[-score] == 1: del self.sortedScores[-score] else: self.sortedScores[-score] -= 1 del self.scores[playerId] # Your Leaderboard object will be instantiated and called as such: # obj = Leaderboard() # obj.addScore(playerId,score) # param_2 = obj.top(K) # obj.reset(playerId) # Your Leaderboard object will be instantiated and called as such: # obj = Leaderboard() # obj.addScore(playerId,score) # param_2 = obj.top(K) # obj.reset(playerId)
class LFUCache: def __init__(self, capacity: int): self.capacity = capacity self.capUsage = 0 self.keyValue = {} self.keyCnt = {} self.cntKey = SortedDict() def get(self, key: int) -> int: # print(f"get {key} ") # print("keyCnt ",self.keyCnt) # print("cnt ", self.cntKey) if self.capacity == 0 or key not in self.keyValue: return -1 ret = self.keyValue[key] ntimes = self.keyCnt[key] self.cntKey[ntimes + 1] = self.cntKey.get(ntimes + 1, OrderedDict()) self.cntKey[ntimes + 1][key] = 1 self.keyCnt[key] += 1 del self.cntKey[ntimes][key] if len(self.cntKey[ntimes]) == 0: del self.cntKey[ntimes] # print("keyCnt: ", self.keyCnt) # print("cntKey: ", self.cntKey) return ret def put(self, key: int, value: int) -> None: # print(f"put {key} ") # print("keyCnt: ", self.keyCnt) # print("cnt: ", self.cntKey) if self.capacity == 0: return None if key in self.keyValue: self.get(key) self.keyValue[key] = value ## delete least freq used else: if self.capUsage == self.capacity: ntimes, ndict = self.cntKey.popitem(0) self.capUsage -= 1 oldKey, _ = ndict.popitem(last=False) del self.keyCnt[oldKey] del self.keyValue[oldKey] if ndict: self.cntKey[ntimes] = ndict self.capUsage += 1 self.keyValue[key] = value self.keyCnt[key] = 1 self.cntKey[1] = self.cntKey.get(1, OrderedDict()) self.cntKey[1][key] = 1
class Leaderboard: def __init__(self): self.scores = {} self.sortedScores = SortedDict() def addScore(self, playerId: int, score: int) -> None: # The scores dictionary simply contains the mapping from the # playerId to their score. The sortedScores contain a BST with # key as the score and value as the number of players that have # that score. if playerId not in self.scores: self.scores[playerId] = score self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: preScore = self.scores[playerId] val = self.sortedScores.get(-preScore) if val == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] = val - 1 newScore = preScore + score; self.scores[playerId] = newScore self.sortedScores[-newScore] = self.sortedScores.get(-newScore, 0) + 1 def top(self, K: int) -> int: count, total = 0, 0; for key, value in self.sortedScores.items(): times = self.sortedScores.get(key) for _ in range(times): total += -key; count += 1; # Found top-K scores, break. if count == K: break; # Found top-K scores, break. if count == K: break; return total; def reset(self, playerId: int) -> None: preScore = self.scores[playerId] if self.sortedScores[-preScore] == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] -= 1 del self.scores[playerId];
class MyCalendarThree2: def __init__(self): self.timeline = SortedDict() def book(self, start: int, end: int) -> int: self.timeline[start] = self.timeline.get(start, 0) + 1 self.timeline[end] = self.timeline.get(end, 0) - 1 max_event, cur_event = 0, 0 for v in self.timeline.values(): cur_event += v max_event = max(max_event, cur_event) return max_event
def minMeetingRooms(self, intervals: List[List[int]]) -> int: points = SortedDict() for start, end in intervals: points[start] = points.get(start, 0) + 1 points[end] = points.get(end, 0) - 1 best = 0 cur = 0 for v in points.values(): cur += v best = max(best, cur) return best
def isNStraightHand(self, hand: List[int], groupSize: int) -> bool: counter = SortedDict() for num in hand: counter[num] = counter.get(num, 0) + 1 queue = [] opened, prev = 0, -1 for num in counter: if (opened > 0 and num > prev + 1) or (opened > counter[num]): return False queue.append(counter[num] - opened) prev = num opened = counter.get(num) if len(queue) == groupSize: opened -= queue.pop(0) return opened == 0
def solve(): n, k = map(int, raw_input().split()) cnts = SortedDict([(n, 1)]) i = 0 while i < k: sz, cnt = cnts.peekitem() ls = (sz-1) / 2 rs = sz / 2 if ls: cnts[ls] = cnts.get(ls, 0) + cnt if rs: cnts[rs] = cnts.get(rs, 0) + cnt del cnts[sz] i += cnt return "%d %d" % (rs, ls)
class MyCalendarThree: def __init__(self): self.timeline = SortedDict() def book(self, start: int, end: int) -> int: self.timeline[start] = self.timeline.get(start, 0) + 1 self.timeline[end] = self.timeline.get(end, 0) - 1 ans = 0 activeEvents = 0 for count in self.timeline.values(): activeEvents += count ans = max(ans, activeEvents) return ans
class OrderBook: def __init__(self, side: Side): if side == Side.buy: self.book = SortedDict(lambda x: -x) elif side == Side.sell: self.book = SortedDict() self.side = side def add(self, order: Order): assert (order.filled() == False) if order.price not in self.book: self.book[order.price] = Level(order.price, self.side) self.book[order.price].add(order) def match(self, order: Order): removed_px = [] for px, lv in self.book.items(): if not lv.can_match(order): break result = lv.match(order) if lv.empty(): removed_px.append(lv.price) if result == MatchResult.complete: break for px in removed_px: del self.book[px] def cancel(self, order: Order): lv = self.book.get(order.price) lv.cancel(order)
def diagonalSort(self, mat: List[List[int]]) -> List[List[int]]: n = len(mat) m = len(mat[0]) result = [[0] * m for i in range(n)] d = n + m + 1 i0 = n - 1 j0 = 0 for i in range(d): counts = SortedDict() i1 = i0 j1 = j0 while i1 < n and j1 < m: v = mat[i1][j1] counts[v] = (counts.get(v) or 0) + 1 i1 += 1 j1 += 1 i1 = i0 j1 = j0 k = 0 while i1 < n and j1 < m: k = counts.keys()[0] result[i1][j1] = k counts[k] -= 1 if not counts[k]: counts.pop(k) i1 += 1 j1 += 1 if i0 == 0: j0 += 1 else: i0 -= 1 return result
class FamilyTreatment(Treatment): def __init__(self, **kwargs): self.species_treatments = SortedDict() super(FamilyTreatment, self).__init__(**kwargs) def add_species(self, species): self.species_treatments[species.taxon] = species def get_species(self, taxon): return self.species_treatments.get(taxon, None) def list_species(self): return self.species_treatments.values() @property def taxon_authors(self): """ Include the full scientific name in the taxon authors field So that it is completely unitalicized """ return self.description.scientific_name @property def notes(self): return self.description.paragraphs
class Leaderboard: def __init__(self): self.scores = collections.defaultdict(int) self.sortedScores = SortedDict() def addScore(self, playerId: int, score: int) -> None: preScore = self.scores[playerId] self.scores[playerId] += score if -preScore not in self.sortedScores: self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: val = self.sortedScores.get(-preScore) if val == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] -= 1 newScore = -(preScore + score) self.sortedScores[newScore] = self.sortedScores.get(newScore, 0) + 1 def top(self, K: int) -> int: cnt = 0 res = 0 for key, val in self.sortedScores.items(): for i in range(self.sortedScores[key]): cnt += 1 res += -key if cnt == K: break if cnt == K: break return res def reset(self, playerId: int) -> None: preScore = self.scores[playerId] del self.scores[playerId] if self.sortedScores[-preScore] == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] -= 1 # Your Leaderboard object will be instantiated and called as such: # obj = Leaderboard() # obj.addScore(playerId,score) # param_2 = obj.top(K) # obj.reset(playerId)
class Leaderboard: def __init__(self): self.scores = {} # {score: times_scores_happens} self.sortedScores = SortedDict() def addScore(self, playerId: int, score: int) -> None: if playerId not in self.scores: self.scores[playerId] = score self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: preScore = self.scores[playerId] val = self.sortedScores.get(-preScore) if val == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] = val + 1 newScore = preScore + score self.scores[playerId] = newScore self.sortedScores[-newScore] = self.sortedScores.get(-newScore, 0) + 1 def top(self, K: int) -> int: count, total = 0, 0 for key, value in self.sortedScores.items(): times = self.sortedScores.get(key) for _ in range(times): total += -key count += 1 if count == K: break if count == K: break return total def reset(self, playerId: int) -> None: preScore = self.scores[playerId] if self.sortedScores[-preScore] == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] -= 1 del self.scores[playerId]
def brightestPosition(self, lights: List[List[int]]) -> int: ans = inf maxBrightness = -1 currBrightness = 0 timeline = SortedDict() for position, range in lights: start = position - range end = position + range + 1 timeline[start] = timeline.get(start, 0) + 1 timeline[end] = timeline.get(end, 0) - 1 for pos, brightness in timeline.items(): currBrightness += brightness if currBrightness > maxBrightness: maxBrightness = currBrightness ans = pos return ans
def order(participants): def count_score(order): score = 0 for i, participant in enumerate(order): next = order[i + 1 if i + 1 < len(order) else 0] if participant['sex'] and next[ 'sex'] and participant['sex'] != next['sex']: score += len(order) if participant['group'] and next['group'] and participant[ 'group'] == next['group']: score -= len(order)**2 + 1 return score def offer_order(): copy = participants.copy() random.shuffle(copy) return copy scores_map = SortedDict() best_order = participants best_score = 0 for _ in itertools.repeat(None, 100000): order = offer_order() score = count_score(order) scores_map[score] = scores_map.get(score, 0) + 1 # names = [p['name'] for p in order] # print(score, names) if score > best_score: best_order = order best_score = score print('Scores distribution') for k in scores_map.keys(): v = scores_map.get(k) print(f'{k} -> {v}') return best_order
def occurences(iterable): """ count number of occurences of each item in a finite iterable :param iterable: finite iterable :return: dict of int count indexed by item """ from sortedcontainers import SortedDict occur=SortedDict() for x in iterable: occur[x]=occur.get(x, 0) + 1 return occur
def occurences(iterable): """ count number of occurences of each item in a finite iterable :param iterable: finite iterable :return: dict of int count indexed by item """ from sortedcontainers import SortedDict occur = SortedDict() for x in iterable: occur[x] = occur.get(x, 0) + 1 return occur
class Leaderboard: def __init__(self): self.scores = {} self.sortedScores = SortedDict() def addScore(self, playerId: int, score: int) -> None: if playerId not in self.scores: self.scores[playerId] = score self.sortedScores[-score] = self.sortedScores.get(-score, 0) + 1 else: preScore = self.scores[playerId] if self.sortedScores[-preScore] == 1: del self.sortedScores[-preScore] else: self.sortedScores[-preScore] = self.sortedScores[-preScore] - 1 newScore = preScore + score self.scores[playerId] = newScore self.sortedScores[-newScore] = self.sortedScores.get(-newScore, 0) + 1 def top(self, K: int) -> int: ans = 0 curr = 0 for key, val in self.sortedScores.items(): count = self.sortedScores.get(key) for i in range(count): ans += key curr += 1 if curr == K: break if curr == K: break return abs(ans) def reset(self, playerId: int) -> None: score = self.scores[playerId] if self.sortedScores[-score] == 1: del self.sortedScores[-score] else: self.sortedScores[-score] = self.sortedScores.get(-score) - 1 del self.scores[playerId]
class MenuBar(QMenuBar): def __init__(self, menu_order: Tuple[MainMenu] = (FileMenu, ViewMenu, ToolsMenu, AlgorithmsMenu, WindowsMenu, HelpMenu)): super().__init__() self._menu_order = menu_order self._menus_order_indexes = { menu_type: i for (i, menu_type) in enumerate(self._menu_order) } self._ordered_added_menus = SortedDict( ) # {order_index: MainMenu class} def add_menu(self, menu_type: Type[MainMenu]) -> MainMenu: menu = menu_type() menu_order_index = self._menu_order_index(menu_type) self._ordered_added_menus[menu_order_index] = menu menu_index_in_ordered_added_menus = self._ordered_added_menus.index( menu_order_index) # If the menu is the last one if menu_index_in_ordered_added_menus == len( self._ordered_added_menus) - 1: self.addMenu(menu) else: next_menu_index_in_ordered_added_menus = menu_index_in_ordered_added_menus + 1 next_menu = self._ordered_added_menus.peekitem( next_menu_index_in_ordered_added_menus)[1] self.insertMenu(next_menu.menuAction(), menu) return menu def menu(self, menu_type: Type[MainMenu], add_nonexistent: bool = True) -> Optional[MainMenu]: menu = self._ordered_added_menus.get(self._menu_order_index(menu_type)) if menu is None and add_nonexistent: menu = self.add_menu(menu_type) return menu def add_menu_action(self, menu_type: Type[MainMenu], action_name, method, shortcut=None) -> QAction: return self.menu(menu_type).addAction(action_name, method, shortcut) def _menu_order_index(self, menu_type: Type[MainMenu]) -> int: return self._menus_order_indexes[menu_type]
class LObject(): def __init__(self): self.region = SortedDict() self.id = '' self.name = '' self.kind = None self.note = None self.height = 0 self.width = 0 self.zorder = 0 self.door = False self.trap = False self.mini_icon = False self.tags = {} def put_icon(self, icon, region_key): ''' takes an icon and puts it into the board dict at the specified region. :icon: An instance of Icon :region_key: a string defining which region style to use in the icon ''' # TODO: I set a square = an icon. That means there can be one value per # region key only. # We need to see later if there should rather be a list of icons in # each square? self.region[region_key] = icon def get_icon(self, region_key): ''' returns the icon if there is one in the region key. Else returns None. ''' return self.region.get(region_key, None) def compareTo(self, other_l_object): ''' compares the names of two LObjects in order to enable alphabetical sorting. ''' # TODO: Enable custom sorting ? if not isinstance(other_l_object, LObject): raise NameError( 'comparing a LObject is possible only with another LObject') if self.name.lower() < other_l_object.name.lower(): return -1 elif self.name.lower() > other_l_object.name.lower(): return 1 else: return 0 def __str__(self): return self.name
def __init__(self, arch, results, args, root_url, progress_browser, bugzilla_browser, test_browser): """Construct an archreport object with options.""" self.arch = arch self.args = args self.root_url = root_url self.progress_browser = progress_browser self.bugzilla_browser = bugzilla_browser self.status_badge = set_status_badge([i['state'] for i in results.values()]) results_by_bugref = SortedDict(get_results_by_bugref(results, self.args)) self.issues = defaultdict(lambda: defaultdict(list)) for bugref, result_list in iteritems(results_by_bugref): # if a ticket is known and the same refers to a STILL_FAILING scenario and any NEW_ISSUE we regard that as STILL_FAILING but just visible in more # scenarios, ... # ... else (no ticket linked) we don't group them as we don't know if it really is the same issue and handle them outside if not re.match('(poo|bsc|boo)#', bugref): continue # if any result was still failing the issue is regarded as existing bug = result_list[0] issue = Issue(bug['bugref'], bug['bugref_href'], self.args.query_issue_status, self.progress_browser, self.bugzilla_browser) self.issues[issue_state(result_list)][issue_type(bugref)].append(IssueEntry(self.args, self.root_url, result_list, bug=issue)) # left do handle are the issues marked with 'TODO' new_issues = (r for r in results_by_bugref.get('TODO', []) if r['state'] == 'NEW_ISSUE') self.issues['new']['todo'].extend(IssueEntry.for_each(self.args, self.root_url, new_issues, test_browser)) existing_issues = (r for r in results_by_bugref.get('TODO', []) if r['state'] == 'STILL_FAILING') self.issues['existing']['todo'].extend(IssueEntry.for_each(self.args, self.root_url, existing_issues, test_browser)) if self.args.include_softfails: new_soft_fails = [r for r in results.values() if r['state'] == 'NEW_SOFT_ISSUE'] existing_soft_fails = [r for r in results.values() if r['state'] == 'STILL_SOFT_FAILING'] if new_soft_fails: self.issues['new']['product'].append(IssueEntry(self.args, self.root_url, new_soft_fails, soft=True)) if existing_soft_fails: self.issues['existing']['product'].append(IssueEntry(self.args, self.root_url, existing_soft_fails, soft=True))
class LBoard(): def __init__(self, width, height): ''' defines a new board. :width: Board width in squares :height: Board height in squares ''' self.region = SortedDict() if isinstance(width, int): self.width = width else: self.width = int(width) if isinstance(height, int): self.height = height else: self.height = int(height) self.borderDoorsOffset = 0 self.adjacentBoardsOffset = 0 # initializes the corridor matrix with "False"; # width squares wide and height squares high. # attention, it's zero-based; # so the first square in the top left corner is [0][0] # and the last square in the lower right corner is [18][20] self.corridors = [[False for y in range(0, self.height)] for x in range(0, self.width)] def put_icon(self, icon, region_key): ''' takes an icon and puts it into the board dict at the specified region. :icon: An instance of Icon containing a game board :region_key: a string defining where to put the icon ''' self.region[region_key] = icon def get_icon(self, region_key): ''' returns the icon if there is one in the region key. Else returns None. ''' return self.region.get(region_key, None)
class Events: def __init__(self): self.list_of_events = SortedDict() # for each time, a list of events def add_event(self, event): if not self.list_of_events.get(event.time): self.list_of_events[event.time] = [event] else: self.list_of_events[event.time].append(event) def count_arrivals_at(self, time): try: li = self.list_of_events[time] n = 0 for e in li: if li.type == 'arrival': n += 1 return n except: return 0 def pop_next_event(self): li = self.list_of_events.peekitem(0)[1] #if debug: # print(self.list_of_events) if (len(li) > 1): toret = li.pop(0) #if debug: # print(f'returning {toret}') return toret else: lil = self.list_of_events.popitem(0)[1] toret = lil[0] #if debug: # print(f'returning {toret}') return toret def count(self): return len(self.list_of_events) def __str__(self): s = '' for k in self.list_of_events.keys(): s += (str(k) + ": ") for e in self.list_of_events[k]: s += str(e) + ', ' return s
class TransactionRepository: def __init__(self): self.__accounts = SortedDict() def add_amount(self, account, amount): account = int(account) amount = float(amount) self.__accounts[account] = self.__accounts.get(account, 0) + float(amount) def get_account_amount(self, account): return self.__accounts[int(account)] def get_formatted_transactions(self): return self.__accounts.iteritems() def clear(self): self.__accounts.clear()
class TransactionRepository: def __init__(self): self.__accounts = SortedDict() def add_amount(self, account, amount): account = int(account) amount = float(amount) self.__accounts[account] = self.__accounts.get(account, 0) + float(amount) def get_account_amount(self, account): return self.__accounts[int(account)] def get_formatted_transactions(self): return self.__accounts.iteritems() def clear(self): self.__accounts.clear()
def mk_bpnts(br_adj): bpnts = SortedDict() #initialize some entries for i in range(5): bpnts[i] = [] #for br_adj we ignore deg 2 vertices for key in br_adj: row = br_adj[key] nedge = len(row) if nedge != 2: if bpnts.get(nedge): bpnts[nedge].append(key) else: bpnts[nedge] = [key] return bpnts
class NameMap: """ Class to store information about the Statistics of names. For each name type, there is a dict for each name, there is a list of name %, cumulative %, rank """ def __init__(self, nameFile: str): """ create a new list of names with data given an index within range of 0..num-1 """ try: names = open(filePath + nameFile) except IOError: print("Error opening file:" + filePath + nameFile) self.namemap = SortedDict() for line in names: nameData = nameEntry(line) self.namemap[nameData.name] = nameData return def lookup(self, name): """ lookup name in map return nameEntry else return none """ return self.namemap.get(name) def lookup10(self, name): """ lookup name in specified index return list of <name,[%,%cum,rank]> if in list else return none """ i = self.namemap.bisect_right(name) low = max(0, i - 5) high = min(len(self.namemap), i + 5) result = [] for j in range(low, high): result.append(self.namemap.peekitem(j)) return result
def mk_bpnts(self, br_adj): bpnts = SortedDict() #initialize some entries for i in range(5): bpnts[i] = [] #find branch graph vertices which are # not 2-vertices, bpnts key is number of neighbors #this will be either 1 or >= 3 for key in br_adj: row = br_adj[key] nedge = len(row) if nedge != 2: if bpnts.get(nedge): bpnts[nedge].append(key) else: bpnts[nedge] = [key] return bpnts
class CacheStore(object): class CacheItem(object): __slots__ = ('valid', 'data') def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: try: item = self.store[key] item.data = data item.valid.set() return False except KeyError: item = self.CacheItem() item.data = data item.valid.set() self.store[key] = item return True def update(self, **kwargs): with self.lock: items = {} created = [] updated = [] for k, v in kwargs.items(): items[k] = self.CacheItem() items[k].data = v items[k].valid.set() if k in self.store: updated.append(k) else: created.append(k) self.store.update(**items) return created, updated def update_one(self, key, **kwargs): with self.lock: item = self.get(key) if not item: return False for k, v in kwargs.items(): set(item, k, v) self.put(key, item) return True def update_many(self, key, predicate, **kwargs): with self.lock: updated = [] for k, v in self.itervalid(): if predicate(v): if self.update_one(k, **kwargs): updated.append(key) return updated def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: try: del self.store[key] return True except KeyError: return False def remove_many(self, keys): with self.lock: removed = [] for key in keys: try: del self.store[key] removed.append(key) except KeyError: pass return removed def clear(self): with self.lock: items = list(self.store.keys()) self.store.clear() return items def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return query(list(self.validvalues()), *filter, **params)
class WordData(QObject): # Define the signal we emit when we have loaded new data WordsUpdated = pyqtSignal() def __init__(self, my_book): super().__init__(None) # Save reference to the book self.my_book = my_book # Save reference to the metamanager self.metamgr = my_book.get_meta_manager() # Save reference to the edited document self.document = my_book.get_edit_model() # Save reference to a speller, which will be the default # at this point. self.speller = my_book.get_speller() # The vocabulary list as a sorted dict. self.vocab = SortedDict() # Key and Values views on the vocab list for indexing by table row. self.vocab_kview = self.vocab.keys() self.vocab_vview = self.vocab.values() # The count of available words based on the latest sort self.active_word_count = 0 # The good- and bad-words sets and the scannos set. self.good_words = set() self.bad_words = set() self.scannos = set() # A dict of words that use an alt-dict tag. The key is a word and the # value is the alt-dict tag string. self.alt_tags = SortedDict() # Cached sort vectors, see get_sort_vector() self.sort_up_vectors = [None, None, None] self.sort_down_vectors = [None, None, None] self.sort_key_funcs = [None, None, None] # Register metadata readers and writers. self.metamgr.register(C.MD_GW, self.good_read, self.good_save) self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save) self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save) self.metamgr.register(C.MD_VL, self.word_read, self.word_save) # End of __init__ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Methods used when saving metadata. The items in the good_words, # bad_words, and scanno sets are simply returned as a list of strings. # def good_save(self, section) : return [ token for token in self.good_words ] def bad_save(self, section) : return [ token for token in self.bad_words ] def scanno_save(self, section) : return [ token for token in self.scannos ] # # To save the vocabulary, write a list for each word: # [ "token", "tag", count, [prop-code...] ] # where "token" is the word as a string, "tag" is its alt-dict tag # or a null string, count is an integer and [prop-code...] is the # integer values from the word's property set as a list. Note that # alt_tag needs to be a string because json doesn't handle None. # def word_save(self, section) : vlist = [] for word in self.vocab: [count, prop_set] = self.vocab[word] #tag = "" if AD not in prop_set else self.alt_tags[word] tag = "" if AD in prop_set : if word in self.alt_tags : tag = self.alt_tags[word] else : # should never occur, could be assertion error worddata_logger.error( 'erroneous alt tag on ' + word ) plist = list(prop_set) vlist.append( [ word, count, tag, plist ] ) return vlist # # Methods used to load metadata. Called by the metadata manager with # a single Python object, presumably the object that was prepared by # the matching _save method above. Because the user might edit the metadata # file, do a little quality control. # def good_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : if token in self.bad_words : worddata_logger.warn( '"{}" is in both good and bad words - use in good ignored'.format(token) ) else : self.good_words.add(token) if token in self.vocab : # vocab already loaded, it seems props = self.vocab[token][1] props.add(GW) props &= prop_nox else : worddata_logger.error( '{} in GOODWORDS list ignored'.format(token) ) if len(self.good_words) : # We loaded some, the display might need to change self.WordsUpdated.emit() else : worddata_logger.error( 'GOODWORDS metadata is not a list of strings, ignoring it' ) def bad_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : if token in self.good_words : worddata_logger.warn( '"{}" is in both good and bad words - use in bad ignored'.format(token) ) else : self.bad_words.add(token) if token in self.vocab : # vocab already loaded, it seems props = self.vocab[token][1] props.add(BW) props.add(XX) else : worddata_logger.error( '{} in BADWORDS list ignored'.format(token) ) if len(self.bad_words) : # We loaded some, the display might need to change self.WordsUpdated.emit() else : worddata_logger.error( 'BADWORDS metadata is not a list of strings, ignoring it' ) def scanno_read(self, section, value, version): if isinstance(value, list) : for token in value : if isinstance(token, str) : self.scannos.add(token) else : worddata_logger.error( '{} in SCANNOLIST ignored'.format(token) ) else : worddata_logger.error( 'SCANNOLIST metadata is not a list of strings, ignoring it' ) # Load the vocabulary section of a metadata file, allowing for # user-edited malformed items. Be very generous about user errors in a # modified meta file. The expected value for each word is as written by # word_save() above, ["token", count, tag, [props]] but allow a single # item ["token"] or just "token" so the user can put in a single word # with no count or properties. Convert null-string alt-tag to None. # # Before adding a word make sure to unicode-flatten it. # def word_read(self, section, value, version) : global PROP_ALL, prop_nox # get a new speller in case the Book read a different dict already self.speller = self.my_book.get_speller() # if value isn't a list, bail out now if not isinstance(value,list): worddata_logger.error( 'WORDCENSUS metadata is not a list, ignoring it' ) return # inspect each item of the list. for wlist in value: try : if isinstance(wlist,str) : # expand "token" to ["token"] wlist = [wlist] if not isinstance(wlist, list) : raise ValueError if len(wlist) != 4 : if len(wlist) > 4 :raise ValueError if len(wlist) == 1 : wlist.append(0) # add default count of 0 if len(wlist) == 2 : wlist.append('') # add default alt-tag if len(wlist) == 3 : wlist.append([]) # add default props word = wlist[0] if not isinstance(word,str) : raise ValueError word = unicodedata.normalize('NFKC',word) count = int(wlist[1]) # exception if not numeric alt_tag = wlist[2] if not isinstance(alt_tag,str) : raise ValueError if alt_tag == '' : alt_tag = None prop_set = set(wlist[3]) # exception if not iterable if len( prop_set - PROP_ALL ) : raise ValueError #bogus props except : worddata_logger.error( 'WORDCENSUS item {} is invalid, ignoring it'.format(wlist) ) continue # checking done, store the word. if (0 == len(prop_set)) or (0 == count) : # word with no properties or count is a user addition, enter # it as if we found it in the file, including deducing the # properties, spell-check, hyphenation split. self._add_token(word, alt_tag) continue # that's that, on to next line # Assume we have a word saved by word_save(), but possibly the # good_words and bad_words have been edited and read-in first. # Note we are not checking for duplicates if word in self.bad_words : prop_set.add(BW) prop_set.add(XX) if word in self.good_words : prop_set.add(GW) prop_set &= prop_nox if alt_tag : prop_set.add(AD) self.alt_tags[word] = alt_tag self.vocab[word] = [count, prop_set] # end of "for wlist in value" # note the current word count self.active_word_count = len(self.vocab) # Tell wordview that the display might need to change self.WordsUpdated.emit() # end of word_read() # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Methods used when opening a new file, one with no metadata. # # The Book will call these methods passing a text stream when it finds a # good-words file or bad-words file. Each of these is expected to have # one token per line. We don't presume to know in what order the files # are presented, but we DO assume that the vocabulary census has not yet # been taken. That requires the user clicking Refresh and that cannot # have happened while first opening the file. def good_file(self, stream) : while not stream.atEnd() : token = stream.readLine().strip() if token in self.bad_words : worddata_logger.warn( '"{}" is in both good and bad words - use in good ignored'.format(token) ) else : self.good_words.add(token) def bad_file(self, stream) : while not stream.atEnd() : token = stream.readLine().strip() if token in self.good_words : worddata_logger.warn( '"{}" is in both good and bad words - use in bad ignored'.format(token) ) else : self.bad_words.add(token) # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # # The user can choose a new scannos file any time while editing. So there # might be existing data, so we clear the set before reading. # def scanno_file(self, stream) : self.scannos = set() # clear any prior values while not stream.atEnd() : token = stream.readLine().strip() self.scannos.add(token) # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # The following is called by the Book when the user chooses a different # spelling dictionary. Store a new spellcheck object. Recheck the # spelling of all words except those with properties HY, GW, or BW. # # NOTE IF THIS IS A PERFORMANCE BURDEN, KILL IT AND REQUIRE REFRESH # def recheck_spelling(self, speller): global PROP_BGH, prop_nox self.speller = speller for i in range(len(self.vocab)) : (c, p) = self.vocab_vview[i] if not( PROP_BGH & p ) : # then p lacks BW, GW and HY p = p & prop_nox # and now it also lacks XX w = self.vocab_kview[i] t = self.alt_tags.get(w,None) if not self.speller.check(w,t): p.add(XX) self.vocab_vview[i][1] = p # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # Method to perform a census. This is called from wordview when the # user clicks the Refresh button asking for a new scan over all words in # the book. Formerly this took a progress bar, but the actual operation # is so fast no progress need be shown. # def refresh(self): global RE_LANG_ATTR, RE_TOKEN count = 0 end_count = self.document.blockCount() # get a reference to the dictionary to use self.speller = self.my_book.get_speller() # clear the alt-dict list. self.alt_tags = SortedDict() # clear the sort vectors self.sort_up_vectors = [None, None, None] self.sort_down_vectors = [None, None, None] self.sort_key_funcs = [None, None, None] # Zero out all counts and property sets that we have so far. We will # develop new properties when each word is first seen. Properties # such as HY will not have changed, but both AD and XX might have # changed while the word text remains the same. for j in range(len(self.vocab)) : self.vocab_vview[j][0] = 0 self.vocab_vview[j][1] = set() # iterate over all lines extracting tokens and processing them. alt_dict = None alt_tag = None for line in self.document.all_lines(): count += 1 j = 0 m = RE_TOKEN.search(line,0) while m : # while match is not None if m.group(6) : # start-tag; has it lang= ? d = RE_LANG_ATTR.search(m.group(8)) if d : alt_dict = d.group(1) alt_tag = m.group(7) elif m.group(9) : if m.group(10) == alt_tag : # end tag of a lang= start tag alt_dict = None alt_tag = None else : self._add_token(m.group(0),alt_dict) j = m.end() m = RE_TOKEN.search(line,j) # Look for zero counts and delete those items. It is forbidden to # alter the dict contents while iterating over values or keys views, # so make a list of the word tokens to be deleted, then use del. togo = [] for j in range(len(self.vocab)) : if self.vocab_vview[j][0] == 0 : togo.append(self.vocab_kview[j]) for key in togo: del self.vocab[key] # Update possibly modified word count self.active_word_count = len(self.vocab) # Internal method for adding a possibly-hyphenated token to the vocabulary, # incrementing its count. This is used during the census/refresh scan, and # can be called from word_read to process a user-added word. # Arguments: # tok_str: a normalized word-like token; may be hyphenated a/o apostrophized # dic_tag: an alternate dictionary tag or None # # If the token has no hyphens, this is just a cover on _count. When the # token is hyphenated, we enter each part of it alone, then add the # phrase with the union of the prop_sets of its parts, plus HY. Thus # "mother-in-law's" will be added as "mother", "in" and "law's", and as # itself with HY, LC, AP. "1989-1995" puts 1989 and 1995 in the list and # will have HY and ND. Yes, this means that a hyphenation could have all # of UC, MC and LC. # # If a part of a phrase fails spellcheck, it will have XX but we do not # propogate that to the phrase itself. # # If a part of the phrase has AD (because it was previously entered as # part of a lang= string) that also is not propogated to the phrase # itself. Since hyphenated phrases are never spell-checked, they should # never have AD. # # Note: en-dash \u2013 is not supported here, only the ascii hyphen. # Support for it could be added if required. # # Defensive programming: '-'.split('-') --> ['','']; '-9'.split('-') --> ['','9'] def _add_token(self, tok_str, dic_tag ) : global prop_nox # Count the entire token regardless of hyphens self._count(tok_str, dic_tag) # this definitely puts it in the dict [count, prop_set] = self.vocab[tok_str] if (count == 1) and (HY in prop_set) : # We just added a hyphenated token: add its parts also. parts = tok_str.split('-') prop_set = {HY} for member in parts : if len(member) : # if not null split from leading - self._count(member, dic_tag) [x, part_props] = self.vocab[member] prop_set |= part_props self.vocab[tok_str] = [count, prop_set - {XX, AD} ] # Internal method to count a token, adding it to the list if necessary. # An /alt-tag must already be removed. The word must be already # normalized. Because of the way we tokenize, we know the token contains # only letter forms, numeric forms, and possibly hyphens and/or # apostrophes. # # If it is in the list, increment its count. Otherwise, compute its # properties, including spellcheck for non-hyphenated tokens, and # add it to the vocabulary with a count of 1. Returns nothing. def _count(self, word, dic_tag ) : [count, prop_set] = self.vocab.get( word, [0,set()] ) if count : # it was in the list: a new word would have count=0 self.vocab[word][0] += 1 # increment its count return # and done. # Word was not in the list (but is now): count is 0, prop_set is empty. # The following is only done once per unique word. self.my_book.metadata_modified(True, C.MD_MOD_FLAG) work = word[:] # copy the word, we may modify it next. if work.startswith("Point"): pass # debug # If word has apostrophes, note that and delete for following tests. if -1 < work.find("'") : # look for ascii apostrophe prop_set.add(AP) work = work.replace("'","") if -1 < work.find('\u02bc') : # look for MODIFIER LETTER APOSTROPHE prop_set.add(AP) work = work.replace('\u02bc','') # If word has hyphens, note that and remove them. if -1 < work.find('-') : prop_set.add(HY) work = work.replace('-','') # With the hyphens and apostrophes out, check letter case if ANY_DIGIT.search( work ) : # word has at least one numeric prop_set.add(ND) if not work.isnumeric() : # word is not all-numeric, determine case of letters if work.lower() == work : prop_set.add(LC) # most common case elif work.upper() != work : prop_set.add(MC) # next most common case else : # work.upper() == work prop_set.add(UC) if HY not in prop_set : # word is not hyphenated, so check its spelling. if word not in self.good_words : if word not in self.bad_words : # Word in neither good- nor bad-words if dic_tag : # uses an alt dictionary self.alt_tags[word] = dic_tag prop_set.add(AD) if not self.speller.check(word, dic_tag) : prop_set.add(XX) else : # in bad-words prop_set.add(XX) # else in good-words # else hyphenated, spellcheck only its parts self.vocab[word] = [1, prop_set] # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # # The following methods are called from the Words panel. # # Get the count of words in the vocabulary, as selected by the # latest sort vector. # def word_count(self): return self.active_word_count # # Get the actual size of the vocabulary, for searching it all. def vocab_count(self): return len(self.vocab) # # Get the word at position n in the vocabulary, using the SortedDict # KeysView for O(1) lookup time. Guard against invalid indices. # def word_at(self, n): try: return self.vocab_kview[n] except Exception as whatever: worddata_logger.error('bad call to word_at({0})'.format(n)) return ('?') # # Get the count and/or property-set of the word at position n in the # vocabulary, using the SortedDict ValuesView for O(1) lookup time. # def word_info_at(self, n): try: return self.vocab_vview[n] except Exception as whatever: worddata_logger.error('bad call to word_count_at({0})'.format(n)) return [0, set()] def word_count_at(self, n): try: return self.vocab_vview[n][0] except Exception as whatever: worddata_logger.error('bad call to word_count_at({0})'.format(n)) return 0 def word_props_at(self, n): try: return self.vocab_vview[n][1] except Exception as whatever: worddata_logger.error('bad call to word_props_at({0})'.format(n)) return (set()) # # Return a sort vector to implement column-sorting and/or filtering. The # returned value is a list of index numbers to self.vocab_vview and # vocab_kview such that iterating over the list selects vocabulary items # in some order. The parameters are: # # col is the number of the table column, 0:word, 1:count, 2:properties. # The sort key is formed based on the column: # 0: key is the word-token # 1: key is nnnnnnword-token so that words with the same count are # in sequence. # 2: fffffffword-token so that words with the same props are in sequence. # # order is Qt.AscendingOrder or Qt.DescendingOrder # # key_func is a callable used to extract or condition the key value when # a new key is added to a SortedDict, usually created by natsort.keygen() # and used to implement locale-aware and case-independent sorting. # # filter_func is a callable that examines a vocab entry and returns # True or False, meaning include or omit this entry from the vector. # Used to implement property filters or harmonic-sets. # # To implement Descending order we return a reversed() version of the # matching Ascending order vector. # # Because vectors are expensive to make, we cache them, so that to # return to a previous sort order takes near zero time. However we can't # cache every variation of a filtered vector, so when a filter_func is # passed we make the vector every time. # def _make_key_getter(self, col) : if col == 0 : return lambda j : self.vocab_kview[j] elif col == 1 : return lambda j : '{:05}:{}'.format( self.vocab_vview[j][0], self.vocab_kview[j] ) else : # col == 2 return lambda j : prop_string(self.vocab_vview[j][1]) + self.vocab_kview[j] def get_sort_vector( self, col, order, key_func = None, filter_func = None ) : if filter_func : # is not None, # create a sort vector from scratch, filtered getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) : k = getter_func( j ) sorted_dict[ k ] = j vector = sorted_dict.values() if order != Qt.AscendingOrder : vector = [j for j in reversed( vector ) ] else : # no filter_func, try to reuse a cached vector vector = self.sort_up_vectors[ col ] if not vector or key_func is not self.sort_key_funcs[ col ] : # there is no ascending vector for this column, or there # is one but it was made with a different key_func. getter_func = self._make_key_getter( col ) sorted_dict = SortedDict( key_func ) for j in range( len( self.vocab ) ) : k = getter_func( j ) sorted_dict[ k ] = j vector = self.sort_up_vectors[ col ] = sorted_dict.values() self.sort_key_funcs[ col ] = key_func if order != Qt.AscendingOrder : # what is wanted is a descending order vector, do we have one? if self.sort_down_vectors[ col ] is None : # no, so create one from the asc. vector we now have self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ] # yes we do (now) vector = self.sort_down_vectors[ col ] # one way or another, vector is a sort vector # note the actual word count available through that vector self.active_word_count = len(vector) return vector # Return a reference to the good-words set def get_good_set(self): return self.good_words # Note the addition of a word to the good-words set. The word probably # (but does not have to) exist in the database; add GW and remove XX from # its properties. def add_to_good_set(self, word): self.good_words.add(word) if word in self.vocab_kview : [count, pset] = self.vocab[word] pset.add(GW) pset -= set([XX]) # conditional .remove() self.vocab[word] = [count,pset] # Note the removal of a word from the good-words set. The word exists in # the good-words set, because the wordview panel good-words list only # calls this for words it is displaying. The word may or may not exist in # the database. If it does, remove GW and set XX based on a spellcheck # test. def del_from_good_set(self, word): self.good_words.remove(word) if word in self.vocab_kview : [count, pset] = self.vocab[word] pset -= set([GW,XX]) dic_tag = self.alt_tags.get(word) if not self.speller.check(word, dic_tag) : pset.add(XX) self.vocab[word] = [count, pset] # mostly used by unit test, get the index of a word by its key def word_index(self, w): try: return self.vocab_kview.index(w) except Exception as whatever: worddata_logger.error('bad call to word_index({0})'.format(w)) return -1 # The following methods are used by the edit syntax highlighter to set flags. # # 1. Check a token for spelling. We expect the vast majority of words # will be in the list. And for performance, we want to respond in as little # code as possible! So if we know the word, reply at once. # # 2. If the word in the document isn't in the vocab, perhaps it is not # a normalized string, so try again, normalized. # # 3 If the token is not in the list, add it to the vocabulary with null # properties (to speed up repeat calls) and return False, meaning it is # not misspelled. The opposite, returning True for misspelled, in a new # book before Refresh is done, would highlight everything. # def spelling_test(self, tok_str) : count, prop_set = self.vocab.get(tok_str,[0,set()]) if count : # it was in the list return XX in prop_set tok_nlz = unicodedata.normalize('NFKC',tok_str) [count, prop_set] = self.vocab.get(tok_nlz,[0,set()]) return XX in prop_set # # 2. Check a token for being in the scannos list. If no scannos # have been loaded, none will be hilited. # def scanno_test(self, tok_str) : return tok_str in self.scannos
def test_get(): mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)] temp = SortedDict(mapping) assert temp.get('a') == 0 assert temp.get('A', -1) == -1
class DotMap(MutableMapping): def __init__(self, *args, **kwargs): self._map = SortedDict() if args: d = args[0] if type(d) is dict: for k, v in self.__call_items(d): if type(v) is dict: v = DotMap(v) self._map[k] = v if kwargs: for k, v in self.__call_items(kwargs): self._map[k] = v @staticmethod def __call_items(obj): if hasattr(obj, 'iteritems') and ismethod(getattr(obj, 'iteritems')): return obj.iteritems() else: return obj.items() def items(self): return self.iteritems() def iteritems(self): return self.__call_items(self._map) def __iter__(self): return self._map.__iter__() def __setitem__(self, k, v): self._map[k] = v def __getitem__(self, k): if k not in self._map: # automatically extend to new DotMap self[k] = DotMap() return self._map[k] def __setattr__(self, k, v): if k == '_map': super(DotMap, self).__setattr__(k, v) else: self[k] = v def __getattr__(self, k): if k == '_map': return self._map else: return self[k] def __delattr__(self, key): return self._map.__delitem__(key) def __contains__(self, k): return self._map.__contains__(k) def __str__(self): items = [] for k, v in self.__call_items(self._map): items.append('{0}={1}'.format(k, repr(v))) out = 'DotMap({0})'.format(', '.join(items)) return out def __repr__(self): return str(self) def to_dict(self): d = {} for k, v in self.items(): if type(v) is DotMap: v = v.to_dict() d[k] = v return d def pprint(self): pprint(self.to_dict()) # proper dict subclassing def values(self): return self._map.values() @staticmethod def parse_other(other): if type(other) is DotMap: return other._map else: return other def __cmp__(self, other): other = DotMap.parse_other(other) return self._map.__cmp__(other) def __eq__(self, other): other = DotMap.parse_other(other) if not isinstance(other, dict): return False return self._map.__eq__(other) def __ge__(self, other): other = DotMap.parse_other(other) return self._map.__ge__(other) def __gt__(self, other): other = DotMap.parse_other(other) return self._map.__gt__(other) def __le__(self, other): other = DotMap.parseOther(other) return self._map.__le__(other) def __lt__(self, other): other = DotMap.parse_other(other) return self._map.__lt__(other) def __ne__(self, other): other = DotMap.parse_other(other) return self._map.__ne__(other) def __delitem__(self, key): return self._map.__delitem__(key) def __len__(self): return self._map.__len__() def copy(self): return self def get(self, key, default=None): return self._map.get(key, default) def has_key(self, key): return key in self._map def iterkeys(self): return self._map.iterkeys() def itervalues(self): return self._map.itervalues() def keys(self): return self._map.keys() def pop(self, key, default=None): return self._map.pop(key, default) def setdefault(self, key, default=None): return self._map.setdefault(key, default) def viewitems(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewitems() else: return self._map.items() def viewkeys(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewkeys() else: return self._map.keys() def viewvalues(self): if version_info.major == 2 and version_info.minor >= 7: return self._map.viewvalues() else: return self._map.values() @classmethod def fromkeys(cls, seq, value=None): d = DotMap() d._map = SortedDict.fromkeys(seq, value) return d
class TxGraph(object): """represents a graph of all transactions within the current window Attributes: median(float) : the current median of the degree of the nodes highMarker(int) : the latest timestamp seen so far lowMarker(int) : the earliest timestamp of the window we are interested in txMap(dict) : this is a collection of EdgeList's with key being the timestamp and the value an instance of EdgeList edgeMap(dict) : this is collection of all Edges within a window with key being the name of an Edge nodeMap(dict) : this represents a collection of Nodes with a window with key being the name of the Node degreeList(list): list of degrees of noded (sorted) """ WINDOW_SIZE = 60 def __init__(self): self.median = 0 self.highMarker = TxGraph.WINDOW_SIZE self.lowMarker = 1 self.txMap = SortedDict() #sorted by unix epoch (timestamp) self.edgeMap = SortedDict() #sorted by edge name self.nodeMap = SortedDict() #sorted by node name self.degreeList = SortedList() #sorted by degreeList def __calculate_median(self, use_existing_list=False): """calculates median by adding degrees to a sortedlist """ if not use_existing_list: #lets reconstruct the list self.degreeList = SortedList() for node in self.nodeMap.itervalues(): if node.degree > 0: self.degreeList.add(node.degree) listLen = len(self.degreeList) if listLen == 0: raise Exception("No items in the degreeList") if listLen == 1: return self.degreeList[0]/1.0 if (listLen % 2) == 0: return (self.degreeList[listLen/2] + self.degreeList[(listLen/2) - 1]) / 2.0 return self.degreeList[listLen/2]/1.0 def __get_edgelist(self, tstamp, create=True): """returns an instance of EdgeList with matching timestamp and creates one if needed """ edgeList = self.txMap.get(tstamp, None) if edgeList is None and create is True: edgeList = EdgeList(tstamp) self.txMap[tstamp] = edgeList return edgeList def __getnode_with_name(self, name, create=True): """returns an instance of Node with matching name and creates one if necessary Args: name(str) : name of the edge create(bool): flag to indicate whether to create a missing node """ node = self.nodeMap.get(name, None) if node is None and create is True: node = Node(name) self.nodeMap[name] = node return node def __incr_degree_of_edge_nodes(self, edge): """increments the degree of the two nodes of an edge """ src = self.__getnode_with_name(edge.source) src.incr_degree() tar = self.__getnode_with_name(edge.target) tar.incr_degree() return (src.degree, tar.degree) def __decr_degree_of_edge_nodes(self, edge): """decrements the degree of the two nodes of an edge """ self.__decr_degree_of_node(edge.source) self.__decr_degree_of_node(edge.target) def __decr_degree_of_node(self, name): """decrements the degree of a node and removes it from the nodeMap if degree is 0 """ node = self.__getnode_with_name(name, create=False) node.decr_degree() if node.degree == 0: del self.nodeMap[node.name] def __remove_edge(self, edge): """removes an edge from the graph and updates the degree of a node. If degree of a node goes to 0, then remove the node as well Args: egde(Edge) : An instance of Edge class """ self.__decr_degree_of_edge_nodes(edge) del self.edgeMap[edge.name] def __update_tstamp_for_existing_edge(self, edgeName, tstamp): """updates the timestamp for an existing edge and moves the edge to an appropriate EdgeList Args: edgeName(str) : name of the edge to be updated tstamp(int) : unix epoch of the timstamp """ currEdge = self.edgeMap[edgeName] if not currEdge: return if tstamp <= currEdge.tstamp: return #ignore older transactions within the window #remove the edge from the edgelist with old timestamp edgeList = self.__get_edgelist(currEdge.tstamp, create=False) del edgeList.edges[currEdge.name] #update the tstamp in the edge currEdge.tstamp = tstamp #move this edge to the correct edgelist edgeList = self.__get_edgelist(tstamp) edgeList.edges[currEdge.name] = currEdge def __update_tx_window(self): """updates the transaction window of the graph This method is called when a newer transaction out the window arrives. It does the following: 1. Gets the edgeList's that are below the lowMarker 2. Goes through the edges and deletes them from the edgeMap 3. Update the degree of the nodes 4. Moves the window by deleting the stale edgeLists """ tsIter = self.txMap.irange(None, self.lowMarker, inclusive=(True,False)) lastTStamp = None for tstamp in tsIter: lastTStamp = tstamp edgeList = self.txMap[tstamp] for edge in edgeList.edges.itervalues(): self.__remove_edge(edge) #lets delete the stale edgelists if lastTStamp: lowIdx = self.txMap.index(lastTStamp) del self.txMap.iloc[:lowIdx+1] def process_transaction(self, tstamp, source, target): """this is the starting point of transaction processing. We first check whether the tx is within the window. If it is, then we update the Edge (if it already exists) or create a new Edge if necessary and update the median. If the tx is not within the window and is newer, we then move the window and remove all stale(older) edges and create a new edge for the newer transaction and finally update the median """ #basic sanity checks if source is None or target is None: raise Exception("Invalid node") if len(source) == 0 or len(target) == 0: raise Exception("Invalid node") if source == target: raise Exception("source and target cannot be the same") #timestamp of the transaction is old and can be ignored if tstamp < self.lowMarker: return #create a new edge representing this transaction newEdge = Edge(tstamp, source, target) if tstamp <= self.highMarker: if newEdge.name in self.edgeMap: self.__update_tstamp_for_existing_edge(newEdge.name, tstamp) #no need to recalculate the median here since degree does not change return """handle new edge 1. find the edgelist with the same timestamp (if not create it) 2. add this edge to the edgelist and edgemap 4. create new Nodes for the edges if needed or update their degrees 5. update the degreeList with the new degrees 6. recalculate the median but use the existing degreeList """ edgeList = self.__get_edgelist(tstamp) edgeList.edges[newEdge.name] = newEdge self.edgeMap[newEdge.name] = newEdge """ this is optimization because most of the degrees of the nodes hasn't changed and therefore we can reuse the existing list """ srcDegree, tarDegree = self.__incr_degree_of_edge_nodes(newEdge) if srcDegree == 1: self.degreeList.add(1) else: self.degreeList.remove(srcDegree - 1) self.degreeList.add(srcDegree) if tarDegree == 1: self.degreeList.add(1) else: self.degreeList.remove(tarDegree - 1) self.degreeList.add(tarDegree) self.median = self.__calculate_median(use_existing_list=True) return """this transaction is newer and we need to move the window 1. update the low and high markers of the timestamp window 2. create edgelist with this newer timestamp 2. add the new edge to the edgelist 3. add the new edge to the edgemap 4. create new Nodes of the edges if needed or update their degrees 5. calculate the median (but reconstruct the degreeList) """ #this tx is newer and we need to move the window self.highMarker = tstamp self.lowMarker = tstamp - TxGraph.WINDOW_SIZE + 1 self.__update_tx_window() if newEdge.name in self.edgeMap: self.__update_tstamp_for_existing_edge(newEdge.name, tstamp) else: edgeList = self.__get_edgelist(tstamp) edgeList.edges[newEdge.name] = newEdge self.edgeMap[newEdge.name] = newEdge self.__incr_degree_of_edge_nodes(newEdge) self.median = self.__calculate_median()
class OrderBook(WebsocketClient): def __init__(self, product_id='BTC-USD', log_to=None): super(OrderBook, self).__init__(products=product_id) self._asks = SortedDict() self._bids = SortedDict() self._client = PublicClient() self._sequence = -1 self._log_to = log_to if self._log_to: assert hasattr(self._log_to, 'write') self._current_ticker = None @property def product_id(self): ''' Currently OrderBook only supports a single product even though it is stored as a list of products. ''' return self.products[0] def on_open(self): self._sequence = -1 print("-- Subscribed to OrderBook! --\n") def on_close(self): print("\n-- OrderBook Socket Closed! --") def reset_book(self): self._asks = SortedDict() self._bids = SortedDict() res = self._client.get_product_order_book(product_id=self.product_id, level=3) for bid in res['bids']: self.add({ 'id': bid[2], 'side': 'buy', 'price': Decimal(bid[0]), 'size': Decimal(bid[1]) }) for ask in res['asks']: self.add({ 'id': ask[2], 'side': 'sell', 'price': Decimal(ask[0]), 'size': Decimal(ask[1]) }) self._sequence = res['sequence'] def on_message(self, message): if self._log_to: pickle.dump(message, self._log_to) sequence = message.get('sequence', -1) if self._sequence == -1: self.reset_book() return if sequence <= self._sequence: # ignore older messages (e.g. before order book initialization from getProductOrderBook) return elif sequence > self._sequence + 1: self.on_sequence_gap(self._sequence, sequence) return msg_type = message['type'] if msg_type == 'open': self.add(message) elif msg_type == 'done' and 'price' in message: self.remove(message) elif msg_type == 'match': self.match(message) self._current_ticker = message elif msg_type == 'change': self.change(message) self._sequence = sequence def on_sequence_gap(self, gap_start, gap_end): self.reset_book() print('Error: messages missing ({} - {}). Re-initializing book at sequence.'.format( gap_start, gap_end, self._sequence)) def add(self, order): order = { 'id': order.get('order_id') or order['id'], 'side': order['side'], 'price': Decimal(order['price']), 'size': Decimal(order.get('size') or order['remaining_size']) } if order['side'] == 'buy': bids = self.get_bids(order['price']) if bids is None: bids = [order] else: bids.append(order) self.set_bids(order['price'], bids) else: asks = self.get_asks(order['price']) if asks is None: asks = [order] else: asks.append(order) self.set_asks(order['price'], asks) def remove(self, order): price = Decimal(order['price']) if order['side'] == 'buy': bids = self.get_bids(price) if bids is not None: bids = [o for o in bids if o['id'] != order['order_id']] if len(bids) > 0: self.set_bids(price, bids) else: self.remove_bids(price) else: asks = self.get_asks(price) if asks is not None: asks = [o for o in asks if o['id'] != order['order_id']] if len(asks) > 0: self.set_asks(price, asks) else: self.remove_asks(price) def match(self, order): size = Decimal(order['size']) price = Decimal(order['price']) if order['side'] == 'buy': bids = self.get_bids(price) if not bids: return assert bids[0]['id'] == order['maker_order_id'] if bids[0]['size'] == size: self.set_bids(price, bids[1:]) else: bids[0]['size'] -= size self.set_bids(price, bids) else: asks = self.get_asks(price) if not asks: return assert asks[0]['id'] == order['maker_order_id'] if asks[0]['size'] == size: self.set_asks(price, asks[1:]) else: asks[0]['size'] -= size self.set_asks(price, asks) def change(self, order): try: new_size = Decimal(order['new_size']) except KeyError: return try: price = Decimal(order['price']) except KeyError: return if order['side'] == 'buy': bids = self.get_bids(price) if bids is None or not any(o['id'] == order['order_id'] for o in bids): return index = [b['id'] for b in bids].index(order['order_id']) bids[index]['size'] = new_size self.set_bids(price, bids) else: asks = self.get_asks(price) if asks is None or not any(o['id'] == order['order_id'] for o in asks): return index = [a['id'] for a in asks].index(order['order_id']) asks[index]['size'] = new_size self.set_asks(price, asks) tree = self._asks if order['side'] == 'sell' else self._bids node = tree.get(price) if node is None or not any(o['id'] == order['order_id'] for o in node): return def get_current_ticker(self): return self._current_ticker def get_current_book(self): result = { 'sequence': self._sequence, 'asks': [], 'bids': [], } for ask in self._asks: try: # There can be a race condition here, where a price point is removed # between these two ops this_ask = self._asks[ask] except KeyError: continue for order in this_ask: result['asks'].append([order['price'], order['size'], order['id']]) for bid in self._bids: try: # There can be a race condition here, where a price point is removed # between these two ops this_bid = self._bids[bid] except KeyError: continue for order in this_bid: result['bids'].append([order['price'], order['size'], order['id']]) return result def get_ask(self): return self._asks.peekitem(0)[0] def get_asks(self, price): return self._asks.get(price) def remove_asks(self, price): del self._asks[price] def set_asks(self, price, asks): self._asks[price] = asks def get_bid(self): return self._bids.peekitem(-1)[0] def get_bids(self, price): return self._bids.get(price) def remove_bids(self, price): del self._bids[price] def set_bids(self, price, bids): self._bids[price] = bids
class CacheStore(object): class CacheItem(object): def __init__(self): self.valid = Event() self.data = None def __init__(self, key=None): self.lock = RLock() self.store = SortedDict(key) def __getitem__(self, item): return self.get(item) def put(self, key, data): with self.lock: item = self.store[key] if key in self.store else self.CacheItem() item.data = data item.valid.set() if key not in self.store: self.store[key] = item return True return False def get(self, key, default=None, timeout=None): item = self.store.get(key) if item: item.valid.wait(timeout) return item.data return default def remove(self, key): with self.lock: if key in self.store: del self.store[key] return True return False def exists(self, key): return key in self.store def rename(self, oldkey, newkey): with self.lock: obj = self.get(oldkey) obj['id'] = newkey self.put(newkey, obj) self.remove(oldkey) def is_valid(self, key): item = self.store.get(key) if item: return item.valid.is_set() return False def invalidate(self, key): with self.lock: item = self.store.get(key) if item: item.valid.clear() def itervalid(self): for key, value in list(self.store.items()): if value.valid.is_set(): yield (key, value.data) def validvalues(self): for value in list(self.store.values()): if value.valid.is_set(): yield value.data def remove_predicate(self, predicate): result = [] for k, v in self.itervalid(): if predicate(v): self.remove(k) result.append(k) return result def query(self, *filter, **params): return wrap(list(self.validvalues())).query(*filter, **params)