def busiestServers(self, k, arrival, load): """ :type k: int :type arrival: List[int] :type load: List[int] :rtype: List[int] """ pq = [] n = len(arrival) serves = [0] * k finish = SortedSet() for i in range(k): if i < n: heapq.heappush(pq, (arrival[i] + load[i], i)) serves[i] += 1 for i in range(k, n): arr = arrival[i] dur = load[i] while pq and pq[0][0] <= arr: _, idx = heapq.heappop(pq) finish.add(idx) idx = (i + k) % k if not finish: continue # no servers available if idx not in finish: j = finish.bisect(idx) if j >= len(finish): j = 0 idx = finish[j] heapq.heappush(pq, (arr + dur, idx)) serves[idx] += 1 finish.remove(idx) mx = max(serves) return [i for i, s in enumerate(serves) if s == mx]
class PriorityQueue: """min first""" def __init__(self): self._priorities_with_counter = dict() self._tree = SortedSet() self._counter = 0 def add_or_update(self, value, priority): if value in self._priorities_with_counter: self._tree.remove((self._priorities_with_counter[value], value)) del self._priorities_with_counter[value] self._priorities_with_counter[value] = (priority, self._counter) self._tree.add(((priority, self._counter), value)) self._counter += 1 def get_priority(self, value): return self._priorities_with_counter[value][0] def remove_if_present(self, value): if value in self._priorities_with_counter: self._tree.remove((self._priorities_with_counter[value], value)) del self._priorities_with_counter[value] def __len__(self): return len(self._priorities_with_counter) def pop_first(self): """:returns (value, priority) of element with the lowest priority""" assert len(self) > 0, "priority queue is empty!" tmp, value = self._tree.pop(0) priority, _ = tmp del self._priorities_with_counter[value] return value, priority
class StockPrice: def __init__(self): self.Prices = dict() self.MinMax = SortedSet() self.Ts = 0 def update(self, timestamp: int, price: int) -> None: if timestamp in self.Prices: old = self.Prices[timestamp] self.MinMax.remove((old, timestamp)) self.Prices[timestamp] = price self.MinMax.add((price, timestamp)) self.Ts = max(self.Ts, timestamp) def current(self) -> int: return self.Prices[self.Ts] def maximum(self) -> int: p, t = self.MinMax[-1] return p def minimum(self) -> int: p, t = self.MinMax[0] return p
def get_skyline_silhouette(buildings): Building = namedtuple('Building', ('start', 'end', 'height')) EndPoint = namedtuple('EndPoint', ('x', 'y', 'is_start', 'building')) if buildings: buildings = [ Building(b[0], b[1], b[2]) for b in buildings if b and len(b) == 3 ] points = [ point for building in buildings for point in ( EndPoint(building.start, building.height, True, building), EndPoint(building.end, building.height, False, building)) ] points.sort(key=lambda p: (p.x, not p.is_start, -p.y)) sorted_set = SortedSet(key=lambda b: -b.height) result, current_height = list(), 0 for point in points: if point.is_start: if point.y > current_height: current_height = point.y result.append([point.x, point.y]) sorted_set.add(point.building) else: if point.building in sorted_set: sorted_set.remove(point.building) new_height = sorted_set[0].height if sorted_set else 0 if current_height != new_height: current_height = new_height if result[-1][0] == point.x: result.pop() result.append([point.x, current_height]) return result
class MultiSelectCursor(FieldCursor): """Cursor allowing setting and unsetting values on a MultiSelectField Respects parent field's validation """ def __init__(self, *args, **kwargs): super(MultiSelectCursor, self).__init__(*args, **kwargs) self._elements = SortedSet(self._elements) def select(self, element): """Add an element to the set of selected elements Proxy to internal set.add and sync field """ self._field.validate_value(element) self._elements.add(element) self._sync_field() def deselect(self, element): """Remove an element from the set of selected elements Proxy to internal set.remove and sync field """ self._elements.remove(element) self._sync_field()
class OrderBook(object): def __init__(self, clock, sort_key): self.clock = clock self._received_orders = SortedSet(key=sort_key) def record_order(self, order): order.tick = self.clock.current_tick self._received_orders.add(order) def cancel_order(self, order): order.tick = None self._received_orders.remove(order) @property def open_orders(self): return filter(lambda o: not o.filled, self._received_orders) @property def best_order(self): cache = self.open_orders return cache[0] if len(cache) > 0 else None @property def best_price(self): cache = self.best_order return cache.limit_price if cache is not None else None def __str__(self): return str(self.open_orders)
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: if not nums or len(nums) == 0: return False tree_set = SortedSet() for i in range(len(nums)): ceilling_idx = tree_set.bisect_left( nums[i]) # O(log(k)) – approximate. flooring_idx = ceilling_idx - 1 if ceilling_idx < len( tree_set) and tree_set[ceilling_idx] - nums[i] <= t: # ceilling_idx == len(tree_set) -> nums[i] is the largest number in TreeSet return True if flooring_idx >= 0 and nums[i] - tree_set[flooring_idx] <= t: # flooring_idx == 0 -> nums[i] is the smallest number in TreeSet return True tree_set.add(nums[i]) # O(log(k)) – approximate. if i >= k: # restrict the size of TreeSet to be `k` (window size) tree_set.remove(nums[i - k]) # O(log(k)) – approximate. return False
def avoidFlood(self, rains: List[int]) -> List[int]: n = len(rains) d = {} res = [-1] * n ss = SortedSet() for i, r in enumerate(rains): if r == 0: ss.add(i) continue if r not in d: d[r] = i else: j = d[r] ind = ss.bisect_left(j) if ind == len(ss): return [] res[ss[ind]] = r ss.remove(ss[ind]) d[r] = i while ss: res[ss.pop()] = 1 return res
def busiestServers(self, k: int, arrival: List[int], load: List[int]) -> List[int]: # 把unavailable的list换成heapq from sortedcontainers import SortedSet # 空闲的服务器 # 排序为了快速查找 # set为了快速add和remove available = SortedSet({i for i in range(k)}) unavailable = list() # 存储(时间,服务器编号),小根堆 #print(available) cnt = [0] * k # 记录每个服务器被使用的次数 for i,(a,l) in enumerate(zip(arrival,load)): while unavailable and unavailable[0][0] <= a: _,serverID = heapq.heappop(unavailable) available.add(serverID) n = len(available) if n == 0: continue start = i % k pos = bisect.bisect_left(available,start) if pos == n: pos = 0 serverID = available[pos] available.remove(serverID) heapq.heappush(unavailable,(a + l,serverID)) cnt[serverID] += 1 #print(available,unavailable,cnt) _max = max(cnt) ans = [] for i in range(k): if cnt[i] == _max: ans.append(i) return ans '''
def parallel_execution(reqs: Requirements): def not_working(ws): return filter(lambda w: not w.busy, ws) def working(ws): return filter(lambda w: w.busy, ws) def worked_on(step, ws): return len([w for w in ws if w.step == step]) > 0 completed_steps: set[str] = set() available_steps = SortedSet() num_keys = len(reqs.keys()) time = 0 workers = [Worker(), Worker(), Worker(), Worker(), Worker()] while len(completed_steps) < num_keys: for (step, req_set) in reqs.items(): if (req_set <= completed_steps and step not in completed_steps and not worked_on(step, workers)): available_steps.add(step) for w in not_working(workers): if len(available_steps) == 0: continue next_step = str(available_steps[0]) available_steps.remove(next_step) w.get_work(next_step) pprint(available_steps) pprint(workers) for w in working(workers): result = w.work() if result is not None: completed_steps.add(result) time += 1 return time
def run(self,in_data): n,m=in_data.no_tasks,in_data.no_machines l=in_data.tasks r=[] N=[] A=SortedSet(a(n),key=lambda idx:-l[idx].due_date) Y=[0 for _ in a(m)] for _ in a(n): while d(A)>0 and Y[2]>=l[A[-1]].due_date: S=A.pop() L=l[S] J(N,(-L.weight/R(L.duration),S)) if d(N)==0: O=g(s(lambda task_id:(self.v(Y,l[task_id]),task_id),A)) H=O[1] A.remove(H) else: _,H=q(N) L=l[H] r.append(H) p=0 for n in a(m): Y[n]=I(Y[n],p)+L.duration[n] p=Y[n] r=[e+1 for e in r] U=Schedule(in_data.no_tasks,r) y=Evaluator136775().score(in_data,U) return SS(score=y,schedule=U)
def allocate(self, spec): """ For each ASID pool in the spec, assign it to an unused ASID table slot. This modifies the spec's ASID pool objects in-place. Slot 0 is always skipped, because it is used for the init thread's ASID pool. We assume that the C loader also skips slot 0. This allocator allows ASID pools that already have assigned asid_high numbers. However, seL4 only allows allocating table slots in sequential order. Therefore, we raise AllocatorException if the spec's asid_high numbers cannot be obtained by the C loader. """ assert (isinstance(spec, Spec)) num_asid_high = get_object_size(ObjectType.seL4_ASID_Table) free_asid_highs = SortedSet(range(num_asid_high)) free_asid_highs.remove(0) # Init thread's asid_pools = [] # Get all ASIDPools for obj in spec.objs: if isinstance(obj, ASIDPool): asid_pools.append(obj) # Make deterministic asid_pools = sorted(asid_pools, key=lambda obj: obj.name) # Check availability of asid_highs; check existing claims for asid_pool in asid_pools: if asid_pool.asid_high is not None: if asid_pool.asid_high < 0 or asid_pool.asid_high >= num_asid_high: raise AllocatorException( "invalid asid_high of 0x%x, ASID pool %s" % (asid_pool.asid_high, asid_pool.name)) elif asid_pool.asid_high in free_asid_highs: raise AllocatorException( "asid_high 0x%x already in use, can't give to ASID pool %s" % (asid_pool.asid_high, asid_pool.name)) else: free_asid_highs.remove(asid_pool.asid_high) # Allocate free_asid_highs for asid_pool in asid_pools: if asid_pool.asid_high is None: if not free_asid_highs: raise AllocatorException( "ran out of asid_highs to allocate (next ASID pool: %s)" % asid_pool.name) else: asid_pool.asid_high = free_asid_highs.pop(0) # Check that asid_highs are contiguous for asid_pool in asid_pools: if asid_pool.asid_high > 0 and asid_pool.asid_high - 1 in free_asid_highs: raise AllocatorException( "asid_high not contiguous: %s wants 0x%x but 0x%x not assigned" % (asid_pool.name, asid_pool.asid_high, asid_pool.asid_high - 1))
class ExamRoom2(object): def __init__(self, N): """ :type N: int """ self.n = N self.startMap = {} self.endMap = {} def distance(s): x, y = s if x == -1: return y if y == N: return N - 1 - x # 中点和端点之间的长度 return (y - x) // 2 self.spots = SortedSet([(-1, N)], key=lambda seg: (distance(seg), -seg[0])) def addSeg(self, seg): self.spots.add(seg) self.startMap[seg[0]] = seg self.endMap[seg[1]] = seg def removeSeg(self, seg): self.spots.remove(seg) self.startMap.pop(seg[0], None) self.endMap.pop(seg[1], None) def seat(self): """ :rtype: int """ x, y = self.spots[-1] if x == -1: seat = 0 elif y == self.n: seat = self.n - 1 else: seat = x + (y - x) // 2 left = (x, seat) right = (seat, y) self.removeSeg((x, y)) self.addSeg(left) self.addSeg(right) return seat def leave(self, p): """ :type p: int :rtype: None """ right = self.startMap[p] left = self.endMap[p] merged = (left[0], right[1]) self.removeSeg(left) self.removeSeg(right) self.addSeg(merged)
class DictGraph(Graph): """Graph that supports nonconsecutive vertex ids.""" def __init__(self, nodes: Set[int] = None, r: int = 1) -> None: """Make a new graph.""" if nodes is None: self.nodes = SortedSet() # type: Set[int] else: self.nodes = nodes self.radius = r self.inarcs_by_weight = [ defaultdict(SortedSet) for _ in range(self.radius) ] def __len__(self): """len() support.""" return len(self.nodes) def __iter__(self): """Iteration support.""" return iter(self.nodes) def __contains__(self, v): """Support for `if v in graph`.""" return v in self.nodes def add_node(self, u: int): """Add a new node.""" self.nodes.add(u) def arcs(self, weight: int = None): """ Return all the arcs in the graph. restrict to a given weight when provided """ if weight: return [(x, y) for x in self.nodes for y in self.inarcs_by_weight[weight - 1][x]] else: return [(x, y, w + 1) for w, arc_list in enumerate(self.inarcs_by_weight) for x in self.nodes for y in arc_list[x]] def remove_isolates(self) -> List[int]: """ Remove all isolated vertices and return a list of vertices removed. Precondition: the graph is bidirectional """ isolates = [v for v in self if self.in_degree(v, 1) == 0] for v in isolates: self.nodes.remove(v) for N in self.inarcs_by_weight: if v in N: del N[v] return isolates
class Charset(Configurable): corups = State(default=string.hexdigits) blank = State(default=0) unknown = State(default=1) blank_char = State('\t') unknown_char = State('\n') case_sensitive = State(default=False) def __init__(self, corups=None, cmd={}, **kwargs): self.load_all(**kwargs) self._corpus = SortedSet(self._filter_corpus(corups)) if self.blank_char in self._corpus: self._corpus.remove(self.blank_char) if self.unknown_char in self._corpus: self._corpus.remove(self.unknown_char) self._charset = list(self._corpus) self._charset.insert(self.blank, self.blank_char) self._charset.insert(self.unknown, self.unknown_char) self._charset_lut = { char: index for index, char in enumerate(self._charset) } def _filter_corpus(self, corups): return corups def __getitem__(self, index): return self._charset[index] def index(self, x): target = x if not self.case_sensitive: target = target.upper() return self._charset_lut.get(target, self.unknown) def is_empty(self, index): return index == self.blank or index == self.unknown def is_empty_char(self, x): return x == self.blank_char or x == self.unknown_char def __len__(self): return len(self._charset) def string_to_label(self, string_input, max_size=32): length = max(max_size, len(string_input)) target = np.zeros((length, ), dtype=np.int32) for index, c in enumerate(string_input): value = self.index(c) target[index] = value return target def label_to_string(self, label): ingnore = [self.unknown, self.blank] return "".join([self._charset[i] for i in label if i not in ingnore])
class DictGraph(Graph): """Graph that supports nonconsecutive vertex ids.""" def __init__(self, nodes: Set[int]=None, r: int=1) -> None: """Make a new graph.""" if nodes is None: self.nodes = SortedSet() # type: Set[int] else: self.nodes = nodes self.radius = r self.inarcs_by_weight = [defaultdict(SortedSet) for _ in range(self.radius)] def __len__(self): """len() support.""" return len(self.nodes) def __iter__(self): """Iteration support.""" return iter(self.nodes) def __contains__(self, v): """Support for `if v in graph`.""" return v in self.nodes def add_node(self, u: int): """Add a new node.""" self.nodes.add(u) def arcs(self, weight: int=None): """ Return all the arcs in the graph. restrict to a given weight when provided """ if weight: return [(x, y) for x in self.nodes for y in self.inarcs_by_weight[weight-1][x]] else: return [(x, y, w+1) for w, arc_list in enumerate(self.inarcs_by_weight) for x in self.nodes for y in arc_list[x]] def remove_isolates(self) -> List[int]: """ Remove all isolated vertices and return a list of vertices removed. Precondition: the graph is bidirectional """ isolates = [v for v in self if self.in_degree(v, 1) == 0] for v in isolates: self.nodes.remove(v) for N in self.inarcs_by_weight: if v in N: del N[v] return isolates
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: s = SortedSet() for i, num in enumerate(nums): idx = s.bisect_left(num - t) if 0 <= idx < len(s) and s[idx] <= num + t: return True s.add(num) if i >= k: s.remove(nums[i - k]) return False
def longestSubarray(self, nums, limit): ss = SortedSet(key=lambda x: nums[x]) ss.add(0) l = 0 res = 1 for r in range(1, len(nums)): ss.add(r) while nums[ss[-1]] - nums[ss[0]] > limit: ss.remove(l) l += 1 res = max(res, r - l + 1) return res
class SortedSetKey: def __init__(self): self.dict = dict() self.sorted_set = SortedSet(key=self.get_key) def __getitem__(self, item): return self.sorted_set[item] def __len__(self): return len(self.sorted_set) def __str__(self): return str(self.sorted_set) def get_key(self, value): return self.dict[value] def get_reversed_list(self, index, count): return self[-1 - index:-1 - index - count:-1] def values(self): for value in self.sorted_set: yield value def clear(self): self.sorted_set.clear() self.dict.clear() def destroy(self): self.sorted_set = None def index(self, value): return self.sorted_set.index(value) def pop(self, index=-1): return self.sorted_set.pop(index) def add(self, value, rank): if value in self.sorted_set: self.sorted_set.remove(value) self.dict[value] = rank self.sorted_set.add(value) def remove(self, value): self.sorted_set.remove(value) del self.dict[value] def update(self, value_list, rank_list): self.sorted_set.difference_update(value_list) for i, value in enumerate(value_list): self.dict[value] = rank_list[i] self.sorted_set.update(value_list)
def lengthOfLIS(self, nums: List[int]) -> int: s = SortedSet() for x in nums: if x in s: continue s.add(x) it = s.irange(x + 1) nxt = next(it, None) if nxt: s.remove(nxt) return len(s)
def deckRevealedIncreasing(self, deck: list[int]) -> list[int]: deck.sort() position = SortedSet(range(len(deck))) ret = [0 for i in range(len(deck))] p = 0 for d in deck: p %= len(position) ret[position[p]] = d position.remove(position[p]) p += 1 return ret
def pathfind(start, goal, collide_check_func): if collide_check_func(*goal) or collide_check_func(*start): return None q = SortedSet() def _heuristic(x1, y1, x2, y2): return (abs(x1 - x2) + abs(y1 - y2))**2 max_h = _heuristic(*start, *goal) q.add((max_h, start)) visited = set() curr_cost = dict() curr_cost[start] = (0, max_h) backtrace = dict() found = False while q: cost, point = q.pop(0) visited.add(point) if point == goal: found = True break for dir in ((0, 1), (0, -1), (1, 0), (-1, 0)): npoint = (point[0] + dir[0], point[1] + dir[1]) if collide_check_func(npoint[0], npoint[1]): continue if npoint in visited: continue h = _heuristic(*npoint, *goal) ncost = h + cost + 1 if npoint not in curr_cost or ncost < sum(curr_cost[npoint]): if npoint in curr_cost: q.remove((sum(curr_cost[npoint]), npoint)) q.add((ncost, npoint)) curr_cost[npoint] = (cost + 1, h) backtrace[npoint] = point if not found: return None l = [goal] while l[-1] != start: curr = backtrace[l[-1]] l.append(curr) l.reverse() return l
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: ss = SortedSet() for s in range(min(k + 1, len(nums))): num = nums[s] print(num) print(s, num) if num in ss: print("INIT") return True else: ss.add(num) print(ss) for i in range(len(ss) - 1): j = i + 1 print(f"A {(ss[i], ss[j])}") if (abs(ss[i] - ss[j]) <= t): print((i, j)) print((ss[i], ss[j])) return True print(f"B {(k, nums)}") if (k >= len(nums)): print("INIT B") return False ss.remove(nums[0]) for s in range(len(nums) - k - 1): idx = s + k + 1 if (nums[idx] in ss): return True ss.add(nums[idx]) for i in range(len(ss) - 1): j = i + 1 if (abs(ss[i] - ss[j]) <= t): print((i, j)) print((ss[i], ss[j])) return True ss.remove(nums[s + 1]) return False
def gen_tree_arcs(ξ, n, m, m_neg, α=1, β=1, quiet=False): assert m > n - 1, "Number of arcs must be able to form a tree" # Sample the minimum required loop arcs nb_loop_arcs = max(0, 2 * n - 1 - m) loop_arc_predecessors = set(ξ.sample(range(n - 1), nb_loop_arcs)) tree_nodes = SortedSet([0]) with tqdm(total=n - 1, disable=quiet, desc="Tree arcs") as bar: def dive(u): """Add loop arcs to tree where possible""" while u in loop_arc_predecessors: tree_nodes.add(u + 1) bar.update() yield (u, u + 1) u += 1 # Keep track of nodes without parents in the tree or in the loop arcs. We ignore # loop arcs as they will be added by diving when the predecessor is added. parentless = SortedSet( set(range(1, n)) - set([u + 1 for u in loop_arc_predecessors])) # Source node must have at least one child, choose from parentless nodes if 0 not in loop_arc_predecessors: p_min = min(parentless) x = 1 + ξ.betavariate(α, β) * (n - 1) v = take_closest(parentless, x) parentless.remove(v) tree_nodes.add(v) bar.update() yield (0, v) yield from dive(v) else: yield from dive(0) # Remaining nodes must have exactly one parent, choose from nodes in tree. # parentless = list(parentless) # ξ.shuffle(parentless) # parentless = OrderedSet(parentless) for _ in range(len(parentless)): # choose the predecessor x = ξ.betavariate(α, β) * (n) v = take_closest(parentless, x) parentless.remove(v) # choose the successor x = ξ.betavariate(α, β) * n y = (v - x) % n u = take_closest(tree_nodes, y) tree_nodes.add(v) bar.update() yield (u, v) yield from dive(v)
def containsNearbyAlmostDuplicate(nums: List[int], k: int, t: int) -> bool: from sortedcontainers import SortedSet st = SortedSet() left, right = 0, 0 res = 0 while right < len(nums): if right - left > k: st.remove(nums[left]) left += 1 index = bisect.bisect_left(st, nums[right] - t) if st and 0 <= index < len(st) and abs(st[index] - nums[right]) <= t: return True st.add(nums[right]) right += 1 return False
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: s = SortedSet() for i, num in enumerate(nums): # O(n) # print(s, num) floor, ceiling = find_floor_ceiling(s, num) # O(logk) # print(floor, ceiling) if floor is not None and num - floor <= t: return True if ceiling is not None and ceiling - num <= t: return True s.add(num) if len(s) > k: s.remove(nums[i - k]) # O(logk) return False
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: from sortedcontainers import SortedSet st = SortedSet() start = 0 for end in range(len(nums)): if end - start > k: st.remove(nums[start]) # 删除被移除的元素 start += 1 i = bisect.bisect_left(st, nums[end] - t) # 找到适合nums[end] - t插入的第一个位置 if st and i >= 0 and i < len(st) and abs(st[i] - nums[end]) <= t: return True st.add(nums[end]) return False
class FileReference: """A class that manages n-triple files. This class stores inforamtation about the location of a n-triple file and is able to add and delete triples to that file. """ def __init__(self, path, content): """Initialize a new FileReference instance. Args: filelocation: A string of the filepath. filecontentinmem: Boolean to decide if local filesystem should be used to or if file content should be kept in memory too . (Defaults false) Raises: ValueError: If no file at the filelocation, or in the given directory + filelocation. """ if isinstance(content, str): new = [] for line in content.splitlines(): new.append(' '.join(line.split())) content = new self._path = path self._content = SortedSet(content) self._modified = False @property def path(self): return self._path @property def content(self): return "\n".join(self._content) + "\n" def add(self, data): """Add a triple to the file content.""" self._content.add(data) def extend(self, data): """Add triples to the file content.""" self._content.extend(data) def remove(self, data): """Remove trple from the file content.""" try: self._content.remove(data) except KeyError: pass
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: from sortedcontainers import SortedSet window = SortedSet() left, right = 0, 0 res = 0 while right < len(nums): if right - left > k: window.remove(nums[left]) left += 1 index = bisect.bisect_left(window, nums[right] - t) if window and index >= 0 and index < len(window) and abs(window[index]-nums[right])<=t: return True window.add(nums[right]) right += 1 return False
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: st = SortedSet() for i in range(len(nums)): num = nums[i] index = st.bisect_left(num - t) print(st) if index < len(st) and st[index] <= num + t: return True st.add(num) if len(st) > k: st.remove(nums[i - k]) return False
def build_execution_order(reqs: Requirements) -> str: order: list[str] = [] completed_steps: set[str] = set() available_steps = SortedSet() num_keys = len(reqs.keys()) while len(order) < num_keys: for (step, req_set) in reqs.items(): if req_set <= completed_steps and step not in completed_steps: available_steps.add(step) next_step: str = str(available_steps[0]) available_steps.remove(next_step) order.append(next_step) completed_steps.add(next_step) return ''.join(order)
class FileReference: """A class that manages n-triple files. This class stores inforamtation about the location of a n-triple file and is able to add and delete triples to that file. """ def __init__(self, path, content): """Initialize a new FileReference instance. Args: filelocation: A string of the filepath. filecontentinmem: Boolean to decide if local filesystem should be used to or if file content should be kept in memory too . (Defaults false) Raises: ValueError: If no file at the filelocation, or in the given directory + filelocation. """ if isinstance(content, str): new = [] for line in content.splitlines(): new.append(' '.join(line.split())) content = new self._path = path self._content = SortedSet(content) self._modified = False @property def path(self): return self._path @property def content(self): return "\n".join(self._content) + "\n" def add(self, data): """Add a triple to the file content.""" self._content.add(data) def extend(self, data): """Add triples to the file content.""" self._content.extend(data) def remove(self, data): """Remove trple from the file content.""" try: self._content.remove(data) except KeyError: pass
def revealed2sortedset(revealed: List[Union[tuple, slice]]) -> SortedSet: """ Converts a list of included pairs to a sorted set of integers in O(n), n = size of @slices. Every number from every slice is added to the sorted set, except 0. """ # 10, [] -> 10, [] # 10, [(0, 10)] -> 10, [10] # 10, [(0, 7)] -> 10, [7] # 10, [(7, 10)] -> 10, [7, 10] # 10, [(3, 7)] -> 10, [3, 7] # 10, [(0, 3), (7, 10)] -> 10, [3, 7, 10] # 10, [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)] -> 10, [1, 2, 3, 4, 5, 6, 7, 8, 9] try: #intervals = SortedSet(a for a, _ in revealed).union(b for _, b in revealed) intervals = SortedSet() for a, b in revealed: intervals.add(a) intervals.add(b) except TypeError: # slice intervals = SortedSet(sl.start for sl in revealed).union(sl.stop for sl in revealed) if 0 in intervals: intervals.remove(0) return intervals
class AbstractReadyDecorator(AbstractGraphDecorator): """ Wraps a graph and keeps track of tasks that are `ready`. A task is ready if and only if: * For each input connection to certain ports: The connected output port has data set with `set_outut_data()`. The input ports can be selected with the `port_filter` passed to `__init__`. * The task is not a sync-point and all sync-point tasks with a lower tick have been executed. * The task is a sync-point task and all tasks with a lower tick have been executed. A task is considered to be `executed` once `set_output_data()` has been called. """ def __init__(self, g, prefix, port_filter, property_filter, syncpoint_run_last=True): """ :param prefix: Prefix for the task properties used by the implementation to keep track of the state. If several decorators of this type are wrapped around the same graph, then different prefixes must be used. :param port_filter: Function to filter the relevant input ports. The function takes three arguments: * The graph (`self`) * The tick of the task. * The name of the port. If the function returns `False` Then the port is ignored. :param property_filter: Function that filters tasks which are collected. The function takes three * The graph (`self`) * The tick of the task. * The properties of the task (`graph.get_task_properties(tick)`). Only tasks for which this function returns `True` are returned. The function is reevaluated if the properties of a task change. :param syncpoint_run_last: If `True` then tasks with syncpoints only run once all tasks with a lower tick have completed. """ AbstractGraphDecorator.__init__(self, g) self._prefix = prefix self._port_filter = port_filter self._property_filter = property_filter self._syncpoint_run_last = syncpoint_run_last #: ticks of all tasks that have data for all inputs. #: That is, if every output port connected to each input port #: had data set with `set_output_data`. self._queue = SortedSet() #: ticks of all tasks with #: * `properties["syncpoint"] == True` #: * `set_output_data` not yet called. self._pending_syncpoints = SortedSet() #: ticks of all tasks with #: * `set_output_data` not yet called. self._pending_ticks = SortedSet() self._collected_prop = self._prefix + "_collected" self._count_prop = self._prefix + "_count" self._ready_prop = self._prefix + "_ready" self.g.set_task_property(graph.FINAL_TICK, self._count_prop, 0) self.g.set_task_property(graph.FINAL_TICK, self._ready_prop, 0) self.g.set_task_property(graph.FINAL_TICK, self._collected_prop, False) self._consider(graph.FINAL_TICK) self._pending_ticks.add(graph.FINAL_TICK) def past_all_syncpoints(self): """ Returns `True` if all sync-point tasks have been executed. """ return len(self._pending_syncpoints) == 0 def collect_tasks(self): """ Returns the ticks of all tasks which are ready. """ ticks = set() while True: tick = self.consume_ready_task() if tick is not None: ticks.add(tick) else: break return ticks def consume_ready_task(self): """ Returns the next task which is ready or `None`. """ def check_tick_against_sync_point(tick): if not self._pending_syncpoints: return True # no more sync points next_sync_point = self._pending_syncpoints[0] if tick < next_sync_point: # `tick` is not a sync_point and must run # before the next sync_point. return True elif tick == next_sync_point: # `tick` is the sync_point if self._syncpoint_run_last and self._pending_ticks[0] < next_sync_point: # There are still unfinished tasks that have to run # before it. return False else: # It is time to run the sync_point. return True else: # tick > next_sync_point # has to wait til the sync_point completed return False tick = next(iter(self._queue), None) if tick is not None: if not check_tick_against_sync_point(tick): return None props = self.g.get_task_properties(tick) if props.get(self._collected_prop, False): raise ValueError("Task %s became ready twice." % tick) self.g.set_task_property(tick, self._collected_prop, True) self._queue.remove(tick) return tick def was_collected(self, tick): """ Returns if the given tick was collected. """ props = self.g.get_task_properties(tick) return props.get(self._collected_prop, False) def add_task(self, tick, task, properties={}): properties = dict(properties) properties[self._count_prop] = 0 properties[self._ready_prop] = 0 properties[self._collected_prop] = False self.g.add_task(tick, task, properties=properties) self._consider(tick) self._pending_ticks.add(tick) if properties.get("syncpoint", False): self._pending_syncpoints.add(tick) def remove_task(self, tick): self.g.remove_task(tick) if tick in self._queue: self._queue.remove(tick) if tick in self._pending_syncpoints: self._pending_syncpoints.remove(tick) if tick in self._pending_ticks: self._pending_ticks.remove(tick) def connect(self, source, dest): self.g.connect(source, dest) if not self._port_filter(self, dest.tick, dest.port): return source_props = self.g.get_task_properties(source.tick) dest_props = self.g.get_task_properties(dest.tick) self.g.set_task_property(dest.tick, self._count_prop, dest_props[self._count_prop] + 1) if source.port in source_props["out_data"]: self.g.set_task_property(dest.tick, self._ready_prop, dest_props[self._ready_prop] + 1) self._consider(dest.tick) def disconnect(self, source, dest): self.g.disconnect(source, dest) if not self._port_filter(self, dest.tick, dest.port): return source_props = self.g.get_task_properties(source.tick) dest_props = self.g.get_task_properties(dest.tick) self.g.set_task_property(dest.tick, self._count_prop, dest_props[self._count_prop] - 1) if source.port in source_props["out_data"]: self.g.set_task_property(dest.tick, self._ready_prop, dest_props[self._ready_prop] - 1) self._consider(dest.tick) def set_task_property(self, tick, key, value): retval = AbstractGraphDecorator.set_task_property(self, tick, key, value) if key == "syncpoint": if not value and tick in self._pending_syncpoints: self._pending_syncpoints.remove(tick) if value: self._pending_syncpoints.add(tick) self._consider(tick) return retval def set_output_data(self, tick, outputs): self.g.set_output_data(tick, outputs) if tick in self._pending_syncpoints: self._pending_syncpoints.remove(tick) if tick in self._pending_ticks: self._pending_ticks.remove(tick) for source, dest in self.g.get_out_connections(tick): if source.port in outputs: if not self._port_filter(self, dest.tick, dest.port): continue dest_props = self.get_task_properties(dest.tick) self.set_task_property(dest.tick, self._ready_prop, dest_props[self._ready_prop] + 1) self._consider(dest.tick) def _consider(self, tick): props = self.get_task_properties(tick) if props.get(self._collected_prop, False): # Already collected return False if props[self._count_prop] == props[self._ready_prop]: if self._property_filter(self, tick, props): should_be_in_queue = True else: should_be_in_queue = False else: should_be_in_queue = False if should_be_in_queue: self._queue.add(tick) elif tick in self._queue: self._queue.remove(tick)
def test_remove(): temp = SortedSet(range(0, 100), load=7) temp.remove(50)
class Chunk(object): """ Represents a chunk of code providing some useful functionality in the system. """ def __init__(self, logical_name, feature, local_content=None): self.logical_name = logical_name self.feature = feature self.local_content = local_content self.dependencies = SortedSet(key=lambda d: d.fully_qualified_name) self.bugs = SortedSet(key=lambda b: b.logical_name) self.bug_count = 0 def __eq__(self, other): if self.local_content != other.local_content: return False elif self.bugs_logical_names != other.bugs_logical_names: return False elif self.dependency_logical_names != other.dependency_logical_names: return False else: return True def __ne__(self, other): return not(self.__eq__(other)) @property def probability_gain_feature_dependency(self): return self.feature.software_system.probability_gain_feature_dependency @property def probability_lose_feature_dependency(self): return self.feature.software_system.probability_lose_feature_dependency @property def probability_gain_system_dependency(self): return self.feature.software_system.probability_gain_system_dependency @property def probability_lose_system_dependency(self): return self.feature.software_system.probability_lose_system_dependency @property def probability_new_bug(self): return self.feature.software_system.probability_new_bug @property def probability_debug_known(self): return self.feature.software_system.probability_debug_known @property def probability_debug_unknown(self): return self.feature.software_system.probability_debug_unknown @property def dependency_logical_names(self): return map(lambda d: d.logical_name, self.dependencies) @property def bugs_logical_names(self): return map(lambda b: b.logical_name, self.bugs) @property def bugs_in_dependencies(self): chunk_bug_set = frozenset(map(lambda chunk: frozenset(chunk.bugs), self.dependencies)) return reduce(lambda bugs_a, bugs_b: bugs_a.union(bugs_b), chunk_bug_set, set()) @property def tests(self): return filter(lambda t: self in t.chunks, self.feature.tests) def modify(self, random): feature_chunks = self.feature.chunks - {self} system_chunks = set(self.feature.software_system.chunks.difference(self.feature.chunks)) self._add_dependencies(random, system_chunks, self.probability_gain_system_dependency) self._add_dependencies(random, feature_chunks, self.probability_gain_feature_dependency) self.local_content = random.create_local_content() self._insert_bugs(random) def merge(self, source_chunk, random): for dependency in source_chunk.dependencies: working_copy_dependency = self.feature.software_system.get_chunk(dependency.fully_qualified_name) self.dependencies.add(working_copy_dependency) self.modify(random) def overwrite_with(self, source_chunk): self.local_content = source_chunk.local_content self.bugs.clear() for old_bug in source_chunk.bugs: new_bug = self.get_bug(old_bug.logical_name) if new_bug is None: self.add_bug(old_bug.logical_name) self.dependencies.clear() for dependency in source_chunk.dependencies: new_dependency = self.feature.software_system.get_chunk(dependency.fully_qualified_name) self.dependencies.add(new_dependency) def _add_dependencies(self, random, candidate_chunks, threshold): for candidate in SortedSet(candidate_chunks, key=lambda c: c.logical_name): if random.dependency_should_be_added(threshold): self.add_dependency(candidate) def add_dependency(self, candidate): self.dependencies.add(candidate) def _insert_bugs(self, random): while random.a_bug_should_be_inserted(self): self.add_bug(self.bug_count) self.bug_count += 1 def add_bug(self, logical_name): self.bugs.add(Bug(logical_name, self)) def get_bug(self, logical_name): result = filter(lambda bug: bug.logical_name == logical_name, self.bugs) if len(result) is 0: return None else: return result[0] def refactor(self, random): to_remove = set() for dependency in self.dependencies: if random.dependency_should_be_removed(self, dependency): to_remove.add(dependency) self.dependencies.difference_update(to_remove) def debug(self, random, bug=None): if len(self.bugs) == 0: return False if bug is None or bug not in self.bugs: if random.unknown_bug_should_be_removed(self): bug = random.choose_bug(self) self.bugs.remove(bug) elif random.known_bug_should_be_removed(self): self.bugs.remove(bug) def operate(self, random): for bug in self.bugs_in_dependencies.union(self.bugs): bug.manifest(random) def __str__(self): def string_repr_set(iterable): return ",".join(map(lambda e: repr(e), iterable)) feature_dependencies = string_repr_set(filter(lambda c: c.feature == self.feature, self.dependencies)) system_dependencies = string_repr_set(filter(lambda c: c.feature != self.feature, self.dependencies)) bugs = ", ".join(map(lambda bug: str(bug), self.bugs)) return "c_%s:[%s]:[%s]->(in[%s],ex[%s])" % \ (str(self.logical_name), self.local_content, bugs, feature_dependencies, system_dependencies) @property def fully_qualified_name(self): return "%s.%s" % (str(self.feature.logical_name), str(self.logical_name)) def __repr__(self): return "c%s" % str(self.fully_qualified_name)
def cluster_(self, fX): """Compute complete dendrogram Parameters ---------- fX : (n_items, dimension) np.array Embeddings. Returns ------- dendrogram : list of (i, j, distance) tuples Dendrogram. """ N = len(fX) # clusters contain the identifier of each cluster clusters = SortedSet(np.arange(N)) # labels[i] = c means ith item belongs to cluster c labels = np.array(np.arange(N)) squared = squareform(pdist(fX, metric=self.metric)) distances = ValueSortedDict() for i, j in itertools.combinations(range(N), 2): distances[i, j] = squared[i, j] dendrogram = [] for _ in range(N-1): # find most similar clusters (c_i, c_j), d = distances.peekitem(index=0) # keep track of this iteration dendrogram.append((c_i, c_j, d)) # index of clusters in 'clusters' and 'fX' i = clusters.index(c_i) j = clusters.index(c_j) # merge items of cluster c_j into cluster c_i labels[labels == c_j] = c_i # update c_i representative fX[i] += fX[j] # remove c_j cluster fX[j:-1, :] = fX[j+1:, :] fX = fX[:-1] # remove distances to c_j cluster for c in clusters[:j]: distances.pop((c, c_j)) for c in clusters[j+1:]: distances.pop((c_j, c)) clusters.remove(c_j) if len(clusters) < 2: continue # compute distance to new c_i cluster new_d = cdist(fX[i, :].reshape((1, -1)), fX, metric=self.metric).squeeze() for c_k, d in zip(clusters, new_d): if c_k < c_i: distances[c_k, c_i] = d elif c_k > c_i: distances[c_i, c_k] = d return dendrogram
class Selection(IMutableGSlice): def __init__( self, universe: slice, revealed: list = None, intervals: Iterator = None, _length: Optional[int] = None # For performance ): #assert isinstance(universe, slice) # Should universe even be visible/exist? #assert universe.start == 0 #assert isinstance(universe.stop, int) #assert universe.stop >= 1 # TODO Do we need this? self.universe = universe if intervals is None and revealed is None: self._intervals = self.revealed2sortedset([slice(0, universe.stop)]) elif intervals is not None: self._intervals = SortedSet(intervals) else: self._intervals = self.revealed2sortedset(revealed) self._revealed_count = _length if isinstance(_length, int) else Selection._compute_len(self._intervals) @staticmethod def revealed2sortedset(revealed: List[Union[tuple, slice]]) -> SortedSet: """ Converts a list of included pairs to a sorted set of integers in O(n), n = size of @slices. Every number from every slice is added to the sorted set, except 0. """ # 10, [] -> 10, [] # 10, [(0, 10)] -> 10, [10] # 10, [(0, 7)] -> 10, [7] # 10, [(7, 10)] -> 10, [7, 10] # 10, [(3, 7)] -> 10, [3, 7] # 10, [(0, 3), (7, 10)] -> 10, [3, 7, 10] # 10, [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)] -> 10, [1, 2, 3, 4, 5, 6, 7, 8, 9] try: #intervals = SortedSet(a for a, _ in revealed).union(b for _, b in revealed) intervals = SortedSet() for a, b in revealed: intervals.add(a) intervals.add(b) except TypeError: # slice intervals = SortedSet(sl.start for sl in revealed).union(sl.stop for sl in revealed) if 0 in intervals: intervals.remove(0) return intervals @staticmethod def sortedset2slices(sortedset: SortedSet) -> List[slice]: """ Converts a sorted set of integers to a list of included slices in O(n), n = size of @sortedset. If there is an even number of elements in @sortedset, the first slice is formed by the first and second numbers, the second slice is formed by the third and fourth numbers, and so on. If there is an odd number of elements in @sortedset, the pair consisting of the number 0 and the first element in @sortedset becomes the first slice in the output list. The remaining slices, if any, are formed by the second and third numbers, the fourth and fifth numbers, and so on. """ slices = [] if len(sortedset) % 2 == 0: for i in range(0, len(sortedset), 2): slices.append(slice(sortedset[i], sortedset[i + 1])) else: slices.append(slice(0, sortedset[0])) for i in range(1, len(sortedset), 2): slices.append(slice(sortedset[i], sortedset[i + 1])) return slices def slices(self) -> List[slice]: return self.sortedset2slices(self._intervals) def pairs(self) -> Iterator[Tuple[int, int]]: if len(self._intervals) % 2 == 0: return zip(self._intervals[::2], self._intervals[1::2]) return itertools.chain([(0, self._intervals[0])], zip(self._intervals[1::2], self._intervals[2::2])) def gap_pairs(self) -> Iterator[Tuple[int, int]]: return self.complement().pairs() def intervals(self): return self._intervals def exclude(self, from_index: Optional[int], to_index: Optional[int]): original_length = self._revealed_count if isinstance(from_index, int) and -self.universe.stop <= from_index < 0: from_index = from_index % self.universe.stop if isinstance(to_index, int): if to_index > self.universe.stop: return self.exclude(from_index, None) if -self.universe.stop <= to_index < 0: to_index = to_index % self.universe.stop assert from_index is None or self.universe.start <= from_index <= self.universe.stop assert to_index is None or self.universe.start <= to_index <= self.universe.stop if from_index is None: from_index = self.universe.start if to_index is None: to_index = self.universe.stop if len(self._intervals) == 0: return 0 if from_index >= to_index: return 0 m = self._intervals.bisect_right(from_index) n = self._intervals.bisect_right(to_index) try: from_index_index = self._intervals.index(from_index) except ValueError: from_index_index = None try: to_index_index = self._intervals.index(to_index) except ValueError: to_index_index = None from_index_is_included = ( len(self._intervals) % 2 == 0 and m % 2 == 1 or len(self._intervals) % 2 == 1 and m % 2 == 0) to_index_is_included = ( len(self._intervals) % 2 == 0 and n % 2 == 1 or len(self._intervals) % 2 == 1 and n % 2 == 0) from_index_is_leftmost_included = from_index == 0 and from_index_is_included or from_index_index is not None and ( len(self._intervals) % 2 == 0 and from_index_index % 2 == 0 or len(self._intervals) % 2 == 1 and (from_index == 0 or from_index_index % 2 == 1)) to_index_right_of_excluded = to_index_index is not None and ( len(self._intervals) % 2 == 0 and to_index_index % 2 == 1 or len(self._intervals) % 2 == 1 and (to_index == 0 or to_index_index % 2 == 0)) if from_index_is_included: if from_index_is_leftmost_included: if to_index_is_included: if m == 0: to_remove = self._intervals[m:n] endpoint = 0 if n == 0 else self._intervals[n - 1] addendum = 0 if n == 0 else self._intervals[0] self._revealed_count -= (to_index - endpoint) + addendum + sum( b - a for a, b in zip(to_remove[1:-1:2], to_remove[2:-1:2])) del self._intervals[m:n] self._intervals.add(to_index) else: intermediates = self._intervals[m + 1:n - 1] from_start, from_end = self._intervals[m - 1], self._intervals[m] to_start, to_end = self._intervals[n - 1], self._intervals[n] if m == n: self._revealed_count -= to_index - from_start self._intervals.remove(from_start) self._intervals.add(to_index) else: self._revealed_count -= (from_end - from_start) + (to_index - self._intervals[n - 1]) + ( from_index - from_start) + sum( b - a for a, b in zip(intermediates[::2], intermediates[1::2])) del self._intervals[m + 1:n - 1] # intermediates self._intervals.remove(from_start) self._intervals.remove(from_end) self._intervals.remove(to_start) self._intervals.add(to_index) else: from_start = 0 if m == 0 else self._intervals[m - 1] from_end = self._intervals[m] self._revealed_count -= from_end - from_start if from_start > 0: self._intervals.remove(from_start) self._intervals.remove(from_end) else: if to_index_is_included: from_end = self._intervals[m] to_start = self._intervals[n - 1] if m == n: self._revealed_count -= to_index - from_index if from_index > 0: self._intervals.add(from_index) self._intervals.add(to_index) else: intermediates = self._intervals[m + 1:n - 1] self._revealed_count -= (from_end - from_index) + (to_index - to_start) + sum( b - a for a, b in zip(intermediates[::2], intermediates[1::2])) del self._intervals[m + 1:n - 1] # intermediates if from_index > 0: self._intervals.add(from_index) self._intervals.add(to_index) self._intervals.remove(from_end) self._intervals.remove(to_start) else: to_remove = self._intervals[m:n] self._revealed_count -= self._intervals[m] - from_index + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2])) del self._intervals[m:n] if from_index != 0: self._intervals.add(from_index) else: if to_index_is_included: if to_index_right_of_excluded: to_remove = self._intervals[m:n - 1] del self._intervals[m:n - 1] self._revealed_count -= sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2])) else: to_remove = self._intervals[m:n] del self._intervals[m:n] self._intervals.add(to_index) self._revealed_count -= (to_index - to_remove[0]) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2])) else: to_remove = self._intervals[m:n] del self._intervals[m:n] self._revealed_count -= sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2])) return original_length - self._revealed_count def exclude_virtual(self, from_index: Optional[int], to_index: Optional[int]): if from_index is None or from_index < -len(self) or from_index >= len(self): p_from_index = None else: p_from_index = self.virtual2physical(from_index) if to_index is None or to_index < -len(self) or to_index >= len(self): p_to_index = None else: p_to_index = self.virtual2physical(to_index) return self.exclude(p_from_index, p_to_index) def include(self, from_index: Optional[int], to_index: Optional[int]): original_length = len(self) if isinstance(from_index, int) and -self.universe.stop <= from_index < 0: from_index = from_index % self.universe.stop if isinstance(to_index, int): if to_index > self.universe.stop: return self.include(from_index, None) if -self.universe.stop <= to_index < 0: to_index = to_index % self.universe.stop assert from_index is None or self.universe.start <= from_index <= self.universe.stop assert to_index is None or self.universe.start <= to_index <= self.universe.stop if from_index is None: from_index = self.universe.start if to_index is None: to_index = self.universe.stop if not self._intervals: if from_index > 0: self._intervals.add(from_index) self._intervals.add(to_index) self._revealed_count += to_index - from_index return to_index - from_index if from_index == to_index: return 0 m = self._intervals.bisect_right(from_index) n = self._intervals.bisect_right(to_index) try: from_index_index = self._intervals.index(from_index) except ValueError: from_index_index = None from_index_is_included = ( len(self._intervals) % 2 == 0 and m % 2 == 1 or len(self._intervals) % 2 == 1 and m % 2 == 0) to_index_is_included = ( len(self._intervals) % 2 == 0 and n % 2 == 1 or len(self._intervals) % 2 == 1 and n % 2 == 0) from_index_right_of_included = from_index_index is not None and ( len(self._intervals) % 2 == 0 and from_index_index % 2 == 1 or len(self._intervals) % 2 == 1 and from_index_index % 2 == 0) if from_index_is_included: if to_index_is_included: to_remove = self._intervals[m:n] del self._intervals[m:n] self._revealed_count += sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2])) else: to_remove = self._intervals[m:n] del self._intervals[m:n] self._intervals.add(to_index) self._revealed_count += (to_index - to_remove[-1]) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2])) else: if to_index_is_included: if from_index_right_of_included: to_remove = self._intervals[m - 1:n] del self._intervals[m - 1:n] self._revealed_count += sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2])) else: to_remove = self._intervals[m:n] del self._intervals[m:n] self._intervals.add(from_index) self._revealed_count += (to_remove[0] - from_index) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2])) else: if from_index_right_of_included: intermediates = self._intervals[m:n] del self._intervals[m:n] # intermediates self._intervals.remove(from_index) self._intervals.add(to_index) self._revealed_count += (to_index - from_index) - sum(b - a for a, b in zip(intermediates[::2], intermediates[1::2])) else: to_remove = self._intervals[m:n] del self._intervals[m:n] if from_index > 0: self._intervals.add(from_index) self._intervals.add(to_index) self._revealed_count += (to_index - from_index) - sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2])) return len(self) - original_length def include_partially(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, tuple]): if isinstance(count, int): return self.include_partially(from_index, to_index, (count, count)) head_count, tail_count = count head_revealed_count = self._include_partially_from_left(from_index, to_index, head_count) tail_revealed_count = self._include_partially_from_right(from_index, to_index, tail_count) return head_revealed_count + tail_revealed_count def _include_partially_from_left(self, from_index: int, to_index: int, count: int): if count == 0: return 0 from_index, to_index = self._normalized_range(from_index, to_index) subsel = self._spanning_subslice(from_index, to_index).complement().subslice(from_index, to_index) revealed_count = 0 for covered_start, covered_stop in subsel.pairs(): coverage = covered_stop - covered_start if revealed_count + coverage < count: self.include(covered_start, covered_stop) revealed_count += coverage else: self.include(covered_start, covered_start + count - revealed_count) revealed_count = count break return revealed_count def _include_partially_from_right(self, from_index: int, to_index: int, count: int): if count == 0: return 0 from_index, to_index = self._normalized_range(from_index, to_index) subsel = self._spanning_subslice(from_index, to_index).complement().subslice(from_index, to_index) revealed_count = 0 for covered_start, covered_stop in reversed(list(subsel.pairs())): coverage = covered_stop - covered_start if revealed_count + coverage < count: self.include(covered_start, covered_stop) revealed_count += coverage else: self.include(covered_stop - (count - revealed_count), covered_stop) revealed_count = count break return revealed_count def include_expand(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, Tuple[int, int]]): if isinstance(count, int): return self.include_expand(from_index, to_index, (count, count)) if count == (0, 0): return 0 head_count, tail_count = count revealed_counter = 0 gaps = self.complement().subslice(from_index, to_index) for a, b in gaps.pairs(): if b < self.universe.stop: revealed_counter += self._include_partially_from_right(a, b, head_count) if a > self.universe.start: revealed_counter += self._include_partially_from_left(a, b, tail_count) return revealed_counter def _previous_slice(self, sl: slice): """ :return The revealed or covered slice immediately to the left of @sl. :raise ValueError if there is none. """ if sl.start == self.universe.start: raise ValueError("There is no slice to the left of {}.".format(sl)) # TODO O(n) -> O(1) zero_or_one = [s for s in self._intervals + self.complement()._intervals if s.stop == sl.start] if len(zero_or_one) == 1: return zero_or_one[0] else: raise ValueError("Slice not found: {}.".format(sl)) def _next_slice(self, sl: slice): """ :return The revealed or covered slice immediately to the right of @sl. :raise ValueError if there is none. """ if sl.stop == self.universe.stop: raise ValueError("There is no slice to the right of {}.".format(sl)) # TODO O(n) zero_or_one = [s for s in self._intervals + self.complement()._intervals if s.start == sl.stop] if len(zero_or_one) == 1: return zero_or_one[0] else: raise ValueError("Slice not found: {}.".format(sl)) def include_virtual(self, from_index, to_index): if from_index is None or from_index < -len(self) or from_index >= len(self): p_from_index = None else: p_from_index = self.virtual2physical(from_index) if to_index is None or to_index < -len(self) or to_index >= len(self): p_to_index = None else: p_to_index = self.virtual2physical(to_index) return self.include(p_from_index, p_to_index) def include_partially_virtual(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, tuple]): if from_index is None or from_index < -len(self) or from_index >= len(self): p_from_index = None else: p_from_index = self.virtual2physical(from_index) if to_index is None or to_index < -len(self) or to_index >= len(self): p_to_index = None else: p_to_index = self.virtual2physical(to_index) return self.include_partially(p_from_index, p_to_index, count) # FIXME Inconsistent with reversed(selection). Should probably make this use the default implementation and instead # rewrite this one to iter_slices or something. def __iter__(self): for a, b in self.pairs(): yield a, b # FIXME should probably generate slices instead, or every index def complement(self): if len(self._intervals) >= 1 and self._intervals[-1] == self.universe.stop: return Selection(universe=self.universe, intervals=self._intervals[:-1], _length=self.universe.stop - len(self)) return Selection(universe=self.universe, intervals=self._intervals.union([self.universe.stop]), _length=self.universe.stop - len(self)) def _normalized_range(self, from_index: Optional[int], to_index: Optional[int]) -> Tuple[int, int]: """ For any range [@from_index, @to_index) where the indices are either None or any integer, returns the equivalent range [x, y) such that either 0 <= x < y <= upper_bound or x = y = 0. The ranges are equivalent in the sense that when using them to slice this selection, they produce the same sub-selection. """ if from_index is None or from_index <= -self.universe.stop: from_index = self.universe.start elif from_index > self.universe.stop: from_index = self.universe.stop elif -self.universe.stop <= from_index < 0: from_index = self.universe.stop - from_index if to_index is None or to_index >= self.universe.stop: to_index = self.universe.stop elif -self.universe.stop <= to_index < 0: to_index = self.universe.stop - to_index elif to_index < -self.universe.stop: to_index = self.universe.start if from_index >= to_index: from_index, to_index = (0, 0) return from_index, to_index def subslice(self, from_index: Optional[int], to_index: Optional[int]): from_index, to_index = self._normalized_range(from_index, to_index) sel = self._spanning_subslice(from_index, to_index) if len(sel._intervals) % 2 == 0: if len(sel) > 0: if sel._intervals[0] < from_index < sel._intervals[1]: sel._revealed_count -= from_index - sel._intervals[0] del sel._intervals[0] sel._intervals.add(from_index) if sel._intervals[-2] < to_index < sel._intervals[-1]: sel._revealed_count -= sel._intervals[-1] - to_index del sel._intervals[-1] sel._intervals.add(to_index) else: if 0 < from_index < sel._intervals[0]: sel._revealed_count -= from_index sel._intervals.add(from_index) if (len(sel._intervals) == 1 and to_index < sel._intervals[-1] or len(sel._intervals) >= 2 and sel._intervals[-2] < to_index < sel._intervals[-1]): sel._revealed_count -= sel._intervals[-1] - to_index del sel._intervals[-1] sel._intervals.add(to_index) return sel def _spanning_subslice(self, from_index: int, to_index: int): """ :return A Selection whose set of revealed slices is a subset of that of this Selection such that every index in [from_index, to_index) is either on some slice in the subset, or on a gap. """ if from_index >= to_index: return Selection(universe=deepcopy(self.universe), intervals=[]) m = self._intervals.bisect_right(from_index) if len(self._intervals) % 2 == 0: n = self._intervals.bisect_left(to_index) intervals = self._intervals[m - (m % 2):n + (n % 2)] else: n = self._intervals.bisect_right(to_index) a = max(0, m - ((m + 1) % 2)) b = n + ((n + 1) % 2) intervals = self._intervals[a:b] sel = Selection(universe=deepcopy(self.universe), intervals=intervals) return sel def _slow_subslice(self, from_index: Optional[int], to_index: Optional[int]): sel = self.deepcopy() if isinstance(from_index, int): sel.exclude(None, from_index) if isinstance(to_index, int): sel.exclude(to_index, None) return sel def _interval_index(self, pindex): """ :return n if the nth interval edge is the smallest number such that @pindex < n (zero-indexed). """ lower = 0 upper = len(self._intervals) - 1 while lower <= upper: middle = (lower + upper) // 2 midsl = self._intervals[middle] if pindex < midsl.start: upper = middle - 1 elif midsl.stop <= pindex: lower = middle + 1 else: # midsl.start <= pindex < midsl.stop: return middle raise IndexError("{} is not in any interval.".format(pindex)) def select(self, listlike): # TODO only works for stringlike objects lst = [] for interval in self.slices(): lst.append(listlike[interval]) selection = listlike[0:0].join(lst) return selection def physical2virtual(self, pindex: int): vindex = 0 for a, b in self.pairs(): if a <= pindex < b: vindex += pindex - a return vindex vindex += b - a raise IndexError("Physical index {} out of bounds for selection {}".format(pindex, self)) # TODO: O(n) -> O(log(n)) (using another sorted set for cumulative lengths?) def virtual2physical(self, vindex: int): # TODO -> virtualint2physical """ :return the integer n such that where the @vindex'th revealed element is the nth element. If @vindex < 0, @vindex is interpreted as (number of revealed elements) + @vindex. """ if vindex < -len(self): raise IndexError( "Got index {}, expected it to be within range [{},{})".format(vindex, -len(self), len(self))) elif vindex < 0: return self.virtual2physical(len(self) + vindex) cumlength = 0 for a, b in self.pairs(): cumlength += b - a if vindex < cumlength: pindex = b - (cumlength - vindex) if a <= pindex < b: return pindex else: break raise IndexError("Virtual index {} out of bounds for selection {}".format(vindex, self)) def virtual2physicalselection(self, vslice: slice) -> 'Selection': # TODO -> virtualslice2physical """ :return the sub-Selection that is the intersection of this selection and @vslice. """ if not self._intervals or vslice.stop == 0: return Selection(self.universe, revealed=[]) if vslice.start is None: a = self.virtual2physical(0) elif -len(self) <= vslice.start < len(self): a = self.virtual2physical(vslice.start) elif vslice.start >= len(self): a = self._intervals[-1] else: raise ValueError("Unexpected slice start: {}".format(vslice)) if vslice.stop is None or vslice.stop >= len(self): b = self._intervals[-1] - 1 elif -len(self) <= vslice.stop < len(self): b = self.virtual2physical(vslice.stop - 1) else: raise ValueError("Unexpected slice stop: {}".format(vslice)) # INV: a is the physical index of the first element, b is the physical index of the last element if b < a: return Selection(universe=self.universe, revealed=[]) m = self._intervals.bisect_right(a) n = self._intervals.bisect_right(b) intervals = SortedSet([a] + self._intervals[m:n] + [b + 1]) return Selection(universe=self.universe, intervals=intervals) def virtualselection2physical(self, vselection: 'Selection'): # TODO -> virtualslice2physical """ :return the sub-Selection that is the intersection of this selection and @vselection. """ intervals = [] for start, stop in vselection: for a, b in self.virtual2physicalselection(slice(start, stop)): intervals.append(slice(a, b)) return Selection(universe=self.universe, revealed=intervals) def stretched(self, from_index: Optional[int], to_index: Optional[int]): # TODO remove? """ :return A potentially shrinked deep copy of this selection, delimited by the universe [@from_index, @to_index). """ m = self._intervals.bisect_right(from_index) n = self._intervals.bisect_right(to_index) intervals = self._intervals[m:n] return Selection(universe=slice(from_index, to_index), intervals=intervals) def __getitem__(self, item): return self.virtual2physical(item) @staticmethod def _compute_len(sortedset: SortedSet): """ :return The sum of the lengths of every slice in @slicelist. """ if len(sortedset) == 0: return 0 elif len(sortedset) % 2 == 0: return sum(sortedset[i + 1] - sortedset[i] for i in range(0, len(sortedset), 2)) return sortedset[0] + sum(sortedset[i + 1] - sortedset[i] for i in range(1, len(sortedset), 2)) def __len__(self): return self._revealed_count def __eq__(self, other): return repr(self) == repr(other) def __mul__(self, other: int): if other == 0: return Selection(universe=slice(0, 0), revealed=[]) scaled_universe = slice(self.universe.start * other, self.universe.stop * other) scaled_revealed = [other * x for x in self._intervals] return Selection(universe=scaled_universe, intervals=scaled_revealed) def __rmul__(self, other): return self.__mul__(other) def __repr__(self): return "{}(universe={}, intervals={})".format(self.__class__.__name__, self.universe, self._intervals) def __str__(self): return repr(self) def deepcopy(self): """ :return A deep copy of this object. """ return Selection(universe=deepcopy(self.universe), intervals=deepcopy(self._intervals))
def eliminationOrder(gm, orderMethod=None, nExtra=-1, cutoff=inf, priority=None, target=None): """Find an elimination order for a graphical model Args: gm (GraphModel): A graphical model object method (str): Heuristic method; one of {'minfill','wtminfill','minwidth','wtminwidth','random'} nExtra (int): Randomly select eliminated variable from among the best plus nExtra; this adds randomness to the order selection process. 0 => randomly from best; -1 => no randomness (default) cutoff (float): Quit early if ``score`` exceeds a user-supplied cutoff value (returning ``target, cutoff``) priority (list, optional): Optional list of variable priorities; lowest priority variables are eliminated first. Useful for mixed elimination models, such as marginal MAP inference tasks. target (list): If the identified order is better than cutoff, write it directly into passed ``target`` list Returns: list: The identified elimination order float: The "score" of this ordering Using ``target`` and ``cutoff`` one can easily search for better orderings by repeated calls: >>> ord, score = eliminationOrder(model, 'minfill', nExtra=2, cutoff=score, target=ord) """ orderMethod = 'minfill' if orderMethod is None else orderMethod.lower() priority = [1 for x in gm.X] if priority is None else priority if orderMethod == 'minfill': score = lambda adj,Xj: 0.5*sum([len(adj[Xj]-adj[Xk]) for Xk in adj[Xj]]) elif orderMethod == 'wtminfill': score = lambda adj,Xj: sum([(adj[Xj]-adj[Xk]).nrStatesDouble() for Xk in adj[Xj]]) elif orderMethod == 'minwidth': score = lambda adj,Xj: len(adj[Xj]) elif orderMethod == 'wtminwidth': score = lambda adj,Xj: adj[Xj].nrStatesDouble() elif orderMethod == 'random': score = lambda adj,Xj: np.random.rand() else: raise ValueError('Unknown ordering method: {}'.format(orderMethod)) adj = [ gm.markovBlanket(Xi) for Xi in gm.X ] # build MRF # initialize priority queue of scores using e.g. heapq or sort reverse = [ (priority[Xi],score(adj,Xi),Xi) for Xi in gm.X ] scores = SortedSet( reverse ); totalSize = 0.0 #_order = np.zeros((len(gm.X),)) #np.array([0 for Xi in gm.X]) _order = [0]*len(gm.X) for idx in range(gm.nvar): pick = 0 Pi,Si,Xi = scores[pick] if nExtra >= 0: mx = bisect.bisect_right(scores, (Pi,Si,gm.X[-1])) # get one past last equal-priority & score vars pick = min(mx+nExtra, len(scores)) # then pick a random "near-best" variable pick = np.random.randint(pick) Pi,Si,Xi = scores[pick] del scores[pick] _order[idx] = Xi.label # write into order[idx] = Xi totalSize += adj[Xi].nrStatesDouble() if totalSize > cutoff: return target,cutoff # if worse than cutoff, quit with no changes to "target" fix = VarSet() for Xj in adj[Xi]: adj[Xj] |= adj[Xi] adj[Xj] -= [Xi] # TODO adj[Xj].remove(Xi) slightly faster but still unsupported by cython version fix |= adj[Xj] # shouldn't need to fix as much for min-width? for Xj in fix: Pj,Sj,Xj = reverse[Xj] scores.remove(reverse[Xj]) reverse[Xj] = (Pj,score(adj,Xj),Xj) scores.add(reverse[Xj]) # add (Pj,score(adj,Xj),Xj) to heap & update reverse lookup if not (target is None): target.extend([None for i in range(len(target),len(_order))]) # make sure order is the right size for idx in range(gm.nvar): target[idx]=_order[idx] # copy result if completed without quitting return _order,totalSize
def test_remove(): temp = SortedSet(range(0, 100)) temp._reset(7) temp.remove(50)