def _candidates(self, mz_list, query_mz, ms1_tol): pmz_list = SortedList([m.precursor_mz for m in mz_list]) lower = query_mz - ms1_tol upper = query_mz + ms1_tol start = pmz_list.bisect(lower) end = pmz_list.bisect(upper) return mz_list[start:end]
def test_bisect(): slt = SortedList() assert slt.bisect(10) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect(10) == 22 assert slt.bisect(200) == 200
def test_bisect(): slt = SortedList() assert slt.bisect(0) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect(50) == 100 assert slt.bisect(200) == 200
def _candidates(self,query_mz,ms1_tol): from sortedcontainers import SortedList pmz_list = SortedList([m.precursor_mz for m in self.sorted_record_list]) lower = query_mz - ms1_tol upper = query_mz + ms1_tol start = pmz_list.bisect(lower) end = pmz_list.bisect(upper) return self.sorted_record_list[start:end]
def test_bisect(): slt = SortedList() assert slt.bisect(10) == 0 slt = SortedList(range(100)) slt._reset(17) slt.update(range(100)) slt._check() assert slt.bisect(10) == 22 assert slt.bisect(200) == 200
def goodTriplets(self, nums1: List[int], nums2: List[int]) -> int: A, B = nums1, nums2 # Index of a (from A) in B. pos = [0] * len(A) for idx, b in enumerate(B): pos[b] = idx # Build pre_a[i]: number of elements on a[i]'s left in both A and B. # pos_in_b: sorted indexes (in B) of all the visited elements in A. pos_in_b, pre_a = SortedList([pos[A[0]]]), [0] for a in A[1:]: pos_in_b.add(pos[a]) pre_a.append(pos_in_b.bisect_left(pos[a])) # Build suf_a[i]: number of elements on a[i]'s right in both A and B. pos_in_b, suf_a = SortedList([pos[A[-1]]]), [0] for a in reversed(A[:len(A) - 1]): idx = pos_in_b.bisect(pos[a]) suf_a.append(len(pos_in_b) - idx) pos_in_b.add(pos[a]) suf_a.reverse() # Sum up all unique triplets centered on A[i]. ans = 0 for x, y in zip(pre_a, suf_a): ans += x * y return ans
def findFarmland(self, land: List[List[int]]) -> List[List[int]]: M, N = len(land), len(land[0]) pending = SortedList([[float("inf")] * 4], key=lambda farm: farm[1]) res = [] for i in range(M + 1): j = 0 next_farm = pending[0] while j < N: if j == next_farm[1]: if i != M and land[i][j] == 1: j = next_farm[3] + 1 next_farm = pending[pending.bisect_left([None, j])] else: pending.remove(next_farm) next_farm[2] = i - 1 res.append(next_farm) next_farm = pending[pending.bisect(next_farm)] else: if i != M and land[i][j] == 1: new_farm = [i, j, None, None] while j < N and land[i][j] == 1: j += 1 j -= 1 new_farm[3] = j pending.add(new_farm) else: pass j += 1 return res
def findValue(index1, index2): if index1 == index2: return 0.0 lst = SortedList(dict[index1]) index = lst.bisect((index2, )) # print lst # print index, len(lst); lst[index] return float(lst[index][1])
def createSortedArray(self, instructions: List[int]) -> int: """O(NlogN) / O(N)""" ans = 0 sorted_insts = SortedList() for inst in instructions: # O(N) l = sorted_insts.bisect_left(inst) # O(logN) r = len(sorted_insts) - sorted_insts.bisect(inst) # O(logN) ans += min(l, r) sorted_insts.add(inst) # O(logN) return ans % (10**9 + 7)
def createSortedArray(self, instructions: List[int]) -> int: answer = 0 from sortedcontainers import SortedList instructionsSorted = SortedList() for instruction in instructions: left = instructionsSorted.bisect_left(instruction) right = len(instructionsSorted) - instructionsSorted.bisect(instruction) answer += left if left < right else right instructionsSorted.add(instruction) return answer % (10**9 + 7)
def createSortedArray(self, instructions: List[int]) -> int: answer = 0 instructionsSorted = SortedList() for instruction in instructions: left = instructionsSorted.bisect_left(instruction) right = len(instructionsSorted) - instructionsSorted.bisect( instruction) print(instructionsSorted, instruction, left, right) answer += left if left < right else right instructionsSorted.add(instruction) return answer % (10**9 + 7)
def countSmaller(self, nums: List[int]) -> List[int]: n = len(nums) res = [0] * n sl = SortedList([nums[-1]]) for i in range(n - 2, -1, -1): index = sl.bisect(nums[i] - 1) res[i] = index sl.add(nums[i]) return res
class MyCalendar: def __init__(self): # add -INF/INF to the head and the end to make it easy to check leftmost and rightmost boundary self.cal = SortedList([(float('-inf'), float('-inf')), (float('inf'), float('inf'))]) def book(self, start: int, end: int) -> bool: interval = (start, end) # either bisect(bisect_right)/bisect_left works i = self.cal.bisect(interval) if self.cal[i - 1][1] <= start and end <= self.cal[i][0]: self.cal.add(interval) return True return False
def minInteger(self, num: str, k: int) -> str: N=len(num) q = defaultdict(deque) for i in range(N): q[num[i]].append(i) res = list(num) seen = SortedList() for i in range(N): res[i] = num[i] for n in digits: if not q[n]: continue # exact index considering all past swaps j = q[n][0] + len(seen) - seen.bisect(q[n][0]) used = j-i # how many k it will consume if 0 <= used <= k: k -= used seen.add(q[n][0]) res[i] = num[q[n].popleft()] break return ''.join(res)
class DataPack(BasePack[Entry, Link, Group]): # pylint: disable=too-many-public-methods """ A :class:`DataPack` contains a piece of natural language text and a collection of NLP entries (annotations, links, and groups). The natural language text could be a document, paragraph or in any other granularity. Args: doc_id (str, optional): A universal id of this data pack. """ def __init__(self, doc_id: Optional[str] = None): super().__init__() self._text = "" self.annotations: SortedList[Annotation] = SortedList() self.links: List[Link] = [] self.groups: List[Group] = [] self.generics: List[Generic] = [] self.replace_back_operations: ReplaceOperationsType = [] self.processed_original_spans: List[Tuple[Span, Span]] = [] self.orig_text_len: int = 0 self.index: DataIndex = DataIndex() self.meta: Meta = Meta(doc_id) def __getstate__(self): """ In serialization, 1) will serialize the annotation sorted list as a normal list; 2) will not serialize the indexes """ state = super(DataPack, self).__getstate__() state['annotations'] = list(state['annotations']) state.pop('index') return state def __setstate__(self, state): """ In deserialization, we 1) transform the annotation list back to a sorted list; 2) initialize the indexes. """ super(DataPack, self).__setstate__(state) self.annotations = SortedList(self.annotations) self.index = DataIndex() self.index.update_basic_index(list(self.annotations)) self.index.update_basic_index(self.links) self.index.update_basic_index(self.groups) for a in self.annotations: a.set_pack(self) for a in self.links: a.set_pack(self) for a in self.groups: a.set_pack(self) for a in self.generics: a.set_pack(self) # pylint: disable=no-self-use def validate(self, entry: EntryType) -> bool: return isinstance(entry, SinglePackEntries) @property def text(self): return self._text def get_span_text(self, span: Span): return self._text[span.begin:span.end] def set_text( self, text: str, replace_func: Optional[Callable[[str], ReplaceOperationsType]] = None): if len(self._text) > 0: logger.warning("The new text is overwriting the original one, " "which might cause unexpected behavior.") span_ops = [] if replace_func is None else replace_func(text) # The spans should be mutually exclusive (self._text, self.replace_back_operations, self.processed_original_spans, self.orig_text_len) = io_utils.modify_text_and_track_ops( text, span_ops) def get_original_text(self): """Get original unmodified text from the :class:`DataPack` object. :return: Original text after applying the `replace_back_operations` of :class:`DataPack` object to the modified text """ original_text, _, _, _ = io_utils.modify_text_and_track_ops( self._text, self.replace_back_operations) return original_text def get_original_span(self, input_processed_span: Span, align_mode: str = "relaxed"): """ Function to obtain span of the original text that aligns with the given span of the processed text. Args: input_processed_span: Span of the processed text for which the corresponding span of the original text is desired align_mode: The strictness criteria for alignment in the ambiguous cases, that is, if a part of input_processed_span spans a part of the inserted span, then align_mode controls whether to use the span fully or ignore it completely according to the following possible values - "strict" - do not allow ambiguous input, give ValueError - "relaxed" - consider spans on both sides - "forward" - align looking forward, that is, ignore the span towards the left, but consider the span towards the right - "backward" - align looking backwards, that is, ignore the span towards the right, but consider the span towards the left Returns: Span of the original text that aligns with input_processed_span Example: * Let o-up1, o-up2, ... and m-up1, m-up2, ... denote the unprocessed spans of the original and modified string respectively. Note that each o-up would have a corresponding m-up of the same size. * Let o-pr1, o-pr2, ... and m-pr1, m-pr2, ... denote the processed spans of the original and modified string respectively. Note that each o-p is modified to a corresponding m-pr that may be of a different size than o-pr. * Original string: <--o-up1--> <-o-pr1-> <----o-up2----> <----o-pr2----> <-o-up3-> * Modified string: <--m-up1--> <----m-pr1----> <----m-up2----> <-m-pr2-> <-m-up3-> * Note that `self.inverse_original_spans` that contains modified processed spans and their corresponding original spans, would look like - [(o-pr1, m-pr1), (o-pr2, m-pr2)] >> data_pack = DataPack() >> original_text = "He plays in the park" >> data_pack.set_text(original_text,\ >> lambda _: [(Span(0, 2), "She"))] >> data_pack.text "She plays in the park" >> input_processed_span = Span(0, len("She plays")) >> orig_span = data_pack.get_original_span(input_processed_span) >> data_pack.get_original_text()[orig_span.begin: orig_span.end] "He plays" """ assert align_mode in ["relaxed", "strict", "backward", "forward"] req_begin = input_processed_span.begin req_end = input_processed_span.end def get_original_index(input_index: int, is_begin_index: bool, mode: str) -> int: """ Args: input_index: begin or end index of the input span is_begin_index: if the index is the begin index of the input span or the end index of the input span mode: alignment mode Returns: Original index that aligns with input_index """ if len(self.processed_original_spans) == 0: return input_index len_processed_text = len(self._text) orig_index = None prev_end = 0 for (inverse_span, original_span) in self.processed_original_spans: # check if the input_index lies between one of the unprocessed # spans if prev_end <= input_index < inverse_span.begin: increment = original_span.begin - inverse_span.begin orig_index = input_index + increment # check if the input_index lies between one of the processed # spans elif inverse_span.begin <= input_index < inverse_span.end: # look backward - backward shift of input_index if is_begin_index and mode in ["backward", "relaxed"]: orig_index = original_span.begin if not is_begin_index and mode == "backward": orig_index = original_span.begin - 1 # look forward - forward shift of input_index if is_begin_index and mode == "forward": orig_index = original_span.end if not is_begin_index and mode in ["forward", "relaxed"]: orig_index = original_span.end - 1 # break if the original index is populated if orig_index is not None: break prev_end = inverse_span.end if orig_index is None: # check if the input_index lies between the last unprocessed # span inverse_span, original_span = self.processed_original_spans[-1] if inverse_span.end <= input_index < len_processed_text: increment = original_span.end - inverse_span.end orig_index = input_index + increment else: # check if there input_index is not valid given the # alignment mode or lies outside the processed string raise ValueError(f"The input span either does not adhere " f"to the {align_mode} alignment mode or " f"lies outside to the processed string.") return orig_index orig_begin = get_original_index(req_begin, True, align_mode) orig_end = get_original_index(req_end - 1, False, align_mode) + 1 return Span(orig_begin, orig_end) def add_entry(self, entry: EntryType) -> EntryType: """ Force add an :class:`~forte.data.ontology.top.Entry` object to the :class:`DataPack` object. Allow duplicate entries in a pack. Args: entry (Entry): An :class:`~forte.data.ontology.top.Entry` object to be added to the pack. Returns: The input entry itself """ return self.__add_entry_with_check(entry, True) def add_or_get_entry(self, entry: EntryType) -> EntryType: """ Try to add an :class:`~forte.data.ontology.top.Entry` object to the :class:`DataPack` object. If a same entry already exists, will return the existing entry instead of adding the new one. Note that we regard two entries as the same if their :meth:`~forte.data.ontology.top.Entry.eq` have the same return value, and users could override :meth:`~forte.data.ontology.top.Entry.eq` in their custom entry classes. Args: entry (Entry): An :class:`~forte.data.ontology.top.Entry` object to be added to the pack. Returns: If a same entry already exists, returns the existing entry. Otherwise, return the (input) entry just added. """ return self.__add_entry_with_check(entry, False) def __add_entry_with_check(self, entry: EntryType, allow_duplicate: bool = True) -> EntryType: """ Internal method to add an :class:`Entry` object to the :class:`DataPack` object. Args: entry (Entry): An :class:`Entry` object to be added to the datapack. allow_duplicate (bool): Whether we allow duplicate in the datapack. Returns: The input entry itself """ if isinstance(entry, Annotation): target = self.annotations elif isinstance(entry, Link): target = self.links elif isinstance(entry, Group): target = self.groups else: target = self.generics # raise ValueError( # f"Invalid entry type {type(entry)}. A valid entry " # f"should be an instance of Annotation, Link, or Group." # ) add_new = allow_duplicate or (entry not in target) if add_new: self.record_entry(entry) if isinstance(target, list): target.append(entry) else: # For the sorted list case. target.add(entry) # update the data pack index if needed self.index.update_basic_index([entry]) if self.index.link_index_on and isinstance(entry, Link): self.index.update_link_index([entry]) if self.index.group_index_on and isinstance(entry, Group): self.index.update_group_index([entry]) self.index.deactivate_coverage_index() return entry else: return target[target.index(entry)] def delete_entry(self, entry: EntryType): """ Delete an :class:`~forte.data.ontology.top.Entry` object from the :class:`DataPack`. Args: entry (Entry): An :class:`~forte.data.ontology.top.Entry` object to be deleted from the pack. """ begin = 0 if isinstance(entry, Annotation): target = self.annotations begin = target.bisect_left(entry) elif isinstance(entry, Link): target = self.links elif isinstance(entry, Group): target = self.groups else: raise ValueError( f"Invalid entry type {type(entry)}. A valid entry " f"should be an instance of Annotation, Link, or Group.") for i, e in enumerate(target[begin:]): if e.tid == entry.tid: target.pop(i + begin) break # update basic index self.index.remove_entry(entry) # set other index invalid self.index.turn_link_index_switch(on=False) self.index.turn_group_index_switch(on=False) self.index.deactivate_coverage_index() @classmethod def validate_link(cls, entry: EntryType) -> bool: return isinstance(entry, Link) @classmethod def validate_group(cls, entry: EntryType) -> bool: return isinstance(entry, Group) def get_data(self, context_type: Type[Annotation], request: Optional[DataRequest] = None, skip_k: int = 0) -> Iterator[Dict[str, Any]]: """ Example: .. code-block:: python requests = { base_ontology.Sentence: { "component": ["dummy"], "fields": ["speaker"], }, base_ontology.Token: ["pos", "sense""], base_ontology.EntityMention: { "unit": "Token", }, } pack.get_data(base_ontology.Sentence, requests) Args: context_type (str): The granularity of the data context, which could be any ``Annotation`` type. request (dict): The entry types and fields required. The keys of the requests dict are the required entry types and the value should be either: - a list of field names or - a dict which accepts three keys: `"fields"`, `"component"`, and `"unit"`. - By setting `"fields"` (list), users specify the requested fields of the entry. If "fields" is not specified, only the default fields will be returned. - By setting `"component"` (list), users can specify the components by which the entries are generated. If `"component"` is not specified, will return entries generated by all components. - By setting `"unit"` (string), users can specify a unit by which the annotations are indexed. Note that for all annotation types, `"text"` and `"span"` fields are returned by default; for all link types, `"child"` and `"parent"` fields are returned by default. skip_k (int): Will skip the first `skip_k` instances and generate data from the `offset` + 1 th instance. Returns: A data generator, which generates one piece of data (a dict containing the required entries, fields, and context). """ annotation_types: Dict[Type[Annotation], Union[Dict, List]] = dict() link_types: Dict[Type[Link], Union[Dict, List]] = dict() group_types: Dict[Type[Group], Union[Dict, List]] = dict() if request is not None: for key, value in request.items(): if issubclass(key, Annotation): annotation_types[key] = value elif issubclass(key, Link): link_types[key] = value elif issubclass(key, Group): group_types[key] = value context_args = annotation_types.get(context_type) context_components, _, context_fields = self._parse_request_args( context_type, context_args) valid_context_ids: Set[int] = self.get_ids_by_type(context_type) if context_components: valid_component_id: Set[int] = set() for component in context_components: valid_component_id |= self.get_ids_by_component(component) valid_context_ids &= valid_component_id skipped = 0 # must iterate through a copy here because self.annotations is changing for context in list(self.annotations): if (context.tid not in valid_context_ids or not isinstance(context, context_type)): continue if skipped < skip_k: skipped += 1 continue data = dict() data["context"] = self.text[context.span.begin:context.span.end] data["offset"] = context.span.begin for field in context_fields: data[field] = getattr(context, field) if annotation_types: for a_type, a_args in annotation_types.items(): if issubclass(a_type, context_type): continue if a_type.__name__ in data.keys(): raise KeyError( f"Requesting two types of entries with the " f"same class name {a_type.__name__} at the " f"same time is not allowed") data[a_type.__name__] = \ self._generate_annotation_entry_data( a_type, a_args, data, context ) if link_types: for l_type, l_args in link_types.items(): if l_type.__name__ in data.keys(): raise KeyError( f"Requesting two types of entries with the " f"same class name {l_type.__name__} at the " f"same time is not allowed") data[l_type.__name__] = self._generate_link_entry_data( l_type, l_args, data, context) if group_types: # pylint: disable=unused-variable for g_type, g_args in group_types.items(): pass yield data def _parse_request_args(self, a_type, a_args): # request which fields generated by which component components = None unit = None fields = set() if isinstance(a_args, dict): components = a_args.get("component") if components is not None and not isinstance(components, Iterable): raise TypeError( f"Invalid request format for 'components'. " f"The value of 'components' should be of an iterable type." ) unit = a_args.get("unit") if unit is not None and not isinstance(unit, str): raise TypeError(f"Invalid request format for 'unit'. " f"The value of 'unit' should be a string.") a_args = a_args.get("fields", set()) if isinstance(a_args, Iterable): fields = set(a_args) elif a_args is not None: raise TypeError( f"Invalid request format for '{a_type}'. " f"The request should be of an iterable type or a dict.") # # check the existence of fields # # self.field_records # # for meta_key, meta_val in self.internal_metas.items(): # if issubclass(meta_key, a_type): # for meta_c, meta_f in meta_val.fields_created.items(): # if components is None or meta_c in components: # if not fields.issubset(meta_f): # raise KeyError( # f"The {a_type} generated by {meta_c} doesn't " # f"have the fields requested.") fields.add("tid") return components, unit, fields def _generate_annotation_entry_data(self, a_type: Type[Annotation], a_args: Union[Dict, Iterable], data: Dict, cont: Optional[Annotation]) -> Dict: components, unit, fields = self._parse_request_args(a_type, a_args) a_dict: Dict[str, Any] = dict() a_dict["span"] = [] a_dict["text"] = [] for field in fields: a_dict[field] = [] unit_begin = 0 if unit is not None: if unit not in data.keys(): raise KeyError(f"{unit} is missing in data. You need to " f"request {unit} before {a_type}.") a_dict["unit_span"] = [] cont_begin = cont.span.begin if cont else 0 annotations: List[Annotation] = self.get_entries( # type: ignore a_type, cont, components) for annotation in annotations: # we provide span, text (and also tid) by default a_dict["span"].append((annotation.span.begin, annotation.span.end)) a_dict["text"].append(annotation.text) for field in fields: if field in ("span", "text"): continue if field == "context_span": a_dict[field].append((annotation.span.begin - cont_begin, annotation.span.end - cont_begin)) continue a_dict[field].append(getattr(annotation, field)) if unit is not None: while not self.index.in_span(data[unit]["tid"][unit_begin], annotation.span): unit_begin += 1 unit_span_begin = unit_begin unit_span_end = unit_span_begin + 1 while self.index.in_span(data[unit]["tid"][unit_span_end], annotation.span): unit_span_end += 1 a_dict["unit_span"].append((unit_span_begin, unit_span_end)) for key, value in a_dict.items(): a_dict[key] = np.array(value) return a_dict def _generate_link_entry_data(self, a_type: Type[Link], a_args: Union[Dict, Iterable], data: Dict, cont: Optional[Annotation]) -> Dict: components, unit, fields = self._parse_request_args(a_type, a_args) if unit is not None: raise ValueError(f"Link entries cannot be indexed by {unit}.") a_dict: Dict[str, Any] = dict() for field in fields: a_dict[field] = [] a_dict["parent"] = [] a_dict["child"] = [] links: List[Link] = self.get(a_type, cont, components) # type: ignore for link in links: parent_type = link.ParentType.__name__ child_type = link.ChildType.__name__ if parent_type not in data.keys(): raise KeyError( f"The Parent entry of {a_type} is not requested." f" You should also request {parent_type} with " f"{a_type}") if child_type not in data.keys(): raise KeyError(f"The child entry of {a_type} is not requested." f" You should also request {child_type} with " f"{a_type}") a_dict["parent"].append( np.where(data[parent_type]["tid"] == link.parent)[0][0]) a_dict["child"].append( np.where(data[child_type]["tid"] == link.child)[0][0]) for field in fields: if field in ("parent", "child"): continue a_dict[field].append(getattr(link, field)) for key, value in a_dict.items(): a_dict[key] = np.array(value) return a_dict def get_entries( self, entry_type: Type[EntryType], range_annotation: Optional[Annotation] = None, components: Optional[Union[str, List[str]]] = None) -> Iterable[EntryType]: """ Get ``entry_type`` entries from the span of ``range_annotation`` in a DataPack. Args: entry_type (type): The type of entries requested. range_annotation (Annotation, optional): The range of entries requested. If `None`, will return valid entries in the range of whole data_pack. components (str or list, optional): The component generating the entries requested. If `None`, will return valid entries generated by any component. """ range_begin = range_annotation.span.begin if range_annotation else 0 range_end = (range_annotation.span.end if range_annotation else self.annotations[-1].span.end) # valid type valid_id = self.get_ids_by_type(entry_type) # valid component if components is not None: if isinstance(components, str): components = [components] valid_component_id: Set[int] = set() for component in components: valid_component_id |= self.get_ids_by_component(component) valid_id &= valid_component_id # valid span if range_annotation is not None: coverage_index = self.index.coverage_index(type(range_annotation), entry_type) if coverage_index is not None: valid_id &= coverage_index[range_annotation.tid] if issubclass(entry_type, Annotation): begin_index = self.annotations.bisect( Annotation(self, range_begin, range_begin)) end_index = self.annotations.bisect( Annotation(self, range_end, range_end)) for annotation in self.annotations[begin_index:end_index]: if annotation.tid not in valid_id: continue if (range_annotation is None or self.index.in_span( annotation, range_annotation.span)): yield annotation elif issubclass(entry_type, (Link, Group)): for entry_id in valid_id: entry: EntryType = self.get_entry(entry_id) # type: ignore if (range_annotation is None or self.index.in_span(entry, range_annotation.span)): yield entry def get(self, entry_type: Type[EntryType], range_annotation: Optional[Annotation] = None, component: Optional[str] = None) -> Iterable[EntryType]: return self.get_entries(entry_type, range_annotation, component)
class BarGridKernel(Kernel): ''' Store a kernel of n dimensions as a list of bars in the space of dimension (n-1). A bar is given by its start and its end coordinates, and corresponds to the hull of the viable points in the last dimension for each coordinates in the space of dimension (n-1). The order of the dimensions may have been changed, and the last dimension of these data may not correspond to the last dimension of the viability problem. Therefore the attribute ``permutation`` give a matrix describing the permutation of the dimension. ''' def __init__(self, originCoords, oppositeCoords, intervalNumberperaxis, permutation=None, kernelMinPoint=None, kernelMaxPoint=None,data=[], metadata={}): super(BarGridKernel, self).__init__(metadata) self.originCoords = np.array(originCoords, float) self.oppositeCoords = np.array(oppositeCoords, float) self.intervalNumberperaxis = np.array(intervalNumberperaxis, int) self.bars = SortedList(data) if permutation is None: self.permutation = np.eye(len(originCoords),dtype = int) else: self.permutation = permutation if kernelMinPoint is None: self.kernelMinPoint = [] for i in range(len(originCoords)): self.kernelMinPoint.append(intervalNumberperaxis[i]) else : self.kernelMinPoint = kernelMinPoint if kernelMaxPoint is None: self.kernelMaxPoint = [-1] * len(originCoords) else : self.kernelMaxPoint = kernelMaxPoint self.metadata.update(self.getDataAttributes()) @staticmethod def getFormatCode(): return "bars" def getDataAttributes(self): da = super(BarGridKernel, self).getDataAttributes() da['origin'] = self.originCoords da['opposite'] = self.oppositeCoords da['intervals'] = self.intervalNumberperaxis da['permutation']= self.permutation da['maxPoint']= self.kernelMaxPoint da['minPoint']= self.kernelMinPoint return da @classmethod def initFromHDF5(cls, metadata, attrs, data): ''' Create an object of class BarGridKernel from attributes and data loaded from an HDF5 file. This method is intended to be used by the method hdf5common.readKernel ''' return cls( originCoords=attrs['origin'], oppositeCoords=attrs['opposite'], intervalNumberperaxis=attrs['intervals'], permutation=attrs['permutation'], kernelMinPoint=attrs['minPoint'], kernelMaxPoint=attrs['maxPoint'], data=data.tolist(), metadata=metadata ) def getData(self): return np.array(list(self.bars), dtype='int64') def getIntervalSizes(self): ''' Give the coordinates of the point of the grid with minimal coordinates ''' intervalsizes = [] intervalsizes = list((self.oppositeCoords-self.originCoords)/self.intervalNumberperaxis) return intervalsizes def getMinFrameworkBounds(self): return list(self.originCoords-np.array(self.getIntervalSizes())/2) def getMaxFrameworkBounds(self): return list(self.oppositeCoords+np.array(self.getIntervalSizes())/2) def getMinBounds(self): ''' Give the coordinates of the point of the vino with minimal coordinates ''' minbounds = [] intervalSizes = np.array(self.getIntervalSizes()) permutOriginCoords = np.dot(self.permutation, self.originCoords) permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords) permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis) minbounds = list(np.dot(np.transpose(self.permutation),permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*self.kernelMinPoint/permutIntervalNumberperaxis)) minbounds = minbounds - intervalSizes/2 return minbounds def getMaxBounds(self): ''' Give the coordinates of the point of the vino with maximal coordinates ''' maxbounds = [] intervalSizes = np.array(self.getIntervalSizes()) permutOriginCoords = np.dot(self.permutation, self.originCoords) permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords) permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis) maxbounds = list(np.dot(np.transpose(self.permutation),permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*self.kernelMaxPoint/permutIntervalNumberperaxis)) maxbounds = maxbounds + intervalSizes/2 return maxbounds def getDataToPlot(self): data = [] permutOriginCoords = np.dot(self.permutation, self.originCoords) permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords) permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis) for i in range(len(self.bars)): data.append([ permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*np.array(self.bars[i][:-1])/permutIntervalNumberperaxis, permutOriginCoords[-1]+(permutOppositeCoords[-1]-permutOriginCoords[-1])*self.bars[i][-1]/permutIntervalNumberperaxis[-1] ]) perm = np.dot(self.permutation,np.arange(len(self.originCoords))) data = [self.getMinFrameworkBounds()+self.getMaxFrameworkBounds()+self.getIntervalSizes()+[perm], data] return data def getTotalPointNumber(self): return sum([elt[-1] - elt[-2] + 1 for elt in self.bars]) def toRegularGridKernel(self): ''' Convert the kernel to the regular grid representation. Returns an instance of RegularGridKernel. The returned grid is trimed to not include empty portion of grid. ''' minPoint = np.array(self.kernelMinPoint) maxPoint = np.array(self.kernelMaxPoint) dimensionsExtents = maxPoint - minPoint + 1 grid = RegularGridKernel(self.originCoords, self.intervalNumberperaxis, dimensionsExtents, metadata=self.metadata) for bar in self.bars: barPosition = (bar[:-2]-minPoint[:-1]).tolist() grid.grid[tuple(barPosition)].put(list(range(bar[-2], bar[-1] + 1)), True) return grid def intersectionwithBarGridKernel(self,othergrid): ''' Returns an instance of BarGridKernel which is the intersection of two BarGridKernels with the same underlying grid characteristics ''' data = [] grid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,self.permutation,None,None,data,self.metadata) barsindex = 0 otherbarsindex = 0 while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)): actualbarposition = self.bars[barsindex][:-2] # print("actualbarposition[0] ::%d " %actualbarposition[0]) while (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] < self.bars[barsindex][:-2]): otherbarsindex = otherbarsindex + 1 while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] == self.bars[barsindex][:-2]): if othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-2]: otherbarsindex = otherbarsindex + 1 elif othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-1]: barsindex = barsindex + 1 elif othergrid.bars[otherbarsindex][-1] > self.bars[barsindex][-1]: grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), self.bars[barsindex][-1]) barsindex = barsindex + 1 elif othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-1]: grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), othergrid.bars[otherbarsindex][-1]) otherbarsindex = otherbarsindex + 1 else: grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), othergrid.bars[otherbarsindex][-1]) otherbarsindex = otherbarsindex + 1 barsindex = barsindex + 1 while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] > self.bars[barsindex][:-2]): barsindex = barsindex + 1 return grid def MinusBarGridKernel(self,othergrid): ''' Returns an instance of BarGridKernel which is the element of the BarGridKernels which are not in the other one. The Bargridkernel have the same underlying grid characteristics ''' data = [] grid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,self.permutation,None,None,data,self.metadata) barsindex = 0 otherbarsindex = 0 while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)): while (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] < self.bars[barsindex][:-2]): otherbarsindex = otherbarsindex + 1 alreadycut = False while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] == self.bars[barsindex][:-2]): if othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-2]: otherbarsindex = otherbarsindex + 1 elif othergrid.bars[otherbarsindex][-1] >= self.bars[barsindex][-1]: if othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-2]: if alreadycut: grid.addBar(self.bars[barsindex][:-2], remember, othergrid.bars[otherbarsindex][-2]-1) else : grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], othergrid.bars[otherbarsindex][-2]-1) barsindex = barsindex + 1 alreadycut = False else : if othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-2]: if alreadycut: grid.addBar(self.bars[barsindex][:-2], remember, othergrid.bars[otherbarsindex][-2]-1) else : grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], othergrid.bars[otherbarsindex][-2]-1) alreadycut = True remember = othergrid.bars[otherbarsindex][-1]+1 otherbarsindex = otherbarsindex + 1 while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] > self.bars[barsindex][:-2]): if alreadycut: grid.addBar(self.bars[barsindex][:-2], remember, self.bars[barsindex][-1]) barsindex = barsindex + 1 alreadycut = False else : grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], self.bars[barsindex][-1]) barsindex = barsindex + 1 if (otherbarsindex >= len(othergrid.bars)): while (barsindex < len(self.bars)): if alreadycut: grid.addBar(self.bars[barsindex][:-2], remember, self.bars[barsindex][-1]) barsindex = barsindex + 1 alreadycut = False else : grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], self.bars[barsindex][-1]) barsindex = barsindex + 1 return grid def toBarGridKernel(self, newOriginCoords, newOppositeCoords, newIntervalNumberperaxis): ''' Convert a BarGridKernel to another BarGridKernel with another underlying grid. Returns an instance of BarGridKernel. ''' dimension = len(self.originCoords) actualbarposition = np.zeros(dimension-1,int) barsindex = 0 # converting to numpy arrays newOriginCoords = np.array(newOriginCoords,float) newIntervalNumberperaxis = np.array(newIntervalNumberperaxis,float) permutnewOriginCoords = np.dot(self.permutation, newOriginCoords) # permuting coordinates permutnewIntervalNumberperaxis = np.dot(self.permutation, newIntervalNumberperaxis) permutnewpas = np.dot(self.permutation,(np.array(newOppositeCoords,float) - newOriginCoords) / newIntervalNumberperaxis) permutOriginCoords = np.dot(self.permutation, self.originCoords) permutinversepas = np.dot(self.permutation, self.intervalNumberperaxis / (self.oppositeCoords - self.originCoords)) data = [] grid = BarGridKernel(newOriginCoords,newOppositeCoords,newIntervalNumberperaxis,self.permutation,None,None,data,self.metadata) # oups = 0 # while (oups < 1) : # oups = 1 while(actualbarposition[0]<permutnewIntervalNumberperaxis[0]+1): realpoint = permutnewOriginCoords[:-1] + actualbarposition * permutnewpas[:-1] intpoint = (realpoint-permutOriginCoords[:-1]) * permutinversepas[:-1] intpoint = [int(e+0.5) for e in intpoint] while (barsindex < len(self.bars)) and (self.bars[barsindex][:2] < intpoint): barsindex = barsindex+1 barinprocess = False # print intpoint while (barsindex < len(self.bars)) and (self.bars[barsindex][:-2] == intpoint): inf = self.bars[barsindex][-2] realinf = inf/permutinversepas[-1] +permutOriginCoords[-1] intinf = int((realinf-permutnewOriginCoords[-1])/permutnewpas[-1]+0.5) sup = self.bars[barsindex][-1] realsup = sup/permutinversepas[-1] +permutOriginCoords[-1] intsup = int((realsup-permutnewOriginCoords[-1])/permutnewpas[-1]+0.5) # print realinf # print realsup # print intinf # print intsup if (intinf<permutnewIntervalNumberperaxis[-1]+1) or (intsup >=0): if barinprocess == True : if intinf == grid.bars[-1][-1]: grid.bars[-1][-1] = min(intsup,permutnewIntervalNumberperaxis[-1]+1) else : grid.addBar(actualbarposition.tolist(), max(intinf,0), min(intsup,permutnewIntervalNumberperaxis[-1]+1)) else : grid.addBar(actualbarposition.tolist(), max(intinf,0), min(intsup,permutnewIntervalNumberperaxis[-1]+1)) barinprocess = True barsindex = barsindex+1 for i in range(dimension-1): if ((i == dimension - 2) or (actualbarposition[dimension-2-i]<permutnewIntervalNumberperaxis[dimension-2-i])): actualbarposition[dimension-2-i] = actualbarposition[dimension-2-i]+1 break else : actualbarposition[dimension-2-i] = 0 return grid def addBar(self, coords, inf, sup): # First, we collect the bars already present at the position 'coords' # and we merge the bar to add with the existing ones # two bars will be merged if at least they touch themselves # "touch" means that a lower bound of one bar is equals to the (upper bound of the other one) + 1 insertion_point = self.bars.bisect(coords) merged = False mergedBarsToRemove=[] rightExpanded = False while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==coords: if inf > self.bars[insertion_point][-1] + 1: # the new bar doesn't touch the right of the current one # we should test if it isn't equals to the upper bound + 1 to ensure that it doesn't touch insertion_point += 1 continue; if rightExpanded: # a previous bar has been modified, we check that it doesn't cross or touch the current bar if self.bars[insertion_point][-2] <= self.bars[insertion_point-1][-1] + 1: # the previous bar now intersects the lower bound of the current one # so let's merge the two bars self.bars[insertion_point][-2] = self.bars[insertion_point-1][-2] if self.bars[insertion_point][-2] <= self.bars[insertion_point-1][-2]: # the previous bar completly overlaps the current one self.bars[insertion_point][-2] = self.bars[insertion_point-1][-2] mergedBarsToRemove.append(insertion_point-1) rightExpanded = True elif inf >= self.bars[insertion_point][-2] and inf <= self.bars[insertion_point][-1] + 1: # the lower bound of the inserted bar is inside the current one merged = True if sup > self.bars[insertion_point][-1]: # the upper bound is outside the current bar, so we update the upper bound self.bars[insertion_point][-1] = sup rightExpanded = True elif inf < self.bars[insertion_point][-2]: # the lower bound of the inserted bar is before the current one if sup >= self.bars[insertion_point][-2] - 1: # the inserted bar crosses or touches the current bar, so we update the lower bound self.bars[insertion_point][-2] = inf merged = True if sup > self.bars[insertion_point][-1]: # the inserted bound is globally bigger than the current one, so we update also the upper bound self.bars[insertion_point][-1] = sup rightExpanded = True insertion_point += 1 for index in reversed(mergedBarsToRemove): del self.bars[index] if not merged: self.bars.add(coords[:] + [inf,sup]) self.kernelMinPoint[:-1] = [min(x) for x in zip(self.kernelMinPoint[:-1],coords)] self.kernelMinPoint[-1] = min(self.kernelMinPoint[-1], inf) self.kernelMaxPoint[:-1] = [max(x) for x in zip(self.kernelMaxPoint[:-1],coords)] self.kernelMaxPoint[-1] = max(self.kernelMaxPoint[-1], sup) def getBars(self): return self.bars def isInSet(self, point): ''' Returns if point belongs to the BarGridKernel. This method will find the cell where to lookup a bar containing the point. If not, the point is not considered in the set. Technical details: In a BarGrid, each cell of the (n-1) dimensional space is stored by its index inside a matrix between two opposite points and the number of cells in each dimension. First, this method will compute the index(es) of the cell where to lookup for bars, then it will look for the last dimension to check if the point is inside a cell covered by a bar in the selected cell of the (n-1) dimensional space. Note that if the point is exactly between several cells (on a vertex or on an edge), the method will check the bars of all the touching cells. Let's consider s(i) the size of a cell (the step size) on the dimension i, p(i) the coordinates of the point requested on the dimension i, and c(i) the coordinates of the center of a cell of the BarGrid in the dimension i, thus we consider that the point p is inside the cell c if: - p(i)>=c(i)-s(i)/2 (after the left side of the cell) - p(i)<=c(i)+s(i)/2 (before the right side of the cell) - for i belongs to [0;n-1] ''' result = False point = np.array(point,float) # first we need to project the point into the cells coordinate system point_int = self.intervalNumberperaxis * (point - self.originCoords)/(self.oppositeCoords - self.originCoords) point_int = np.dot(self.permutation, np.transpose(point_int)) points = [point_int] for i,coord in enumerate(point_int): new_points = [] for p in points: if (coord%1)==0.5: # the point is exactly between two cells on the current dimension left = [pp for pp in p] left[i] = int(math.floor(left[i])) right = [pp for pp in p] right[i] = int(math.ceil(right[i])) new_points.extend([left, right]) else: # we just need to round to the nearest cell center new_point = [pp for pp in p] new_point[i] = int(round(new_point[i])) new_points.append(new_point) points=new_points l = len(point) # we will look at each bar if they are positioned in the coordinates # in (n-1) dimensions space than our point for point in points: candidateBar = False for bar in self.bars: if point[:-1] == bar[:-2]: # we have reached the interesting zone candidateBar = True # is our point in the bar? if (point[l-1] >= bar[l-1]-0.5) and (point[l-1] <= bar[l] + 0.5): result = True break elif candidateBar: # we have passed the position in (n-1) dimensions space, so we can't find candidates anymore break return result def permute(self,permutation): ''' Create a BarGrid corresponding to the same data as the initial one but with a different permutation of the variables : np.dot(np.transpose(permutation),self.permutation) instead of self.permutation ''' griddata = [] unitbars = [] dimension = len(self.originCoords) matid = np.identity(dimension,dtype = int) b = False for i in range(dimension): for j in range(dimension): if permutation[i][j] != matid[i][j]: b = True if b: permutegrid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,np.dot(np.transpose(permutation),self.permutation),np.dot(np.transpose(permutation),self.kernelMinPoint),np.dot(np.transpose(permutation),self.kernelMaxPoint),griddata,self.metadata) if permutation[dimension-1][dimension-1] == 0: barposition = [0]*(dimension-1) increment = [0]*len(barposition) increment.append(1) oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1] oldindex = oldincrement.index(1) newincrement = list(np.dot(np.transpose(permutation),np.array(increment,int)))[:-1] newindex = newincrement.index(1) NmaxUsefullBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[oldindex] barposition.append(0) permutnewIntervalNumberperaxis = np.dot(permutegrid.permutation, permutegrid.intervalNumberperaxis) if (newindex == 0): indexbar = 1 else : indexbar = 0 if dimension == 2: # print "barposition" # print barposition usefuloldbars = [] newbars = [] oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1] for i in range(NmaxUsefullBars+1): oldbarposition[oldindex] = i # print oldbarposition insertion_point = self.bars.bisect(oldbarposition) while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition: usefuloldbars.append(self.bars[insertion_point]) insertion_point = insertion_point+1 # print usefuloldbars for oldbar in usefuloldbars: # print "newoldbar" # print oldbar level = oldbar[oldindex] unitbar = barposition[:-1] + [level,level] mini = oldbar[-2] maxi = oldbar[-1] newbartoupdateindex = mini if newbars: k = 0 while k <len(newbars): newbar = newbars[k] # print "ole" # print newbars # print newbar # print k if (newbar[newindex] > maxi): break elif (newbar[newindex] >= mini): # print newbar[newindex] if (newbar[newindex] > newbartoupdateindex): for l in range(newbartoupdateindex,newbar[newindex]): unitbar[newindex] = l newbars.insert(k,copy.copy(unitbar)) k=k+1 newbartoupdateindex = newbar[newindex] if (newbar[-1] == level-1): newbar[-1] = level newbartoupdateindex = newbar[newindex]+1 k = k+1 for l in range(newbartoupdateindex,maxi+1): unitbar[newindex] = l newbars.insert(k,copy.copy(unitbar)) k = k+1 else : for l in range(mini,maxi+1): unitbar[newindex] = l newbars.append(copy.copy(unitbar)) # print "newbars" # print newbars permutegrid.bars.update(newbars) else: while(barposition[indexbar]<permutnewIntervalNumberperaxis[indexbar]+1): # print "barposition" # print barposition usefuloldbars = [] newbars = [] oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1] for i in range(NmaxUsefullBars+1): oldbarposition[oldindex] = i # print oldbarposition insertion_point = self.bars.bisect(oldbarposition) while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition: usefuloldbars.append(self.bars[insertion_point]) insertion_point = insertion_point+1 # print usefuloldbars for oldbar in usefuloldbars: # print "newoldbar" # print oldbar level = oldbar[oldindex] unitbar = barposition[:-1] + [level,level] mini = oldbar[-2] maxi = oldbar[-1] newbartoupdateindex = mini if newbars: k = 0 while k <len(newbars): newbar = newbars[k] # print "ole" # print newbars # print newbar # print k if (newbar[newindex] > maxi): break elif (newbar[newindex] >= mini): # print newbar[newindex] if (newbar[newindex] > newbartoupdateindex): for l in range(newbartoupdateindex,newbar[newindex]): unitbar[newindex] = l newbars.insert(k,copy.copy(unitbar)) k=k+1 newbartoupdateindex = newbar[newindex] if (newbar[-1] == level-1): newbar[-1] = level newbartoupdateindex = newbar[newindex]+1 k = k+1 for l in range(newbartoupdateindex,maxi+1): unitbar[newindex] = l newbars.insert(k,copy.copy(unitbar)) k = k+1 else : for l in range(mini,maxi+1): unitbar[newindex] = l newbars.append(copy.copy(unitbar)) # print "newbars" # print newbars permutegrid.bars.update(newbars) for i in range(dimension-1): if ((dimension-2-i) != newindex): if ((i == dimension - 2- indexbar) or (barposition[dimension-2-i]<permutnewIntervalNumberperaxis[dimension-2-i])): barposition[dimension-2-i] = barposition[dimension-2-i]+1 break else : barposition[dimension-2-i] = 0 else: for bar in self.bars : tpermutation = np.transpose(permutation) permutegrid.bars.add(list(np.dot(tpermutation,bar[:-1]))+[bar[-1]]) else: permutegrid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,np.dot(np.transpose(permutation),self.permutation),np.dot(np.transpose(permutation),self.kernelMinPoint),np.dot(np.transpose(permutation),self.kernelMaxPoint),list(self.bars),self.metadata) return permutegrid def buildNewBars(self,barposition,permutation,data): newdata = [] unitbar = [] increment = [0]*len(barposition) increment.append(1) newincrement = list(np.dot(np.transpose(permutation),np.array(increment,int)))[:-1] newindex = newincrement.index(1) oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1] oldindex = oldincrement.index(1) NmaxNewBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[-1] # print newincrement # print newindex # print oldincrement # print oldindex # print NmaxNewBars for oldbar in data: level = oldbar[oldindex] unitbar = barposition + [level,level] mini = oldbar[-2] maxi = oldbar[-1] newbartoupdateindex = mini if newdata: for k in range(len(newdata)): # print "ole" # print newdata # print k newbar = newdata[k] if (newbar[newindex] >= mini) and (newbar[newindex] <= maxi): # print newbar[newindex] if (newbar[newindex] > newbartoupdateindex): for l in range(newbartoupdateindex,newbar[newindex]): unitbar[newindex] = l newdata.insert(k,copy.copy(unitbar)) k=k+1 if (newbar[-1] == level-1): newbar[-1] = level newbartoupdateindex = newbar[newindex]+1 for l in range(newbartoupdateindex,maxi+1): unitbar[newindex] = l newdata.insert(len(newdata),copy.copy(unitbar)) else : for l in range(mini,maxi+1): unitbar[newindex] = l newdata.append(copy.copy(unitbar)) return newdata def findUsefullBars(self,barposition,permutation): data = [] increment = [0]*len(barposition) barposition.append(0) increment.append(1) oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1] index = oldincrement.index(1) oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1] NmaxUsefullBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[index] # print oldbarposition # print oldincrement # print index # print NmaxUsefullBars for i in range(NmaxUsefullBars+1): oldbarposition[index] = i insertion_point = self.bars.bisect(oldbarposition) # print oldbarposition # print insertion_point while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition: data.append(self.bars[insertion_point]) insertion_point = insertion_point+1 return data
from random import uniform from sortedcontainers import SortedList x = [uniform(0,1) for i in xrange(20000)] val = x[4500] %time x.index(val) rev_dict = dict(zip(x, range(20000))) %time rev_dict[val] rev_bis = SortedList((v,i) for (i,v) in enumerate(x)) %time rev_bis.bisect((val,))
# 315. Count of Smaller Numbers After Self # https://leetcode.com/problems/count-of-smaller-numbers-after-self from sortedcontainers import SortedList class Solution: def countSmaller(self, nums: List[int]) -> List[int]: n = len(nums) res = [0] * n sl = SortedList([nums[-1]]) for i in range(n - 2, -1, -1): index = sl.bisect(nums[i] - 1) res[i] = index sl.add(nums[i]) return res
class PortfolioHistory(SortedDict): """Represents the historical holdings of a portfolio. Usually this class should only be instantiated by GetPortfolio. """ def __init__(self, user_id): super(PortfolioHistory, self).__init__() self._user_id = user_id with sql.GetCursor() as cursor: cursor.execute( 'SELECT type, timestamp, in_symbol, in_amount, out_symbol, out_amount ' 'FROM transactions where user_id = %s' % user_id) self._transactions = SortedList([ Transaction(type=t[0], timestamp=t[1], in_symbol=t[2], in_amount=t[3], out_symbol=t[4], out_amount=t[5]) for t in cursor.fetchall() ]) self.InitFromTransactions() def InitFromTransactions(self): # TODO(brandonsalmon): If it becomes necessary, we can greatly improve # the performance of !buy, !sell, !trade, by adding a transaction cursor # and not reinitializing all transactions every time. self.clear() for t in self._transactions: if t.type == "INIT": self[t.timestamp] = {} continue if t.timestamp not in self: bisect_point = self.bisect(t.timestamp) if (bisect_point) is 0: copy = {} else: copy = self[self._list[bisect_point - 1]].copy() self[t.timestamp] = copy if t.in_symbol: if t.in_symbol not in self[t.timestamp]: self[t.timestamp][t.in_symbol] = 0 self[t.timestamp][t.in_symbol] += t.in_amount if t.out_symbol: if t.out_symbol not in self[t.timestamp]: raise Exception( '%s tried to remove coin %s they didn\'t own' % (self._user_id, t.out_symbol)) self[t.timestamp][t.out_symbol] -= t.out_amount if self[t.timestamp][t.out_symbol] < 1e-10: del self[t.timestamp][t.out_symbol] def CreationDate(self): return self._transactions[0].timestamp def GetValueList(self, t_list): return [self.Value(t) for t in t_list] def GetChange(self, timestamp=None, timedelta='24h'): dt = datetime.fromtimestamp(timestamp) if timestamp else datetime.now() old_timestamp = (dt - util.GetTimeDelta(timedelta)).timestamp() old_value = self.Value(old_timestamp) new_value = self.Value(timestamp) if old_value != 0: return '%.2f%s' % (100 * (new_value - old_value) / old_value, '%') elif new_value == 0: return "No change" elif new_value > 0: return "+Inf%" else: return "-Inf%" def ClearRemote(self): with sql.GetCursor() as cursor: cursor.execute('DELETE FROM transactions where user_id = %s' % self._user_id) self.clear() def Init(self, tuples, timestamp=None): """Takes a list of tuples of (symbol, amount).""" timestamp = int(timestamp if timestamp else time.time()) with sql.GetCursor() as cursor: cursor.execute( 'INSERT INTO transactions (user_id, type, timestamp) ' 'values (%s, "%s", %s)' % (self._user_id, "INIT", timestamp)) transaction = Transaction(type="INIT", timestamp=timestamp) self._transactions.insert(self._transactions.bisect(transaction), transaction) for t in tuples: self.Buy(t[0], t[1], timestamp, init=False) self.InitFromTransactions() def Buy(self, symbol, amount, timestamp=None, init=True): timestamp = int(timestamp if timestamp else time.time()) with sql.GetCursor() as cursor: cursor.execute( 'INSERT INTO transactions (user_id, type, timestamp, in_symbol, in_amount) ' 'values (%s, "%s", %s, "%s", %s)' % (self._user_id, "BUY", timestamp, symbol.upper(), amount)) transaction = Transaction(type="BUY", timestamp=timestamp, in_symbol=symbol.upper(), in_amount=amount) self._transactions.insert(self._transactions.bisect(transaction), transaction) if init: self.InitFromTransactions() def Sell(self, symbol, amount, timestamp=None): timestamp = int(timestamp if timestamp else time.time()) with sql.GetCursor() as cursor: cursor.execute( 'INSERT INTO transactions (user_id, type, timestamp, out_symbol, out_amount) ' 'values (%s, "%s", %s, "%s", %s)' % (self._user_id, "SELL", timestamp, symbol.upper(), amount)) transaction = Transaction(type="SELL", timestamp=timestamp, out_symbol=symbol.upper(), out_amount=amount) self._transactions.insert(self._transactions.bisect(transaction), transaction) self.InitFromTransactions() def Trade(self, in_symbol, in_amount, out_symbol, out_amount, timestamp=None): timestamp = int(timestamp if timestamp else time.time()) with sql.GetCursor() as cursor: cursor.execute( 'INSERT INTO transactions (user_id, type, timestamp, in_symbol, in_amount, ' 'out_symbol, out_amount) values (%s, "%s", %s, "%s", %s, "%s", %s)' % (self._user_id, "SELL", timestamp, in_symbol.upper(), in_amount, out_symbol.upper(), out_amount)) transaction = Transaction(type="TRADE", timestamp=timestamp, out_symbol=out_symbol.upper(), out_amount=out_amount, in_symbol=in_symbol.upper(), in_amount=in_amount) self._transactions.insert(self._transactions.bisect(transaction), transaction) self.InitFromTransactions() def Value(self, timestamp=None): try: if timestamp: bisect_point = self.bisect(timestamp) if (bisect_point) is 0: return 0.0 data = self[self._list[bisect_point - 1]] else: data = self[self._list[-1]] except (IndexError, KeyError): return 0.0 value = 0.0 for symbol, amount in data.items(): price = coin_data.GetHistory(symbol).GetValue(timestamp) value += amount * price return value def GetOwnedCurrency(self, timestamp=None): try: if timestamp: bisect_point = self.bisect(timestamp) if (bisect_point) is 0: return {} return self[self._list[bisect_point - 1]] else: return self[self._list[-1]] except (IndexError, KeyError): return {} def AsTable(self, timestamp=None): tuples = [] for symbol, amount in self.GetOwnedCurrency(timestamp).items(): history = coin_data.GetHistory(symbol) price = history.GetValue(timestamp) curr_value = amount * price change_day = history.GetDayChange(timestamp) tuples.append([ symbol, amount, '$%.2f (%.2f%s)' % (curr_value, change_day, "%"), curr_value ]) tuples = sorted(tuples, key=lambda x: x[3], reverse=True) for t in tuples: t.pop() return tabulate(tuples, tablefmt='fancy_grid', floatfmt='.4f') def BreakTable(self, timestamp=None): tuples = [] for symbol, amount in self.GetOwnedCurrency(timestamp).items(): price = coin_data.GetHistory(symbol).GetValue(timestamp) value_at_t = amount * price tuples.append([ symbol, amount, '%.2f%s' % ((value_at_t / self.Value(timestamp)) * 100, "%"), (value_at_t / self.Value(timestamp)) * 100 ]) tuples = sorted(tuples, key=lambda x: x[3], reverse=True) for t in tuples: t.pop() return tabulate(tuples, tablefmt='fancy_grid', floatfmt='.4f')
class PriorityDict(MutableMapping): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. """ def __init__(self, *args, **kwargs): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. If the first argument is the boolean value False, then it indicates that keys are not comparable. By default this setting is True and duplicate values are tie-breaked on the key. Using comparable keys improves the performance of the PriorityDict. An optional *iterable* argument provides an initial series of items to populate the PriorityDict. Each item in the sequence must itself contain two items. The first is used as a key in the new dictionary, and the second as the key's value. If a given key is seen more than once, the last value associated with it is retained in the new dictionary. If keyword arguments are given, the keywords themselves with their associated values are added as items to the dictionary. If a key is specified both in the positional argument and as a keyword argument, the value associated with the keyword is retained in the dictionary. For example, these all return a dictionary equal to ``{"one": 2, "two": 3}``: * ``SortedDict(one=2, two=3)`` * ``SortedDict({'one': 2, 'two': 3})`` * ``SortedDict(zip(('one', 'two'), (2, 3)))`` * ``SortedDict([['two', 3], ['one', 2]])`` The first example only works for keys that are valid Python identifiers; the others work with any valid keys. Note that this constructor mimics the Python dict constructor. If you're looking for a constructor like collections.Counter(...), see PriorityDict.count(...). """ self._dict = dict() if len(args) > 0 and isinstance(args[0], bool): if args[0]: self._list = SortedList() else: self._list = SortedListWithKey(key=lambda tup: tup[0]) else: self._list = SortedList() self.iloc = _IlocWrapper(self) self.update(*args, **kwargs) def clear(self): """Remove all elements from the dictionary.""" self._dict.clear() self._list.clear() def clean(self, value=0): """ Remove all items with value less than or equal to `value`. Default `value` is 0. """ _list, _dict = self._list, self._dict pos = self.bisect_right(value) for key in (key for value, key in _list[:pos]): del _dict[key] del _list[:pos] def __contains__(self, key): """Return True if and only if *key* is in the dictionary.""" return key in self._dict def __delitem__(self, key): """ Remove ``d[key]`` from *d*. Raises a KeyError if *key* is not in the dictionary. """ value = self._dict[key] self._list.remove((value, key)) del self._dict[key] def __getitem__(self, key): """ Return the priority of *key* in *d*. Raises a KeyError if *key* is not in the dictionary. """ return self._dict[key] def __iter__(self): """ Create an iterator over the keys of the dictionary ordered by the value sort order. """ return iter(key for value, key in self._list) def __reversed__(self): """ Create an iterator over the keys of the dictionary ordered by the reversed value sort order. """ return iter(key for value, key in reversed(self._list)) def __len__(self): """Return the number of (key, value) pairs in the dictionary.""" return len(self._dict) def __setitem__(self, key, value): """Set `d[key]` to *value*.""" if key in self._dict: old_value = self._dict[key] self._list.remove((old_value, key)) self._list.add((value, key)) self._dict[key] = value def copy(self): """Create a shallow copy of the dictionary.""" result = PriorityDict() result._dict = self._dict.copy() result._list = self._list.copy() result.iloc = _IlocWrapper(result) return result def __copy__(self): """Create a shallow copy of the dictionary.""" return self.copy() @classmethod def fromkeys(cls, iterable, value=0): """ Create a new dictionary with keys from `iterable` and values set to `value`. The default *value* is 0. """ return PriorityDict((key, value) for key in iterable) def get(self, key, default=None): """ Return the value for *key* if *key* is in the dictionary, else *default*. If *default* is not given, it defaults to ``None``, so that this method never raises a KeyError. """ return self._dict.get(key, default) def has_key(self, key): """Return True if and only in *key* is in the dictionary.""" return key in self._dict def pop(self, key, default=_NotGiven): """ If *key* is in the dictionary, remove it and return its value, else return *default*. If *default* is not given and *key* is not in the dictionary, a KeyError is raised. """ if key in self._dict: value = self._dict[key] self._list.remove((value, key)) return self._dict.pop(key) else: if default == _NotGiven: raise KeyError else: return default def popitem(self, index=-1): """ Remove and return item at *index* (default: -1). Raises IndexError if dict is empty or index is out of range. Negative indices are supported as for slice indices. """ value, key = self._list.pop(index) del self._dict[key] return key, value def setdefault(self, key, default=0): """ If *key* is in the dictionary, return its value. If not, insert *key* with a value of *default* and return *default*. *default* defaults to ``0``. """ if key in self._dict: return self._dict[key] else: self._dict[key] = default self._list.add((default, key)) return default def elements(self): """ Return an iterator over elements repeating each as many times as its count. Elements are returned in value sort-order. If an element’s count is less than one, elements() will ignore it. """ values = (repeat(key, value) for value, key in self._list) return chain.from_iterable(values) def most_common(self, count=None): """ Return a list of the `count` highest priority elements with their priority. If `count` is not specified, `most_common` returns *all* elements in the dict. Elements with equal counts are ordered by key. """ _list, _dict = self._list, self._dict if count is None: return [(key, value) for value, key in reversed(_list)] end = len(_dict) start = end - count return [(key, value) for value, key in reversed(_list[start:end])] def subtract(self, elements): """ Elements are subtracted from an iterable or from another mapping (or counter). Like dict.update() but subtracts counts instead of replacing them. Both inputs and outputs may be zero or negative. """ self -= Counter(elements) def tally(self, *args, **kwargs): """ Elements are counted from an iterable or added-in from another mapping (or counter). Like dict.update() but adds counts instead of replacing them. Also, the iterable is expected to be a sequence of elements, not a sequence of (key, value) pairs. """ self += Counter(*args, **kwargs) @classmethod def count(self, *args, **kwargs): """ Consume `args` and `kwargs` with a Counter and use that mapping to initialize a PriorityDict. """ return PriorityDict(Counter(*args, **kwargs)) def update(self, *args, **kwargs): """ Update the dictionary with the key/value pairs from *other*, overwriting existing keys. *update* accepts either another dictionary object or an iterable of key/value pairs (as a tuple or other iterable of length two). If keyword arguments are specified, the dictionary is then updated with those key/value pairs: ``d.update(red=1, blue=2)``. """ _list, _dict = self._list, self._dict if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping): items = args[0] else: items = dict(*args, **kwargs) if (10 * len(items)) > len(_dict): _dict.update(items) _list.clear() _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(items): old_value = _dict[key] _list.remove((old_value, key)) _dict[key] = value _list.add((value, key)) def index(self, key): """ Return the smallest *i* such that `d.iloc[i] == key`. Raises KeyError if *key* is not present. """ value = self._dict[key] return self._list.index((value, key)) def bisect_left(self, value): """ Similar to the ``bisect`` module in the standard library, this returns an appropriate index to insert *value* in PriorityDict. If *value* is already present in PriorityDict, the insertion point will be before (to the left of) any existing entries. """ return self._list.bisect_left((value,)) def bisect(self, value): """Same as bisect_left.""" return self._list.bisect((value,)) def bisect_right(self, value): """ Same as `bisect_left`, but if *value* is already present in PriorityDict, the insertion point will be after (to the right of) any existing entries. """ return self._list.bisect_right((value, _Biggest)) def __iadd__(self, that): """Add values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value + value _dict[key] = value _list.add((value, key)) return self def __isub__(self, that): """Subtract values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value - value _dict[key] = value _list.add((value, key)) return self def __ior__(self, that): """Or values from `that` mapping (max(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value > value else value _dict[key] = value _list.add((value, key)) return self def __iand__(self, that): """And values from `that` mapping (min(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value < value else value _dict[key] = value _list.add((value, key)) return self def __add__(self, that): """Add values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __sub__(self, that): """Subtract values in `that` mapping from this.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) return result def __or__(self, that): """Or values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __and__(self, that): """And values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) return result def __eq__(self, that): """Compare two mappings for equality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict == that def __ne__(self, that): """Compare two mappings for inequality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict != that def __lt__(self, that): """Compare two mappings for less than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self <= that) def __le__(self, that): """Compare two mappings for less than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) <= len(that) and all(_dict[key] <= that[key] if key in that else False for key in _dict)) def __gt__(self, that): """Compare two mappings for greater than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self >= that) def __ge__(self, that): """Compare two mappings for greater than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) >= len(that) and all(_dict[key] >= that[key] if key in _dict else False for key in that)) def isdisjoint(self, that): """ Return True if no key in `self` is also in `that`. This doesn't check that the value is greater than zero. To remove keys with value less than or equal to zero see *clean*. """ return not any(key in self for key in that) def items(self): """ Return a list of the dictionary's items (``(key, value)`` pairs). Items are ordered by their value from least to greatest. """ return list((key, value) for value, key in self._list) def iteritems(self): """ Return an iterable over the items (``(key, value)`` pairs) of the dictionary. Items are ordered by their value from least to greatest. """ return iter((key, value) for value, key in self._list) @not26 def viewitems(self): """ In Python 2.7 and later, return a new `ItemsView` of the dictionary's items. Beware iterating the `ItemsView` as items are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewitems() else: return self._dict.items() def keys(self): """ Return a list of the dictionary's keys. Keys are ordered by their corresponding value from least to greatest. """ return list(key for value, key in self._list) def iterkeys(self): """ Return an iterable over the keys of the dictionary. Keys are ordered by their corresponding value from least to greatest. """ return iter(key for value, key in self._list) @not26 def viewkeys(self): """ In Python 2.7 and later, return a new `KeysView` of the dictionary's keys. Beware iterating the `KeysView` as keys are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewkeys() else: return self._dict.keys() def values(self): """ Return a list of the dictionary's values. Values are ordered from least to greatest. """ return list(value for value, key in self._list) def itervalues(self): """ Return an iterable over the values of the dictionary. Values are iterated from least to greatest. """ return iter(value for value, key in self._list) @not26 def viewvalues(self): """ In Python 2.7 and later, return a `ValuesView` of the dictionary's values. Beware iterating the `ValuesView` as values are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewvalues() else: return self._dict.values() def __repr__(self): """Return a string representation of PriorityDict.""" return 'PriorityDict({0})'.format(repr(dict(self))) def _check(self): self._list._check() assert len(self._dict) == len(self._list) assert all(key in self._dict and self._dict[key] == value for value, key in self._list)
class FileLabels(): def __init__(self, filename, path): self.filename = filename self.path = path self.current_labels = SortedList(key=self._labelsortkey) self.updated_labels = SortedList(key=self._labelsortkey) self.get_labels_from_file() for l in self.current_labels: if not ((l.filename == self.filename) and (l.path == self.path)): raise Exception("Found label with incorrect filename/path") def get_labels_from_file(self): self.updated_labels = SortedList(key=self._labelsortkey) self.current_labels = SortedList(key=self._labelsortkey) self.current_labels.update( SrcLabelTool._get_labels(self.filename, self.path, "", "", True, None)) self.updated_labels.update(self.current_labels) def insert_label(self, label): if not ((label.filename == self.filename) and (label.path == self.path)): raise Exception( "Trying to insert label with incorrect filename/path") if label in self.current_labels: return # update the lineno in the following labels if label in self.updated_labels: self.updated_labels.remove(label) self.updated_labels.add(label) i = self.updated_labels.bisect(label) for l in self.updated_labels[i:]: l.lineno += 1 self.updated_labels.add(label) def remove_label(self, label): i = self.updated_labels.bisect(label) # update the lineno in the following labels for l in self.updated_labels[i:]: l.lineno -= 1 self.updated_labels.remove(label) def insert_label_list(self, labels): sortedlabels = SortedList(key=self._revlabelsortkey) sortedlabels.update(labels) for i in sortedlabels: self.insert_label(i) def _labelsortkey(self, l): return l.lineno def _revlabelsortkey(self, l): return -l.lineno def update_file(self): fullpath = os.path.join(self.path, self.filename) f = open(fullpath, "r") lines = f.readlines() f.close() nolabels = [l for l in lines if not SrcLabelTool.is_any_label(l)] for l in self.updated_labels: nolabels.insert(l.lineno - 1, l.filerepr()) # rewrite file f = open(fullpath, "w") for line in nolabels: f.write(line) f.close() self.get_labels_from_file()
class InclusionTreeBuilder: """ this class builds a tree of polygons included in one another. it works through a sweeping line algorithm. also identifies each as a hole or a polygon. """ def __init__(self, polygons): # the algorithm works in O(n) (times sorted container's costs) in this way: # we have a SortedList of all currently crossed paths self.crossed_paths = SortedList() # for each polygon, a SortedList of all of its currently crossed paths self.polygons = defaultdict(SortedList) # when meeting a new polygon for the first time # we will insert it in the crossed_paths list ; we get it's top neighbour (smaller) # and get the corresponding polygon # now if we are contained inside it, we are its child # if we are not contained inside it, we are its brother # to figure out whether we are inside or not, we look at #paths smaller than us # in the neighbour polygon's SortedList set_comparer(self) # we store all keys used for comparing paths # this speeds up keys computations and more importantly removes # rounding errors self.sweeping_keys = dict() polygons_number = self._create_events(polygons) self.current_point = None self.tree = InclusionTree() self.nodes = dict() # store for each poly its node and father node for event in self.events: self.execute_event(event) if len(self.nodes) == polygons_number: return # no need to finish the sweep once everyone is identified def _create_events(self, polygons): """ create all start/end events for each path. each event is : a comparison key ; the path. """ self.events = [] polygons_number = 0 for height, polygons in polygons.items(): for polygon in polygons: polygons_number += 1 for segment in polygon_segments(height, polygon): angle = segment.key_angle() print("angle for", segment, "is", angle) for point, event_type in zip(sorted(segment.endpoints), (START_EVENT, END_EVENT)): key = (point, event_type, -height) raise Exception("we lack an angle here") self.events.append((key, segment)) self.sweeping_keys[(id(segment), point)] =\ (point.coordinates[1], angle, -height) self.events.sort(key=lambda e: e[0]) return polygons_number def key(self, path): """ returns key at current point for given path. """ key_id = (id(path), self.current_point) if key_id in self.sweeping_keys: return self.sweeping_keys[key_id] else: current_x = self.current_point.coordinates[0] return (path.vertical_intersection_at(current_x), path.key_angle(), -path.height) def execute_event(self, event): """ execute start path or end path event """ event_key, event_path = event event_point, event_type = event_key[0:2] if event_type == START_EVENT: self.current_point = event_point self.start_path(event_path) else: self.end_path(event_path) self.current_point = event_point if __debug__: # very slow paths = iter(self.crossed_paths) previous_path = next(paths, None) for path in paths: if self.key(previous_path) >= self.key(path): paths = list(self.crossed_paths) print(paths) print("previous", previous_path, self.key(previous_path)) print("current", path, self.key(path)) tycat(self.current_point, paths, previous_path, path) raise Exception("pb ordre") previous_path = path def start_path(self, path): """ handles incoming path """ index = self.crossed_paths.bisect(path) self.crossed_paths.insert(index, path) polygon = path.polygon_id() self.polygons[polygon].add(path) if polygon not in self.nodes: father_node = self.identify_father_node(path, index) new_node = father_node.add_child(path) self.nodes[polygon] = (new_node, father_node) print("adding", polygon, "as child of", id(father_node.content)) def identify_father_node(self, path, index): """ identify where polygon is in tree. we need the path and its position in crossed paths """ if index == 0: # no one above us, we are below root return self.tree else: neighbour_polygon = self.crossed_paths[index - 1].polygon_id() above_paths = self.polygons[neighbour_polygon].bisect(path) if above_paths % 2: # odd neighbour's paths above us # we are inside him return self.nodes[neighbour_polygon][0] else: # event neighbour's paths above us # we are beside him return self.nodes[neighbour_polygon][1] def end_path(self, path): """ handles ending path """ print("removing", path, "from", self.crossed_paths) self.crossed_paths.remove(path) self.polygons[path.polygon_id()].remove(path)
class ParetoFrontier(): def __init__(self, vertex, lista=None): # El último elemento de sucesores va a tener como sucesor a infinito. self.sucesores_map = {0: np.inf} self.predecesores_map = {np.inf: 0, 0: None} self.contenedor = [] self.sorted_list = SortedList(self.contenedor) self.pareto_map = {0: np.inf} ############################################################################## self.vertex = vertex # PILAS, ACABO DE AGREGAR ESTE self.info_label = { } # A cada etiqueta apunta a una dupla con vértice previo y x_previo ¿hay que incicializar? self.lista_labels = [] if lista != None: for label in lista: if label == (0, 0): trazador = (None, None) else: trazador = None self.add(label, trazador) def show_pareto(self): return self.pareto_map def show_pareto2(self): return self.pareto_map, self.sucesores_map, self.predecesores_map #def to_list(self): # return list(self.sorted_list) def list_frontlabels(self): ## PILAS, ESTO ES MUY INEFICIENTE Y SÓLO LO TENGO PARA HACER PRINTS, NOTE QUE SIEMPRE RECONSTRUYE #LA LISTA DESDE EL PRINCIPIO. self.lista_labels = [] #print('en list_frontlabels sorted_list:', self.sorted_list ) #print('en list_frontlabels lista labels',self.lista_labels) for x in self.sorted_list: #print('en list_frontlabels: x en sorted_list',x) self.lista_labels.append((x, self.pareto_map[x])) print('lista_labels', self.lista_labels) return self.lista_labels def x_in_pareto(self, x): return x in self.pareto_map def _xleft(self, x): if self.x_in_pareto(x): return x else: i = self.sorted_list.bisect(x) if (i == 0): return 0 else: return self.sorted_list[i - 1] def _yleft(self, x): return self.pareto_map[self._xleft(x)] def check_dominance(self, label): x = label[0] y = label[1] return self._yleft(x) <= y # En la siguiente función está implícito que las etiquetas se van adicionando a Pareto Frontier # una a una. #def add(self, label, pure_pareto=True): def add(self, label, trazadorx=None, pure_pareto=True): indi_pareto_modified = False discard_set = set() x = label[0] y = label[1] x_left = self._xleft(x) # SI LABEL ES DOMINADO POR ALGUIEN EN PARETO if self.check_dominance(label): #if self.pareto_map[x_left] <= y: return self, indi_pareto_modified, discard_set if x_left != x: next_x = self.sucesores_map[x_left] self.sucesores_map[x] = next_x self.predecesores_map[next_x] = x self.sucesores_map[x_left] = x self.predecesores_map[x] = x_left self.contenedor.append(x) self.sorted_list.add(x) elif x not in self.sorted_list: self.contenedor.append(x) self.sorted_list.add(x) self.pareto_map[x] = y if trazadorx != None: self.info_label[x] = trazadorx #print('PILAS',(x,y)) indi_pareto_modified = True sucesor = self.sucesores_map[x] #print('cuando x es:', x, 'sucesor es:',sucesor) if pure_pareto == False: while sucesor < np.inf: if self.pareto_map[sucesor] > y: self.pareto_map[sucesor] = y else: break sucesor = self.sucesores_map[sucesor] else: while sucesor < np.inf: #print('ENTRAMOS AL WHILE cuando x es:', x, 'sucesor es:', sucesor) # Pilas, cuando una etiqueta se elimina del frente, debemos borrar también la información # consignada en info_label if self.pareto_map[sucesor] > y: discard_set.add((sucesor, self.pareto_map[sucesor])) del self.pareto_map[sucesor] ######################################## del self.info_label[sucesor] ######################################## nextt_x = self.sucesores_map[sucesor] self.sucesores_map[x] = nextt_x self.predecesores_map[nextt_x] = x del self.sucesores_map[sucesor] del self.predecesores_map[sucesor] self.contenedor.remove(sucesor) self.sorted_list.remove(sucesor) else: break sucesor = self.sucesores_map[x] return self, indi_pareto_modified, discard_set def Delete_label(self, label): print('Entramos a Delete_label') x = label[0] y = label[1] if self.x_in_pareto(x): print(x, 'está en pareto') del self.pareto_map[x] print('así queda pareto map después de borrar', x, self.pareto_map) ################################################################################ del self.info_label[ x] # puede haber problemas si en info_label no está x PILAS print('asi queda info_label después de borrar', x, self.info_label) ################################################################################ predx = self.predecesores_map[x] sucx = self.sucesores_map[x] self.sucesores_map[predx] = sucx self.predecesores_map[sucx] = predx del self.sucesores_map[x] del self.predecesores_map[x] self.contenedor.remove(x) self.sorted_list.remove(x) print('esta es sorted_list después de borrar', x, self.sorted_list) return self else: #raise ValueError() print('No está en Pareto y retorno el mismo ParetoFrontier') return self def label_track(self, x): return self.info_label[x]
class DataPack(BasePack[Entry, Link, Group]): # pylint: disable=too-many-public-methods r"""A :class:`DataPack` contains a piece of natural language text and a collection of NLP entries (annotations, links, and groups). The natural language text could be a document, paragraph or in any other granularity. Args: pack_name (str, optional): A name for this data pack. """ def __init__(self, pack_name: Optional[str] = None): super().__init__(pack_name) self._text = "" self.annotations: SortedList[Annotation] = SortedList() self.links: SortedList[Link] = SortedList() self.groups: SortedList[Group] = SortedList() self.generics: SortedList[Generics] = SortedList() self.__replace_back_operations: ReplaceOperationsType = [] self.__processed_original_spans: List[Tuple[Span, Span]] = [] self.__orig_text_len: int = 0 self._index: DataIndex = DataIndex() def __getstate__(self): r""" In serialization, 1) will serialize the annotation sorted list as a normal list; 2) will not serialize the indices """ state = super().__getstate__() state['annotations'] = list(state['annotations']) state['links'] = list(state['links']) state['groups'] = list(state['groups']) state['generics'] = list(state['generics']) return state def __setstate__(self, state): r""" In deserialization, we 1) transform the annotation list back to a sorted list; 2) initialize the indexes. 3) Obtain the pack ids. """ super().__setstate__(state) # For backward compatibility. if 'replace_back_operations' in self.__dict__: self.__replace_back_operations = self.__dict__.pop( 'replace_back_operations') if 'processed_original_spans' in self.__dict__: self.__processed_original_spans = self.__dict__.pop( 'processed_original_spans') if 'orig_text_len' in self.__dict__: self.__orig_text_len = self.__dict__.pop('orig_text_len') self.annotations = SortedList(self.annotations) self.links = SortedList(self.links) self.groups = SortedList(self.groups) self.generics = SortedList(self.generics) self._index = DataIndex() self._index.update_basic_index(list(self.annotations)) self._index.update_basic_index(list(self.links)) self._index.update_basic_index(list(self.groups)) self._index.update_basic_index(list(self.generics)) for a in self.annotations: a.set_pack(self) for a in self.links: a.set_pack(self) for a in self.groups: a.set_pack(self) for a in self.generics: a.set_pack(self) def __iter__(self): yield from self.annotations yield from self.links yield from self.groups yield from self.generics def _init_meta(self, pack_name: Optional[str] = None) -> Meta: return Meta(pack_name) def _validate(self, entry: EntryType) -> bool: return isinstance(entry, SinglePackEntries) @property def text(self) -> str: r"""Return the text of the data pack""" return self._text @property def all_annotations(self) -> Iterator[Annotation]: """ An iterator of all annotations in this data pack. Returns: Iterator of all annotations, of type :class:"~forte.data.ontology.top.Annotation". """ yield from self.annotations @property def num_annotations(self) -> int: """ Number of annotations in this data pack. Returns: (int) Number of the links. """ return len(self.annotations) @property def all_links(self) -> Iterator[Link]: """ An iterator of all links in this data pack. Returns: Iterator of all links, of type :class:"~forte.data.ontology.top.Link". """ yield from self.links @property def num_links(self) -> int: """ Number of links in this data pack. Returns: Number of the links. """ return len(self.links) @property def all_groups(self) -> Iterator[Group]: """ An iterator of all groups in this data pack. Returns: Iterator of all groups, of type :class:"~forte.data.ontology.top.Group". """ yield from self.groups @property def num_groups(self): """ Number of groups in this data pack. Returns: Number of groups. """ return len(self.groups) @property def all_generic_entries(self) -> Iterator[Generics]: """ An iterator of all generic entries in this data pack. Returns: Iterator of generic """ yield from self.generics @property def num_generics_entries(self): """ Number of generics entries in this data pack. Returns: Number of generics entries. """ return len(self.generics) def get_span_text(self, span: Span) -> str: r"""Get the text in the data pack contained in the span Args: span (Span): Span object which contains a `begin` and an `end` index Returns: The text within this span """ return self._text[span.begin:span.end] def set_text( self, text: str, replace_func: Optional[Callable[[str], ReplaceOperationsType]] = None): if len(text) < len(self._text): raise ProcessExecutionException( "The new text is overwriting the original one with shorter " "length, which might cause unexpected behavior.") if len(self._text): logging.warning("Need to be cautious when changing the text of a " "data pack, existing entries may get affected. ") span_ops = [] if replace_func is None else replace_func(text) # The spans should be mutually exclusive (self._text, self.__replace_back_operations, self.__processed_original_spans, self.__orig_text_len) = data_utils_io.modify_text_and_track_ops( text, span_ops) def get_original_text(self): r"""Get original unmodified text from the :class:`DataPack` object. Returns: Original text after applying the `replace_back_operations` of :class:`DataPack` object to the modified text """ original_text, _, _, _ = data_utils_io.modify_text_and_track_ops( self._text, self.__replace_back_operations) return original_text def get_original_span(self, input_processed_span: Span, align_mode: str = "relaxed"): r"""Function to obtain span of the original text that aligns with the given span of the processed text. Args: input_processed_span: Span of the processed text for which the corresponding span of the original text is desired align_mode: The strictness criteria for alignment in the ambiguous cases, that is, if a part of input_processed_span spans a part of the inserted span, then align_mode controls whether to use the span fully or ignore it completely according to the following possible values - "strict" - do not allow ambiguous input, give ValueError - "relaxed" - consider spans on both sides - "forward" - align looking forward, that is, ignore the span towards the left, but consider the span towards the right - "backward" - align looking backwards, that is, ignore the span towards the right, but consider the span towards the left Returns: Span of the original text that aligns with input_processed_span Example: * Let o-up1, o-up2, ... and m-up1, m-up2, ... denote the unprocessed spans of the original and modified string respectively. Note that each o-up would have a corresponding m-up of the same size. * Let o-pr1, o-pr2, ... and m-pr1, m-pr2, ... denote the processed spans of the original and modified string respectively. Note that each o-p is modified to a corresponding m-pr that may be of a different size than o-pr. * Original string: <--o-up1--> <-o-pr1-> <----o-up2----> <----o-pr2----> <-o-up3-> * Modified string: <--m-up1--> <----m-pr1----> <----m-up2----> <-m-pr2-> <-m-up3-> * Note that `self.inverse_original_spans` that contains modified processed spans and their corresponding original spans, would look like - [(o-pr1, m-pr1), (o-pr2, m-pr2)] >> data_pack = DataPack() >> original_text = "He plays in the park" >> data_pack.set_text(original_text,\ >> lambda _: [(Span(0, 2), "She"))] >> data_pack.text "She plays in the park" >> input_processed_span = Span(0, len("She plays")) >> orig_span = data_pack.get_original_span(input_processed_span) >> data_pack.get_original_text()[orig_span.begin: orig_span.end] "He plays" """ assert align_mode in ["relaxed", "strict", "backward", "forward"] req_begin = input_processed_span.begin req_end = input_processed_span.end def get_original_index(input_index: int, is_begin_index: bool, mode: str) -> int: r""" Args: input_index: begin or end index of the input span is_begin_index: if the index is the begin index of the input span or the end index of the input span mode: alignment mode Returns: Original index that aligns with input_index """ if len(self.__processed_original_spans) == 0: return input_index len_processed_text = len(self._text) orig_index = None prev_end = 0 for (inverse_span, original_span) in self.__processed_original_spans: # check if the input_index lies between one of the unprocessed # spans if prev_end <= input_index < inverse_span.begin: increment = original_span.begin - inverse_span.begin orig_index = input_index + increment # check if the input_index lies between one of the processed # spans elif inverse_span.begin <= input_index < inverse_span.end: # look backward - backward shift of input_index if is_begin_index and mode in ["backward", "relaxed"]: orig_index = original_span.begin if not is_begin_index and mode == "backward": orig_index = original_span.begin - 1 # look forward - forward shift of input_index if is_begin_index and mode == "forward": orig_index = original_span.end if not is_begin_index and mode in ["forward", "relaxed"]: orig_index = original_span.end - 1 # break if the original index is populated if orig_index is not None: break prev_end = inverse_span.end if orig_index is None: # check if the input_index lies between the last unprocessed # span inverse_span, original_span = self.__processed_original_spans[ -1] if inverse_span.end <= input_index < len_processed_text: increment = original_span.end - inverse_span.end orig_index = input_index + increment else: # check if there input_index is not valid given the # alignment mode or lies outside the processed string raise ValueError(f"The input span either does not adhere " f"to the {align_mode} alignment mode or " f"lies outside to the processed string.") return orig_index orig_begin = get_original_index(req_begin, True, align_mode) orig_end = get_original_index(req_end - 1, False, align_mode) + 1 return Span(orig_begin, orig_end) @classmethod def deserialize(cls, data_pack_string: str) -> "DataPack": """ Deserialize a Data Pack from a string. This internally calls the internal :meth:`~forte.data.base_pack.BasePack._deserialize` function from :class:`~forte.data.base_pack.BasePack`. Args: data_pack_string: The serialized string of a data pack to be deserialized. Returns: An data pack object deserialized from the string. """ return cls._deserialize(data_pack_string) def _add_entry(self, entry: EntryType) -> EntryType: r"""Force add an :class:`~forte.data.ontology.core.Entry` object to the :class:`DataPack` object. Allow duplicate entries in a pack. Args: entry (Entry): An :class:`~forte.data.ontology.core.Entry` object to be added to the pack. Returns: The input entry itself """ return self.__add_entry_with_check(entry, True) def __add_entry_with_check(self, entry: EntryType, allow_duplicate: bool = True) -> EntryType: r"""Internal method to add an :class:`Entry` object to the :class:`DataPack` object. Args: entry (Entry): An :class:`Entry` object to be added to the datapack. allow_duplicate (bool): Whether we allow duplicate in the datapack. Returns: The input entry itself """ if isinstance(entry, Annotation): target = self.annotations begin, end = entry.span.begin, entry.span.end if begin < 0: raise ValueError(f'The begin {begin} is smaller than 0, this' f'is not a valid begin.') if end > len(self.text): if len(self.text) == 0: raise ValueError( f"The end {end} of span is greater than the text " f"length {len(self.text)}, which is invalid. The text " f"length is 0, so it may be the case the you haven't " f"set text for the data pack. Please set the text " f"before calling `add_entry` on the annotations.") else: raise ValueError( f"The end {end} of span is greater than the text " f"length {len(self.text)}, which is invalid. The " f"problematic entry is of type {entry.__class__} " f"at [{begin}:{end}]") elif isinstance(entry, Link): target = self.links elif isinstance(entry, Group): target = self.groups elif isinstance(entry, Generics): target = self.generics else: raise ValueError( f"Invalid entry type {type(entry)}. A valid entry " f"should be an instance of Annotation, Link, Group of Generics." ) # TODO: duplicate is ill-defined. add_new = allow_duplicate or (entry not in target) if add_new: target.add(entry) # update the data pack index if needed self._index.update_basic_index([entry]) if self._index.link_index_on and isinstance(entry, Link): self._index.update_link_index([entry]) if self._index.group_index_on and isinstance(entry, Group): self._index.update_group_index([entry]) self._index.deactivate_coverage_index() self._pending_entries.pop(entry.tid) return entry else: return target[target.index(entry)] def delete_entry(self, entry: EntryType): r"""Delete an :class:`~forte.data.ontology.core.Entry` object from the :class:`DataPack`. This find out the entry in the index and remove it from the index. Note that entries will only appear in the index if `add_entry` (or _add_entry_with_check) is called. Please note that deleting a entry do not guarantee the deletion of the related entries. Args: entry (Entry): An :class:`~forte.data.ontology.core.Entry` object to be deleted from the pack. """ if isinstance(entry, Annotation): target = self.annotations elif isinstance(entry, Link): target = self.links elif isinstance(entry, Group): target = self.groups elif isinstance(entry, Generics): target = self.generics else: raise ValueError( f"Invalid entry type {type(entry)}. A valid entry " f"should be an instance of Annotation, Link, or Group.") begin: int = target.bisect_left(entry) index_to_remove = -1 for i, e in enumerate(target[begin:]): if e.tid == entry.tid: index_to_remove = begin + i break if index_to_remove < 0: logger.warning( "The entry with id %d that you are trying to removed " "does not exists in the data pack's index. Probably it is " "created but not added in the first place.", entry.tid) else: target.pop(index_to_remove) # update basic index self._index.remove_entry(entry) # set other index invalid self._index.turn_link_index_switch(on=False) self._index.turn_group_index_switch(on=False) self._index.deactivate_coverage_index() @classmethod def validate_link(cls, entry: EntryType) -> bool: return isinstance(entry, Link) @classmethod def validate_group(cls, entry: EntryType) -> bool: return isinstance(entry, Group) def get_data(self, context_type: Type[Annotation], request: Optional[DataRequest] = None, skip_k: int = 0) -> Iterator[Dict[str, Any]]: r"""Fetch entries from the data_pack of type `context_type`. Currently, we do not support Groups and Generics in the request. Example: .. code-block:: python requests = { base_ontology.Sentence: { "component": ["dummy"], "fields": ["speaker"], }, base_ontology.Token: ["pos", "sense""], base_ontology.EntityMention: { "unit": "Token", }, } pack.get_data(base_ontology.Sentence, requests) Args: context_type (str): The granularity of the data context, which could be any ``Annotation`` type. request (dict): The entry types and fields required. The keys of the requests dict are the required entry types and the value should be either: - a list of field names or - a dict which accepts three keys: `"fields"`, `"component"`, and `"unit"`. - By setting `"fields"` (list), users specify the requested fields of the entry. If "fields" is not specified, only the default fields will be returned. - By setting `"component"` (list), users can specify the components by which the entries are generated. If `"component"` is not specified, will return entries generated by all components. - By setting `"unit"` (string), users can specify a unit by which the annotations are indexed. Note that for all annotation types, `"text"` and `"span"` fields are returned by default; for all link types, `"child"` and `"parent"` fields are returned by default. skip_k (int): Will skip the first `skip_k` instances and generate data from the (`offset` + 1)th instance. Returns: A data generator, which generates one piece of data (a dict containing the required entries, fields, and context). """ annotation_types: Dict[Type[Annotation], Union[Dict, List]] = dict() link_types: Dict[Type[Link], Union[Dict, List]] = dict() group_types: Dict[Type[Group], Union[Dict, List]] = dict() generics_types: Dict[Type[Generics], Union[Dict, List]] = dict() if request is not None: for key, value in request.items(): if issubclass(key, Annotation): annotation_types[key] = value elif issubclass(key, Link): link_types[key] = value elif issubclass(key, Group): group_types[key] = value elif issubclass(key, Generics): generics_types[key] = value context_args = annotation_types.get(context_type) context_components, _, context_fields = self._parse_request_args( context_type, context_args) valid_context_ids: Set[int] = self.get_ids_by_type_subtype( context_type) if context_components: valid_component_id: Set[int] = set() for component in context_components: valid_component_id |= self.get_ids_by_creator(component) valid_context_ids &= valid_component_id skipped = 0 # must iterate through a copy here because self.annotations is changing for context in list(self.annotations): if (context.tid not in valid_context_ids or not isinstance(context, context_type)): continue if skipped < skip_k: skipped += 1 continue data: Dict[str, Any] = dict() data["context"] = self.text[context.span.begin:context.span.end] data["offset"] = context.span.begin for field in context_fields: data[field] = getattr(context, field) if annotation_types: for a_type, a_args in annotation_types.items(): if issubclass(a_type, context_type): continue if a_type.__name__ in data.keys(): raise KeyError( f"Requesting two types of entries with the " f"same class name {a_type.__name__} at the " f"same time is not allowed") data[a_type.__name__] = \ self._generate_annotation_entry_data( a_type, a_args, data, context) if link_types: for l_type, l_args in link_types.items(): if l_type.__name__ in data.keys(): raise KeyError( f"Requesting two types of entries with the " f"same class name {l_type.__name__} at the " f"same time is not allowed") data[l_type.__name__] = self._generate_link_entry_data( l_type, l_args, data, context) # TODO: Getting Group based on range is not done yet. if group_types: raise NotImplementedError("Querying groups based on ranges is " "currently not supported.") if generics_types: raise NotImplementedError( "Querying generic types based on ranges is " "currently not supported.") yield data def _parse_request_args(self, a_type, a_args): # request which fields generated by which component components = None unit = None fields = set() if isinstance(a_args, dict): components = a_args.get("component") # pylint: disable=isinstance-second-argument-not-valid-type # TODO: until fix: https://github.com/PyCQA/pylint/issues/3507 if components is not None and not isinstance(components, Iterable): raise TypeError( "Invalid request format for 'components'. " "The value of 'components' should be of an iterable type.") unit = a_args.get("unit") if unit is not None and not isinstance(unit, str): raise TypeError("Invalid request format for 'unit'. " "The value of 'unit' should be a string.") a_args = a_args.get("fields", set()) # pylint: disable=isinstance-second-argument-not-valid-type # TODO: disable until fix: https://github.com/PyCQA/pylint/issues/3507 if isinstance(a_args, Iterable): fields = set(a_args) elif a_args is not None: raise TypeError( f"Invalid request format for '{a_type}'. " f"The request should be of an iterable type or a dict.") fields.add("tid") return components, unit, fields def _generate_annotation_entry_data(self, a_type: Type[Annotation], a_args: Union[Dict, Iterable], data: Dict, cont: Optional[Annotation]) -> Dict: components, unit, fields = self._parse_request_args(a_type, a_args) a_dict: Dict[str, Any] = dict() a_dict["span"] = [] a_dict["text"] = [] for field in fields: a_dict[field] = [] unit_begin = 0 if unit is not None: if unit not in data.keys(): raise KeyError(f"{unit} is missing in data. You need to " f"request {unit} before {a_type}.") a_dict["unit_span"] = [] cont_begin = cont.span.begin if cont else 0 annotation: Annotation for annotation in self.get(a_type, cont, components): # we provide span, text (and also tid) by default a_dict["span"].append((annotation.span.begin, annotation.span.end)) a_dict["text"].append(annotation.text) for field in fields: if field in ("span", "text"): continue if field == "context_span": a_dict[field].append((annotation.span.begin - cont_begin, annotation.span.end - cont_begin)) continue a_dict[field].append(getattr(annotation, field)) if unit is not None: while not self._index.in_span(data[unit]["tid"][unit_begin], annotation.span): unit_begin += 1 unit_span_begin = unit_begin unit_span_end = unit_span_begin + 1 while self._index.in_span(data[unit]["tid"][unit_span_end], annotation.span): unit_span_end += 1 a_dict["unit_span"].append((unit_span_begin, unit_span_end)) for key, value in a_dict.items(): a_dict[key] = np.array(value) return a_dict def _generate_link_entry_data(self, a_type: Type[Link], a_args: Union[Dict, Iterable], data: Dict, cont: Optional[Annotation]) -> Dict: components, unit, fields = self._parse_request_args(a_type, a_args) if unit is not None: raise ValueError(f"Link entries cannot be indexed by {unit}.") a_dict: Dict[str, Any] = dict() for field in fields: a_dict[field] = [] a_dict["parent"] = [] a_dict["child"] = [] link: Link for link in self.get(a_type, cont, components): parent_type = link.ParentType.__name__ child_type = link.ChildType.__name__ if parent_type not in data.keys(): raise KeyError( f"The Parent entry of {a_type} is not requested." f" You should also request {parent_type} with " f"{a_type}") if child_type not in data.keys(): raise KeyError(f"The child entry of {a_type} is not requested." f" You should also request {child_type} with " f"{a_type}") a_dict["parent"].append( np.where(data[parent_type]["tid"] == link.parent)[0][0]) a_dict["child"].append( np.where(data[child_type]["tid"] == link.child)[0][0]) for field in fields: if field in ("parent", "child"): continue a_dict[field].append(getattr(link, field)) for key, value in a_dict.items(): a_dict[key] = np.array(value) return a_dict def build_coverage_for(self, context_type: Type[Annotation], covered_type: Type[EntryType]): """ User can call this function to build coverage index for specific types. The index provide a in-memory mapping from entries of `context_type` to the entries "covered" by it. See :class:`forte.data.data_pack.DataIndex` for more details. Args: context_type: The context/covering type. covered_type: The entry to find under the context type. """ if self._index.coverage_index(context_type, covered_type) is None: self._index.build_coverage_index(self, context_type, covered_type) def iter_in_range(self, entry_type: Type[EntryType], range_annotation: Annotation) -> Iterator[EntryType]: """ Iterate the entries of the provided type within or fulfill the constraints of the `range_annotation`. The constraint is True if an entry is `in_span` of the provided `range_annotation`. Internally, if the coverage index between the entry type and the type of the `range_annotation` is built, then this will create the iterator from the index. Otherwise, the function will iterate them from scratch (which is slower). If there are frequent usage of this function, it is suggested to build the coverage index. Args: entry_type: The type of entry to iterate over. range_annotation: The range annotation that serve as the constraint. Returns: An iterator of the entries with in the `range_annotation`. """ use_coverage = self._index.coverage_index_is_valid coverage_index: Optional[Dict[int, Set[int]]] = {} if use_coverage: coverage_index = self._index.coverage_index( type(range_annotation), entry_type) if coverage_index is None: use_coverage = False if use_coverage and coverage_index is not None: for tid in coverage_index[range_annotation.tid]: yield self.get_entry(tid) # type: ignore else: if issubclass(entry_type, Annotation): range_begin = (range_annotation.span.begin if range_annotation else 0) range_end = (range_annotation.span.end if range_annotation else self.annotations[-1].span.end) if issubclass(entry_type, Annotation): temp_begin = Annotation(self, range_begin, range_begin) begin_index = self.annotations.bisect(temp_begin) temp_end = Annotation(self, range_end, range_end) end_index = self.annotations.bisect(temp_end) # Make sure these temporary annotations are not part of the # actual data. temp_begin.regret_creation() temp_end.regret_creation() yield from self.annotations[begin_index:end_index] elif issubclass(entry_type, Link): for link in self.links: if self._index.in_span(link, range_annotation.span): yield link elif issubclass(entry_type, Group): for group in self.groups: if self._index.in_span(group, range_annotation.span): yield group def get( self, entry_type: Type[EntryType], # type: ignore range_annotation: Optional[Annotation] = None, components: Optional[Union[str, Iterable[str]]] = None, include_sub_type=True) -> Iterable[EntryType]: r"""This function is used to get data from a data pack with various methods. Depending on the provided arguments, the function will perform several different filtering of the returned data. The `entry_type` is mandatory, where all the entries matching this type will be returned. The sub-types of the provided entry type will be also returned if `include_sub_type` is set to True (which is the default behavior). The `range_annotation` controls the search area of the sub-types. An entry `E` will be returned if :meth:`in_span(E, range_annotation` returns True. If this function is called frequently with queries related to the `range_annotation`, please consider to build the coverage index regarding the related entry types. The `components` list will filter the results by the `component` (i.e the creator of the entry). If `components` is provided, only the entries created by one of the `components` will be returned. Example: .. code-block:: python # Iterate through all the sentences in the pack. for sentence in input_pack.get(Sentence): # Take all tokens from a sentence created by NLTKTokenizer. token_entries = input_pack.get( entry_type=Token, range_annotation=sentence, component='NLTKTokenizer') ... In the above code snippet, we get entries of type ``Token`` within each ``sentence`` which were generated by ``NLTKTokenizer``. You can consider build coverage index between `Token` and `Sentence` if this snippet is frequently used. Args: entry_type (type): The type of entries requested. range_annotation (Annotation, optional): The range of entries requested. If `None`, will return valid entries in the range of whole data_pack. components (str or list, optional): The component (creator) generating the entries requested. If `None`, will return valid entries generated by any component. include_sub_type (bool): whether to consider the sub types of the provided entry type. Default `True`. """ def require_annotations() -> bool: if issubclass(entry_type, Annotation): return True if issubclass(entry_type, Link): return (issubclass(entry_type.ParentType, Annotation) and issubclass(entry_type.ChildType, Annotation)) if issubclass(entry_type, Group): return issubclass(entry_type.MemberType, Annotation) return False # If we don't have any annotations but the items to check requires them, # then we simply yield from an empty list. if len(self.annotations) == 0 and range_annotation is not None: if require_annotations(): yield from [] return # Valid entry ids based on type. all_types: Set[Type] if include_sub_type: all_types = self._expand_to_sub_types(entry_type) else: all_types = {entry_type} entry_iter: Iterator[Entry] if issubclass(entry_type, Generics): entry_iter = self.generics elif range_annotation is not None: if (issubclass(entry_type, Annotation) or issubclass(entry_type, Link) or issubclass(entry_type, Group)): entry_iter = self.iter_in_range(entry_type, range_annotation) elif issubclass(entry_type, Annotation): entry_iter = self.annotations elif issubclass(entry_type, Link): entry_iter = self.links elif issubclass(entry_type, Group): entry_iter = self.groups else: raise ValueError( f"The requested type {str(entry_type)} is not supported.") for entry in entry_iter: # Filter by type and components. if type(entry) not in all_types: continue if components is not None: if not self.is_created_by(entry, components): continue yield entry # type: ignore
# 2012. Sum of Beauty in the Array # https://leetcode.com/problems/sum-of-beauty-in-the-array/ from sortedcontainers import SortedList class Solution: def sumOfBeauties(self, nums: List[int]) -> int: n = len(nums) left = SortedList() left.add(nums[0]) right = SortedList() for i in range(2, n): right.add(nums[i]) res = 0 for i in range(1, n - 1): left_index = left.bisect(nums[i]) right_index = right.bisect_left(nums[i]) if left_index == len(left) and left[left_index - 1] < nums[i] and right_index == 0 and right[0] > nums[i]: res += 2 elif nums[i - 1] < nums[i] < nums[i + 1]: res += 1 left.add(nums[i]) right.remove(nums[i + 1]) return res