def _parse_pack(self, data_source: str) -> Iterator[MultiPack]: r"""Takes a raw string and converts into a MultiPack. Args: data_source: str that contains text of a document. Returns: MultiPack containing a datapack for the current query. """ multi_pack = MultiPack() # use context to build the query if self.resources is not None and self.resources.get("user_utterance"): multi_pack.add_pack_( self.resources.get("user_utterance")[-1], "user_utterance") if self.resources is not None and self.resources.get("bot_utterance"): multi_pack.add_pack_( self.resources.get("bot_utterance")[-1], "bot_utterance") pack = multi_pack.add_pack(self.configs.pack_name) pack.set_text(data_source, replace_func=self.text_replace_operation) Utterance(pack, 0, len(data_source)) yield multi_pack
def cast(self, pack: DataPack) -> MultiPack: """ Args: pack: The data pack to be boxed Returns: An iterator that produces the boxed multi pack. """ p = MultiPack() p.add_pack_(pack, self.configs.pack_name) return p
def cast(self, pack: DataPack) -> MultiPack: """ Auto-box the data-pack into a multi-pack by simple wrapping. Args: pack: The data pack to be boxed Returns: An iterator that produces the boxed multi pack. """ p = MultiPack() p.add_pack_(pack, self.configs.pack_name) return p
def _process(self, input_pack: MultiPack): r"""Searches `Elasticsearch` indexer to fetch documents for a query. This query should be contained in the input multipack with name `self.config.query_pack_name`. This method adds new packs to `input_pack` containing the retrieved results. Each result is added as a `ft.onto.base_ontology.Document`. Args: input_pack: A multipack containing query as a pack. """ query_pack = input_pack.get_pack(self.configs.query_pack_name) # ElasticSearchQueryCreator adds a Query entry to query pack. We now # fetch it as the first element. first_query: Query = query_pack.get_single(Query) # pylint: disable=isinstance-second-argument-not-valid-type # TODO: until fix: https://github.com/PyCQA/pylint/issues/3507 if not isinstance(first_query.value, Dict): raise ValueError( "The query to the elastic indexer need to be a dictionary.") results = self.index.search(first_query.value) hits = results["hits"]["hits"] for idx, hit in enumerate(hits): document = hit["_source"] first_query.add_result(document["doc_id"], hit["_score"]) if self.configs.indexed_text_only: pack: DataPack = input_pack.add_pack( f"{self.configs.response_pack_name_prefix}_{idx}") pack.pack_name = document["doc_id"] content = document[self.configs.field] pack.set_text(content) Document(pack=pack, begin=0, end=len(content)) else: pack = DataPack.deserialize(document["pack_info"]) input_pack.add_pack_( pack, f"{self.configs.response_pack_name_prefix}_{idx}") pack.pack_name = document["doc_id"]
def _process(self, input_pack: MultiPack): # Get the pack names for augmentation. aug_pack_names: List[str] = [] # Check if the DataPack exists. for pack_name in self.configs["augment_pack_names"]["kwargs"].keys(): if pack_name in input_pack.pack_names: aug_pack_names.append(pack_name) if len(self.configs["augment_pack_names"]["kwargs"].keys()) == 0: # Augment all the DataPacks if not specified. aug_pack_names = list(input_pack.pack_names) self._augment(input_pack, aug_pack_names) new_packs: List[Tuple[str, DataPack]] = [] for aug_pack_name in aug_pack_names: new_pack_name: str = \ self.configs["augment_pack_names"]["kwargs"].get( aug_pack_name, "augmented_" + aug_pack_name ) data_pack = input_pack.get_pack(aug_pack_name) new_pack = self._auto_align_annotations( data_pack=data_pack, replaced_annotations=self._replaced_annos[ data_pack.meta.pack_id]) new_packs.append((new_pack_name, new_pack)) for new_pack_name, new_pack in new_packs: input_pack.add_pack_(new_pack, new_pack_name) # Copy the MultiPackLinks/MultiPackGroups for mpl in input_pack.get(MultiPackLink): self._copy_multi_pack_link_or_group(mpl, input_pack) for mpg in input_pack.get(MultiPackGroup): self._copy_multi_pack_link_or_group(mpg, input_pack) # Must be called after processing each multipack # to reset internal states. self._clear_states()