def _parse_pack(self, data_source: str) -> Iterator[MultiPack]: fields = data_source.split("\t") data_pack = DataPack(doc_id=fields[0]) multi_pack = MultiPack() document = Document(pack=data_pack, begin=0, end=len(fields[1])) data_pack.add_entry(document) data_pack.set_text(fields[1]) multi_pack.update_pack({self.config.pack_name: data_pack}) yield multi_pack
def _process(self, input_pack: MultiPack): query_pack = input_pack.get_pack(self.configs.query_pack_name) first_query = list(query_pack.get(Query))[0] results = self.index.search(first_query.value, self.k) documents = [r[1] for result in results for r in result] packs = {} for i, doc in enumerate(documents): pack = input_pack.add_pack() pack.set_text(doc) Document(pack, 0, len(doc)) packs[self.configs.response_pack_name_prefix + f"_{i}"] = pack input_pack.update_pack(packs)
def _parse_pack(self, file_path: str) -> Iterator[DataPack]: # type: ignore with open(file_path, "r", encoding="utf8") as doc: for line in doc: m_pack = MultiPack() pack = DataPack(doc_id=file_path) line = line.strip() if len(line) == 0: continue sent = Sentence(pack, 0, len(line)) pack.add_entry(sent) pack.set_text(line) self.count += 1 m_pack.update_pack({"pack": pack}) yield m_pack # type: ignore
def _parse_pack(self, data_source: str) -> Iterator[MultiPack]: """ Takes a raw string and converts into a MultiPack Args: data_source: str that contains text of a document Returns: MultiPack containing a datapack for the current query """ multi_pack = MultiPack() # use context to build the query if self.resource.get("user_utterance"): user_pack = self.resource.get("user_utterance")[-1] multi_pack.update_pack({"user_utterance": user_pack}) if self.resource.get("bot_utterance"): bot_pack = self.resource.get("bot_utterance")[-1] multi_pack.update_pack({"bot_utterance": bot_pack}) pack = DataPack() utterance = Utterance(pack, 0, len(data_source)) pack.add_entry(utterance) pack.set_text(data_source, replace_func=self.text_replace_operation) multi_pack.update_pack({self.config.pack_name: pack}) yield multi_pack