示例#1
0
文件: processor.py 项目: svmihar/FARM
 def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
     n_special_tokens = self.tokenizer.num_added_tokens(pair=True)
     samples = create_samples_squad(dictionary=dictionary,
                                    max_query_len=self.max_query_length,
                                    max_seq_len=self.max_seq_len,
                                    doc_stride=self.doc_stride,
                                    n_special_tokens=n_special_tokens)
     return samples
示例#2
0
    def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
        if "paragraphs" not in dictionary:  # TODO change this inference mode hack
            dictionary = self._convert_rest_api_dict(infer_dict=dictionary)
        samples = create_samples_squad(entry=dictionary)
        for sample in samples:
            tokenized = tokenize_with_metadata(text=" ".join(
                sample.clear_text["doc_tokens"]),
                                               tokenizer=self.tokenizer)
            sample.tokenized = tokenized

        return samples
示例#3
0
    def _dict_to_samples(cls, dict: dict, **kwargs) -> [Sample]:
        # TODO split samples that are too long in this function, related to todo in self._sample_to_features
        if "paragraphs" not in dict:  # TODO change this inference mode hack
            dict = cls._convert_inference(infer_dict=dict)
        samples = create_samples_squad(entry=dict)
        for sample in samples:
            tokenized = tokenize_with_metadata(
                text=" ".join(sample.clear_text["doc_tokens"]),
                tokenizer=cls.tokenizer,
                max_seq_len=cls.max_seq_len,
            )
            sample.tokenized = tokenized

        return samples