Пример #1
0
    def rank_sentences(self, dataset, document_column_name, **kwargs):
        all_sentences = list(map(sent_tokenize, dataset[document_column_name]))
        all_scores = [list(range(len(sentences)))[::-1] for sentences in all_sentences]

        data = [
            {"sentences": sentences, "scores": scores}
            for sentences, scores in zip(all_sentences, all_scores)
        ]
        return Baseline.append_column(dataset, data, self.name)
Пример #2
0
    def rank_sentences(self, dataset, document_column_name, seed=42, **kwargs):
        random.seed(seed)
        all_sentences = list(map(sent_tokenize, dataset[document_column_name]))
        scores = [[random.random() for sentence in sentences]
                  for sentences in all_sentences]

        data = [{
            "sentences": sentences,
            "scores": scores
        } for sentences, scores in zip(all_sentences, scores)]
        return Baseline.append_column(dataset, data, self.name)
Пример #3
0
    def rank_sentences(self, dataset, document_column_name, **kwargs):
        all_sentences = []
        all_scores = []
        for document in tqdm(dataset[document_column_name]):
            sentences, scores = self.run_single(document)
            all_sentences.append(sentences)
            all_scores.append(scores)

        data = [{
            "sentences": sentences,
            "scores": scores
        } for sentences, scores in zip(all_sentences, all_scores)]
        return Baseline.append_column(dataset, data, self.name)