Exemplos de Summarizer._sent_len em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: summarizer

Classe / Tipo: Summarizer

Método / Função: _sent_len

Exemplos em hotexamples.com: 2

Summarizer._sent_len em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de summarizer.Summarizer._sent_len em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Summarizer(30)

keywords(3)

keyToVideo(3)

generate_summary(2)

get_sentence_scores(2)

format_summary(2)

info(2)

key_noun_phrases(2)

from_file(2)

_sent_len(2)

_preprocess(2)

parse(2)

generate(2)

make_density_map(1)

hidden_concat(1)

retrieve_query_summary(1)

get_summary(1)

retrieve_summary(1)

ForFull(1)

from_text(1)

ForFullRough(1)

fit(1)

facts(1)

df(1)

count_with_median(1)

count_tp(1)

check_mat(1)

_deduplicate(1)

ForRough(1)

ForIntro(1)

rouge(1)

Métodos Frequentes

Summarizer (30)

keywords (3)

keyToVideo (3)

generate_summary (2)

get_sentence_scores (2)

format_summary (2)

info (2)

key_noun_phrases (2)

from_file (2)

_sent_len (2)

Métodos Frequentes

_preprocess (2)

parse (2)

generate (2)

make_density_map (1)

hidden_concat (1)

retrieve_query_summary (1)

get_summary (1)

retrieve_summary (1)

ForFull (1)

from_text (1)

ForFullRough (1)

fit (1)

facts (1)

df (1)

count_with_median (1)

count_tp (1)

check_mat (1)

_deduplicate (1)

ForRough (1)

ForIntro (1)

Métodos Frequentes

ForFullRough (1)

fit (1)

facts (1)

df (1)

count_with_median (1)

count_tp (1)

check_mat (1)

_deduplicate (1)

ForRough (1)

ForIntro (1)

rouge (1)

Métodos Frequentes

rouge (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: oracles.py Projeto: xiamenwcy/wcep-mds-dataset

class LeadOracle(): def __init__(self, rouge_n=1, metric='f'): self.rouge_n = rouge_n self.metric = metric self.summarizer = Summarizer() def summarize(self, ref, articles, max_len=40, len_type='words', in_titles=False, out_titles=False, min_sent_tokens=7, max_sent_tokens=40): articles = self.summarizer._preprocess(articles) scored_summaries = [] for a in articles: selected_sents = [] current_len = 0 sents = a.sents if in_titles == False or out_titles == False: sents = [s for s in sents if not s.is_title] for s in sents: l = self.summarizer._sent_len(s, len_type) new_len = current_len + l if new_len <= max_len: selected_sents.append(s.text) current_len = new_len if new_len > max_len: break if len(selected_sents) >= 1: summary = ' '.join(selected_sents) rouge_scores = compute_rouge_n(summary, ref, self.rouge_n, tokenize=True) score = rouge_scores[self.metric] scored_summaries.append((summary, score)) scored_summaries.sort(key=lambda x: x[1], reverse=True) summary = scored_summaries[0][0] return summary

Exemplo n.º 2

0

Exibir arquivo

Arquivo: oracles.py Projeto: xiamenwcy/wcep-mds-dataset

class Oracle(): def __init__(self, rouge_n=1, metric='f', early_stopping=True): self.rouge_n = rouge_n self.metric = metric self.early_stopping = early_stopping self.summarizer = Summarizer() def summarize(self, ref, articles, max_len=40, len_type='words', in_titles=False, out_titles=False, min_sent_tokens=7, max_sent_tokens=40): articles = self.summarizer._preprocess(articles) sents = [s for a in articles for s in a.sents] sents = self.summarizer._deduplicate(sents) if in_titles == False or out_titles == False: sents = [s for s in sents if not s.is_title] sent_lens = [self.summarizer._sent_len(s, len_type) for s in sents] current_len = 0 remaining = list(range(len(sents))) selected = [] scored_selections = [] ref_words = word_tokenize(ref) while current_len < max_len and len(remaining) > 0: scored = [] current_summary_words = [ tok for i in selected for tok in sents[i].words ] for i in remaining: new_len = current_len + sent_lens[i] if new_len <= max_len: try: summary_words = current_summary_words + sents[i].words rouge_scores = compute_rouge_n(summary_words, ref_words, rouge_n=self.rouge_n, tokenize=False) score = rouge_scores[self.metric] scored.append((i, score)) except: pass if len(scored) == 0: break scored.sort(key=lambda x: x[1], reverse=True) best_idx, best_score = scored[0] scored_selections.append((selected + [best_idx], best_score)) current_len += sent_lens[best_idx] selected.append(scored[0][0]) remaining.remove(best_idx) if self.early_stopping == False: # remove shorter summaries max_sents = max([len(x[0]) for x in scored_selections]) scored_selections = [ x for x in scored_selections if len(x[0]) < max_sents ] scored_selections.sort(key=lambda x: x[1], reverse=True) if len(scored_selections) == 0: return '' best_selection = scored_selections[0][0] summary_sents = [sents[i].text for i in best_selection] return ' '.join(summary_sents)