Exemplos de sentence_cut em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: data_processing.txt_file_processing.basic_nlp_func

Método / Função: sentence_cut

Exemplos em hotexamples.com: 5

sentence_cut em Python - 5 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de data_processing.txt_file_processing.basic_nlp_func.sentence_cut em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Relacionados

timer_stop

create_root_node

log

_get_commands

Hook

quota_create

formatPlayer

validate_config_against_schema

binary_read

Percolator

Related in langs

ScalarCompiler (PHP)

replaceFields (PHP)

WPFUtil.BitmapBuffer (C#)

FastFileReading (C#)

RTMP_IO_WRITE32 (C++)

SDL_SetPaletteColors (C++)

Policy (Go)

Copy (Go)

DebugKeysListener (Java)

IllegalFieldValueException (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: based_on_features_with_paragraph.py Projeto: kiseliu/Naturallanguageprocessing

def main(): # 输入压缩比 ratio = raw_input("Please enter the compressed ratio: ") title, paragraghs = load_data("data/01.txt") sentences = [] # 构建文本的句子顺序 for paragragh in paragraghs: sentences.extend(sentence_cut(paragragh, punctuation_list='!！。')) sentences_with_indices = dict(zip(sentences, range(len(sentences)))) # 抽取关键词，并计算句子的权重 keywords = get_key_words(title, paragraghs) key_sentences = compute_sentences_weigths(keywords, paragraghs) # 根据压缩比，计算需要抽取多少个句子 topK = int(len(key_sentences) * float(ratio)) result_dict = {} for sentence in key_sentences[:topK]: result_dict[sentence] = sentences_with_indices[sentence] # 将抽取出来的句子按原文顺序排好输出 result_dict = sorted(result_dict.iteritems(), key=lambda d: d[1]) result_dict = [result[0] for result in result_dict] summary = ''.join(result_dict) print summary

Exemplo n.º 2

0

Exibir arquivo

Arquivo: based_on_features_with_paragraph.py Projeto: kiseliu/Naturallanguageprocessing

def get_key_sentences(content_weights, content, p_weight=1.2, s_bias=1, s_weight = 1.2): sentences = sentence_cut(content, punctuation_list='!！。') for i in range(s_bias): content_weights[sentences[i]] = {'weight': 0, 'p_weight': p_weight, 's_weight':s_weight} content_weights[sentences[-i-1]] = {'weight': 0, 'p_weight': p_weight, 's_weight': s_weight} for sentence in sentences[s_bias:-s_bias]: content_weights[sentence] = {'weight': 0, 'p_weight': p_weight, 's_weight':1} return content_weights

Exemplo n.º 3

0

Exibir arquivo

Arquivo: based_on_features_with_paragraph.py Projeto: kiseliu/Naturallanguageprocessing

def compute_sentences_weigths(keywords, paragraphs, p_bias=1, p_weight=1.2, s_bias=1, s_weight = 1.2): content_weights = {} for i in range(p_bias): content_weights = get_key_sentences(content_weights, paragraphs[i], p_weight=p_weight, s_bias=s_bias, s_weight =s_weight) content_weights = get_key_sentences(content_weights, paragraphs[-i-1], p_weight=p_weight, s_bias=s_bias, s_weight =s_weight) for paragraph in paragraphs[p_bias:-p_bias]: content_weights = get_key_sentences(content_weights, paragraph, p_weight=1, s_bias=s_bias, s_weight =s_weight) for sentence in content_weights.keys(): for word in keywords.keys(): if word in sentence: content_weights[sentence]['weight'] += keywords[word] inner_num = len(sentence_cut(sentence, punctuation_list=',;，:：；… ')) content_weights[sentence] = content_weights[sentence]['weight']*\ content_weights[sentence]['p_weight']*content_weights[sentence]['s_weight']/inner_num content_weights = sorted(content_weights.iteritems(), key=lambda d: d[1], reverse=True) content_weights = [list(result)[0] for result in content_weights] return content_weights

Exemplo n.º 4

0

Exibir arquivo

def main(): with codecs.open("data/01.txt", 'r', encoding='GBK') as fr: sentences = [] for line in fr.readlines(): line = line.strip() if line: sentences.extend(sentence_cut(line, punctuation_list='。！!')) words, text = get_keywords(sentences, ['ns', 'nr', 'n']) weights = construct_matrix(words) num = len(text) start_tr = np.ones((1, num)) d = 0.85 iters = 100 tr = textrank(start_tr, iters, d, weights).tolist()[0] ratio = float(raw_input("Please enter the compressed ratio: ")) topK = int(num * ratio) summary = summaly(text, tr, topK) print summary

Exemplo n.º 5

0

Exibir arquivo

Arquivo: based_on_textrank.py Projeto: kiseliu/Naturallanguageprocessing

def main(): with codecs.open("data/01.txt", 'r', encoding='GBK') as fr: sentences = [] for line in fr.readlines(): line = line.strip() if line: sentences.extend(sentence_cut(line, punctuation_list='。！!')) words, text = get_keywords(sentences,['ns', 'nr', 'n']) weights = construct_matrix(words) num = len(text) start_tr = np.ones((1,num)) d = 0.85 iters = 100 tr = textrank(start_tr, iters, d, weights).tolist()[0] ratio = float(raw_input("Please enter the compressed ratio: ")) topK = int(num*ratio) summary = summaly(text, tr, topK) print summary