Exemplos de TextRank.summarize em Python, exemplos de textrank.TextRank.summarize em Python

Exemplo n.º 1

0

Exibir arquivo

 def 처지(self):
     tr = TextRank()
     from konlpy.tag import Komoran
     tagger = Komoran()
     stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV')])
     tr.loadSents(
         RawSentenceReader('x.txt'), lambda sent: filter(
             lambda x: x not in stopword and x[1] in
             ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
     tr.build()
     ranks = tr.rank()
     if tr.summarize(0.4) is None:
         return "모름"
     else:
         return tr.summarize(0.4)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: textrank_v01.py Projeto: ExcelsiorCJH/textrank

    def clck_summary(self):
        sents = pdf_to_text(self.file_name)

        textrank = TextRank(language=self.language,
                            tokenizer=None,
                            stopwords=STOPWORDS)
        keysents = textrank.summarize(sents, topk=5)

        self.ui.textBrowser.setText("\n".join(keysents))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ats.py Projeto: agikarasugi/FS-n-TextRank-Text-Summarizer

    def summary(self):
        if self.parent.file_path != '':
            out = None

            if str(self.parent.comboBox.currentText()) == 'File':
                self.parent.text = open(self.parent.file_path, 'r').read()
            elif str(self.parent.comboBox.currentText()) == 'URL':
                if str(self.parent.internet_status.text()) == 'OFFLINE':
                    return
                self.parent.text = get_text(self.parent.file_path)

            if self.parent.set_algorithm == 'FS':
                out = fs(self.parent.text, self.parent.set_language,
                         int(self.parent.set_count))
            elif self.parent.set_algorithm == 'TextRank':
                tr = TextRank(self.parent.text, int(self.parent.set_count),
                              self.parent.set_language, self.parent.set_metric,
                              self.parent.set_graph)
                out = tr.summarize()

            self.parent.out = out

Exemplo n.º 4

0

Exibir arquivo

Arquivo: textrank_v02.py Projeto: ExcelsiorCJH/textrank

    def click_summary(self):
        self.language = str(self.ui.comboBox.currentText())
        self.top_k_word = int(self.ui.comboBox_2.currentText())
        self.top_k_sent = int(self.ui.comboBox_3.currentText())

        sents = pdf_to_text(self.file_name)

        # print(self.language, self.top_k_word, self.top_k_sent)
        if self.language == "ko":
            textrank = TextRank(language=self.language,
                                tokenizer="mecab",
                                stopwords=STOPWORDS)
        else:
            textrank = TextRank(language=self.language,
                                tokenizer=None,
                                stopwords=STOPWORDS)

        keywords = textrank.keywords(sents, topk=self.top_k_word)
        keysents = textrank.summarize(sents, topk=self.top_k_sent)

        self.ui.textBrowser.setText("\n".join(keysents))
        self.ui.textBrowser_2.setText(", ".join(keywords))

Exemplo n.º 5

0

Exibir arquivo

from textrank import TextRank, RawSentenceReader
from konlpy.tag import Kkma
import sys

filename = sys.argv[1]
rate = float(sys.argv[2])

tr = TextRank()
#print('Load...')
from konlpy.tag import Komoran
tagger = Komoran()
stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV') ])
tr.loadSents(RawSentenceReader(filename), lambda sent: filter(lambda x:x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
#print('Build...')
tr.build()
ranks = tr.rank()
#for k in sorted(ranks, key=ranks.get, reverse=True)[:100]:
    #print("\t".join([str(k), str(ranks[k]), str(tr.dictCount[k])]))

sentence = '%s.' % (tr.summarize(rate).split('. ')[0])

kkma = Kkma()
print(sentence)
print(list(x[0] for x in (list(filter(lambda x: x[1][0] == 'N', kkma.pos(sentence))))))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test.py Projeto: taebbang/study

        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        method=args.method,
        stopwords=None,
    )

    data = get_data(args.test_path)

    output_path = args.output_path
    hyp_path = f"{output_path}/hyp"
    abs_ref_path = f"{output_path}/abs_ref"

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    if not os.path.exists(hyp_path):
        os.makedirs(hyp_path)
    if not os.path.exists(abs_ref_path):
        os.makedirs(abs_ref_path)

    for articles in tqdm(data):
        doc_id, sents, gold = articles

        hyp = model.summarize(sents, args.topk)

        with open(f"{abs_ref_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(gold)
        with open(f"{hyp_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(hyp)

Exemplo n.º 7

0

Exibir arquivo

# coding: utf-8
from textrank import TextRank  #textrank 모듈 불러오기

f = open("text.txt", 'r', encoding='utf-8')  #stopwords 템플릿
text = f.read()
tr = TextRank(text)  #textrank 실행
f.close()
i = 1
for row in tr.summarize(3):  #요약된 문장과 키워드 출력
    print(str(i) + '. ' + row)
    i += 1
print('keywords :', tr.keywords())

Exemplo n.º 8

0

Exibir arquivo

from textrank import TextRank
from article import get_text

text = get_text('https://www.bbc.com/news/world-us-canada-47848619')
tr = TextRank(text, lang='english', metric='log', graph='HITS')
tr.summarize()


'''
Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
In February Mr Sakurada had to make another apology, after arriving three minutes late to a parliamentary meeting.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
The 2011 tsunami left more than 20,000 dead and caused a meltdown at the Fukushima Daiichi nuclear plant.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

'''

Exemplo n.º 9

0

Exibir arquivo

Arquivo: main.py Projeto: ExcelsiorCJH/textrank

    else:
        sents = get_data("data/sents.txt", "news")
        # stopwords of korean
        stopwords = ["뉴스", "기자", "그리고", "연합뉴스"]

    # initialize Textrank
    textrank = TextRank(
        min_count=args.min_count,
        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        max_iter=args.max_iter,
        method=args.method,
        stopwords=stopwords,
    )

    # extraction setences or keywords
    if args.mode == "sentences":
        results = textrank.summarize(sents, topk=args.topk)
        results = [sent for _, sent in results]
        results = "\n".join(results)
    else:
        args.mode = "words"
        results = textrank.keywords(sents, topk=args.topk)

    print(f"{args.mode}")
    print("=" * 20)
    print(f"{results}")