Exemplo n.º 1
0
 def 처지(self):
     tr = TextRank()
     from konlpy.tag import Komoran
     tagger = Komoran()
     stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV')])
     tr.loadSents(
         RawSentenceReader('x.txt'), lambda sent: filter(
             lambda x: x not in stopword and x[1] in
             ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
     tr.build()
     ranks = tr.rank()
     if tr.summarize(0.4) is None:
         return "모름"
     else:
         return tr.summarize(0.4)
Exemplo n.º 2
0
    def clck_summary(self):
        sents = pdf_to_text(self.file_name)

        textrank = TextRank(language=self.language,
                            tokenizer=None,
                            stopwords=STOPWORDS)
        keysents = textrank.summarize(sents, topk=5)

        self.ui.textBrowser.setText("\n".join(keysents))
    def summary(self):
        if self.parent.file_path != '':
            out = None

            if str(self.parent.comboBox.currentText()) == 'File':
                self.parent.text = open(self.parent.file_path, 'r').read()
            elif str(self.parent.comboBox.currentText()) == 'URL':
                if str(self.parent.internet_status.text()) == 'OFFLINE':
                    return
                self.parent.text = get_text(self.parent.file_path)

            if self.parent.set_algorithm == 'FS':
                out = fs(self.parent.text, self.parent.set_language,
                         int(self.parent.set_count))
            elif self.parent.set_algorithm == 'TextRank':
                tr = TextRank(self.parent.text, int(self.parent.set_count),
                              self.parent.set_language, self.parent.set_metric,
                              self.parent.set_graph)
                out = tr.summarize()

            self.parent.out = out
Exemplo n.º 4
0
    def click_summary(self):
        self.language = str(self.ui.comboBox.currentText())
        self.top_k_word = int(self.ui.comboBox_2.currentText())
        self.top_k_sent = int(self.ui.comboBox_3.currentText())

        sents = pdf_to_text(self.file_name)

        # print(self.language, self.top_k_word, self.top_k_sent)
        if self.language == "ko":
            textrank = TextRank(language=self.language,
                                tokenizer="mecab",
                                stopwords=STOPWORDS)
        else:
            textrank = TextRank(language=self.language,
                                tokenizer=None,
                                stopwords=STOPWORDS)

        keywords = textrank.keywords(sents, topk=self.top_k_word)
        keysents = textrank.summarize(sents, topk=self.top_k_sent)

        self.ui.textBrowser.setText("\n".join(keysents))
        self.ui.textBrowser_2.setText(", ".join(keywords))
Exemplo n.º 5
0
from textrank import TextRank, RawSentenceReader
from konlpy.tag import Kkma
import sys

filename = sys.argv[1]
rate = float(sys.argv[2])

tr = TextRank()
#print('Load...')
from konlpy.tag import Komoran
tagger = Komoran()
stopword = set([('있', 'VV'), ('하', 'VV'), ('되', 'VV') ])
tr.loadSents(RawSentenceReader(filename), lambda sent: filter(lambda x:x not in stopword and x[1] in ('NNG', 'NNP', 'VV', 'VA'), tagger.pos(sent)))
#print('Build...')
tr.build()
ranks = tr.rank()
#for k in sorted(ranks, key=ranks.get, reverse=True)[:100]:
    #print("\t".join([str(k), str(ranks[k]), str(tr.dictCount[k])]))

sentence = '%s.' % (tr.summarize(rate).split('. ')[0])

kkma = Kkma()
print(sentence)
print(list(x[0] for x in (list(filter(lambda x: x[1][0] == 'N', kkma.pos(sentence))))))

Exemplo n.º 6
0
        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        method=args.method,
        stopwords=None,
    )

    data = get_data(args.test_path)

    output_path = args.output_path
    hyp_path = f"{output_path}/hyp"
    abs_ref_path = f"{output_path}/abs_ref"

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    if not os.path.exists(hyp_path):
        os.makedirs(hyp_path)
    if not os.path.exists(abs_ref_path):
        os.makedirs(abs_ref_path)

    for articles in tqdm(data):
        doc_id, sents, gold = articles

        hyp = model.summarize(sents, args.topk)

        with open(f"{abs_ref_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(gold)
        with open(f"{hyp_path}/{doc_id}.txt", "w", encoding="utf8") as f:
            f.write(hyp)
Exemplo n.º 7
0
# coding: utf-8
from textrank import TextRank  #textrank 모듈 불러오기

f = open("text.txt", 'r', encoding='utf-8')  #stopwords 템플릿
text = f.read()
tr = TextRank(text)  #textrank 실행
f.close()
i = 1
for row in tr.summarize(3):  #요약된 문장과 키워드 출력
    print(str(i) + '. ' + row)
    i += 1
print('keywords :', tr.keywords())
Exemplo n.º 8
0
from textrank import TextRank
from article import get_text

text = get_text('https://www.bbc.com/news/world-us-canada-47848619')
tr = TextRank(text, lang='english', metric='log', graph='HITS')
tr.summarize()


'''
Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
In February Mr Sakurada had to make another apology, after arriving three minutes late to a parliamentary meeting.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

Shunichi Suzuki, who had been Olympics minister before Mr Sakurada was appointed last October, will return to the post.
"I deeply apologise for his remark to the people in the disaster-hit areas," said Mr Abe.
It is not the first time Mr Sakurada has been forced to apologise.
After accepting Mr Sakurada's resignation, Prime Minister Shinzo Abe apologised for appointing him.
Image copyright AFP  Japan's Olympics Minister Yoshitaka Sakurada has resigned over comments that offended people affected by a huge tsunami and earthquake in 2011.
The 2011 tsunami left more than 20,000 dead and caused a meltdown at the Fukushima Daiichi nuclear plant.
Mr Sakurada also admitted last year to never having used a computer, despite being Japan's cyber security minister.

'''
Exemplo n.º 9
0
    else:
        sents = get_data("data/sents.txt", "news")
        # stopwords of korean
        stopwords = ["뉴스", "기자", "그리고", "연합뉴스"]

    # initialize Textrank
    textrank = TextRank(
        min_count=args.min_count,
        min_sim=args.min_sim,
        tokenizer=args.tokenizer,
        noun=args.noun,
        similarity=args.similarity,
        df=args.df,
        max_iter=args.max_iter,
        method=args.method,
        stopwords=stopwords,
    )

    # extraction setences or keywords
    if args.mode == "sentences":
        results = textrank.summarize(sents, topk=args.topk)
        results = [sent for _, sent in results]
        results = "\n".join(results)
    else:
        args.mode = "words"
        results = textrank.keywords(sents, topk=args.topk)

    print(f"{args.mode}")
    print("=" * 20)
    print(f"{results}")