示例#1
0
def test_default(spacy_doc):
    result = ke.scake(spacy_doc)
    assert isinstance(result, list) and len(result) > 0
    assert all(isinstance(ts, tuple) and len(ts) == 2 for ts in result)
    assert all(
        isinstance(ts[0], compat.unicode_) and isinstance(ts[1], float)
        for ts in result)
示例#2
0
def test_single_sentence_doc():
    doc = textacy.make_spacy_doc("This is a document with a single sentence.",
                                 lang="en")
    result = ke.scake(doc)
    assert isinstance(result, list)
    assert len(result) > 0
示例#3
0
def test_empty_doc(empty_spacy_doc):
    result = ke.scake(empty_spacy_doc)
    assert isinstance(result, list)
    assert len(result) == 0
示例#4
0
def test_topn_float(spacy_doc):
    result = ke.scake(spacy_doc, topn=0.2)
    assert len(result) > 0
    with pytest.raises(ValueError):
        _ = ke.scake(spacy_doc, topn=2.0)
示例#5
0
def test_n_topn(spacy_doc):
    for n in (5, 25):
        result = ke.scake(spacy_doc, topn=n)
        assert 0 < len(result) <= n
示例#6
0
def test_include_pos(spacy_doc):
    result1 = ke.scake(spacy_doc, include_pos={"NOUN", "PROPN", "ADJ"})
    result2 = ke.scake(spacy_doc, include_pos={"NOUN", "PROPN"})
    assert len(result1) > 0 and len(result2) > 0
    assert result1 != result2
示例#7
0


#open data from .txt file
with open('news_article.txt', 'r') as file:
    data = file.read().replace('\n', '')   
article = data.replace(u'\xa0', u' ')

#create doc object
doc = textacy.make_spacy_doc(article, lang='en_core_web_sm')

#KEYTERM EXTRACTION
#Each algorithm returns a list of tuples, containg the keyterm and a score
textrank = ke.textrank(doc,normalize="lemma")
yake = ke.yake(doc,normalize="lemma")
scake = ke.scake(doc,normalize="lemma")
sgrank = ke.sgrank(doc,normalize="lemma")

#separate terms and relevany scores
terms_textrank, scores_textrank  = decompose_keyterms(textrank)
terms_yake, scores_yake  = decompose_keyterms(yake)
terms_scake, scores_scake  = decompose_keyterms(scake)
terms_sgrank, scores_sgrank  = decompose_keyterms(sgrank)

#save results to dataframe
df = keyterm_dataframe(scake,'scake')
print(df)
    


#Make plot