Пример #1
0
def test_process_with_exclude():
    """Test processing of texts with excluded words"""
    miner = AdeftMiner('INDRA', exclude='and')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('dynamical reasoning assembler', 2.0)
    assert miner.top()[1] == ('indonesian debt restructuring agency', 1.0)
Пример #2
0
def test_miner_to_dict():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    miner_dict = miner.to_dict()
    miner2 = load_adeft_miner_from_dict(miner_dict)
    assert miner.top() == miner2.top()
    assert miner.get_longforms(use_alignment_based_scoring=False) == \
        miner2.get_longforms(use_alignment_based_scoring=False)
    miner.compute_alignment_scores()
    assert miner.get_longforms() == miner2.get_longforms()
Пример #3
0
def test_prune():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    candidates = [candidate for candidate, _, _ in miner.top()]
    miner.prune(5)
    pruned_candidates = [candidate for candidate, _, _ in miner.top()]
    assert pruned_candidates == [
        candidate for candidate in candidates if len(candidate.split()) <= 5
    ]
Пример #4
0
def test_serialize_adeft_miner():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex)
    with open(temp_filename, 'w') as f:
        miner.dump(f)
    with open(temp_filename) as f:
        miner2 = load_adeft_miner(f)
    assert miner.top() == miner2.top()
    assert miner.get_longforms() == miner2.get_longforms()
Пример #5
0
def test_process_texts():
    """Test processing of texts
    """
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('indonesian debt restructuring agency', 1.0)
    assert miner.top()[3] == ('integrated network and dynamical'
                              ' reasoning assembler', 1.0)
    assert miner.top()[7] == ('reasoning assembler', 0.0)

    # check that top works with limit
    assert miner.top(limit=5) == miner.top()[0:5]
Пример #6
0
def test_add():
    """Test the addition of candidates to the trie

    First add one maximal candidate. All nested parent candidates will be
    added as well. Check that the candidates are contained in the trie and
    that likelihood calculations are correct. Then add the parent of the
    original maximal candidate and check that likelihood has been updated
    correctly.
    """
    miner = AdeftMiner('INDRA')
    candidate = [
        'the', 'integrated', 'network', 'and', 'dynamical', 'reasoning',
        'assembler'
    ]
    miner._add(candidate)
    stemmed = ['assembl', 'reason', 'dynam', 'and', 'network', 'integr', 'the']
    counts = [1] * 7
    penalty = [1] * 6 + [0]
    current = miner._internal_trie
    for penalty, token in zip(penalty, stemmed):
        assert token in current.children
        score = 1 - penalty
        assert current.children[token].score == score
        current = current.children[token]
    miner._add(candidate[1:])
    counts = [2] * 6 + [1]
    penalty = [2] * 5 + [1, 0]
    current = miner._internal_trie
    for count, penalty, token in zip(counts, penalty, stemmed):
        assert token in current.children
        score = count - penalty
        assert current.children[token].score == score
        current = current.children[token]
Пример #7
0
def test_get_longforms():
    """Test breadth first search algorithm to extract longforms
    """
    miner = AdeftMiner('INDRA')
    # ensure list of longforms is initialized correctly
    assert miner.top() == []

    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    longforms = miner.get_longforms(cutoff=0.5)
    assert (len(longforms) == 2)
    assert longforms[0] == ('indonesian debt restructuring agency', 1.0)
    assert longforms[1] == ('integrated network and dynamical'
                            ' reasoning assembler', 1.0)
Пример #8
0
def test_compose_adeft_miners():
    miner1 = AdeftMiner('INDRA')
    miner2 = AdeftMiner('INDRA')
    miner3 = AdeftMiner('INDRA')

    miner1.process_texts([example_text1, example_text2])
    miner2.process_texts([example_text3, example_text4])
    miner3.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    combined = compose(miner1, miner2)
    print(combined)
    assert combined.top() == miner3.top()