def test_miner_to_dict(): miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) miner_dict = miner.to_dict() miner2 = load_adeft_miner_from_dict(miner_dict) assert miner.top() == miner2.top() assert miner.get_longforms(use_alignment_based_scoring=False) == \ miner2.get_longforms(use_alignment_based_scoring=False) miner.compute_alignment_scores() assert miner.get_longforms() == miner2.get_longforms()
def test_serialize_adeft_miner(): miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex) with open(temp_filename, 'w') as f: miner.dump(f) with open(temp_filename) as f: miner2 = load_adeft_miner(f) assert miner.top() == miner2.top() assert miner.get_longforms() == miner2.get_longforms()
def test_get_longforms(): """Test breadth first search algorithm to extract longforms """ miner = AdeftMiner('INDRA') # ensure list of longforms is initialized correctly assert miner.top() == [] miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) longforms = miner.get_longforms(cutoff=0.5) assert (len(longforms) == 2) assert longforms[0] == ('indonesian debt restructuring agency', 1.0) assert longforms[1] == ('integrated network and dynamical' ' reasoning assembler', 1.0)