def test_search(): """Test that searching for a longform in the trie works correctly""" rec = AdeftRecognizer('ER', grounding_map) example = [ 'for', 'women', ',', 'mandatory', 'hmo', 'programs', 'reduce', 'some', 'types', 'of', 'non', 'emergency', 'room' ] assert rec._search(example) == 'emergency room'
def test_recognizer(): """Test the recognizer end to end""" rec = AdeftRecognizer('ER', grounding_map) for text, result in [example1, example2, example3, example4, example5]: longform = rec.recognize(text) assert longform.pop() == result # Case where defining pattern appears at the start of the fragment assert not rec.recognize('(ER) stress')
def test_recognizer(): """Test the recognizer end to end""" rec = AdeftRecognizer('ER', grounding_map) for text, expected in [example1, example2, example3, example4, example5]: result = rec.recognize(text) assert result.pop()['grounding'] == expected # Case where defining pattern appears at the start of the fragment assert not rec.recognize('(ER) stress')
def test_strip_defining_patterns(): rec = AdeftRecognizer('ER', grounding_map) test_cases = [ 'The endoplasmic reticulum (ER) is a transmembrane', 'The endoplasmic reticulum (ER)is a transmembrane', 'The endoplasmic reticulum (ER)-is a transmembrane' ] results = (['The ER is a transmembrane'] * 2 + ['The ER -is a transmembrane']) for case, result in zip(test_cases, results): assert rec.strip_defining_patterns(case) == result null_case = 'Newly developed extended release (ER) medications' null_result = 'Newly developed extended release ER medications' assert rec.strip_defining_patterns(null_case) == null_result
def __init__(self, classifier, grounding_dict, names): self.classifier = classifier self.shortforms = classifier.shortforms self.recognizers = [AdeftRecognizer(shortform, grounding_map) for shortform, grounding_map in grounding_dict.items()] self.grounding_dict = grounding_dict self.names = names self.labels = set(value for grounding_map in grounding_dict.values() for value in grounding_map.values()) self.pos_labels = classifier.pos_labels
def test_init(): """Test that the recognizers internal trie is initialized correctly""" rec = AdeftRecognizer('ER', grounding_map) trie = rec._trie for longform, grounding in grounding_map.items(): edges = tuple(_stemmer.stem(token) for token, _ in tokenize(longform))[::-1] current = trie for index, token in enumerate(edges): assert token in current.children if index < len(edges) - 1: assert current.children[token].longform is None else: assert current.children[token].longform == longform current = current.children[token]
def test_exclude(): """Test that using excluded words works""" rec = AdeftRecognizer('ER', grounding_map, exclude=['emergency']) assert not rec.recognize(example3[0])
def __init__(self, grounding_dict): self.grounding_dict = grounding_dict self.recognizers = [ AdeftRecognizer(shortform, grounding_map) for shortform, grounding_map in grounding_dict.items() ]