Пример #1
0
def test_extractor_1():
    context = Context(
        'Tom and Bob declined cancer treatment.',
        ['Tom', 'and', 'Bob', 'declined', 'cancer', 'treatment', '.'])
    context.add('entities', ['B-A', '', 'B-A', '', 'B-B', 'B-C', ''])

    spacy = SpacyIntegration('en_core_web_lg')
    extract = StrictDependencyRelationshipExtract(spacy=spacy,
                                                  attribute='entities',
                                                  relationships=[
                                                      ('A', 'has_B', 'B'),
                                                      ('B', 'has_C', 'C'),
                                                  ])

    relationships = extract.evaluate(context)
    assert len(relationships) == 2

    kb_triples = relationships[0]
    assert kb_triples[0].subj == 'Tom'
    assert kb_triples[0].obj == 'cancer'

    assert kb_triples[1].subj == 'cancer'
    assert kb_triples[1].obj == 'treatment'

    kb_triples = relationships[1]
    assert kb_triples[0].subj == 'Bob'
    assert kb_triples[0].obj == 'cancer'

    assert kb_triples[1].subj == 'cancer'
    assert kb_triples[1].obj == 'treatment'
Пример #2
0
def test_extractor_3():
    context = Context('Tom declined cancer treatment.',
                      ['Tom', 'declined', 'cancer', 'treatment', '.'])
    context.add('scope1', ['S1', 'S1', '', '', ''])
    context.add('scope2', ['', '', '', 'S2', ''])

    relationships = ScopeOverlapExtract('scope1', 'scope2').evaluate(context)
    assert relationships == ['', '', '', '', '']
Пример #3
0
def test_chunker_single_term_phrase():
    chunker = TreeChunker("NP: {<DT>?<JJ>*<NN>}", attribute='pos')

    context = Context(
        'learn php from guru99',
        ['learn','php', 'from','guru99']
    )
    context.add('pos', ['JJ', 'NN', 'IN', 'NN'])

    chunks = chunker.tag(context)
    assert chunks == ['B-NP', 'I-NP', '', 'B-NP']
Пример #4
0
def test_chunker_single_term_phrase_not_pos():
    chunker = TreeChunker("AB: {<A><B>}", attribute='outer')

    context = Context(
        'learn php from guru99',
        ['learn','php', 'from','guru99']
    )
    context.add('outer', ['A', 'B', '', ''])

    chunks = chunker.tag(context)
    assert chunks == ['B-AB', 'I-AB', '', '']
Пример #5
0
def test_extractor():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context.add('negative', ['', '', 'NEG', 'NEG', '', '', '', ''])
    context.add('negative_scope',
                ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'])
    context.add('entities', ['', '', '', '', '', 'B-LOC', 'I-LOC', ''])

    negative_extracts = TagExtract('negative', ['NEG']).evaluate(context)
    negative_scope_extracts = TagExtract('negative_scope',
                                         ['NEG']).evaluate(context)
    location_extracts = TagExtract('entities', ['LOC']).evaluate(context)

    assert negative_extracts == {
        'NEG': [
            [(2, 'not'), (3, 'going')],
        ]
    }

    assert negative_scope_extracts == {
        'NEG': [
            [(2, 'not'), (3, 'going'), (4, 'to'), (5, 'New'), (6, 'York'),
             (7, '.')],
        ]
    }

    assert location_extracts == {'LOC': [[(5, 'New'), (6, 'York')]]}
Пример #6
0
def test_combine():
    context = Context(
        'We are not going to New York.',
        ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.']
    )
    context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', ''])
    context.add('negatives', ['', '', 'NEG', '', '', '', '', ''])

    data = Merger.combine(
        context,
        ['tokens', 'entities', 'negatives']
    )

    assert data == [
        { 'tokens': 'We' },
        { 'tokens': 'are' },
        { 'tokens': 'not', 'negatives': 'NEG' },
        { 'tokens': 'going' },
        { 'tokens': 'to' },
        { 'tokens': 'New', 'entities': 'LOC' },
        { 'tokens': 'York', 'entities': 'LOC' },
        { 'tokens': '.' },
    ]
Пример #7
0
def test_context_add_and_get():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', ''])
    context.add('negatives', ['', '', 'NEG', '', '', '', '', ''])
    context.add('negative_scope',
                ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'])

    assert len(context.keys) == 4
    assert len(context.get('tokens')) == 8
    assert len(context.get('entities')) == 8
    assert len(context.get('negatives')) == 8
    assert len(context.get('negative_scope')) == 8