Exemplo n.º 1
0
def parse_semantic_tag(tokenstring):
    """ Extension that appends the SEMANTIC tag to the output of the parser.
    """
    client = MBSP.Mbt(port=6066)
    # Find the semantic tag of words in the sentence.
    # Example: "macrophage/NN/I-NP/O/O/O/macrophage".
    s1 = tokenstring.split()
    # => [[[u'macrophage', u'NN', u'I-NP', u'O', u'O', u'O', u'macrophage']]]
    s2 = s1.reduce([MBSP.WORD])
    # => [[[u'macrophage']]]
    s2 = MBSP.TokenString(client.send(s2.join()), tags=[MBSP.WORD, SEMANTIC])
    # => macrophage/protein
    s2 = s2.split()
    # => [[[u'macrophage', u'protein']]]
    s1.tags.append(SEMANTIC, values=s2.tags.pop(s2.tags.index(SEMANTIC)))
    # => [[[u'macrophage', u'NN', u'I-NP', u'O', u'O', u'O', u'macrophage', u'protein']]]
    s1 = s1.join()
    # => macrophage/NN/I-NP/O/O/O/macrophage/protein
    client.disconnect()
    return s1
Exemplo n.º 2
0
def update_pos_tag(tokenstring):
    """ Event handler that fires when the MBSP parser is done tagging and chunking.
        Updates the part-of-speech tags from a specialized biomedical corpus.
        Returns the updated string to the parser.
    """
    client = MBSP.Mbt(port=6065)
    # Retag the part-of-speech tags with the GENIA corpus.
    # Example: "TGF-beta1-transcribing/NN/I-NP macrophages/NNS/I-NP"
    s1 = tokenstring.split() 
    # => [[[u'TGF-beta1-transcribing', u'NN', u'I-NP'], [u'macrophages', u'NNS', u'I-NP']]]
    s2 = s1.reduce([MBSP.WORD]) 
    # => [[[u'TGF-beta1-transcribing'], [u'macrophages']]]
    s2 = MBSP.TokenString(client.send(s2.join()), tags=[MBSP.WORD, MBSP.PART_OF_SPEECH])
    # => TGF-beta1-transcribing/JJ macrophages/NNS
    s2 = s2.split() 
    # => [[[u'TGF-beta1-transcribing', u'JJ'], [u'macrophages', u'NNS']]]
    s2.tags.append(MBSP.CHUNK, values=s1.tags.pop(s1.tags.index(MBSP.CHUNK)))
    # => [[[u'TGF-beta1-transcribing', u'JJ', u'I-NP'], [u'macrophages', u'NNS', u'I-NP']]]
    s2 = s2.join()
    # => TGF-beta1-transcribing/JJ/I-NP macrophages/NNS/I-NP
    client.disconnect()
    return s2