Пример #1
0
def classify_semantic_backoff_pos(segment):
    """ Returns a tag containing either a semantic or a syntactic (part-of-speech)
    symbol.  If the segment is a proper noun, returns either month, fname, mname,
    surname, city or country,  as suitable.
    For  other words, returns a semantic tag if the word is found in Wordnet,
    otherwise, falls back to a POS tag. Aside from these classes, there 
    is also numberN, charN, and specialN, for numbers, character sequences  and 
    sequences of  special characters,  respectively, where N denotes the length
    of the segment.
    Examples:
        loved -> s.love.v.01
        paris -> city
        jonas -> mname
        cindy -> fname
        aaaaa -> char5
    """
    if DictionaryTag.is_gap(segment.dictset_id):
        tag = refine_gap(segment)
    elif segment.pos in ['np', 'np1', 'np2', None] and segment.dictset_id in DictionaryTag.map:
        tag = DictionaryTag.map[segment.dictset_id]
    else:
        synset = semantics.synset(segment.word, segment.pos)
        # only tries to generalize verbs and nouns
        if synset is not None and synset.pos() in ['v', 'n']:
            # TODO: sometimes generalize is returning None. #fixit 
            tag = generalize(synset)
        else:
            tag = segment.pos

    return tag
Пример #2
0
def classify_pos_semantic(segment):
    """ Fully classify the segment. Returns a tag  possibly containing semantic
    and  syntactic (part-of-speech) symbols.  If the segment  is a proper noun,
    returns either month, fname, mname, surname, city or country,  as suitable.
    For  other words, returns a  tag of  the form pos_synset,  where  pos is  a
    part-of-speech tag and  synset is the corresponding  WordNet synset.  If no 
    synset exists, the symbol 'None' is used.   Aside from these classes, there 
    is also numberN, charN, and specialN, for numbers, character sequences  and 
    sequences of  special characters,  respectively, where N denotes the length
    of the segment.
    Examples:
        loved -> vvd_s.love.v.01
        paris -> city
        jonas -> mname
        cindy -> fname
        aaaaa -> char5
    """
    if DictionaryTag.is_gap(segment.dictset_id):
        tag = refine_gap(segment)
    elif segment.pos in ['np', 'np1', 'np2', None] and segment.dictset_id in DictionaryTag.map:
        tag = DictionaryTag.map[segment.dictset_id]
    else:
        synset = semantics.synset(segment.word, segment.pos)
        # only tries to generalize verbs and nouns
        if synset is not None and synset.pos() in ['v', 'n']:
            # TODO: sometimes generalize is returning None. #fixit 
            tag = '{}_{}'.format(segment.pos, generalize(synset)) 
        else:
            tag = segment.pos

    return tag
Пример #3
0
def sample(db):
    """ I wrote this function to output data for a table
    that shows words, the corresponding synsets, and their generalizations."""
    
    while db.hasNext():
        segments = db.nextPwd()
        for s in segments:
            tag = classify(s)
            if re.findall(r'.+\..+\..+', tag): # test if it's a synset
                synset = semantics.synset(s.word, s.pos)
            else:
                synset = None
            print "{}\t{}\t{}\t{}".format(s.password, s.word, tag, synset)
Пример #4
0
def sample(db, noun_treecut, verb_treecut):
    """ I wrote this function to output data for a table
    that shows words, the corresponding synsets, and their generalizations."""

    while db.hasNext():
        segments = db.nextPwd()
        for s in segments:
            tag = classify(s, noun_treecut, verb_treecut)
            if re.findall(r'.+\..+\..+', tag):  # test if it's a synset
                synset = semantics.synset(s.word, s.pos)
            else:
                synset = None
            print "{}\t{}\t{}\t{}".format(s.password, s.word, tag, synset)
Пример #5
0
def classify_semantic_backoff_pos(segment,
                                  noun_treecut,
                                  verb_treecut,
                                  lowres=False):
    """  Returns a  list of  tags  containing  EITHER  semantic  OR syntactic
    (part-of-speech) symbols. If the segment is a proper noun, returns either
    month, fname, mname, surname, city or country, as suitable.
    For other words, returns  semantic tags if the  word is found in Wordnet;
    otherwise, falls  back to A POS tag. Aside from  these classes, there is
    also numberN, charN, and specialN, for numbers,  character sequences  and
    sequences of special characters, respectively, where N denotes the length
    of the segment.
    Examples:
        loved -> s.love.v.01
        paris -> city
        jonas -> mname
        cindy -> fname
        aaaaa -> char5
    Returns:
        list of str -- tags
    """
    if DictionaryTag.is_gap(segment.dictset_id):
        tags = [classify_gap(segment, lowres)]
    elif segment.pos in ['np', 'np1', 'np2', None
                         ] and segment.dictset_id in DictionaryTag.map:
        tags = [DictionaryTag.map[segment.dictset_id]]
    else:
        synset = semantics.synset(segment.word, segment.pos)
        # only tries to generalize verbs and nouns
        if synset is not None and synset.pos() in ['v', 'n']:
            tags = generalize(synset, noun_treecut, verb_treecut)
            tags = ['{}_{}'.format(segment.pos, tag) for tag in tags]
        else:
            tags = [segment.pos]

    return tags
Пример #6
0
def classify_pos_semantic(segment, noun_treecut, verb_treecut, lowres=False):
    """ Fully classify the segment. Returns a list of tags possibly  containing
    semantic AND syntactic (part-of-speech) symbols. If the segment is a proper
    noun,  returns either month, fname,  mname,  surname,  city  or country, as
    suitable.
    For other  words, returns  tags of  the  form  pos_synset, where pos is  a
    part-of-speech tag and  synset is the corresponding  WordNet synset.  If no
    synset exists,  the symbol 'unkwn' is used. Aside from these classes, there
    is also numberN, charN, and specialN, for numbers, character sequences  and
    sequences of  special characters,  respectively, where N denotes the length
    of the segment.
    Examples:
        loved -> vvd_s.love.v.01
        paris -> city
        jonas -> mname
        cindy -> fname
        aaaaa -> char5

    Returns:
        list of str -- tags
    """
    if DictionaryTag.is_gap(segment.dictset_id):
        tags = [classify_gap(segment, lowres)]
    elif segment.pos in ['np', 'np1', 'np2', None
                         ] and segment.dictset_id in DictionaryTag.map:
        tags = [DictionaryTag.map[segment.dictset_id]]
    else:
        synset = semantics.synset(segment.word, segment.pos)
        # only tries to generalize verbs and nouns
        if synset is not None and synset.pos() in ['v', 'n']:
            tags = generalize(synset, noun_treecut, verb_treecut)
            tags = ['{}_{}'.format(segment.pos, tag) for tag in tags]
        else:
            tags = [segment.pos]

    return tags