def classify_semantic_backoff_pos(segment): """ Returns a tag containing either a semantic or a syntactic (part-of-speech) symbol. If the segment is a proper noun, returns either month, fname, mname, surname, city or country, as suitable. For other words, returns a semantic tag if the word is found in Wordnet, otherwise, falls back to a POS tag. Aside from these classes, there is also numberN, charN, and specialN, for numbers, character sequences and sequences of special characters, respectively, where N denotes the length of the segment. Examples: loved -> s.love.v.01 paris -> city jonas -> mname cindy -> fname aaaaa -> char5 """ if DictionaryTag.is_gap(segment.dictset_id): tag = refine_gap(segment) elif segment.pos in ['np', 'np1', 'np2', None] and segment.dictset_id in DictionaryTag.map: tag = DictionaryTag.map[segment.dictset_id] else: synset = semantics.synset(segment.word, segment.pos) # only tries to generalize verbs and nouns if synset is not None and synset.pos() in ['v', 'n']: # TODO: sometimes generalize is returning None. #fixit tag = generalize(synset) else: tag = segment.pos return tag
def classify_pos_semantic(segment): """ Fully classify the segment. Returns a tag possibly containing semantic and syntactic (part-of-speech) symbols. If the segment is a proper noun, returns either month, fname, mname, surname, city or country, as suitable. For other words, returns a tag of the form pos_synset, where pos is a part-of-speech tag and synset is the corresponding WordNet synset. If no synset exists, the symbol 'None' is used. Aside from these classes, there is also numberN, charN, and specialN, for numbers, character sequences and sequences of special characters, respectively, where N denotes the length of the segment. Examples: loved -> vvd_s.love.v.01 paris -> city jonas -> mname cindy -> fname aaaaa -> char5 """ if DictionaryTag.is_gap(segment.dictset_id): tag = refine_gap(segment) elif segment.pos in ['np', 'np1', 'np2', None] and segment.dictset_id in DictionaryTag.map: tag = DictionaryTag.map[segment.dictset_id] else: synset = semantics.synset(segment.word, segment.pos) # only tries to generalize verbs and nouns if synset is not None and synset.pos() in ['v', 'n']: # TODO: sometimes generalize is returning None. #fixit tag = '{}_{}'.format(segment.pos, generalize(synset)) else: tag = segment.pos return tag
def sample(db): """ I wrote this function to output data for a table that shows words, the corresponding synsets, and their generalizations.""" while db.hasNext(): segments = db.nextPwd() for s in segments: tag = classify(s) if re.findall(r'.+\..+\..+', tag): # test if it's a synset synset = semantics.synset(s.word, s.pos) else: synset = None print "{}\t{}\t{}\t{}".format(s.password, s.word, tag, synset)
def sample(db, noun_treecut, verb_treecut): """ I wrote this function to output data for a table that shows words, the corresponding synsets, and their generalizations.""" while db.hasNext(): segments = db.nextPwd() for s in segments: tag = classify(s, noun_treecut, verb_treecut) if re.findall(r'.+\..+\..+', tag): # test if it's a synset synset = semantics.synset(s.word, s.pos) else: synset = None print "{}\t{}\t{}\t{}".format(s.password, s.word, tag, synset)
def classify_semantic_backoff_pos(segment, noun_treecut, verb_treecut, lowres=False): """ Returns a list of tags containing EITHER semantic OR syntactic (part-of-speech) symbols. If the segment is a proper noun, returns either month, fname, mname, surname, city or country, as suitable. For other words, returns semantic tags if the word is found in Wordnet; otherwise, falls back to A POS tag. Aside from these classes, there is also numberN, charN, and specialN, for numbers, character sequences and sequences of special characters, respectively, where N denotes the length of the segment. Examples: loved -> s.love.v.01 paris -> city jonas -> mname cindy -> fname aaaaa -> char5 Returns: list of str -- tags """ if DictionaryTag.is_gap(segment.dictset_id): tags = [classify_gap(segment, lowres)] elif segment.pos in ['np', 'np1', 'np2', None ] and segment.dictset_id in DictionaryTag.map: tags = [DictionaryTag.map[segment.dictset_id]] else: synset = semantics.synset(segment.word, segment.pos) # only tries to generalize verbs and nouns if synset is not None and synset.pos() in ['v', 'n']: tags = generalize(synset, noun_treecut, verb_treecut) tags = ['{}_{}'.format(segment.pos, tag) for tag in tags] else: tags = [segment.pos] return tags
def classify_pos_semantic(segment, noun_treecut, verb_treecut, lowres=False): """ Fully classify the segment. Returns a list of tags possibly containing semantic AND syntactic (part-of-speech) symbols. If the segment is a proper noun, returns either month, fname, mname, surname, city or country, as suitable. For other words, returns tags of the form pos_synset, where pos is a part-of-speech tag and synset is the corresponding WordNet synset. If no synset exists, the symbol 'unkwn' is used. Aside from these classes, there is also numberN, charN, and specialN, for numbers, character sequences and sequences of special characters, respectively, where N denotes the length of the segment. Examples: loved -> vvd_s.love.v.01 paris -> city jonas -> mname cindy -> fname aaaaa -> char5 Returns: list of str -- tags """ if DictionaryTag.is_gap(segment.dictset_id): tags = [classify_gap(segment, lowres)] elif segment.pos in ['np', 'np1', 'np2', None ] and segment.dictset_id in DictionaryTag.map: tags = [DictionaryTag.map[segment.dictset_id]] else: synset = semantics.synset(segment.word, segment.pos) # only tries to generalize verbs and nouns if synset is not None and synset.pos() in ['v', 'n']: tags = generalize(synset, noun_treecut, verb_treecut) tags = ['{}_{}'.format(segment.pos, tag) for tag in tags] else: tags = [segment.pos] return tags