def add_k_word_features_count_to_vector(vector, left_tokens, right_tokens, window_size, head=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) for word in words: vector[word] = vector[word] + 1 if word in vector else 1 if head: vector[head] = 1
def add_k_word_features_to_vector(vector, left_tokens, right_tokens, window_size, head=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) mid = len(words)/2 left = words[:mid] right = words[mid:] for idx, word in enumerate(left): key = 'w_b' + str(len(left) - idx) vector[key] = word for idx, word in enumerate(right): key = 'w_a' + str(idx+1) vector[key] = word if head: key = 'w_head' vector[key] = head
def add_k_word_POS_features_to_vector(vector, left_tokens, right_tokens, window_size, tagger, head_tag=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) mid = len(words)/2 left = words[:mid] right = words[mid:] left_tagged = tagger.tag(left) right_tagged = tagger.tag(right) for idx, (word, tag) in enumerate(left_tagged): key = 'pos_b' + str(len(left_tagged) - idx) vector[key] = tag for idx, (word, tag) in enumerate(right_tagged): key = 'pos_a' + str(idx+1) vector[key] = tag # add POS tag for head if head_tag: key = 'pos_head' word, tag = head_tag[0] vector[key] = tag
def add_synonym_counts(tagger, left_tokens, right_tokens, vector, window): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window) for w in words: tagged = tagger.tag([w]) word, tag = tagged[0] tag = wordnet_tag_from_penn_tag(tag) synonyms = wordnet.synsets(w, pos=tag) for synset in synonyms: if ADD_SYNONYMS: name = synset.name() vector[name] = vector[name]+1 if name in vector else 1 if ADD_HYPONYMS: for idx, hypo in enumerate(synset.hyponyms()): name = hypo.name() vector[name] = vector[name]+1 if name in vector else 1 if ADD_HYPERNYMS: for idx, hypper in enumerate(synset.hypernyms()): name = hypper.name() vector[name] = vector[name]+1 if name in vector else 1