Пример #1
0
def add_k_word_features_count_to_vector(vector, left_tokens, right_tokens, window_size, head=None):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    for word in words:
        vector[word] = vector[word] + 1 if word in vector else 1

    if head:
        vector[head] = 1
Пример #2
0
def add_k_word_features_to_vector(vector, left_tokens, right_tokens, window_size, head=None):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    mid = len(words)/2
    left = words[:mid]
    right = words[mid:]
    for idx, word in enumerate(left):
        key = 'w_b' + str(len(left) - idx)
        vector[key] = word

    for idx, word in enumerate(right):
        key = 'w_a' + str(idx+1)
        vector[key] = word

    if head:
        key = 'w_head'
        vector[key] = head
Пример #3
0
def add_k_word_POS_features_to_vector(vector, left_tokens, right_tokens, window_size, tagger, head_tag=None):

    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    mid = len(words)/2
    left = words[:mid]
    right = words[mid:]

    left_tagged = tagger.tag(left)
    right_tagged = tagger.tag(right)

    for idx, (word, tag) in enumerate(left_tagged):
        key = 'pos_b' + str(len(left_tagged) - idx)
        vector[key] = tag

    for idx, (word, tag) in enumerate(right_tagged):
        key = 'pos_a' + str(idx+1)
        vector[key] = tag

    # add POS tag for head
    if head_tag:
        key = 'pos_head'
        word, tag = head_tag[0]
        vector[key] = tag
Пример #4
0
def add_synonym_counts(tagger, left_tokens, right_tokens, vector, window):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window)

    for w in words:
        tagged = tagger.tag([w])
        word, tag = tagged[0]
        tag = wordnet_tag_from_penn_tag(tag)
        synonyms = wordnet.synsets(w, pos=tag)
        for synset in synonyms:

            if ADD_SYNONYMS:
                name = synset.name()
                vector[name] = vector[name]+1 if name in vector else 1

            if ADD_HYPONYMS:
                for idx, hypo in enumerate(synset.hyponyms()):
                    name = hypo.name()
                    vector[name] = vector[name]+1 if name in vector else 1

            if ADD_HYPERNYMS:
                for idx, hypper in enumerate(synset.hypernyms()):
                    name = hypper.name()
                    vector[name] = vector[name]+1 if name in vector else 1