def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS,
                        AUX_WORDS):
    vector = []

    trig_words = trigger.get_words()
    subtree = trigger.get_subtree()
    context_idx = 0

    for w in trigger.get_context()['words']:
        if w == trig_sentdict['words'][trigger.get_idx()]: break
        context_idx += 1

    # Features 1,2
    vector.append(truth(len(subtree.leaves()) == len(trig_words)))
    vector.append(len(trig_words))

    # Feature set 3.
    pos_tags_dict = {}

    for tag in POS_TAGS:
        pos_tags_dict[tag] = 0

    for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature sets 4,5,6. Description of the auxiliary.
    vector += VC.lemmacategoryvector(
        trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()],
                               AUX_WORDS)

    return vector
示例#2
0
def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS, AUX_WORDS):
    vector = []

    trig_words = trigger.get_words()
    subtree = trigger.get_subtree()
    context_idx = 0

    for w in trigger.get_context()['words']:
        if w == trig_sentdict['words'][trigger.get_idx()]: break
        context_idx += 1

    # Features 1,2
    vector.append(truth(len(subtree.leaves()) == len(trig_words)))
    vector.append(len(trig_words))

    # Feature set 3.
    pos_tags_dict = {}

    for tag in POS_TAGS: pos_tags_dict[tag] = 0

    for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature sets 4,5,6. Description of the auxiliary.
    vector += VC.lemmacategoryvector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()], AUX_WORDS)

    return vector
示例#3
0
def make_vector(sentdict,
                aux,
                features,
                aux_categories,
                aux_lemmas,
                aux_words,
                surrounding_words,
                pos_tags,
                pos_bigrams,
                make_old=False):

    vec = []
    if not make_old:
        if 'aux' in features:
            vec += lemma_category_vector(aux, aux_categories)
            vec += lemma_vector(aux, aux_lemmas)
            vec += aux_vector(aux, aux_words)
        if 'words' in features:
            vec += aux_structure_vector(sentdict, aux, 'words',
                                        surrounding_words)
        if 'pos' in features:
            vec += aux_structure_vector(sentdict, aux, 'pos', pos_tags)
        if 'bigrams' in features:
            vec += aux_pos_bigrams_vector(sentdict, aux, pos_bigrams)

        vec += linguistic_features_vector(sentdict, aux, features)

    else:
        vec += old_vc.lemmacategoryvector(aux.lemma)
        vec += old_vc.lemmavector(aux.lemma)
        vec += old_vc.auxwordvector(aux.word, aux_words)
        vec += old_vc.myfeaturesvector(sentdict, aux.wordnum, features)

    return vec
示例#4
0
def make_vector(sentdict, aux, features, aux_categories, aux_lemmas, aux_words, surrounding_words, pos_tags, pos_bigrams, make_old=False):

    vec = []
    if not make_old:
        if 'aux' in features:
            vec += lemma_category_vector(aux, aux_categories)
            vec += lemma_vector(aux, aux_lemmas)
            vec += aux_vector(aux, aux_words)
        if 'words' in features:
            vec += aux_structure_vector(sentdict, aux, 'words', surrounding_words)
        if 'pos' in features:
            vec += aux_structure_vector(sentdict, aux, 'pos', pos_tags)
        if 'bigrams' in features:
            vec += aux_pos_bigrams_vector(sentdict, aux, pos_bigrams)

        vec += linguistic_features_vector(sentdict, aux, features)

    else:
        vec += old_vc.lemmacategoryvector(aux.lemma)
        vec += old_vc.lemmavector(aux.lemma)
        vec += old_vc.auxwordvector(aux.word, aux_words)
        vec += old_vc.myfeaturesvector(sentdict, aux.wordnum, features)

    return vec