示例#1
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    features = []
    list1 = []

    s1 = configuration.get_stack(0)
    s2 = configuration.get_stack(1)
    s3 = configuration.get_stack(2)

    s1lc1 = configuration.get_left_child(s1, 1)
    s1rc1 = configuration.get_right_child(s1, 1)

    s1lc2 = configuration.get_left_child(s1, 2)
    s1rc2 = configuration.get_right_child(s1, 2)

    s2lc1 = configuration.get_left_child(s2, 1)
    s2rc1 = configuration.get_right_child(s2, 1)

    s2lc2 = configuration.get_left_child(s2, 2)
    s2rc2 = configuration.get_right_child(s2, 2)

    s1lc1lc1 = configuration.get_left_child(s1lc1, 1)
    s1rc1rc1 = configuration.get_right_child(s1rc1, 1)

    s2lc1lc1 = configuration.get_left_child(s2lc1, 1)
    s2rc1rc1 = configuration.get_right_child(s2rc1, 1)

    b1 = configuration.get_buffer(0)
    b2 = configuration.get_buffer(1)
    b3 = configuration.get_buffer(2)

    list1.extend([
        s1, s2, s3, b1, b2, b3, s1lc1, s1rc1, s1lc2, s1rc2, s2lc1, s2rc1,
        s2lc2, s2rc2, s1lc1lc1, s1rc1rc1, s2lc1lc1, s2rc1rc1
    ])

    for word in list1:
        features.extend([vocabulary.get_word_id(configuration.get_word(word))])
        features.extend([vocabulary.get_pos_id(configuration.get_pos(word))])

    for word in range(6, len(list1)):
        features.extend(
            [vocabulary.get_label_id(configuration.get_label(list1[word]))])
    # TODO(Students) End
    assert len(features) == 48
    return features
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    li = []
    features = []
    li1=[]
    li0=[]
    feat1=[]
    feat2=[]
    feat3=[]
    li0.append(configuration.get_buffer(0))
    li0.append(configuration.get_buffer(1))
    li0.append(configuration.get_buffer(2))
    li0.append(configuration.get_stack(0))
    li0.append(configuration.get_stack(1))
    li0.append(configuration.get_stack(2))

    li.append(configuration.get_left_child(configuration.get_stack(0), 1))
    li.append(configuration.get_right_child(configuration.get_stack(0), 1))
    li.append(configuration.get_left_child(configuration.get_stack(0), 2))
    li.append(configuration.get_right_child(configuration.get_stack(0), 2))
    li.append(configuration.get_left_child(configuration.get_stack(1), 1))
    li.append(configuration.get_right_child(configuration.get_stack(1), 1))
    li.append(configuration.get_left_child(configuration.get_stack(1), 2))
    li.append(configuration.get_right_child(configuration.get_stack(1), 2))

    li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(0), 1), 1))
    li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(0), 1), 1))
    li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(1), 1), 1))
    li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(1), 1), 1))
    li1=li0+li
    for p in li1:
        feat1.append(vocabulary.get_pos_id(configuration.get_pos(p)))
    for w in li1:
        feat2.append(vocabulary.get_word_id(configuration.get_word(w)))
    for l in li:
        feat3.append(vocabulary.get_label_id((configuration.get_label(l))))
    features=feat1+feat2+feat3

    # TODO(Students) End

    assert len(features) == 48
    return features
示例#3
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    stack_size = configuration.get_stack_size()
    buffer_size = configuration.get_buffer_size()


    #print("Stack:"+str(stack_size))
    #print("Buffer"+str(buffer_size))


    stack_words = []
    buffer_words = []
    fst_2nd_leftmost_rightmost = []
    fst_2nd_leftmost_leftmost_rightmost_rightmost = []

    for i in range(0,3) :
        stack_words.append(configuration.get_stack(i))
        buffer_words.append(configuration.get_buffer(i))

        if i < 2 :
            fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i],1))
            fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 1))
            fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i], 2))
            fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 2))

            fst_2nd_leftmost_rightmost.append( configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) )
            fst_2nd_leftmost_rightmost.append( configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) )



            #fst_2nd_leftmost_leftmost_rightmost_rightmost.append(
                #configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) )
            #fst_2nd_leftmost_leftmost_rightmost_rightmost.append(
                #configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) )


    final = stack_words + buffer_words + fst_2nd_leftmost_rightmost #+ fst_2nd_leftmost_leftmost_rightmost_rightmost


    pos_tags = []
    for j in range(0,len(final)) :
        pos_tags.append(vocabulary.get_pos_id(configuration.get_pos(final[j])))

    temp_final = final[6:18]
    arc_labels = []
    for j in range(0,len(temp_final)) :
        arc_labels.append(vocabulary.get_label_id(configuration.get_label(temp_final[j])))


    for i,x in enumerate(final) :
        final[i] = vocabulary.get_word_id(configuration.get_word(final[i]))

    features = final + pos_tags + arc_labels

    # TODO(Students) End

    assert len(features) == 48

    return features
示例#4
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start

    features = []  # Store list of all features (initially only word features)
    pos_features = []  # Store list of pos tag features
    label_features = []  # Store list of label features

    top3Stack = [configuration.get_stack(i)
                 for i in range(3)]  # top 3 elements of stack
    top3Buffer = [configuration.get_buffer(i)
                  for i in range(3)]  # top 3 elements of buffer

    for token_index in top3Stack + top3Buffer:  # Iterate over top 3 words in stack and top 3 words in buffer
        # Add word to the features
        features.append(
            vocabulary.get_word_id(configuration.get_word(token_index)))
        # Add pos tag of corresponding word to the pos_features
        pos_features.append(
            vocabulary.get_pos_id(configuration.get_pos(token_index)))

    for token_index in top3Stack[:2]:  # Iterate over top 2 words in stack
        # Iterate over 1 and 2 to get 1st leftmost, 1st rightmost, 2nd leftmost and 2nd rightmost child
        # of corresponding word in stack.
        for i in range(1, 3):
            ith_left_child = configuration.get_left_child(
                token_index, i)  # Get ith_leftmost_child of word in stack
            # Add child to the features
            features.append(
                vocabulary.get_word_id(configuration.get_word(ith_left_child)))
            # Add pos tag of corresponding child to the pos_features
            pos_features.append(
                vocabulary.get_pos_id(configuration.get_pos(ith_left_child)))
            # Add label of corresponding child to the label_features
            label_features.append(
                vocabulary.get_label_id(
                    configuration.get_label(ith_left_child)))

            # Similarly for rightmost child add child word, pos tag and label to respective features list
            ith_right_child = configuration.get_right_child(token_index, i)
            features.append(
                vocabulary.get_word_id(
                    configuration.get_word(ith_right_child)))
            pos_features.append(
                vocabulary.get_pos_id(configuration.get_pos(ith_right_child)))
            label_features.append(
                vocabulary.get_label_id(
                    configuration.get_label(ith_right_child)))

    for token_index in top3Stack[:2]:  # Iterate over top 2 words in stack
        # Get leftmost child of leftmost child of word in stack
        left_left_child = configuration.get_left_child(
            configuration.get_left_child(token_index, 1), 1)
        # Add the corresponding child word, pos tag and label to respective features list
        features.append(
            vocabulary.get_word_id(configuration.get_word(left_left_child)))
        pos_features.append(
            vocabulary.get_pos_id(configuration.get_pos(left_left_child)))
        label_features.append(
            vocabulary.get_label_id(configuration.get_label(left_left_child)))

        # Similarly for rightmost child of rightmost child add child word, pos tag and label to respective features list
        right_right_child = configuration.get_right_child(
            configuration.get_right_child(token_index, 1), 1)
        features.append(
            vocabulary.get_word_id(configuration.get_word(right_right_child)))
        pos_features.append(
            vocabulary.get_pos_id(configuration.get_pos(right_right_child)))
        label_features.append(
            vocabulary.get_label_id(
                configuration.get_label(right_right_child)))

    features += pos_features + label_features  # Append the pos and label features to the word features

    # TODO(Students) End

    assert len(features) == 48
    return features
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start

    Sw = []
    St = []
    Sl = []

    #s1,s2,s3,b1,b2,b3;
    for idx in [0, 1, 2]:
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_stack(idx))))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_stack(idx))))

    for idx in [0, 1, 2]:
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_buffer(idx))))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_buffer(idx))))

    #lc1(si),rc1(si),lc2(si),rc2(si),i=  1,2
    for idx in [0, 1]:
        wrd = configuration.get_stack(idx)
        # Sw.append(configuration.get_left_child(wrd, 1))
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_left_child(wrd, 1))))
        # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1])))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_left_child(wrd, 1))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(configuration.get_left_child(wrd, 1))))

        # Sw.append(configuration.get_right_child(wrd, 1))
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_right_child(wrd, 1))))
        # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1])))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_right_child(wrd, 1))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(configuration.get_right_child(wrd,
                                                                      1))))

        # Sw.append(configuration.get_left_child(wrd, 2))
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_left_child(wrd, 2))))
        # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1])))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_left_child(wrd, 2))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(configuration.get_left_child(wrd, 2))))

        # Sw.append(configuration.get_right_child(wrd, 2))
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(configuration.get_right_child(wrd, 2))))
        # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1])))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(configuration.get_right_child(wrd, 2))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(configuration.get_right_child(wrd,
                                                                      2))))

    #lc1(lc1(si)),rc1(rc1(si)),i= 1,2
    for idx in [0, 1]:
        wrd = configuration.get_stack(idx)
        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(
                    configuration.get_left_child(
                        configuration.get_left_child(wrd, 1), 1))))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(
                    configuration.get_left_child(
                        configuration.get_left_child(wrd, 1), 1))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(
                    configuration.get_left_child(
                        configuration.get_left_child(wrd, 1), 1))))

        Sw.append(
            vocabulary.get_word_id(
                configuration.get_word(
                    configuration.get_right_child(
                        configuration.get_right_child(wrd, 1), 1))))
        St.append(
            vocabulary.get_pos_id(
                configuration.get_pos(
                    configuration.get_right_child(
                        configuration.get_right_child(wrd, 1), 1))))
        Sl.append(
            vocabulary.get_label_id(
                configuration.get_label(
                    configuration.get_right_child(
                        configuration.get_right_child(wrd, 1), 1))))

    # print("***********************")
    # print(configuration.get_str())

    # print(Sw)
    # print(St)
    # print(Sl)

    features = []
    features.extend(Sw)
    features.extend(St)
    features.extend(Sl)
    assert len(features) == 48

    # print(configuration.get_stack(0))
    # print(configuration.get_word(configuration.get_stack(0)))
    # print(vocabulary.get_word_id("unreadable"))
    # print(configuration.get_label(configuration.get_stack(0)))
    # print("id", vocabulary.get_pos_id(configuration.get_stack(0)))
    # print("word", vocabulary.get_pos_id(configuration.get_word(configuration.get_stack(0))))
    # print("word", vocabulary.get_label_id(configuration.get_word(configuration.get_stack(0))))

    # print(f)
    return features
示例#6
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    fWord = []
    fPos = []
    fLabel = []
    feature = []

    for j in range(2, -1, -1):
        index = configuration.get_stack(j)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))

    for j in range(0, 3, 1):
        index = configuration.get_buffer(j)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))

    for j in range(0, 2, 1):
        k = configuration.get_stack(j)
        index = configuration.get_left_child(k, 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(k, 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_left_child(k, 2)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(k, 2)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_left_child(
            configuration.get_left_child(k, 1), 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(
            configuration.get_right_child(k, 1), 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

    feature.extend(fWord)
    feature.extend(fPos)
    feature.extend(fLabel)
    return feature
示例#7
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    '''
    all_words = list()
    features = list()

    # Top 3 words from stack
    # for idx in range(2, -1, -1):
    for idx in range(3):
        all_words.append(vocabulary.get_word_id(configuration.get_stack(idx)))


    # Top 3 words from buffer
    for idx in range(3):
        all_words.append(vocabulary.get_word_id(configuration.get_buffer(idx)))


    # Left & Right Child of the top words in stack

    for idx in range(2):
        k = configuration.get_stack(idx)
        all_words.append(configuration.get_left_child(k, 1))
        all_words.append(configuration.get_left_child(k, 2))
        all_words.append(configuration.get_right_child(k, 1))
        all_words.append(configuration.get_right_child(k, 2))

        # Left of Top word in stack, Right of Top word in stack
        all_words.append(configuration.get_left_child(configuration.get_left_child(k, 1), 1))
        # all_words.append(configuration.get_left_child(all_words[-4], 1))
        all_words.append(configuration.get_right_child(configuration.get_right_child(k, 1), 1))
        # all_words.append(configuration.get_right_child(all_words[-4], 1))


    # Left of 2nd word in stack, Right of 2nd word in stack

    # Word Features
    for word in all_words:
        features.append(vocabulary.get_word_id(configuration.get_word(word)))


    # POS Features
    for word in all_words:
        features.append(vocabulary.get_pos_id(configuration.get_pos(word)))


    # Labels
    for word in all_words[6:]:
        features.append(vocabulary.get_label_id(configuration.get_label(word)))
    '''
    features = list()
    fWord = []
    fPos = []
    fLabel = []
    feature = []

    for j in range(2, -1, -1):
        index = configuration.get_stack(j)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))

    for j in range(0, 3, 1):
        index = configuration.get_buffer(j)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))

    for j in range(0, 2, 1):
        k = configuration.get_stack(j)
        index = configuration.get_left_child(k, 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(k, 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_left_child(k, 2)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(k, 2)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_left_child(
            configuration.get_left_child(k, 1), 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

        index = configuration.get_right_child(
            configuration.get_right_child(k, 1), 1)
        fWord.append(vocabulary.get_word_id(configuration.get_word(index)))
        fPos.append(vocabulary.get_pos_id(configuration.get_pos(index)))
        fLabel.append(vocabulary.get_label_id(configuration.get_label(index)))

    features.extend(fWord)
    features.extend(fPos)
    features.extend(fLabel)

    # TODO(Students) End

    assert len(features) == 48
    return features
示例#8
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    features = []
    stacks = []
    buffers = []

    val_1 = configuration.get_stack(0)
    stacks.append(val_1)
    val_2 = configuration.get_stack(1)
    stacks.append(val_2)
    val_3 = configuration.get_stack(2)
    stacks.append(val_3)

    bval_1 = configuration.get_buffer(0)
    buffers.append(bval_1)
    bval_2 = configuration.get_buffer(1)
    buffers.append(bval_2)
    bval_3 = configuration.get_buffer(2)
    buffers.append(bval_3)

    ##word id stack and buffer
    for i in stacks:
        features.append(vocabulary.get_word_id(configuration.get_word(i)))
    for i in buffers:
        features.append(vocabulary.get_word_id(configuration.get_word(i)))

    #word id left
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_word_id(
                    configuration.get_word(
                        configuration.get_left_child(stacks[i], j))))

    ##word id right
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_word_id(
                    configuration.get_word(
                        configuration.get_right_child(stacks[i], j))))

    #word id left
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_word_id(
                configuration.get_word(
                    configuration.get_left_child(
                        configuration.get_left_child(stacks[i], 1), 1))))

    #word id right
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_word_id(
                configuration.get_word(
                    configuration.get_right_child(
                        configuration.get_right_child(stacks[i], 1), 1))))

    #position id stack and buffer
    for i in stacks:
        features.append(vocabulary.get_pos_id(configuration.get_pos(i)))
    for i in buffers:
        features.append(vocabulary.get_pos_id(configuration.get_pos(i)))

    #position id left
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_pos_id(
                    configuration.get_pos(
                        configuration.get_left_child(stacks[i], j))))

    #position id right
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_pos_id(
                    configuration.get_pos(
                        configuration.get_right_child(stacks[i], j))))

    #position id left
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_pos_id(
                configuration.get_pos(
                    configuration.get_left_child(
                        configuration.get_left_child(stacks[i], 1), 1))))

    #position id right
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_pos_id(
                configuration.get_pos(
                    configuration.get_right_child(
                        configuration.get_right_child(stacks[i], 1), 1))))

    #label id left
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_label_id(
                    configuration.get_label(
                        configuration.get_left_child(stacks[i], j))))

    #label id right
    for i in range(len(stacks) - 1):
        for j in range(1, 3):
            features.append(
                vocabulary.get_label_id(
                    configuration.get_label(
                        configuration.get_right_child(stacks[i], j))))

    #label id left
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_label_id(
                configuration.get_label(
                    configuration.get_left_child(
                        configuration.get_left_child(stacks[i], 1), 1))))

    #label id right
    for i in range(len(stacks) - 1):
        features.append(
            vocabulary.get_label_id(
                configuration.get_label(
                    configuration.get_right_child(
                        configuration.get_right_child(stacks[i], 1), 1))))
    # TODO(Students) End

    assert len(features) == 48
    return features
示例#9
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """

    #Reference: Understood the features from the github implementation of:
    #akjindal53244/dependency_parsing_tf/utils/feature_extraction.py

    # TODO(Students) Start
    features = []
    direct_tokens = []
    children_token = []

    direct_tokens.extend([configuration.get_stack(i) for i in range(3)])
    direct_tokens.extend([configuration.get_buffer(i) for i in range(3)])

    for i in range(2):
        children_token.extend(
            [configuration.get_left_child(configuration.get_stack(i), 1)])
        children_token.extend(
            [configuration.get_right_child(configuration.get_stack(i), 1)])

        children_token.extend(
            [configuration.get_left_child(configuration.get_stack(i), 2)])
        children_token.extend(
            [configuration.get_right_child(configuration.get_stack(i), 2)])

        children_token.extend(
            [configuration.get_left_child(children_token[0], 1)])
        children_token.extend(
            [configuration.get_right_child(children_token[1], 1)])

    features.extend([
        vocabulary.get_word_id(configuration.get_word(i))
        for i in direct_tokens
    ])
    features.extend([
        vocabulary.get_word_id(configuration.get_word(i))
        for i in children_token
    ])

    features.extend([
        vocabulary.get_pos_id(configuration.get_pos(i)) for i in direct_tokens
    ])
    features.extend([
        vocabulary.get_pos_id(configuration.get_pos(i)) for i in children_token
    ])

    features.extend([
        vocabulary.get_label_id(configuration.get_label(i))
        for i in children_token
    ])
    # TODO(Students) End

    assert len(features) == 48
    return features
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    # First we get the top three elements of the stack
    st1 = configuration.get_stack(0)
    st2 = configuration.get_stack(1)
    st3 = configuration.get_stack(2)

    # Next, we get the top three elements of the buffer
    buf1 = configuration.get_buffer(0)
    buf2 = configuration.get_buffer(1)
    buf3 = configuration.get_buffer(2)

    # The left children at level 1 and 2 of the topmost element of the stack
    left_ch1_st1 = configuration.get_left_child(st1, 1)
    left_ch2_st1 = configuration.get_left_child(st1, 2)

    # The left children at level 1 and 2 of the second topmost element of the stack
    left_ch1_st2 = configuration.get_left_child(st2, 1)
    left_ch2_st2 = configuration.get_left_child(st2, 2)

    # The right children at level 1 and 2 of the topmost element of the stack
    right_ch1_st1 = configuration.get_right_child(st1, 1)
    right_ch2_st1 = configuration.get_right_child(st1, 2)

    # The right children at level 1 and 2 of the second topmost element of the stack
    right_ch1_st2 = configuration.get_right_child(st2, 1)
    right_ch2_st2 = configuration.get_right_child(st2, 2)

    # The leftmost children of the topmost and the second topmost element of the stack
    left_ch1_left_ch1_st1 = configuration.get_left_child(left_ch1_st1, 1)
    left_ch1_left_ch1_st2 = configuration.get_left_child(left_ch1_st2, 1)

    # The rightmost children of the topmost and the second topmost element of the stack
    right_ch1_right_ch1_st1 = configuration.get_right_child(right_ch1_st1, 1)
    right_ch1_right_ch1_st2 = configuration.get_right_child(right_ch1_st2, 1)

    # Appending all of this in a list
    childs = [
        st1, st2, st3, buf1, buf2, buf3, left_ch1_st1, right_ch1_st1,
        left_ch2_st1, right_ch2_st1, left_ch1_st2, right_ch1_st2, left_ch2_st2,
        right_ch2_st2, left_ch1_left_ch1_st1, right_ch1_right_ch1_st1,
        left_ch1_left_ch1_st2, right_ch1_right_ch1_st2
    ]
    pos = []
    # We now get the respective parts of speech tags and labels for the ids
    for idx in childs:
        pos.append(configuration.get_pos(idx))

    for idx in childs[6:len(childs) + 1]:
        pos.append(configuration.get_label(idx))

    temp = childs + pos
    #print("features_temp",len(temp))

    features = []
    # Get words, POS tags and Labels and append them to features.
    for word in temp[0:18]:
        features.append(vocabulary.get_word_id(configuration.get_word(word)))

    for pos in temp[18:36]:
        features.append(vocabulary.get_pos_id(pos))

    for label in temp[36:48]:
        features.append(vocabulary.get_label_id(label))
    # TODO(Students) End

    assert len(features) == 48
    return features
示例#11
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    words = []
    posTags = []
    labels = []

    # Get the words and pos tags of the top 3 elements of the stack.
    for idx in range(2, -1, -1):
        stack = configuration.get_stack(idx)
        words.append(vocabulary.get_word_id(configuration.get_word(stack)))
        posTags.append(vocabulary.get_pos_id(configuration.get_pos(stack)))

    # Get the words and pos tags of the top 3 elements of the buffer.
    for idx in range(3):
        buffer = configuration.get_buffer(idx)
        words.append(vocabulary.get_word_id(configuration.get_word(buffer)))
        posTags.append(vocabulary.get_pos_id(configuration.get_pos(buffer)))

    # Get the words, labels, and pos tags of the first and second left child and right child of the top two elements
    # on the stack, and
    # Get the words, labels, and pos tags of the leftmost of the leftmost and rightmost of the rightmost child
    # of the top two elements on the stack
    for idx in range(2):
        stack = configuration.get_stack(idx)
        firstLeftChild = configuration.get_left_child(stack, 1)
        words.append(
            vocabulary.get_word_id(configuration.get_word(firstLeftChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(firstLeftChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(firstLeftChild)))

        firstRightChild = configuration.get_right_child(stack, 1)
        words.append(
            vocabulary.get_word_id(configuration.get_word(firstRightChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(firstRightChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(firstRightChild)))

        secondLeftChild = configuration.get_left_child(stack, 2)
        words.append(
            vocabulary.get_word_id(configuration.get_word(secondLeftChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(secondLeftChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(secondLeftChild)))

        secondRightChild = configuration.get_right_child(stack, 2)
        words.append(
            vocabulary.get_word_id(configuration.get_word(secondRightChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(secondRightChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(secondRightChild)))

        leftLeftChild = configuration.get_left_child(
            configuration.get_left_child(stack, 1), 1)
        words.append(
            vocabulary.get_word_id(configuration.get_word(leftLeftChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(leftLeftChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(leftLeftChild)))

        rightRightChild = configuration.get_right_child(
            configuration.get_right_child(stack, 1), 1)
        words.append(
            vocabulary.get_word_id(configuration.get_word(rightRightChild)))
        labels.append(
            vocabulary.get_label_id(configuration.get_label(rightRightChild)))
        posTags.append(
            vocabulary.get_pos_id(configuration.get_pos(rightRightChild)))

    features = []
    features += words + posTags + labels
    # TODO(Students) End

    assert len(features) == 48
    return features
示例#12
0
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """

    # TODO(Students) Start
    # print("Deubg output")
    features = []
    postags = []
    arclabels = []

    # For possible steps
    for item in range(0, 2):
        features.append(
            configuration.get_left_child(configuration.get_stack(item), 1))
        features.append(
            configuration.get_left_child(configuration.get_stack(item), 2))
        features.append(
            configuration.get_right_child(configuration.get_stack(item), 1))
        features.append(
            configuration.get_right_child(configuration.get_stack(item), 2))
        features.append(
            configuration.get_left_child(
                configuration.get_left_child(configuration.get_stack(item), 1),
                1))
        features.append(
            configuration.get_right_child(
                configuration.get_right_child(configuration.get_stack(item),
                                              1), 1))

    for item in features:
        arclabels.append(configuration.get_label(item))
    for item in range(0, 3):
        features.append(configuration.get_stack(item))
    for item in range(0, 3):
        features.append(configuration.get_buffer(item))
    for item in features:
        postags.append(configuration.get_pos(item))

    # Initialize empty lists
    posids = []
    labelids = []
    wordids = []
    featureid = []
    # Append the final tags
    for i in postags:
        posids.append(vocabulary.get_pos_id(i))
    for i in arclabels:
        labelids.append(vocabulary.get_label_id(i))
    for i in features:
        wordids.append(vocabulary.get_word_id(configuration.get_word(i)))

    # Append the final data
    featureid.extend(wordids)
    featureid.extend(labelids)
    featureid.extend(posids)
    features = featureid

    # TODO(Students) End
    assert len(features) == 48
    return features
def get_configuration_features(configuration: Configuration,
                               vocabulary: Vocabulary) -> List[List[int]]:
    """
    =================================================================

    Implement feature extraction described in
    "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

    =================================================================
    """
    # TODO(Students) Start
    # print(configuration.get_stack_size())
    # print(configuration.get_buffer_size())
    # print("****")
    words = []
    pos_tags = []
    arc_labels = []
    for index in range(0, 3):
        stackIndex = configuration.get_stack(index)
        bufferIndex = configuration.get_buffer(index)

        #Pushing top 3 words from stack n buffer to features_list : s1; s2; s3; b1; b2; b3;
        words.append(vocabulary.get_word_id(
            configuration.get_word(stackIndex)))
        words.append(
            vocabulary.get_word_id(configuration.get_word(bufferIndex)))

        pos_tags.append(
            vocabulary.get_pos_id(configuration.get_pos(stackIndex)))
        pos_tags.append(
            vocabulary.get_pos_id(configuration.get_pos(bufferIndex)))

    for index in range(0, 2):
        stackIndex = configuration.get_stack(index)

        #Getting indexes of the first and second leftmost / rightmost children of the top two words on the stack: lc1(si); rc1(si); lc2(si); rc2(si), i = 1; 2.
        child1 = [
            configuration.get_left_child(stackIndex, 1),
            configuration.get_right_child(stackIndex, 1),
            configuration.get_left_child(stackIndex, 2),
            configuration.get_right_child(stackIndex, 2)
        ]

        #Getting indexes of leftmost of leftmost / rightmost of rightmost children of the top two words on the stack: lc1(lc1(si)); rc1(rc1(si)), i = 1; 2.
        child2 = [
            configuration.get_left_child(
                configuration.get_left_child(stackIndex, 1), 1),
            configuration.get_right_child(
                configuration.get_right_child(stackIndex, 1), 1)
        ]

        #Merging child1 and child2 to single children list.
        children = child1 + child2

        #Pushing children to features_list:
        for childIndex in children:
            words.append(
                vocabulary.get_word_id(configuration.get_word(childIndex)))
            pos_tags.append(
                vocabulary.get_pos_id(configuration.get_pos(childIndex)))
            arc_labels.append(
                vocabulary.get_label_id(configuration.get_label(childIndex)))

    features = words + pos_tags + arc_labels

    #*************************************************#
    # ##pushing top 3 words from stack
    # words.append(configuration.get_stack(0))
    # words.extend([configuration.get_stack(1),configuration.get_stack(2)])
    # ##pushing top 3 words from buffer:
    # words.extend([configuration.get_buffer(0),configuration.get_buffer(1),configuration.get_buffer(2)])
    # ##pushing The first and second leftmost / rightmost children of the top two words on the stack:
    # words.extend([configuration.get_left_child(0,1),configuration.get_right_child(0,1),configuration.get_left_child(0,2),configuration.get_right_child(0,2)])
    # words.extend([configuration.get_left_child(1,1),configuration.get_right_child(1,1),configuration.get_left_child(1,2),configuration.get_right_child(1,2)])
    #
    # ##pushing The leftmost of leftmost / rightmost of rightmost children of the top two words on the stack:
    # words.extend([configuration.get_left_child(configuration.get_left_child(0,1),1),configuration.get_right_child(configuration.get_right_child(0,1),1)])
    # words.extend([configuration.get_left_child(configuration.get_left_child(1,1),1),configuration.get_right_child(configuration.get_right_child(1,1),1)])
    #**************************************************#

    # TODO(Students) End

    assert len(features) == 48
    return features