Exemplo n.º 1
0
def fit(words, tags, labels, model, builders):
    """
    compute joint error of the
    :param words: list of indices
    :param tags: list of indices
    :param labels: index
    :param model: current model to access parameters
    :param builders: builder to create state combinations
    :return: joint error
    """
    # retrieve model parameters
    if MLP:
        H = pycnn.parameter(pH)
        O = pycnn.parameter(pO)
    else:
        O = pycnn.parameter(pO)

    errs = []
    for (forward_state, backward_state), tag in zip(build_tagging_graph(words, model, builders), tags):
        f_b = pycnn.concatenate([forward_state, backward_state])
        if MLP:
            # TODO: add bias terms
            r_t = O * (pycnn.tanh(H * f_b))
        else:
            r_t = O * f_b
        err = pycnn.pickneglogsoftmax(r_t, tag)
        errs.append(err)

    return pycnn.esum(errs)
Exemplo n.º 2
0
    def calc_sentence_error(self, sentence):
        word_expression_list = self._build_word_expression_list(sentence, is_train=True)

        sentence_errors = []
        for word, word_expression in zip(sentence, word_expression_list):
            gold_label_index = self.tag_indexer.get_index(word.gold_label)
            word_error = pickneglogsoftmax(word_expression, gold_label_index)
            sentence_errors.append(word_error)
        return esum(sentence_errors)
Exemplo n.º 3
0
    def calc_sentence_error(self, sentence):
        word_expression_list = self._build_word_expression_list(sentence,
                                                                is_train=True)

        sentence_errors = []
        for word, word_expression in zip(sentence, word_expression_list):
            gold_label_index = self.tag_indexer.get_index(word.gold_label)
            word_error = pickneglogsoftmax(word_expression, gold_label_index)
            sentence_errors.append(word_error)
        return esum(sentence_errors)
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats,
                 word, alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []
    padded_word = word + END_WORD

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        current_loss = pc.pickneglogsoftmax(readout,
                                            alphabet_index[output_char])
        # print 'computed readout layer'
        loss.append(current_loss)

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index,
                 feature_types):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)


    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []
    padded_word = word + END_WORD

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char])
        # print 'computed readout layer'
        loss.append(current_loss)

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
Exemplo n.º 6
0
def attend(model, vectors, state):
    w = pc.parameter(model['attention_w'])
    attention_weights = []
    for vector in vectors:
        #concatenate each encoded vector with the current decoder state
        attention_input = pc.concatenate([vector, pc.concatenate(list(state.s()))])
        #get the attention wieght for the decoded vector
        attention_weights.append(w * attention_input)
    #normalize the weights
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    #apply the weights
    vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(vectors, attention_weights)])
    return vectors
Exemplo n.º 7
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Exemplo n.º 8
0
def attend(model, input_vectors, state):
    w1 = pc.parameter(model['attention_w1'])
    w2 = pc.parameter(model['attention_w2'])
    v = pc.parameter(model['attention_v'])
    attention_weights = []

    w2dt = w2*pc.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*pc.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
Exemplo n.º 9
0
    def fit(self, list_folders_name, num_iterations, train_algo, dev=None):
        """
        train the tagger
        """
        print("read training data",file=sys.stderr)

        nb_tasks = len( list_folders_name )

        train_X, train_Y, task_labels, w2i, c2i, task2t2i = self.get_train_data(list_folders_name)

        ## after calling get_train_data we have self.tasks_ids
        self.task2layer = {task_id: out_layer for task_id, out_layer in zip(self.tasks_ids, self.pred_layer)}
        print("task2layer", self.task2layer, file=sys.stderr)

        # store mappings of words and tags to indices
        self.set_indices(w2i,c2i,task2t2i)

        # init lookup parameters and define graph
        print("build graph",file=sys.stderr)
        
        num_words = len(self.w2i)
        num_chars = len(self.c2i)
        
        assert(nb_tasks==len(self.pred_layer))
        
        self.predictors, self.char_rnn, self.wembeds, self.cembeds = self.build_computation_graph(num_words, num_chars)

        if train_algo == "sgd":
            trainer = pycnn.SimpleSGDTrainer(self.model)
        elif train_algo == "adam":
            trainer = pycnn.AdamTrainer(self.model)

        train_data = list(zip(train_X,train_Y, task_labels))

        for iter in range(num_iterations):
            total_loss=0.0
            total_tagged=0.0
            random.shuffle(train_data)
            for ((word_indices,char_indices),y, task_of_instance) in train_data:
                # use same predict function for training and testing
                output = self.predict(word_indices, char_indices, task_of_instance, train=True)

                loss1 = pycnn.esum([self.pick_neg_log(pred,gold) for pred, gold in zip(output, y)])
                lv = loss1.value()
                total_loss += lv
                total_tagged += len(word_indices)

                loss1.backward()
                trainer.update()

            print("iter {2} {0:>12}: {1:.2f}".format("total loss",total_loss/total_tagged,iter), file=sys.stderr)
Exemplo n.º 10
0
    def calc_sentence_error(self, sentence):
        renew_cg()

        for word in sentence:
            # word.vector = noise(self._get_word_vector(word), 0.1)
            word.vector = self._get_word_vector(word, use_dropout=True)
        sentence_expressions = self._build_sentence_expressions(sentence)

        sentence_errors = []
        for word, word_expression in zip(sentence, sentence_expressions):
            gold_label_index = self.tag_indexer.get_index(word.gold_label)
            word_error = pickneglogsoftmax(word_expression, gold_label_index)
            sentence_errors.append(word_error)
        return esum(sentence_errors)
Exemplo n.º 11
0
    def calc_sentence_error(self, sentence):
        renew_cg()

        for word in sentence:
            # word.vector = noise(self._get_word_vector(word), 0.1)
            word.vector = self._get_word_vector(word, use_dropout=True)
        sentence_expressions = self._build_sentence_expressions(sentence)

        sentence_errors = []
        for word, word_expression in zip(sentence, sentence_expressions):
            gold_label_index = self.tag_indexer.get_index(word.gold_label)
            word_error = pickneglogsoftmax(word_expression, gold_label_index)
            sentence_errors.append(word_error)
        return esum(sentence_errors)
Exemplo n.º 12
0
def decode(model, dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))

    loss = []
    for char in output:
        vector = attend(model, vectors, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        loss.append(-pc.log(pc.pick(probs, char)))
    loss = pc.esum(loss)
    return loss
Exemplo n.º 13
0
def decode(model, dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = pc.parameter(model["decoder_w"])
    b = pc.parameter(model["decoder_b"])

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2))

    loss = []
    for char in output:
        vector = attend(model, vectors, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        loss.append(-pc.log(pc.pick(probs, char)))
    loss = pc.esum(loss)
    return loss
Exemplo n.º 14
0
    def train(
        feature_mapper,
        word_dims,
        tag_dims,
        lstm_units,
        hidden_units,
        epochs,
        batch_size,
        train_data_file,
        dev_data_file,
        model_save_file,
        droprate,
        unk_param,
        alpha=1.0,
        beta=0.0,
    ):

        start_time = time.time()

        fm = feature_mapper
        word_count = fm.total_words()
        tag_count = fm.total_tags()

        network = Network(
            word_count=word_count,
            tag_count=tag_count,
            word_dims=word_dims,
            tag_dims=tag_dims,
            lstm_units=lstm_units,
            hidden_units=hidden_units,
            struct_out=2,
            label_out=fm.total_label_actions(),
            droprate=droprate,
        )
        network.init_params()

        print('Hidden units: {},  per-LSTM units: {}'.format(
            hidden_units,
            lstm_units,
        ))
        print('Embeddings: word={}  tag={}'.format(
            (word_count, word_dims),
            (tag_count, tag_dims),
        ))
        print('Dropout rate: {}'.format(droprate))
        print('Parameters initialized in [-0.01, 0.01]')
        print('Random UNKing parameter z = {}'.format(unk_param))
        print('Exploration: alpha={} beta={}'.format(alpha, beta))

        training_data = fm.gold_data_from_file(train_data_file)
        num_batches = -(-len(training_data) // batch_size)
        print('Loaded {} training sentences ({} batches of size {})!'.format(
            len(training_data),
            num_batches,
            batch_size,
        ))
        parse_every = -(-num_batches // 4)

        dev_trees = PhraseTree.load_treefile(dev_data_file)
        print('Loaded {} validation trees!'.format(len(dev_trees)))

        best_acc = FScore()

        for epoch in xrange(1, epochs + 1):
            print('........... epoch {} ...........'.format(epoch))

            total_cost = 0.0
            total_states = 0
            training_acc = FScore()

            np.random.shuffle(training_data)

            for b in xrange(num_batches):
                batch = training_data[(b * batch_size):((b + 1) * batch_size)]

                explore = [
                    Parser.exploration(
                        example,
                        fm,
                        network,
                        alpha=alpha,
                        beta=beta,
                    ) for example in batch
                ]
                for (_, acc) in explore:
                    training_acc += acc

                batch = [example for (example, _) in explore]

                pycnn.renew_cg()
                network.prep_params()

                errors = []

                for example in batch:

                    ## random UNKing ##
                    for (i, w) in enumerate(example['w']):
                        if w <= 2:
                            continue

                        freq = fm.word_freq_list[w]
                        drop_prob = unk_param / (unk_param + freq)
                        r = np.random.random()
                        if r < drop_prob:
                            example['w'][i] = 0

                    fwd, back = network.evaluate_recurrent(
                        example['w'],
                        example['t'],
                    )

                    for (left,
                         right), correct in example['struct_data'].items():
                        scores = network.evaluate_struct(
                            fwd, back, left, right)

                        probs = pycnn.softmax(scores)
                        loss = -pycnn.log(pycnn.pick(probs, correct))
                        errors.append(loss)
                    total_states += len(example['struct_data'])

                    for (left,
                         right), correct in example['label_data'].items():
                        scores = network.evaluate_label(fwd, back, left, right)

                        probs = pycnn.softmax(scores)
                        loss = -pycnn.log(pycnn.pick(probs, correct))
                        errors.append(loss)
                    total_states += len(example['label_data'])

                batch_error = pycnn.esum(errors)
                total_cost += batch_error.scalar_value()
                batch_error.backward()
                network.trainer.update()

                mean_cost = total_cost / total_states

                print(
                    '\rBatch {}  Mean Cost {:.4f} [Train: {}]'.format(
                        b,
                        mean_cost,
                        training_acc,
                    ),
                    end='',
                )
                sys.stdout.flush()

                if ((b + 1) % parse_every) == 0 or b == (num_batches - 1):
                    dev_acc = Parser.evaluate_corpus(
                        dev_trees,
                        fm,
                        network,
                    )
                    print('  [Val: {}]'.format(dev_acc))

                    if dev_acc > best_acc:
                        best_acc = dev_acc
                        network.save(model_save_file)
                        print('    [saved model: {}]'.format(model_save_file))

            current_time = time.time()
            runmins = (current_time - start_time) / 60.
            print('  Elapsed time: {:.2f}m'.format(runmins))
Exemplo n.º 15
0
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats,
                 word, alphabet_index, feat_index, feature_types, alignment):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    template = task1_ms2s.generate_template_from_alignment(alignment)

    blstm_outputs = encode_chars(alphabet_index, char_lookup, encoder_frnn,
                                 encoder_rrnn, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # convert features to matching embeddings, if UNK handle properly
    feats_input = encode_feats(feat_index, feat_lookup, feats, feature_types)

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    loss = []
    padded_word = word + END_WORD
    padded_template = template + [END_WORD]

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # find all possible actions - copy from index, output specific character etc.
        possible_outputs = list(set([padded_template[i]] + [output_char]))

        # get current h of the decoder
        s = s.add_input(
            pc.concatenate([prev_output_vec, prev_char_vec, feats_input]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(
            blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # choose which feedback based on minimum neg. log likelihood: initialize with the character loss
        min_neg_log_loss = pc.pickneglogsoftmax(readout,
                                                alphabet_index[output_char])
        prev_output_char = output_char
        prev_output_action = output_char
        for output in possible_outputs:
            current_loss = pc.pickneglogsoftmax(readout,
                                                alphabet_index[output])

            # append the loss of all options
            loss.append(current_loss)
            if current_loss < min_neg_log_loss:
                min_neg_log_loss = current_loss
                prev_output_action = output

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[prev_output_action]]
        prev_char_vec = char_lookup[alphabet_index[prev_output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
def compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index,
                 feature_types, alignment):
    pc.renew_cg()

    # read the parameters
    char_lookup = model["char_lookup"]
    feat_lookup = model["feat_lookup"]
    R = pc.parameter(model["R"])
    bias = pc.parameter(model["bias"])
    W_c = pc.parameter(model["W_c"])
    W__a = pc.parameter(model["W__a"])
    U__a = pc.parameter(model["U__a"])
    v__a = pc.parameter(model["v__a"])

    template = task1_ms2s.generate_template_from_alignment(alignment)

    blstm_outputs = task1_attention_implementation.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
                                           feat_lookup, feats, feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    prev_char_vec = char_lookup[alphabet_index[BEGIN_WORD]]

    loss = []
    padded_word = word + END_WORD
    padded_template = template + [END_WORD]

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # find all possible actions - copy from index, output specific character etc.
        possible_outputs = list(set([padded_template[i]]))# + [output_char]))

        # get current h of the decoder
        s = s.add_input(pc.concatenate([prev_output_vec, prev_char_vec]))
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = task1_attention_implementation.attend(blstm_outputs, decoder_rnn_output,
                                                                                   W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # choose which feedback based on minimum neg. log likelihood: initialize with the character loss
        min_neg_log_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char])
        prev_output_char = output_char
        prev_output_action = output_char
        for output in possible_outputs:
            current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output])

            # append the loss of all options
            loss.append(current_loss)
            if current_loss < min_neg_log_loss:
                min_neg_log_loss = current_loss
                prev_output_action = output

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[alphabet_index[prev_output_action]]
        prev_char_vec = char_lookup[alphabet_index[prev_output_char]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss