示例#1
0
def run_training(data):
    with tf.Graph().as_default():
        global_step = tf.Variable(0, name='global_step', trainable=False)
        images_pl = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 32, 32, 3])
        labels_pl = tf.placeholder(tf.int32, shape=[BATCH_SIZE])

        logits = graph.inference(images_pl)
        loss = graph.loss(logits, labels_pl)
        train_op = graph.train(loss, global_step)
        eval_correct = graph.evaluate(logits, labels_pl)

        summary_op = tf.merge_all_summaries()
        saver = tf.train.Saver(tf.all_variables())

        init = tf.initialize_all_variables()
        sess = tf.Session()

        sess.run(init)

        summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, sess.graph)

        for step in range(N_EPOCH * (DS_SIZE // BATCH_SIZE)):
            start_time = time.time()
            feed_dict = fill_feed_dict(data.train, images_pl, labels_pl)
            _, loss_val = sess.run([train_op, loss], feed_dict=feed_dict)
            duration = time.time() - start_time

            assert not np.isnan(loss_val), 'Model diverged with loss = NaN'

            if step % 10 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1:
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_val, duration))
                if step > 0:
                    summary_str = sess.run(summary_op, feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

            if step > 0:
                if step < 1000 and step % 200 == 0:
                    print('Training Data Eval:')
                    do_eval(sess, eval_correct, images_pl, labels_pl, data.train)

                    print('Validation Data Eval:')
                    do_eval(sess, eval_correct, images_pl, labels_pl, data.validation)

                if step % 1000 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1:
                    print('Training Data Eval:')
                    do_eval(sess, eval_correct, images_pl, labels_pl, data.train)

                    print('Validation Data Eval:')
                    do_eval(sess, eval_correct, images_pl, labels_pl, data.validation)

            if step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1:
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_pl, labels_pl, data.test)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1:
                checkpoint_path = CHECKPOINT_DIR
                saver.save(sess, checkpoint_path, global_step=step)
示例#2
0
def run_training(data):
    with tf.Graph().as_default():
        images_pl = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 512, 512, 3])
        labels_pl = tf.placeholder(tf.int32, shape=[BATCH_SIZE])

        logits = graph.inference(images_pl)

        loss = graph.loss(logits, labels_pl)
        train_op = graph.train(loss, 0.0001)
        eval_correct = graph.evaluate(logits, labels_pl)
        saver = tf.train.Saver(tf.trainable_variables())
        summary_op = tf.merge_all_summaries()

        init = tf.initialize_all_variables()
        sess = tf.Session()

        sess.run(init)
        # c,d = data.train.next_batch(BATCH_SIZE)
        # a = sess.run(logits,feed_dict={images_pl: c})
        # print a
        summary_writer = tf.train.SummaryWriter("summary", sess.graph)

        for step in range(N_EPOCH * (DS_SIZE // BATCH_SIZE)):
            start_time = time.time()
            feed_dict = fill_feed_dict(data.train, images_pl, labels_pl)
            _, loss_val = sess.run([train_op, loss], feed_dict=feed_dict)
            duration = time.time() - start_time

            assert not np.isnan(loss_val), 'Model diverged with loss = NaN'

            if step % 10 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1:
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_val, duration))
                if step > 0:
                    summary_str = sess.run(summary_op, feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

            if step % 100 == 0 or step == N_EPOCH * (DS_SIZE //
                                                     BATCH_SIZE) - 1:
                save_path = saver.save(sess, "model.ckpt")
                print("Model saved in file: %s" % save_path)
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_pl, labels_pl, data.train)
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_pl, labels_pl,
                        data.validation)
示例#3
0
文件: nn.py 项目: sinopeus/thrax
    def train(self, inputs, correct_outputs, learning_rate, embedding_learning_rate):
        r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
        (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r

        to_normalize = set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
            dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

            self.trainer.update(loss, correct_score, noise_score, unpenalized_loss, l1penalty)

            for w in weights: assert w == weights[0]
            embedding_learning_rate = embedding_learning_rate * weights[0]
            if loss == 0:
                for di in dcorrect_inputs + dnoise_inputs:
                    assert (di == 0).all()

            if loss != 0:
                for (i, di) in zip(correct_sequence, dcorrect_inputs):
                    assert di.shape == (self.parameters.embedding_size,)
                    self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                    if NORMALIZE_EMBEDDINGS:
                        to_normalize.add(i)
                for (i, di) in zip(noise_sequence, dnoise_inputs):
                    assert di.shape == (self.parameters.embedding_size,)
                    self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                    if NORMALIZE_EMBEDDINGS:
                        to_normalize.add(i)

        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
            self.parameters.normalize(to_normalize)
示例#4
0
def main(job_id, params):
    #re_load = False
    #save_file_name = 'bpe2char_biscale_decoder_adam'
    save_file_name = 'planning_10_true'
    source_dataset = params['train_data_path'] + params['source_dataset']
    target_dataset = params['train_data_path'] + params['target_dataset']
    valid_source_dataset = params['dev_data_path'] + params[
        'valid_source_dataset']
    valid_target_dataset = params['dev_data_path'] + params[
        'valid_target_dataset']
    source_dictionary = params['train_data_path'] + params['source_dictionary']
    target_dictionary = params['train_data_path'] + params['target_dictionary']

    decoder_type = params['decoder_type']
    save_file_name = 'bpe2char_{}_adam.last'.format(decoder_type)

    print "We are using th decoder:", decoder_type

    layers = {
        'ff': ('param_init_fflayer', 'fflayer'),
        'fff': ('param_init_ffflayer', 'ffflayer'),
        'gru': ('param_init_gru', 'gru_layer'),
        '{}'.format(decoder_type): ('param_init_{}'.format(decoder_type),
                                    '{}_decoder'.format(decoder_type)),
    }

    re_load = params['reload'] == 'True'

    print params, params['save_path'], save_file_name

    validerr = train(
        max_epochs=int(params['max_epochs']),
        patience=int(params['patience']),
        dim_word=int(params['dim_word']),
        dim_word_src=int(params['dim_word_src']),
        save_path=params['save_path'],
        save_file_name=save_file_name,
        re_load=re_load,
        enc_dim=int(params['enc_dim']),
        dec_dim=int(params['dec_dim']),
        n_words=int(params['n_words']),
        n_words_src=int(params['n_words_src']),
        decay_c=float(params['decay_c']),
        lrate=float(params['learning_rate']),
        optimizer=params['optimizer'],
        maxlen=int(params['maxlen']),
        maxlen_trg=int(params['maxlen_trg']),
        maxlen_sample=int(params['maxlen_sample']),
        batch_size=int(params['batch_size']),
        valid_batch_size=int(params['valid_batch_size']),
        sort_size=int(params['sort_size']),
        validFreq=int(params['validFreq']),
        dispFreq=int(params['dispFreq']),
        saveFreq=int(params['saveFreq']),
        sampleFreq=int(params['sampleFreq']),
        clip_c=int(params['clip_c']),
        datasets=[source_dataset, target_dataset],
        valid_datasets=[valid_source_dataset, valid_target_dataset],
        dictionaries=[source_dictionary, target_dictionary],
        use_dropout=int(params['use_dropout']),
        source_word_level=int(params['source_word_level']),
        target_word_level=int(params['target_word_level']),
        layers=layers,
        save_every_saveFreq=1,
        save_burn_in=0,
        use_bpe=1,
        init_params=init_params,
        build_model=build_model,
        build_sampler=build_sampler,
        gen_sample=gen_sample,
        c_lb=float(params['c_lb']),
        st_estimator=params['st_estimator'],
        use_gate=params['use_gate'] == 'True',
        learn_t=params['learn_t'] == 'True',
        plan_step=int(params['plan_step']),
        shuffle_dataset=params['shuffle_dataset'] == 'True',
        only_use_w=params['only_use_w'] == 'True',
        nb_cumulate=int(params['nb_cumulate']),
        repeat_actions=params['repeat_actions'] == 'True',
        decoder_type=decoder_type,
        layer_norm=False
        if not 'layer_norm' in params else params['layer_norm'] == 'True')
    return validerr