示例#1
0
def train(config):

    # Initialize the text dataset
    dataset = TextDataset(config.txt_file)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout_keep_prob=config.dropout_keep_prob)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    with tf.name_scope('input'):
        inputs = tf.placeholder(tf.int32,
                                shape=[config.batch_size, config.seq_length],
                                name='inputs')
        labels = tf.placeholder(tf.int32,
                                shape=[config.batch_size, config.seq_length],
                                name='labels')
        input_sample = tf.placeholder(tf.int32,
                                      shape=[config.batch_size, 1],
                                      name='input_sample')
        state = tf.placeholder(tf.float32, [
            config.lstm_num_layers, 2, config.batch_size,
            config.lstm_num_hidden
        ])

    #Create tuple for the state placeholder
    layer = tf.unstack(state, axis=0)
    rnn_tuple_state = tuple([
        tf.nn.rnn_cell.LSTMStateTuple(layer[i][0], layer[i][1])
        for i in range(config.lstm_num_layers)
    ])

    #Logits
    with tf.name_scope('logits'):
        logits, _ = model._build_model(inputs, rnn_tuple_state)

    #Loss
    with tf.name_scope('loss'):
        loss = model._compute_loss(logits, labels)
    tf.summary.scalar('loss', loss)

    #Generate text
    with tf.name_scope('sample_logits'):
        sample_logits, final_state = model._build_model(
            input_sample, rnn_tuple_state)

    #predictions
    with tf.name_scope('predictions'):
        predictions = model.predictions(sample_logits)
    global_step = tf.Variable(0, trainable=False, name='global_step')

    #decaying learning rate
    decaying_learning_rate = tf.train.exponential_decay(
        config.learning_rate,
        global_step,
        config.learning_rate_step,
        config.learning_rate_decay,
        name='decaying_eta')
    tf.add_to_collection(tf.GraphKeys, decaying_learning_rate)

    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(decaying_learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(loss)
    #train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)

    merged = tf.summary.merge_all()
    test_writer = tf.summary.FileWriter(config.summary_path + '/test',
                                        graph=tf.get_default_graph())

    #Initial zero state
    init_state = np.zeros(
        (config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden))

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    ###########################################################################
    # Implement code here.
    ###########################################################################

    for train_step in range(int(config.train_steps)):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here.
        #######################################################################

        x_train, y_train = dataset.batch(config.batch_size, config.seq_length)
        #train
        sess.run(apply_gradients_op,
                 feed_dict={
                     inputs: x_train,
                     labels: y_train,
                     state: init_state
                 })

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:

            l, summary = sess.run([loss, merged],
                                  feed_dict={
                                      inputs: x_train,
                                      labels: y_train,
                                      state: init_state
                                  })
            test_writer.add_summary(summary, train_step)

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"),
                        train_step + 1, int(config.train_steps),
                        config.batch_size, examples_per_second, l))

        if train_step % config.sample_every == 0:
            sample_inputs = (np.random.randint(0,
                                               dataset.vocab_size,
                                               size=config.batch_size))
            new_sample = np.reshape(sample_inputs, (sample_inputs.shape[0], 1))
            new_sentence = np.empty([(config.batch_size), (config.seq_length)])
            #Generate new sentence of length seq_length
            for i in range(config.seq_length):
                if i == 0:
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: init_state
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample
                #When unrolling for 30 timesteps, save the state and feed it again in the model
                elif (i >= 30 & i < 60):
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: final
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample
                else:
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: final
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample

            for idx, elem in enumerate(new_sentence):
                #We can skip the .encode('utf-8') for better looking output. It was used in order not to produce errors when running in surfsara.
                print('Sentence {}:{} {}'.format(
                    idx,
                    dataset.convert_to_string(sample_inputs)[idx].encode(
                        'utf-8'),
                    dataset.convert_to_string(elem).encode('utf-8')))

    test_writer.close()
    sess.close()
示例#2
0
def train(config):
    tf.reset_default_graph()
    # Initialize the text dataset
    dataset = TextDataset(config.txt_file, config.clean_data)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                embed_dim=config.embed_dim,
                                decoding_model=config.decoding_mode)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    warmup_seq = tf.placeholder(dtype=tf.int32,
                                shape=(None, 1),
                                name='warmup_decoding_sequences')
    warmup_decodes = model.decode_warmup(warmup_seq, config.decode_length)

    init_decode_char = tf.placeholder(dtype=tf.int32,
                                      shape=(config.num_rand_samples),
                                      name='rand_init_decoding')
    random_decodes = model.decode(decode_batch_size=config.num_rand_samples,
                                  init_input=init_decode_char,
                                  decode_length=config.decode_length,
                                  init_state=None)

    # Reproducibility
    # tf.set_random_seed(42)
    # np.random.seed(42)

    # Utility vars and ops
    gpu_opts = tf.GPUOptions(
        per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True)
    session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts))
    global_step = tf.Variable(0, trainable=False, name='global_step')

    # logging
    train_logdir = os.path.join(config.summary_path,
                                '{}_train'.format(config.model_name))
    train_log_writer = init_summary_writer(session, train_logdir)

    # Define the optimizer
    if config.optimizer.lower() == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=config.learning_rate,
            decay=config.learning_rate_decay)
    elif config.optimizer.lower() == 'adam':
        optimizer = tf.train.AdamOptimizer(config.learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(model.loss)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)
    saver = tf.train.Saver(max_to_keep=50)
    save_path = os.path.join(config.checkpoint_path,
                             '{}/model.ckpt'.format(config.model_name))
    _ensure_path_exists(save_path)

    # Summaries
    summary_op = tf.summary.merge_all()
    session.run(fetches=[
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    ])

    for train_step in range(int(config.train_steps)):

        # dim: [batch_size, time_step]
        batch_inputs, batch_labels = dataset.batch(
            batch_size=config.batch_size, seq_length=config.seq_length)

        # Time-major: [time_step, batch_size]
        batch_inputs = batch_inputs.T

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here
        #######################################################################
        train_feed = {model.inputs: batch_inputs, model.labels: batch_labels}
        fetches = [model.loss, apply_gradients_op]
        if train_step % config.print_every == 0:
            fetches += [summary_op]
            loss, _, summary = session.run(feed_dict=train_feed,
                                           fetches=fetches)
            train_log_writer.add_summary(summary, train_step)
        else:
            loss, _ = session.run(feed_dict=train_feed, fetches=fetches)

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"),
                        train_step + 1, int(config.train_steps),
                        config.batch_size, examples_per_second, loss))

        # Decode
        if train_step % config.sample_every == 0:
            # warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 5), name='warmup_decoding_sequences')
            # decoded_seqs = model.decode_warmup(warmup_seq, config.decode_length)
            #
            # init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples),
            #                                   name='rand_init_decoding')
            # random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char,
            #                               decode_length=config.decode_length, init_state=None)

            # random character sampling
            print('Random character sampling')
            rand_chars = np.random.choice(a=dataset.vocab_size,
                                          size=(config.num_rand_samples))
            decode_feed = {init_decode_char: rand_chars}
            decoded_tokens = session.run(fetches=[random_decodes],
                                         feed_dict=decode_feed)[0]
            decoded_tokens = np.array(decoded_tokens).T
            for i in range(decoded_tokens.shape[0]):
                print('{}|{}'.format(
                    dataset._ix_to_char[rand_chars[i]],
                    dataset.convert_to_string(decoded_tokens[i, :])))

            print('Warmup sequence sampling')
            warmups = [
                'Welcome to the planet Earth ',
                'Human beings grew up in forests ', 'Satan said ',
                'God is not ', 'theory of evolution ',
                'whole groups of species '
            ]

            for warmup in warmups:
                warmup_tokens = np.array([
                    dataset._char_to_ix[x] for x in warmup.lower()
                    if x in dataset._char_to_ix
                ]).reshape((-1, 1))
                feed = {warmup_seq: warmup_tokens}
                decoded_tokens = session.run(fetches=[warmup_decodes],
                                             feed_dict=feed)[0]
                print('{}|{}'.format(
                    warmup,
                    dataset.convert_to_string(
                        decoded_tokens.squeeze().tolist())))

        if train_step % config.checkpoint_every == 0:
            saver.save(session, save_path=save_path)

    train_log_writer.close()
示例#3
0
def train(config):

    # Initialize the text dataset
    dataset = TextDataset(config.txt_file)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout_keep_prob=config.dropout_keep_prob,
                                prediction_mode=config.prediction_mode)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    # Placeholders for model sampling
    init_sample_char = tf.placeholder(dtype=tf.int32,
                                      shape=(config.num_samples))
    seq_samples = model._sample(init_input=init_sample_char,
                                num_samples=config.num_samples,
                                sample_length=config.sample_length,
                                init_state=None)

    init_sentence = tf.placeholder(dtype=tf.int32, shape=(None, 1))
    completed_sentence = model._complete_sentence(init_sentence,
                                                  config.sample_length)

    gpu_opts = tf.GPUOptions(
        per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts))

    # Setup global step
    global_step = tf.Variable(0, trainable=False, name='global_step')

    # Define the optimizer
    if config.optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=config.learning_rate,
            decay=config.learning_rate_decay)
    elif config.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(config.learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(model._loss)
    #train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)

    # Saver
    saver = tf.train.Saver(max_to_keep=50)
    save_path = os.path.join(config.save_path,
                             '{}/model.ckpt'.format(config.name))
    _check_path(save_path)

    # Initialization
    init_op = tf.global_variables_initializer()
    local_init_op = tf.local_variables_initializer()
    sess.run(fetches=[init_op, local_init_op])

    # Define summary operation
    summary_op = tf.summary.merge_all()

    # Logs
    train_log_path = os.path.join(config.summary_path,
                                  '{}'.format(config.name))
    _check_path(train_log_path)
    train_log_writer = tf.summary.FileWriter(train_log_path, graph=sess.graph)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    print(" ******* DICTIONARY ******* ")
    print(dataset._ix_to_char)

    for train_step in range(int(config.train_steps)):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here.
        #######################################################################
        x, y = dataset.batch(batch_size=config.batch_size,
                             seq_length=config.seq_length)

        tr_feed = {model._inputs: x, model._targets: y}
        fetches = [apply_gradients_op, model._loss]

        if train_step % config.print_every == 0:
            fetches += [summary_op]
            _, train_loss, summary = sess.run(feed_dict=tr_feed,
                                              fetches=fetches)
            train_log_writer.add_summary(summary, train_step)
        else:
            _, train_loss = sess.run(feed_dict=tr_feed, fetches=fetches)

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {:.4f}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step,
                        int(config.train_steps), config.batch_size,
                        examples_per_second, train_loss))

        # Sample sentences from the model
        if train_step % config.sample_every == 0:

            # Random initial character
            init_chars = np.random.choice(a=dataset.vocab_size,
                                          size=(config.num_samples))
            sampled_seq = sess.run(fetches=[seq_samples],
                                   feed_dict={init_sample_char: init_chars})[0]
            sampled_seq = np.array(sampled_seq).T
            print("\n ******* Random Initial Character *******")
            for i in range(config.num_samples):
                print('{} - {}|{}'.format(
                    i, dataset._ix_to_char[init_chars[i]],
                    dataset.convert_to_string(sampled_seq[i, :])))

            #Custom sentences
            custom_inits = [
                'To be, or not to be, that is the question: Whether ',
                'History will be kind to me for I intend to ', 'Hansel and Gr',
                'Democracy is ',
                'Let T be a bounded linear operator in V, a vector space.',
                'Mas vale pajaro en mano que ver un ciento v'
            ]

            print("\n ******* Sentence Completion *******")
            for init_seq in custom_inits:
                init_vec = np.array([
                    dataset._char_to_ix[x] for x in init_seq
                    if x in dataset._char_to_ix
                ]).reshape((-1, 1))
                sampled_seq = sess.run(fetches=[completed_sentence],
                                       feed_dict={init_sentence: init_vec})[0]
                print('{}|{}'.format(
                    init_seq,
                    dataset.convert_to_string(sampled_seq.squeeze().tolist())))

            print("\n")
        # Save checkpoint
        if train_step % config.save_every == 0 and train_step > 1:
            saver.save(sess, save_path=save_path)

    train_log_writer.close()