train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10, FLAGS.batch_size)

# Training
# ==================================================
print("starting graph def")
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    print("started session")
    with sess.as_default():
        siameseModel = SiameseLSTM(
            sequence_length=max_document_length,
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            hidden_units=FLAGS.hidden_units,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            batch_size=FLAGS.batch_size)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        print("initialized siameseModel object")
    
    grads_and_vars=optimizer.compute_gradients(siameseModel.loss)
    tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
    print("defined training_ops")
    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    for g, v in grads_and_vars:
示例#2
0
def train_network(config=global_config.FLAGS):

    embedding_matrix = load_embeddings("embedding_matrix.p")

    txt_suffix = (
        """layers_%(layers)s-dense_units_%(dense_units)s-hidden_%(hidden)s-l2_%(l2)s-dropout_%(dropout)s-multiply%(multiply)s-basiclstm_%(basic_lstm)s-ignore_%(ignore)s"""
        % {
            "layers": config.num_layers,
            "dense_units": config.dense_units,
            "hidden": config.hidden_units,
            "l2": config.l2_reg_lambda,
            "dropout": config.siamese_keep_prob,
            "multiply": config.multiply,
            "basic_lstm": config.basic_lstm,
            "ignore": config.ignore_one_in_every
        }).replace('\n', ' ').replace('\r', '')

    txt_suffix = txt_suffix + "-" + str(
        datetime.datetime.now().isoformat()) + ".txt"
    print("Text file name: ", txt_suffix)
    txt_file = open(txt_suffix, 'w')

    print("starting graph def")
    with tf.Graph().as_default(), tf.device("/gpu:0"):
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=True))
        print("started session")
        with sess.as_default():
            siamese_model = SiameseLSTM(config,
                                        vocab_size=len(embedding_matrix))

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(config.lr)
            print("initialized siameseModel object")

        grads_and_vars = optimizer.compute_gradients(siamese_model.loss)
        tr_op_set = optimizer.apply_gradients(grads_and_vars,
                                              global_step=global_step)
        print("defined training_ops")

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(
            os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", siamese_model.loss)
        # acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge(
            [loss_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        print("init all variables")
        graph_def = tf.get_default_graph().as_graph_def()
        graphpb_txt = str(graph_def)
        with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f:
            f.write(graphpb_txt)

        last_validation_loss = 1000
        checkpoint_model = ''
        for epoch in range(config.num_epochs):
            batches = train_batch_iter(config.batch_size)
            loss = 0.0
            num = 0
            i = 0
            print('Starting epoch: {} at {}'.format(
                epoch,
                datetime.datetime.now().isoformat()))
            for batch in tqdm(batches):
                i += 1
                if i % config.ignore_one_in_every == 0:
                    continue
                x1_batch, x2_batch, x1_len, x2_len, y_batch, ids = zip(*batch)
                step_loss = step(x1_batch, x2_batch, x1_len, x2_len, y_batch,
                                 siamese_model, sess, global_step, tr_op_set,
                                 config.dropout_keep_prob, embedding_matrix,
                                 train_summary_writer, train_summary_op)
                loss += step_loss
                num += len(batch) / config.batch_size

                txt_file.write('Train loss at iteration {} is {}'.format(
                    i, loss / num))
                txt_file.write("\n")
                txt_file.flush()

                if num % 100 == 0:
                    print('Train [{}] loss at step {} is {}'.format(
                        datetime.datetime.now().isoformat(), num, loss / num))
            current_step = tf.train.global_step(sess, global_step)
            print("Train [{}]: after epoch {} loss is {}".format(
                datetime.datetime.now().isoformat(), epoch, loss / num))

            if epoch % config.evaluate_every == 0:
                print("\n Evaluation after epoch: {}".format(epoch))
                dev_batches = val_batch_iter(config.batch_size)
                loss = 0.0
                num = 0
                i = 0
                for db in tqdm(dev_batches):
                    if len(db) < 1:
                        continue
                    x1_dev, x2_dev, x1_len_dev, x2_len_dev, y_dev, id_dev = zip(
                        *db)
                    if len(y_dev) < 1:
                        continue
                    step_loss = step(x1_dev,
                                     x2_dev,
                                     x1_len_dev,
                                     x2_len_dev,
                                     y_dev,
                                     siamese_model,
                                     sess,
                                     global_step,
                                     tr_op_set,
                                     config.siamese_keep_prob,
                                     embedding_matrix,
                                     dev_summary_writer,
                                     dev_summary_op,
                                     evaluate=True)
                    loss += step_loss
                    num += len(db) / config.batch_size
                    txt_file.write(
                        'Validation loss at iteration {} is {}'.format(
                            i, loss / num))
                    txt_file.write("\n")
                    txt_file.flush()
                    i += 1
                print("Validation [{}]:  after epoch {} loss is {}".format(
                    datetime.datetime.now().isoformat(), epoch, loss / num))

                saver.save(sess, checkpoint_prefix, global_step=current_step)
                tf.train.write_graph(sess.graph.as_graph_def(),
                                     checkpoint_prefix,
                                     "graph" + str(epoch) + ".pb",
                                     as_text=False)
                print(
                    "Saved model {} with validation loss ={} checkpoint to {}\n"
                    .format(epoch, loss / num, checkpoint_prefix))
                checkpoint_model = checkpoint_prefix + "-" + str(current_step)

                if loss > last_validation_loss:
                    if config.early_stopping:
                        return last_checkpoint_model, last_validation_loss

                last_validation_loss = loss
                last_checkpoint_model = checkpoint_model

        print("Done!!!")
        txt_file.close()
        return checkpoint_model, last_validation_loss
示例#3
0
    def __launch_from_build(self, vocab_processor, trainableEmbeddings,
                            out_dir, checkpoint_dir_abs, initW):
        # ==================================================
        print("starting graph def")
        graph = tf.Graph()

        with graph.as_default():
            # will use default_graph as input para, and current default_graph is the `graph`
            sess = tf.Session(graph=graph, config=self.session_conf)
            print("started session")
            with sess.as_default():
                if self.FLAGS.is_char_based:
                    siameseModel = SiameseLSTM(
                        sequence_length=self.FLAGS.max_document_length,
                        vocab_size=len(vocab_processor.vocabulary_),
                        embedding_size=self.FLAGS.embedding_dim,
                        hidden_units=self.FLAGS.hidden_units,
                        l2_reg_lambda=self.FLAGS.l2_reg_lambda,
                        batch_size=self.FLAGS.batch_size)
                else:
                    siameseModel = SiameseLSTMw2v(
                        sequence_length=self.FLAGS.max_document_length,
                        vocab_size=len(vocab_processor.vocabulary_),
                        embedding_size=self.FLAGS.embedding_dim,
                        hidden_units=self.FLAGS.hidden_units,
                        l2_reg_lambda=self.FLAGS.l2_reg_lambda,
                        batch_size=self.FLAGS.batch_size,
                        trainableEmbeddings=trainableEmbeddings)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            print("initialized siameseModel object")

            grads_and_vars = optimizer.compute_gradients(siameseModel.loss)
            tr_op_set = optimizer.apply_gradients(grads_and_vars,
                                                  global_step=global_step,
                                                  name='tr_op_set')
            print("defined training_ops")
            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)
            print("defined gradient summaries")

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", siameseModel.loss)
            acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge(
                [loss_summary, acc_summary, grad_summaries_merged])
            train_summary_op = tf.identity(train_summary_op,
                                           'train_summary_op')
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_op = tf.identity(dev_summary_op, 'dev_summary_op')
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
            sess.run(tf.global_variables_initializer())
            if initW is not None:
                sess.run(siameseModel.W.assign(initW))

            graphpb_txt = str(graph.as_graph_def())
            with open(os.path.join(checkpoint_dir_abs, "graphpb.txt"),
                      'w') as f:
                f.write(graphpb_txt)

        input_tensors = InputTensors(siameseModel.input_x1,
                                     siameseModel.input_x2,
                                     siameseModel.input_y,
                                     siameseModel.dropout_keep_prob)
        result_tensors = ResultTensors(global_step, siameseModel.loss,
                                       siameseModel.accuracy,
                                       siameseModel.distance,
                                       siameseModel.temp_sim)
        metric_ops = MetricOps(tr_op_set, train_summary_op, dev_summary_op,
                               train_summary_writer, dev_summary_writer)
        return saver, sess, input_tensors, result_tensors, metric_ops
示例#4
0
# train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10, FLAGS.batch_size)

# Training
# ==================================================
print("starting graph def")
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    print("started session")
    with sess.as_default():
        siameseModel = SiameseLSTM(
            sequence_length=max_document_length,
            embedding_size=FLAGS.embedding_dim,
            hidden_units=FLAGS.hidden_units,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            batch_size=FLAGS.batch_size,
            word_embeddings=word_embeddings)
 
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        print("initialized siameseModel object")
     
    grads_and_vars=optimizer.compute_gradients(siameseModel.loss)
    tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
    print("defined training_ops")
    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    for g, v in grads_and_vars:
示例#5
0
# ==================================================
print("starting graph def")
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    print("started session")
    with sess.as_default():
        siameseModel = SiameseLSTM(
            sequence_length=max_document_length,
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            hidden_units=FLAGS.hidden_units,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            batch_size=FLAGS.batch_size,
            embedding_matrix=embedding_matrix,
            entity_embedding_matrix=entity_embedding_matrix,
            entity_embedding_dim=FLAGS.entity_embedding_dim,
            entity_vocab_size=entity_vocab_size,
            n_entity=n_entity,
            mode=mode)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(5e-4)
        print("initialized siameseModel object")

    grads_and_vars = optimizer.compute_gradients(siameseModel.loss)
    tr_op_set = optimizer.apply_gradients(grads_and_vars,
                                          global_step=global_step)