Python BestCheckpointSaver示例

编程语言: Python

命名空间/包名称: utils.checkmate

hotexamples.com的示例: 6

Python BestCheckpointSaver - 已找到6个示例。这些是从开源项目中提取的最受好评的utils.checkmate.BestCheckpointSaver现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BestCheckpointSaver(5)

handle(5)

示例#1

显示文件

文件： bilstm_cnn_crf_train.py 项目： ypc-stu/NER

    max_word_length,
    0.015,
    0.5)

print('Start training...')
print('Train size = %d' % len(train_x))
print('Val size = %d' % len(val_x))
print('Test size = %d' % len(test_x))
print('Num classes = %d' % num_classes)

start_epoch = 1
max_epoch = 100

saver = tf.train.Saver()
best_saver = BestCheckpointSaver(save_dir='checkpoints/best',
                                 num_to_keep=1,
                                 maximize=True)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(message)s',
    datefmt='%m-%d %H:%M',
    handlers=[logging.FileHandler('logs/train.log'),
              logging.StreamHandler()])

latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir='checkpoints')
if latest_checkpoint:

示例#2

显示文件

    max_word_length,
    0.015,
    0.5)

print('Start training...')
print('Train size = %d' % len(train_x))
print('Val size = %d' % len(val_x))
print('Test size = %d' % len(test_x))
print('Num classes = %d' % num_classes)

start_epoch = 1
max_epoch = 1000

saver = tf.train.Saver()
best_saver = BestCheckpointSaver(save_dir='checkpoints/best',
                                 num_to_keep=1,
                                 maximize=True)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

best_checkpoint = best_checkpoint('checkpoints/best/', True)
sess.run(tf.tables_initializer())
saver.restore(sess, best_checkpoint)

train_feeder = LSTMCNNCRFeeder(train_x, train_chars, train_la, max_seq_length,
                               max_word_length, 16)
val_feeder = LSTMCNNCRFeeder(val_x, val_chars, val_la, max_seq_length,
                             max_word_length, 16)
test_feeder = LSTMCNNCRFeeder(test_x, test_chars, test_la, max_seq_length,

示例#3

显示文件

文件： train.py 项目： ace19-dev/2018-Data-Science-Bowl

def main(_):
    # specify GPU
    if FLAGS.gpu_index:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index

    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.reset_default_graph()
    X = tf.placeholder(tf.float32,
                       shape=[None, FLAGS.img_size, FLAGS.img_size, 3],
                       name="X")
    GT = tf.placeholder(tf.float32,
                        shape=[None, FLAGS.label_size, FLAGS.label_size, 1],
                        name="GT")
    mode = tf.placeholder(tf.bool, name="mode")  # training or not

    if FLAGS.use_64_channel:
        pred = Unet_64_1024(X, mode, FLAGS)
    else:
        pred = Unet_32_512(X, mode, FLAGS)

    tf.add_to_collection("inputs", X)
    tf.add_to_collection("inputs", mode)
    tf.add_to_collection("outputs", pred)

    tf.summary.histogram("Predicted Mask", pred)
    tf.summary.image("Predicted Mask", pred)

    # IOU is
    #
    # (the area of intersection)
    # --------------------------
    # (the area of two boxes)
    iou_op = IOU(pred, GT)

    loss = -iou_op
    tf.summary.scalar("loss", loss)

    # Updates moving mean and moving variance for BatchNorm (train/inference)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        # other optimizer will be used
        train_op = tf.train.MomentumOptimizer(0.001, 0.99).minimize(loss)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    summary_op = tf.summary.merge_all()
    train_summary_writer = tf.summary.FileWriter(FLAGS.logdir + '/train',
                                                 sess.graph)
    val_summary_writer = tf.summary.FileWriter(FLAGS.logdir + '/validation')

    saver = tf.train.Saver()

    # For, checkpoint saver
    if FLAGS.best_train_dir:
        best_ckpt_saver = BestCheckpointSaver(title='unet.ckpt',
                                              save_dir=FLAGS.best_train_dir,
                                              num_to_keep=3,
                                              maximize=True)

    start_epoch = 1
    epoch_from_ckpt = 0
    if FLAGS.ckpt_path:
        saver.restore(sess, FLAGS.ckpt_path)
        tmp = FLAGS.ckpt_path
        tmp = tmp.split('-')
        tmp.reverse()
        epoch_from_ckpt = int(tmp[0])
        start_epoch = epoch_from_ckpt + 1

    if epoch_from_ckpt != FLAGS.epochs + 1:
        tf.logging.info('Training from epoch: %d ', start_epoch)

    # Saving as Protocol Buffer (pb)
    tf.train.write_graph(sess.graph_def,
                         FLAGS.train_dir,
                         'unet.pbtxt',
                         as_text=True)

    ############################
    # Get data
    ############################
    raw = Data(FLAGS.data_dir, FLAGS.validation_percentage)
    tr_data = DataLoader(raw.data_dir, raw.get_data('training'),
                         FLAGS.img_size, FLAGS.label_size, FLAGS.batch_size)
    val_data = DataLoader(raw.data_dir, raw.get_data('validation'),
                          FLAGS.img_size, FLAGS.label_size, FLAGS.batch_size)

    iterator = tf.data.Iterator.from_structure(tr_data.dataset.output_types,
                                               tr_data.dataset.output_shapes)
    next_batch = iterator.get_next()

    # Ops for initializing the two different iterators
    tr_init_op = iterator.make_initializer(tr_data.dataset)
    val_init_op = iterator.make_initializer(val_data.dataset)

    tr_batches_per_epoch = int(tr_data.data_size / FLAGS.batch_size)
    if tr_data.data_size % FLAGS.batch_size > 0:
        tr_batches_per_epoch += 1
    val_batches_per_epoch = int(val_data.data_size / FLAGS.batch_size)
    if val_data.data_size % FLAGS.batch_size > 0:
        val_batches_per_epoch += 1

    ############################
    # Training
    ############################
    print("{} Training start ... ".format(datetime.datetime.now()))
    for epoch in xrange(start_epoch, FLAGS.epochs + 1):
        print('{} Training epoch-{} start >> '.format(datetime.datetime.now(),
                                                      epoch))

        sess.run(tr_init_op)
        for step in range(tr_batches_per_epoch):
            X_train, y_train = sess.run(next_batch)
            train_summary, accuracy, _, _ = \
                sess.run([summary_op, iou_op, train_op, increment_global_step],
                         feed_dict={X: X_train,
                                    GT: y_train,
                                    mode: True}
                         )

            train_summary_writer.add_summary(
                train_summary,
                (epoch - start_epoch) * tr_batches_per_epoch + step)
            tf.logging.info('epoch #%d, step #%d/%d, accuracy(iou) %.5f%%' %
                            (epoch, step, tr_batches_per_epoch, accuracy))

        print("{} Validation start ... ".format(datetime.datetime.now()))
        total_val_accuracy = 0
        val_count = 0
        sess.run(val_init_op)
        for n in range(val_batches_per_epoch):
            X_val, y_val = sess.run(next_batch)
            val_summary, val_accuracy = \
                sess.run([summary_op, iou_op],
                         feed_dict={X: X_val,
                                    GT: y_val,
                                    mode: False}
                         )

            # total_val_accuracy += val_step_iou * X_val.shape[0]
            total_val_accuracy += val_accuracy
            val_count += 1

            val_summary_writer.add_summary(
                val_summary, (epoch - start_epoch) * val_batches_per_epoch + n)
            tf.logging.info('step #%d/%d, accuracy(iou) %.5f%%' %
                            (n, val_batches_per_epoch, val_accuracy * 100))

        total_val_accuracy /= val_count
        tf.logging.info(
            'step %d: Validation accuracy = %.2f%% (N=%d)' %
            (epoch, total_val_accuracy * 100, raw.get_size('validation')))

        # save checkpoint
        checkpoint_path = os.path.join(FLAGS.train_dir, 'unet.ckpt')
        tf.logging.info('Saving to "%s-%d"', checkpoint_path, epoch)
        saver.save(sess, checkpoint_path, global_step=epoch)

        # save best checkpoint
        if FLAGS.best_train_dir:
            best_ckpt_saver.handle(total_val_accuracy, sess, global_step,
                                   epoch)

示例#4

显示文件

    def train(self, data, *args, **kwargs):

        if not os.path.isfile(
                kwargs.get("parsedDumpPath", '../dev/parsedDataDump.pkl')):
            self.data_converter(data, *args, **kwargs)

        with open(kwargs.get("parsedDumpPath", '../dev/parsedDataDump.pkl'),
                  'rb') as fp:
            train_set, val_set, test_set, dicts = pickle.load(fp)

        w2idx, la2idx = dicts['words2idx'], dicts['labels2idx']
        idx2w = {w2idx[k]: k for k in w2idx}
        idx2la = {la2idx[k]: k for k in la2idx}

        train_x, train_chars, train_la = train_set
        val_x, val_chars, val_la = val_set
        test_x, test_chars, test_la = test_set

        self.log.debug('Loading elmo!')
        elmo_batcher = Batcher(kwargs.get("vocabPath", '../dev/vocab.txt'), 50)
        elmo_bilm = BidirectionalLanguageModel(
            kwargs.get(
                "elmoOptionsFile",
                '../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json'
            ),
            kwargs.get(
                "elmoWeightFile",
                '../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5'
            ))

        self.log.debug('Loading model!')

        num_classes = len(la2idx.keys())
        max_seq_length = max(
            max(map(len, train_x)),
            max(map(len, test_x)),
        )
        max_word_length = max(
            max([len(ssc) for sc in train_chars for ssc in sc]),
            max([len(ssc) for sc in test_chars for ssc in sc]))

        model = ElmoModel(
            True,
            kwargs.get("wordEmbeddingSize", 50),  # Word embedding size
            kwargs.get("charEmbeddingSize", 16),  # Character embedding size
            kwargs.get("LSTMStateSize", 200),  # LSTM state size
            kwargs.get("filterNum", 128),  # Filter num
            kwargs.get("filterSize", 3),  # Filter size
            num_classes,
            max_seq_length,
            max_word_length,
            kwargs.get("learningRate", 0.015),
            kwargs.get("dropoutRate", 0.5),
            elmo_bilm,
            1,  # elmo_mode
            elmo_batcher,
            **kwargs)

        self.log.debug('Start training...')
        self.log.debug('Train size = %d' % len(train_x))
        self.log.debug('Val size = %d' % len(val_x))
        self.log.debug('Test size = %d' % len(test_x))
        self.log.debug('Num classes = %d' % num_classes)

        start_epoch = 1
        max_epoch = kwargs.get("maxEpoch", 100)

        self.log.debug('Epoch = %d' % max_epoch)

        saver = tf.train.Saver()
        best_saver = BestCheckpointSaver(save_dir=kwargs.get(
            "bestCheckpointPath", "../results/checkpoints/best"),
                                         num_to_keep=1,
                                         maximize=True)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        latest_checkpoint = tf.train.latest_checkpoint(
            checkpoint_dir=kwargs.get("checkpointPath",
                                      "../results/checkpoints"))
        if latest_checkpoint:
            saver.restore(sess, latest_checkpoint)
        else:
            sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())

        train_feeder = LSTMCNNCRFeeder(train_x, train_chars, train_la,
                                       max_seq_length, max_word_length,
                                       kwargs.get("epochWidth", 16))
        val_feeder = LSTMCNNCRFeeder(val_x, val_chars, val_la, max_seq_length,
                                     max_word_length,
                                     kwargs.get("epochWidth", 16))

        for epoch in range(start_epoch, max_epoch + 1):
            loss = 0
            for step in range(train_feeder.step_per_epoch):
                tokens, chars, labels = train_feeder.feed()

                step_loss = model.train_step(sess, tokens, chars, labels)
                loss += step_loss

                self.log.debug(
                    'epoch: %d, size: %d/%d, step_loss: %f, epoch_loss: %f',
                    epoch, train_feeder.offset, train_feeder.size, step_loss,
                    loss)

            preds = []
            for step in range(val_feeder.step_per_epoch):
                tokens, chars, labels = val_feeder.feed()
                pred = model.test(sess, tokens, chars)
                preds.extend(pred)
            true_seqs = [idx2la[la] for sl in val_la for la in sl]
            pred_seqs = [idx2la[la] for sl in preds for la in sl]
            ll = min(len(true_seqs), len(pred_seqs))

            self.log.debug(true_seqs[:ll])
            self.log.debug(pred_seqs[:ll])

            prec, rec, f1 = evaluate(true_seqs[:ll], pred_seqs[:ll], False)

            self.log.debug("Epoch: %d, val_p: %f, val_r: %f, val_f1: %f",
                           epoch, prec, rec, f1)

            val_feeder.next_epoch(False)

            saver.save(sess,
                       kwargs.get("checkpointPath", "../results/checkpoints") +
                       '/model.ckpt',
                       global_step=epoch)
            best_saver.handle(f1, sess, epoch)

            logging.info('')
            train_feeder.next_epoch()

        self.log.debug("Training done! ... Saving trained model")
        return model, sess, saver

示例#5

显示文件

文件： model.py 项目： qinxie/GM-DGM

    def train(self, Data, n_epochs, l_bs, u_bs, lr, eval_samps=None,
              binarize=False, verbose=1):
        """ Method for training the models """
        self.data_init(Data, eval_samps, l_bs, u_bs)
        self.lr = self.set_learning_rate(lr)
        # define optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        gvs = optimizer.compute_gradients(self.loss)
        # clip gradients
        capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var)
                      for grad, var in gvs]
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer = optimizer.apply_gradients(
                capped_gvs, global_step=self.global_step)

        self.y_pred = self.predict(self.x)
        self.curve_array = np.zeros((n_epochs + 1, 14))
        if self.learning_paradigm == 'unsupervised':
            self.elbo_l_curve = tf.reduce_mean(
                self.unlabelled_loss(self.x))
            self.qy_ll_curve = tf.reduce_mean(
                self.qy_loss(self.x))
            self.elbo_u_curve = tf.reduce_mean(
                self.unlabelled_loss(self.x))
        else:
            self.elbo_l_curve = tf.reduce_mean(
                self.labelled_loss(self.x, self.y))
            self.qy_ll_curve = tf.reduce_mean(
                self.qy_loss(self.x, self.y))
            self.elbo_u_curve = tf.reduce_mean(
                self.unlabelled_loss(self.x))

        self.compute_accuracies()

        # initialize session and train
        epoch = 0
        with self.session as sess:
            sess.run(tf.global_variables_initializer())
            self.curve_array[epoch] = self.calc_curve_vals(sess, Data)
            saver = BestCheckpointSaver(save_dir=self.ckpt_dir,
                                        num_to_keep=5,
                                        maximize=True)
            while epoch < n_epochs:

                x_labelled, labels, x_unlabelled, _ = \
                    Data.next_batch(l_bs, u_bs)

                if binarize is True:
                    x_labelled = self.binarize(x_labelled)
                    x_unlabelled = self.binarize(x_unlabelled)

                fd = self.training_fd(x_labelled, labels, x_unlabelled)
                _, loss_batch = sess.run([self.optimizer, self.loss], fd)

                if Data._epochs_unlabelled > epoch:
                    self.curve_array[epoch + 1] = \
                        self.calc_curve_vals(sess, Data)

                    saver.handle(self.curve_array[epoch, 6],
                                 sess, self.global_step)
                    epoch += 1
                    if verbose == 1:
                        fd = self._printing_feed_dict(Data, x_labelled,
                                                      x_unlabelled, labels,
                                                      eval_samps, binarize)
                        self.print_verbose1(epoch, fd, sess)
                    elif verbose == 2:
                        fd = self._printing_feed_dict(Data, x_labelled,
                                                      x_unlabelled, labels,
                                                      eval_samps, binarize)
                        self.print_verbose2(epoch, fd, sess)
                    elif verbose == 3:
                        self.print_verbose3(epoch)
                        y_pred_test = sess.run([self.y_pred],
                                               {self.x: Data.data['x_test'],
                                                K.learning_phase(): 0})[0]

                        conf_mat = confusion_matrix(
                            Data.data['y_test'].argmax(1),
                            y_pred_test.argmax(1))

                        np.save(os.path.join(
                                self.output_dir,
                                'conf_mat_' + self.name + '_' + str(epoch) + '.npy'),
                                conf_mat)

                        np.save(os.path.join(
                                self.output_dir,
                                'y_pred' + self.name + '_' + str(epoch) + '.npy'),
                                 y_pred_test)

                        np.save(os.path.join(
                                self.output_dir,
                                'y_true' + self.name + '_' + str(epoch) + '.npy'),
                                Data.data['y_test'])

        return self.curve_array

示例#6

显示文件

文件： model.py 项目： yiyg510/DGMs_for_semi-unsupervised_learning

    def train(self,
              Data,
              n_epochs,
              l_bs,
              u_bs,
              lr,
              eval_samps=None,
              binarize=False,
              verbose=1,
              decay_ratio=0.75,
              decay_period=200,
              h_opt=False,
              keep_ckpt=True,
              restore=False):
        """ Method for training the models """
        self.data_init(Data, eval_samps, l_bs, u_bs)
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        # self.global_epoch = tf.Variable(0, trainable=False, name='global_epoch')
        self.epoch = 0
        #self.lr = self.set_learning_rate([lr[0], 1600, lr[0] / 10.0])
        self.lr = self.set_learning_rate(
            [lr[0], lr[0] / 10.0, decay_period, decay_ratio], 'exp')
        # define optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        gvs = optimizer.compute_gradients(self.loss)
        # clip gradients
        capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var)
                      for grad, var in gvs]
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer = optimizer.apply_gradients(
                capped_gvs, global_step=self.global_step)

        self.y_pred = self.predict(self.x)
        self.curve_array = np.zeros((n_epochs + 1, 14))
        if self.learning_paradigm == 'unsupervised':
            self.elbo_l_curve = tf.reduce_mean(self.unlabelled_loss(self.x))
            self.qy_ll_curve = tf.reduce_mean(self.qy_loss(self.x))
            self.elbo_u_curve = tf.reduce_mean(self.unlabelled_loss(self.x))
        else:
            if self.model_name == 'adgm' or self.model_name == 'adg_dgm':
                self.elbo_l_curve = tf.reduce_mean(
                    self.labelled_loss(self.x, self.y)[0])
                self.qy_ll_curve = tf.reduce_mean(
                    self.labelled_loss(self.x, self.y)[1])
            else:
                self.elbo_l_curve = tf.reduce_mean(
                    self.labelled_loss(self.x, self.y))
                self.qy_ll_curve = tf.reduce_mean(self.qy_loss(self.x, self.y))

            self.elbo_u_curve = tf.reduce_mean(self.unlabelled_loss(self.x))

        self.compute_accuracies()

        # initialize session and train
        with self.session as sess:
            sess.run(tf.global_variables_initializer())
            if restore == True:
                saver_for_restore = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(self.ckpt_dir)
                best_ckpt = get_best_checkpoint(self.ckpt_dir)
                best_epoch = int(re.match('.*?([0-9]+)$', best_ckpt).group(1))
                best_ckpt_usable = re.sub('-([0-9]+)$', "", best_ckpt)
                saver_for_restore.restore(sess, best_ckpt_usable)
                self.epoch = best_epoch
            self.curve_array[self.epoch] = self.calc_curve_vals(sess, Data)
            if verbose == 3:
                self.print_verbose3(self.epoch)
            if keep_ckpt == True:
                saver = BestCheckpointSaver(save_dir=self.ckpt_dir,
                                            num_to_keep=2,
                                            maximize=True)
            while self.epoch < n_epochs:

                x_labelled, labels, x_unlabelled, _ = \
                    Data.next_batch(l_bs, u_bs)

                if binarize is True:
                    x_labelled = self.binarize(x_labelled)
                    x_unlabelled = self.binarize(x_unlabelled)

                fd = self.training_fd(x_labelled, labels, x_unlabelled)
                _, loss_batch = sess.run([self.optimizer, self.loss], fd)

                if Data._epochs_unlabelled > self.epoch:
                    self.epoch += 1
                    # sess.run(self.global_epoch.assign(self.epoch)
                    self.curve_array[self.epoch] = \
                        self.calc_curve_vals(sess, Data)
                    if h_opt == True and self.epoch > 20:
                        if self.curve_array[self.epoch, 12] < 0.07:
                            raise Exception('results too bad')
                    if h_opt == True and self.epoch > 40:
                        if self.curve_array[self.epoch, 12] < 0.1:
                            raise Exception('results too bad')
                    if keep_ckpt == True:
                        saver.handle(self.curve_array[self.epoch, 6], sess,
                                     self.global_step, self.epoch)
                    if verbose == 1:
                        fd = self._printing_feed_dict(Data, x_labelled,
                                                      x_unlabelled, labels,
                                                      eval_samps, binarize)
                        self.print_verbose1(self.epoch, fd, sess)
                    elif verbose == 2:
                        fd = self._printing_feed_dict(Data, x_labelled,
                                                      x_unlabelled, labels,
                                                      eval_samps, binarize)
                        self.print_verbose2(self.epoch, fd, sess)
                    elif verbose == 3:
                        self.print_verbose3(self.epoch)
                        if self.epoch % 10 == 0:
                            y_pred_test = sess.run([self.y_pred], {
                                self.x: Data.data['x_test'],
                                K.learning_phase(): 0
                            })[0]
                            conf_mat = confusion_matrix(
                                Data.data['y_test'].argmax(1),
                                y_pred_test.argmax(1))
                            np.save(
                                os.path.join(
                                    self.output_dir, 'conf_mat_' + self.name +
                                    '_' + str(self.epoch) + '.npy'), conf_mat)
                            np.save(
                                os.path.join(
                                    self.output_dir, 'y_pred_' + self.name +
                                    '_' + str(self.epoch) + '.npy'),
                                y_pred_test)
                            np.save(
                                os.path.join(
                                    self.output_dir, 'y_true_' + self.name +
                                    '_' + str(self.epoch) + '.npy'),
                                Data.data['y_test'])
                    if np.sum(np.isnan(self.curve_array)) > 0:
                        print(
                            'loss is nan, going back to previous best checkpoint'
                        )
                        best_ckpt = get_best_checkpoint(self.ckpt_dir)
                        best_epoch = int(
                            re.match('.*?([0-9]+)$', best_ckpt).group(1))
                        best_ckpt_usable = re.sub('-([0-9]+)$', "", best_ckpt)
                        self.epoch = best_epoch
                        saver._saver.restore(sess, best_ckpt_usable)
        return self.curve_array