Пример #1
0
def save_model(session, save_path, ckpt_filename):

    utility.llprint('Storing snapshot')
    saver = tf.train.Saver()
    if not '.ckpt' in ckpt_filename:
        ValueError('.ckpt should be in ckpt_filename.')
    saver.save(session, os.path.join(save_path, ckpt_filename))
    utility.llprint("Done!\n")
def fftrain(**kwargs):

    session = kwargs['session']
    model_gpu_addresses = kwargs['model_gpu_addresses']

    raw_input_size = kwargs['raw_input_size']
    batch_size = kwargs['batch_size']
    num_categories = kwargs['num_categories']
    learning_rate = kwargs['learning_rate']
    clip_gradient = kwargs['clip_gradient']
    dropout_keep_prob = kwargs['dropout_keep_prob']
    threshold_loss = kwargs['threshold_loss']
    num_min_train_imgs = kwargs['num_min_train_imgs']
    num_max_train_imgs = kwargs['num_max_train_imgs']
    num_val_period_imgs = kwargs['num_val_period_imgs']
    num_val_imgs = kwargs['num_val_imgs']

    model_obj = kwargs['model_obj']
    model_init_args = kwargs['model_init_args']
    model_name = kwargs['model_name']
    train_data_obj = kwargs['train_data_obj']
    train_data_init_args = kwargs['train_data_init_args']
    val_data_obj = kwargs['val_data_obj']
    val_data_init_args = kwargs['val_data_init_args']

    tb_logs_dir = kwargs['tb_logs_dir'] if 'tb_logs_dir' in kwargs else None
    extra_tensors_to_grab = kwargs[
        'extra_tensors_to_grab'] if 'extra_tensors_to_grab' in kwargs else None
    save_ckpt_as = kwargs['save_ckpt_as'] if 'save_ckpt_as' in kwargs else None
    save_learningcurve_as = kwargs[
        'save_learningcurve_as'] if 'save_learningcurve_as' in kwargs else None
    learningcurve_type = kwargs[
        'learningcurve_type'] if 'learningcurve_type' in kwargs else None
    save_textsummary_as = kwargs[
        'save_textsummary_as'] if 'save_textsummary_as' in kwargs else None

    ############### SET UP GRAPH ###############
    utility.llprint("Building Computational Graph... ")

    ##### SETUP INPUT & OUTPUT PLACEHOLDERS
    input_placeholder = tf.placeholder(tf.float32,
                                       [batch_size] + raw_input_size,
                                       name='input_placeholder')
    keep_prob_placeholder = tf.placeholder(tf.float32,
                                           name='keep_prob_placeholder')
    target_output_placeholder = tf.placeholder(
        tf.float32, [batch_size] + [1, 1, num_categories],
        name='target_output_placeholder')

    ##### SETUP DATA FETCHER
    train_data_generator = train_data_obj(raw_input_size, batch_size)
    train_data_generator.initialize_vars(**train_data_init_args)
    val_data_generator = val_data_obj(raw_input_size, batch_size)
    val_data_generator.initialize_vars(**val_data_init_args)

    ##### SETUP MODEL (THIS PART IS MODEL-SPECIFIC)
    model = model_obj(name=model_name,
                      input_size=raw_input_size,
                      batch_size=batch_size,
                      gpu_addresses=model_gpu_addresses)
    model.initialize_vars(**model_init_args)

    ##### COMPUTE OUTPUT, LOSS and GRADIENTS
    model_output = make_parallel(model,
                                 model_gpu_addresses,
                                 keep_prob_placeholder,
                                 name='model_output',
                                 X=input_placeholder)
    model_output_argmax = tf.cast(tf.argmax(model_output, axis=-1), tf.float32)

    def ace(output, target, name=None):
        output_softmax = tf.nn.softmax(output, dim=-1)
        target_softmax = tf.nn.softmax(target, dim=-1)
        return -tf.reduce_mean(
            target_softmax * tf.log(output_softmax) +
            (1 - target_softmax) * tf.log(1 - output_softmax),
            name=name)

    def mse(output, target, name=None):
        return tf.reduce_mean(tf.square(target - output), name=name)

    def acc(output, target, name=None):
        return tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(output, axis=3), tf.argmax(target, axis=3)),
            tf.float32),
                              name=name)

    loss = ace(model_output, target_output_placeholder)
    accuracy = acc(model_output, target_output_placeholder, name='accuracy')
    average = tf.reduce_mean(model_output_argmax)
    variance = tf.reduce_mean(tf.square(model_output_argmax - average))
    if clip_gradient:
        optimizer = tf.train.AdamOptimizer(learning_rate)
        gradients = optimizer.compute_gradients(loss)
        for i, (grad, var) in enumerate(gradients):
            if grad is not None:
                gradients[i] = (tf.clip_by_norm(grad, 1e+3), var)
        apply_gradients = optimizer.apply_gradients(gradients)
    else:
        apply_gradients = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    utility.llprint("Done!\n")

    ############### SET UP TENSORBOARD AND CHRONICLER ###############
    if tb_logs_dir is not None:
        if not os.path.exists(tb_logs_dir):
            os.makedirs(tb_logs_dir)
        utility.tb_flush(tb_logs_dir)
        summarize_op, summarizer, summaries = utility.tb_setup_BasicFFClassifier(
            tb_logs_dir,
            tf.reduce_sum(input_placeholder, axis=3, keep_dims=True),
            model_output, target_output_placeholder, gradients, loss, accuracy,
            average, variance, session)
    # TODO: Not implemented yet
    # if history_obj is not None:
    #     history_obj.initialize_vars(session, graph, **history_init_args)
    #     history_op = history_obj.get_history_op()
    no_op = tf.no_op()

    ############### TRAIN ###############
    utility.llprint("Initializing Variables ... ")
    session.run(tf.initialize_all_variables())
    if save_ckpt_as is not None:
        saver = tf.train.Saver()
    utility.llprint("Done!\n")

    learned = False
    max_train_iters = int(num_max_train_imgs / batch_size)
    min_train_iters = int(num_min_train_imgs / batch_size)
    val_iters = int(num_val_imgs / batch_size)
    val_period_iters = int(num_val_period_imgs / batch_size)
    titer = 0
    imgs_to_acquisition = 0
    learning_curve = []
    time_per_iter = []
    while (titer < min_train_iters) | ((titer < max_train_iters) &
                                       (not learned)):
        titer += 1
        utility.llprint("\r(T) Iteration %d/%d" % (titer, max_train_iters))
        validation_period_reached = (
            (titer + 1) % val_period_iters is 0) and (titer > 0)

        # READ DATA FROM TRAINING DATASET
        data_obj_out = train_data_generator.single_batch()
        input_data_fetched = data_obj_out[0]
        target_output_onehot_fetched = data_obj_out[1]

        # RUN NETWORK
        t = time.time()
        session_results = session.run(
            [
                apply_gradients, summarize_op if
                (validation_period_reached and
                 (tb_logs_dir is not None)) else no_op
            ],
            feed_dict={
                input_placeholder: input_data_fetched,
                keep_prob_placeholder: dropout_keep_prob,
                target_output_placeholder: target_output_onehot_fetched
            })
        time_per_iter.append(time.time() - t)
        summarize_op_fetched = session_results[-1]

        ############### ACCUMULATE LEARNING CURVE & DISPLAY PROGRESS ###############
        if validation_period_reached:
            accuracies = []
            utility.llprint("\n")
            print('Average per-iteration runtime: ' +
                  str(np.mean(time_per_iter)))
            utility.llprint("\n")
            if tb_logs_dir is not None:
                summarizer.add_summary(summarize_op_fetched, titer)
            for viter in range(val_iters):
                utility.llprint("\r(V) Iteration %d/%d" % (viter, val_iters))

                # READ DATA FROM TRAINING DATASET
                data_obj_out = val_data_generator.single_batch()
                input_data_fetched = data_obj_out[0]
                target_output_onehot_fetched = data_obj_out[1]

                # RUN NETWORK
                session_results = session.run(
                    [accuracy],
                    feed_dict={
                        input_placeholder: input_data_fetched,
                        keep_prob_placeholder: 1.,
                        target_output_placeholder: target_output_onehot_fetched
                    })
                accuracies.append(session_results[0])
            mean_accuracy = np.mean(accuracies)
            utility.llprint("\n\tValidation Avg. Accuracy: %.4f\n" %
                            (mean_accuracy))
            learning_curve.append(mean_accuracy)
            utility.llprint("\n")
            if mean_accuracy > threshold_loss:
                learned = True
                imgs_to_acquisition = titer * batch_size
    utility.llprint("\nTraining Complete.\n")

    ############### GET TRAIN DATA DISTRIBUTION ###############
    train_data_tracker = train_data_generator.get_tracker()

    ############### DETERMINE IMGS TO ACQUISITION ###############
    if imgs_to_acquisition == 0:
        imgs_to_acquisition = np.inf

    ############### GRAB EXTRA TENSORS AS REQUESTED ###############
    utility.llprint("\nGrabbing extra tensors.\n")
    tensors_grabbed_extra = []
    if extra_tensors_to_grab is not None:
        for i, tn in enumerate(extra_tensors_to_grab):
            tensors_grabbed_extra.append(
                tf.get_default_graph().get_tensor_by_name(tn))

    ############### DRAW AND SAVE LEARNING CURVE (VALIDATION ACC) ###############

    if save_learningcurve_as is not None:
        # SAVE HISTORY
        if not os.path.exists(os.path.split(save_learningcurve_as)[0]):
            os.makedirs(os.path.split(save_learningcurve_as)[0])
        if (learningcurve_type is None) | (learningcurve_type == 'figure'):
            utility.llprint("\nRendering learning curve.\n")
            tick_value_multiplier = 1. / 1000000
            num_ticks = 6
            plt.figure(figsize=(8, 6))  # roughly in inches
            plt.plot([a for a in learning_curve], color='blue', alpha=0.6)
            plt.xlabel('(x' + str(1. / tick_value_multiplier) + ') Images',
                       fontsize=16)
            plt.ylabel('Accuracy', fontsize=16)
            plt.xticks(
                np.linspace(0, max_train_iters / val_period_iters, num_ticks),
                ((10 * np.linspace(0, max_train_iters / val_period_iters,
                                   num_ticks)).astype(int)).astype(float) / 10)
            plt.tick_params(axis='both', which='major', labelsize=16)
            plt.xlim((0, max_train_iters / val_period_iters))
            plt.ylim((0.45, 1.05))
            plt.yticks([0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                       [0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
            plt.savefig(save_learningcurve_as)
            plt.clf()
        elif learningcurve_type == 'array':
            np.save(save_learningcurve_as, learning_curve)
        else:
            raise ValueError(
                'learningcurve_type should be None, figure, or array.')

    ############### SAVE TEXT SUMMARY ###############
    if save_textsummary_as is not None:
        text_summary = open(save_textsummary_as, 'w')
        text_summary.write('Final accuracy: ' + str(mean_accuracy) + '\n')
        text_summary.write('Total iterations: ' + str(titer) + '\n')
        text_summary.write('Total images: ' + str(imgs_to_acquisition))
        text_summary.close()

    ############### SAVE TRAINED NET ###############
    utility.llprint("\nSaving checkpoint.\n")
    if save_ckpt_as is not None:
        # SAVE HISTORY
        if not os.path.exists(os.path.split(save_ckpt_as)[0]):
            os.makedirs(os.path.split(save_ckpt_as)[0])
        saver.save(session, save_ckpt_as + '.ckpt')

    return input_placeholder, target_output_placeholder, keep_prob_placeholder, \
           accuracy, tensors_grabbed_extra, \
           train_data_tracker, imgs_to_acquisition