Exemplo n.º 1
0
def test_model_eval(config):
    test_files = os.path.join(
                        config.base_dir,
                        config.tfrecord_dir,
                        config.test_tfrecords)

    errors = []
    data, labels, preds = [], [], []

    with tf.device('/cpu:0'): 
        test_data, test_labels = inputs(
                                        tfrecord_file=test_files,
                                        num_epochs=1,
                                        batch_size=config.test_batch,
                                        target_data_dims=config.param_dims,
                                        target_label_dims=config.output_hist_dims)
    with tf.device('/gpu:0'):
        with tf.variable_scope("model") as scope:
            model = cnn_model_struct()
            model.build(test_data, config.param_dims[1:], config.output_hist_dims[1:],train_mode=False)
            y_conv = model.output
            error = kl_divergence_test(y_conv, tf.reshape(test_labels,[-1,np.prod(config.output_hist_dims[1:])]))

        gpuconfig = tf.ConfigProto()
        gpuconfig.gpu_options.allow_growth = True
        gpuconfig.allow_soft_placement = True
        saver = tf.train.Saver()

        with tf.Session(config=gpuconfig) as sess:
            init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            step=0
            try:
                while not coord.should_stop():
                    # load the model here
                    ckpts=tf.train.latest_checkpoint(config.model_output)
                    saver.restore(sess,ckpts)
                    ip , op, pred, err = sess.run([test_data, test_labels, y_conv, error])
                    batch_err = np.sum(err, axis=1)
                    errors.append(batch_err)
                    data.append(ip)
                    labels.append(op)
                    preds.append(pred)
                    print('{} batches complete..'.format(len(errors)))
            except tf.errors.OutOfRangeError:
                print('Epoch limit reached!')
            finally:
                coord.request_stop()
            coord.join(threads)
    
    err_vals = np.array(errors).reshape((-1,))
    plt.hist(err_vals, bins=1000)
    plt.title('Model: %s, min error=%0.3f, max error=%0.3f'%(config.model_name,np.min(err_vals), np.max(err_vals)), fontsize=12)
    plt.gca().tick_params(axis='both', which='major', labelsize=6)
    plt.gca().tick_params(axis='both', which='minor', labelsize=6)
    #import ipdb; ipdb.set_trace()
    plt.savefig(os.path.join(config.results_dir, '{}_eval.png'.format(config.model_name)), dpi=300)
    plt.close()

    inp_data = np.array(data)
    inp_data = inp_data.reshape((inp_data.shape[0]*inp_data.shape[1],inp_data.shape[2],inp_data.shape[3]))
    inp_labs = np.array(labels)
    inp_labs = inp_labs.reshape((inp_labs.shape[0]*inp_labs.shape[1],inp_labs.shape[2],inp_labs.shape[3]))
    idx = np.argsort(err_vals)
    net_preds = np.array(preds)
    net_preds = net_preds.reshape((net_preds.shape[0]*net_preds.shape[1],net_preds.shape[2]))
    net_preds = net_preds.reshape(inp_labs.shape)

    # lets draw a 3x3 grid with
    fig, ax = plt.subplots(3,3)
    for k in range(9):
        r, c = int(k/3), k%3
        cur_idx = idx[-1 * (k+1)]
        parameters = np.around(inp_data[cur_idx].flatten(),decimals=2)
        err = err_vals[cur_idx]
        ax[r,c].plot(inp_labs[cur_idx],'r',alpha=0.5)
        ax[r,c].plot(net_preds[cur_idx],'-.g',alpha=0.5)
        mystr = 'err=%0.2f'%(err)
        ax[r,c].text(0.9,.9, "\n".join(wrap('{}, params:{}'.format(mystr, parameters),30)), fontsize=6, horizontalalignment='right', verticalalignment='center', transform=ax[r,c].transAxes)
        #plt.show() 
        ax[r,c].tick_params(axis='both', which='major', labelsize=6)
        ax[r,c].tick_params(axis='both', which='minor', labelsize=6)
    plt.savefig(os.path.join(config.results_dir, '{}_debug.png'.format(config.model_name)),dpi=300)
    plt.close()
Exemplo n.º 2
0
def train_reverse_model(config):

    train_files = os.path.join(config.base_dir, config.tfrecord_dir,
                               config.train_tfrecords)
    val_files = os.path.join(config.base_dir, config.tfrecord_dir,
                             config.val_tfrecords)

    with tf.device('/cpu:0'):
        train_labels, train_data = inputs(
            tfrecord_file=train_files,
            num_epochs=config.epochs,
            batch_size=config.train_batch,
            target_data_dims=config.param_dims,
            target_label_dims=config.output_hist_dims)
        val_labels, val_data = inputs(
            tfrecord_file=val_files,
            num_epochs=config.epochs,
            batch_size=config.val_batch,
            target_data_dims=config.param_dims,
            target_label_dims=config.output_hist_dims)
    with tf.device('/gpu:0'):
        with tf.variable_scope("reversemodel") as scope:
            print("creating the model")
            model = cnn_reverse_model()
            model.build(train_data,
                        config.output_hist_dims[1:],
                        config.param_dims[1:],
                        train_mode=True,
                        full_cov=config.full_cov_matrix)
            y_conv = model.output
            nparams = np.prod(config.param_dims[1:])

            # Define loss and optimizer
            with tf.name_scope('loss'):
                labels = tf.reshape(train_labels, [-1, nparams])

                #### depending on the config, use the appropriate loss
                if config.full_cov_matrix:
                    hke_loss, cov_sym = heteroskedastic_cov_loss(
                        y_conv, labels, nparams)
                else:
                    hke_loss = heteroskedastic_loss(y_conv, labels, nparams)

            with tf.name_scope('adam_optimizer'):
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_step = tf.train.AdamOptimizer(1e-4).minimize(
                        hke_loss)

            #####
            ## VALIDATION
            #####
            print("building a validation model")
            scope.reuse_variables()
            val_model = cnn_reverse_model()
            val_model.build(val_data,
                            config.output_hist_dims[1:],
                            config.param_dims[1:],
                            train_mode=False,
                            full_cov=config.full_cov_matrix)
            val_res = val_model.output
            norm_val_labels = tf.reshape(val_labels, [-1, nparams])

            #### select loss function for the val model as well
            if config.full_cov_matrix:
                val_loss, _ = heteroskedastic_cov_loss(val_res,
                                                       norm_val_labels,
                                                       nparams)
            else:
                val_loss = heteroskedastic_loss(val_res, norm_val_labels,
                                                nparams)

            tf.summary.scalar("loss", hke_loss)
            summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

    gpuconfig = tf.ConfigProto()
    gpuconfig.gpu_options.allow_growth = True
    gpuconfig.allow_soft_placement = True

    with tf.Session(config=gpuconfig) as sess:
        train_writer = tf.summary.FileWriter(
            os.path.join(config.base_dir, config.summary_dir,
                         config.model_name))
        train_writer.add_graph(tf.get_default_graph())

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        step = 0
        start = time.time()
        try:
            while not coord.should_stop():
                # train for a step
                if config.full_cov_matrix:
                    _, loss, outputs, tr_data, tr_labels, norm_tr_labels, cov_mat = sess.run(
                        [
                            train_step, hke_loss, y_conv, train_data,
                            train_labels, labels, cov_sym
                        ])
                else:
                    _, loss, outputs, tr_data, tr_labels, norm_tr_labels = sess.run(
                        [
                            train_step, hke_loss, y_conv, train_data,
                            train_labels, labels
                        ])

                step += 1
                if step % config.print_iters == 0:
                    finish = time.time()
                    print("step={}, loss={}, time_elapsed={} s/step".format(
                        step, loss,
                        (finish - start) / float(config.print_iters)))
                    start = finish
                    saver.save(sess,
                               os.path.join(
                                   config.model_output, config.model_name +
                                   '_' + str(step) + '.ckpt'),
                               global_step=step)
                    if config.full_cov_matrix:
                        print(cov_mat)

                if step % config.val_iters == 0:
                    val_forward_pass_time = time.time()
                    v_data, v_labels, norm_v_labels, v_res, v_loss = sess.run([
                        val_data, val_labels, norm_val_labels, val_res,
                        val_loss
                    ])

                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str, step)
                    print("\t val loss = {}, time_elapsed = {}s".format(
                        v_loss,
                        time.time() - val_forward_pass_time))
                    '''
                    nparams = np.prod(config.param_dims[1:])
                    color_v = ['r', 'g', 'b', 'k', 'm', 'c', 'y']
                    for k in range(nparams): 
                        plt.scatter(norm_v_labels[:, k], v_res[:, k], c = color_v[k], alpha=0.5); 

                    plt.pause(1);
                    plt.clf()
                    '''
                    if config.full_cov_matrix:
                        data_dump = {
                            'predictions': outputs,
                            'labels': norm_tr_labels,
                            'cov': cov_mat
                        }
                        pickle.dump(
                            data_dump,
                            open(
                                os.path.join(config.base_dir,
                                             config.summary_dir,
                                             config.model_name,
                                             'step%d.pickle' % step), 'wb'))

        except tf.errors.OutOfRangeError:
            print("Finished training for %d epochs" % config.epochs)
        finally:
            coord.request_stop()
            coord.join(threads)
Exemplo n.º 3
0
def train_model(config):

    train_files = os.path.join(
                        config.base_dir,
                        config.tfrecord_dir,
                        config.train_tfrecords)
    val_files = os.path.join(
                        config.base_dir,
                        config.tfrecord_dir,
                        config.val_tfrecords)

    with tf.device('/cpu:0'): 
        train_data, train_labels = inputs(
                                        tfrecord_file=train_files,
                                        num_epochs=config.epochs,
                                        batch_size=config.train_batch,
                                        target_data_dims=config.param_dims,
                                        target_label_dims=config.output_hist_dims)
        val_data, val_labels = inputs(
                                        tfrecord_file=val_files,
                                        num_epochs=config.epochs,
                                        batch_size=config.val_batch,
                                        target_data_dims=config.param_dims,
                                        target_label_dims=config.output_hist_dims)

    with tf.device('/gpu:0'):
        with tf.variable_scope("model") as scope:
            print ("creating the model")
            model = cnn_model_struct()
            model.build(train_data, config.param_dims[1:], config.output_hist_dims[1:],train_mode=True)
            y_conv = model.output

            # Define loss and optimizer
            with tf.name_scope('loss'):
                kl_divergence_loss = kl_divergence(y_conv, tf.reshape(train_labels,[-1,np.prod(config.output_hist_dims[1:])]))

            with tf.name_scope('adam_optimizer'):
                # wd_l = [v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if 'biases' not in v.name]
                # loss_wd = reg_loss+(0.0005 * tf.add_n([tf.nn.l2_loss(x) for x in wd_l]))
                # train_step = tf.train.AdamOptimizer(1e-4).minimize(loss_wd)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_step = tf.train.AdamOptimizer(1e-4).minimize(kl_divergence_loss)

            # with tf.name_scope('accuracy'):
            #     res_shaped = tf.reshape(y_conv, [config.train_batch, config.num_classes])
            #     lab_shaped = tf.reshape(train_labels, [config.train_batch, config.num_classes])
            # accuracy = calc_error(lab_shaped, res_shaped)

            #####
            ## VALIDATION
            #####
            print("building a validation model")
            #with tf.variable_scope('val_model', reuse=tf.AUTO_REUSE):
            scope.reuse_variables()
            val_model = cnn_model_struct()
            val_model.build(val_data, config.param_dims[1:], config.output_hist_dims[1:],train_mode=False)
            val_res = val_model.output
            val_loss =  kl_divergence(val_res, tf.reshape(val_labels, [-1,np.prod(config.output_hist_dims[1:])]))

            #img = tf.expand_dims(tf.reshape(train_labels,[-1,32,16]),axis=-1)
            tf.summary.scalar("loss", kl_divergence_loss)
            #tf.summary.image("groundtruth", img)
            #tf.summary.histogram("predictions",y_conv)
            #tf.summary.scalar("train error", accuracy)
            #tf.summary.scalar("validation error", val_error)
            summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

    gpuconfig = tf.ConfigProto()
    gpuconfig.gpu_options.allow_growth = True
    gpuconfig.allow_soft_placement = True

    with tf.Session(config=gpuconfig) as sess:
        #graph_location = tempfile.mkdtemp()
        #print('Saving graph to: %s' % graph_location)
        train_writer = tf.summary.FileWriter(os.path.join(config.base_dir,config.summary_dir))
        train_writer.add_graph(tf.get_default_graph())

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        step = 0
        start = time.time()
        try:
            while not coord.should_stop():
                # train for a step
                _, loss, softmax_outputs, tr_data, tr_labels = sess.run([train_step, kl_divergence_loss, y_conv, train_data, train_labels])
                step+=1
                if math.isnan(loss):
                        import ipdb; ipdb.set_trace()
                #import ipdb; ipdb.set_trace()
                '''
                # validating the model. main concern is if the weights are shared between
                # the train and validation model
                if step % 200 == 0:
                    vl_img, vl_lab, vl_res, vl_err = sess.run([val_images,val_labels,val_res,val_error])
                    print("\t validating")
                    print("\t val error = {}".format(vl_err))

                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str,step)
                # save the model check point
                '''
                if step % config.print_iters == 0:
                    finish = time.time()
                    print("step={}, loss={}, time_elapsed={} s/step".format(step,loss,(finish-start)/float(config.print_iters)))
                    start = finish
                    saver.save(sess,os.path.join(
                        config.model_output,
                        config.model_name+'_'+str(step)+'.ckpt'
                    ),global_step=step)

                if step % config.val_iters == 0:
                    val_forward_pass_time = time.time()
                    v_data, v_labels, v_res, v_loss = sess.run([val_data, val_labels, val_res, val_loss])

                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str, step)
                    print("\t val loss = {}, time_elapsed = {}s".format(v_loss, time.time() - val_forward_pass_time))
                    
                    for kk in range(1):
                        X = v_res[kk].reshape(-1,config.output_hist_dims[-1]); 
                        plt.plot(X,color='r',alpha=0.5, label='Predictions'); 
                        plt.plot(v_labels[kk],'g',alpha=0.5, label='Data');
                        plt.legend() 
                        #plt.plot(X[:,1],color='b',alpha=0.5); 
                        #plt.plot(v_labels[kk][:,1],'-.b',alpha=0.5); 
                        plt.pause(1);
                        plt.clf()
                    
        except tf.errors.OutOfRangeError:
            print("Finished training for %d epochs" % config.epochs)
        finally:
            coord.request_stop()
            coord.join(threads)
Exemplo n.º 4
0
def test_model_eval(config):
    test_data = os.path.join(config.tfrecord_dir, config.test_tfrecords)
    with tf.device('/cpu:0'):
        test_images, test_labels = inputs(
            tfrecord_file=test_data,
            num_epochs=None,
            image_target_size=config.image_target_size,
            label_shape=config.num_classes,
            batch_size=config.test_batch,
            augmentation=False)

    with tf.device('/gpu:0'):
        with tf.variable_scope("model") as scope:
            model = cnn_model_struct()
            model.build(test_images, config.num_classes, train_mode=False)
            results = tf.argmax(model.output, 1)
            error = tf.reduce_mean(
                tf.cast(tf.equal(results, tf.cast(test_labels, tf.int64)),
                        tf.float32))

        gpuconfig = tf.ConfigProto()
        gpuconfig.gpu_options.allow_growth = True
        gpuconfig.allow_soft_placement = True
        saver = tf.train.Saver()

        with tf.Session(config=gpuconfig) as sess:
            #init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            #sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            step = 0
            try:
                while not coord.should_stop():
                    # load the model here
                    ckpts = tf.train.latest_checkpoint(config.model_output)
                    saver.restore(sess, ckpts)
                    ims, labs, probs, err, res = sess.run([
                        test_images, test_labels, model.output, error, results
                    ])
                    import ipdb
                    ipdb.set_trace()
            except tf.errors.OutOfRangeError:
                print('Epoch limit reached!')
            finally:
                coord.request_stop()
            coord.join(threads)


# def get_model_predictions(config,patches):
#     input = tf.placeholder(tf.float32, [None,config.image_target_size[0],config.image_target_size[1],config.image_target_size[2]], name='ip_placeholder')
#     with tf.device('/gpu:0'):
#         with tf.variable_scope("model") as scope:
#             model = cnn_model_struct()
#             model.build(input,config.num_classes,train_mode=False)
#
#         gpuconfig = tf.ConfigProto()
#         gpuconfig.gpu_options.allow_growth = True
#         gpuconfig.allow_soft_placement = True
#         saver = tf.train.Saver()
#
#         with tf.Session(config=gpuconfig) as sess:
#             #init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
#             #sess.run(init_op)
#             #coord = tf.train.Coordinator()
#             #threads = tf.train.start_queue_runners(coord=coord)
#             step=0
#             try:
#                 #while not coord.should_stop():
#                     # load the model here
#                     ckpts=tf.train.latest_checkpoint(config.model_output)
#                     saver.restore(sess,ckpts)
#                     probs = sess.run(model.output,feed_dict={input:patches})
#             except tf.errors.OutOfRangeError:
#                 print('Epoch limit reached!')
#             finally:
#                 #coord.request_stop()
#                 print ('done')
#             #coord.join(threads)
#     return probs
Exemplo n.º 5
0
def train_model(config):
    train_data = os.path.join(config.tfrecord_dir, config.train_tfrecords)
    val_data = os.path.join(config.tfrecord_dir, config.val_tfrecords)

    with tf.device('/cpu:0'):
        train_images, train_labels = inputs(
            tfrecord_file=train_data,
            num_epochs=config.epochs,
            image_target_size=config.image_target_size,
            label_shape=config.num_classes,
            batch_size=config.train_batch,
            augmentation=True)

        val_images, val_labels = inputs(
            tfrecord_file=val_data,
            num_epochs=config.epochs,
            image_target_size=config.image_target_size,
            label_shape=config.num_classes,
            batch_size=config.val_batch)

    with tf.device('/gpu:0'):
        with tf.variable_scope("model") as scope:
            print("creating the model")
            # Create the model
            # x = tf.placeholder(tf.float32, [None, config.image_target_size[0],config.image_target_size[1],config.image_target_size[2]])
            # y_ = tf.placeholder(tf.int64, [None,1])

            # Build the graph for the deep net
            #y_conv, keep_prob = deepnn(train_images)
            #y_conv = deepnn(train_images)
            model = cnn_model_struct()
            model.build(train_images, config.num_classes, train_mode=True)
            y_conv = model.output

            y_ = tf.cast(train_labels, tf.int64)
            yhat = tf.argmax(y_conv, 1)

            # Define loss and optimizer
            with tf.name_scope('loss'):
                cross_entropy = tf.losses.sparse_softmax_cross_entropy(
                    labels=y_, logits=y_conv)
            cross_entropy = tf.reduce_mean(cross_entropy)

            with tf.name_scope('adam_optimizer'):
                train_step = tf.train.AdamOptimizer(1e-4).minimize(
                    cross_entropy)

            with tf.name_scope('accuracy'):
                correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
                correct_prediction = tf.cast(correct_prediction, tf.float32)
            accuracy = tf.reduce_mean(correct_prediction)

            print("using validation")
            scope.reuse_variables()
            val_model = cnn_model_struct()
            val_model.build(val_images, config.num_classes, train_mode=False)
            val_results = tf.argmax(val_model.output, 1)
            val_error = tf.reduce_mean(
                tf.cast(tf.equal(val_results, tf.cast(val_labels, tf.int64)),
                        tf.float32))

            tf.summary.scalar("loss", cross_entropy)
            tf.summary.scalar("train error", accuracy)
            tf.summary.scalar("validation error", val_error)
            summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

    gpuconfig = tf.ConfigProto()
    gpuconfig.gpu_options.allow_growth = True
    gpuconfig.allow_soft_placement = True

    with tf.Session(config=gpuconfig) as sess:
        graph_location = tempfile.mkdtemp()
        print('Saving graph to: %s' % graph_location)
        train_writer = tf.summary.FileWriter(graph_location)
        train_writer.add_graph(tf.get_default_graph())

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        #import ipdb; ipdb.set_trace();
        #batch_images, batch_labels = sess.run([train_images, train_labels])
        #val_batch_images, val_batch_labels = sess.run([val_images, val_labels])

        step = 0
        try:
            while not coord.should_stop():
                #train for a step
                _, tr_images, tr_labels, loss, softmax_outputs, pred_labels, error = sess.run(
                    [
                        train_step, train_images, train_labels, cross_entropy,
                        y_conv, yhat, accuracy
                    ])
                print("step={}, loss={}, accuracy={}".format(
                    step, loss, error))
                step += 1
                #validate model
                if step % 200 == 0:
                    vl_img, vl_lab, vl_res, vl_err = sess.run(
                        [val_images, val_labels, val_results, val_error])
                    print("\t val error = {}".format(vl_err))
                    #import ipdb; ipdb.set_trace();
                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str, step)
                # save the model check point
                if step % 1000 == 0:
                    saver.save(sess,
                               os.path.join(
                                   config.model_output, config.model_name +
                                   '_' + str(step) + '.ckpt'),
                               global_step=step)

        except tf.errors.OutOfRangeError:
            print("Finished training for %d epochs" % config.epochs)
        finally:
            coord.request_stop()
            coord.join(threads)
Exemplo n.º 6
0
def train_classification_model(config):
    train_files = os.path.join(
			config.base_dir,
			config.tfrecord_dir,
			'train_model_classifier.tfrecords')
    val_files = os.path.join(
			config.base_dir,
			config.tfrecord_dir,
			'val_model_classifier.tfrecords')

    with tf.device('/cpu:0'): 
	train_data, train_labels = inputs(
					tfrecord_file=train_files,
					num_epochs=config.epochs,
					batch_size=config.train_batch,
					target_data_dims=[None, 1, 256, 2],
					target_label_dims=[None, 1, 1])
	val_data, val_labels = inputs(
					tfrecord_file=val_files,
					num_epochs=config.epochs,
					batch_size=config.val_batch,
					target_data_dims=[None, 1, 256, 2],
					target_label_dims=[None, 1, 1])
    #import ipdb; ipdb.set_trace()
    with tf.device('/gpu:0'):
        with tf.variable_scope("classmodel") as scope:
            print ("creating the model")
            model = classification_model()
            model.build(train_data, [1, 256, 2], [1, 5, 1], train_mode=True, full_cov=config.full_cov_matrix)
            y_conv = model.output
	    nparams = np.prod(config.param_dims[1:])

            # Define loss and optimizer
            with tf.name_scope('loss'):
		labels = tf.one_hot(tf.cast(tf.squeeze(train_labels), dtype=tf.uint8), 5)
		loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y_conv))

            with tf.name_scope('adam_optimizer'):
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

	    #####
	    ## VALIDATION
	    #####
            print("building a validation model")
	    scope.reuse_variables()
            val_model = classification_model()
            val_model.build(val_data, [1, 256, 2], [1, 5, 1], train_mode=False, full_cov=config.full_cov_matrix)
            val_res = val_model.output
	    val_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(tf.cast(tf.squeeze(val_labels), dtype=tf.uint8),5), logits=val_res))

            tf.summary.scalar("loss", val_loss)
            summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

    gpuconfig = tf.ConfigProto()
    gpuconfig.gpu_options.allow_growth = True
    gpuconfig.allow_soft_placement = True

    with tf.Session(config=gpuconfig) as sess:
        train_writer = tf.summary.FileWriter(os.path.join(config.base_dir,config.summary_dir,config.model_name))
        train_writer.add_graph(tf.get_default_graph())

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        step = 0
	start = time.time()
	import ipdb; ipdb.set_trace()
        try:
            while not coord.should_stop():
                # train for a step
                _, loss_np, outputs, tr_data, tr_labels = sess.run([train_step, loss, y_conv, train_data, train_labels])

                step+=1
		if step % config.print_iters == 0:
		    finish = time.time()
                    print("step={}, loss={}, time_elapsed={} s/step".format(step,loss_np,(finish-start)/float(config.print_iters)))
		    start = finish
                    #saver.save(sess,os.path.join(
                    #    config.model_output,
                    #    config.model_name+'_'+str(step)+'.ckpt'
                    #),global_step=step)
		    #if config.full_cov_matrix:
		    #    print(cov_mat)

		if step % config.val_iters == 0:
		    val_forward_pass_time = time.time()
		    v_data, v_labels, v_res, v_loss = sess.run([val_data, val_labels, val_res, val_loss])

		    #summary_str = sess.run(summary_op)
		    #train_writer.add_summary(summary_str, step)
		    
		    print("\t val loss = {}, time_elapsed = {}s".format(v_loss, time.time() - val_forward_pass_time))
		    '''
		    nparams = np.prod(config.param_dims[1:])
		    color_v = ['r', 'g', 'b', 'k', 'm', 'c', 'y']
		    for k in range(nparams): 
		        plt.scatter(norm_v_labels[:, k], v_res[:, k], c = color_v[k], alpha=0.5); 

		    plt.pause(1);
		    plt.clf()
		    '''
		    if config.full_cov_matrix:
		        data_dump = {'predictions': outputs, 'labels': norm_tr_labels, 'cov':cov_mat}
		        pickle.dump(data_dump, open( os.path.join(config.base_dir,config.summary_dir,config.model_name,'step%d.pickle'%step), 'wb'))
		    
        except tf.errors.OutOfRangeError:
            print("Finished training for %d epochs" % config.epochs)
        finally:
            coord.request_stop()
            coord.join(threads)
Exemplo n.º 7
0
def train_model(config):
    train_data = os.path.join(config.tfrecord_dir, config.train_tfrecords)
    val_data = os.path.join(config.tfrecord_dir, config.val_tfrecords)

    with tf.device('/cpu:0'):
        train_images, train_labels = inputs(tfrecord_file=train_data,
                                            num_epochs=config.epochs,
                                            image_target_size=config.image_target_size,
                                            label_shape=config.label_shape,
                                            batch_size=config.train_batch,
                                            augmentation=True)

        val_images, val_labels = inputs(tfrecord_file=val_data,
                                        num_epochs=config.epochs,
                                        image_target_size=config.image_target_size,
                                        label_shape=config.label_shape,
                                        batch_size=config.val_batch)

    with tf.device('/gpu:0'):
        with tf.variable_scope("model") as scope:
            print ("creating the model")
            model = cnn_model_struct()
            model.build(train_images,config.num_classes,train_mode=True)
            y_conv = model.output

            # Define loss and optimizer
            with tf.name_scope('loss'):
                reg_loss = tf.nn.l2_loss(y_conv - train_labels)

            with tf.name_scope('adam_optimizer'):
                # wd_l = [v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if 'biases' not in v.name]
                # loss_wd = reg_loss+(0.0005 * tf.add_n([tf.nn.l2_loss(x) for x in wd_l]))
                # train_step = tf.train.AdamOptimizer(1e-4).minimize(loss_wd)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_step = tf.train.AdamOptimizer(1e-4).minimize(reg_loss)

            # with tf.name_scope('accuracy'):
            #     res_shaped = tf.reshape(y_conv, [config.train_batch, config.num_classes])
            #     lab_shaped = tf.reshape(train_labels, [config.train_batch, config.num_classes])
            # accuracy = calc_error(lab_shaped, res_shaped)

            print("using validation")
            # scope.reuse_variables()
            with tf.variable_scope('val_model', reuse=tf.AUTO_REUSE):
                val_model = cnn_model_struct()
                val_model.build(val_images, config.num_classes, train_mode=False)
                val_res = val_model.output
                # val_res_shaped = tf.reshape(val_model.output, [config.val_batch, config.num_classes])
                # val_lab_shaped = tf.reshape(val_labels, [config.val_batch, config.num_classes])
                val_error =  tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(val_labels-val_res))))

            tf.summary.scalar("loss", reg_loss)
            #tf.summary.scalar("train error", accuracy)
            #tf.summary.scalar("validation error", val_error)
            summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

    gpuconfig = tf.ConfigProto()
    gpuconfig.gpu_options.allow_growth = True
    gpuconfig.allow_soft_placement = True

    with tf.Session(config=gpuconfig) as sess:
        graph_location = tempfile.mkdtemp()
        print('Saving graph to: %s' % graph_location)
        train_writer = tf.summary.FileWriter(graph_location)
        train_writer.add_graph(tf.get_default_graph())

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        step = 0
        try:
            while not coord.should_stop():
                # train for a step
                _, tr_images, tr_labels, loss, softmax_outputs = sess.run([train_step,train_images,train_labels, reg_loss, y_conv])
                print("step={}, loss={}".format(step,loss))
                step+=1
                #import ipdb; ipdb.set_trace()

                # validating the model. main concern is if the weights are shared between
                # the train and validation model
                if step % 200 == 0:
                    vl_img, vl_lab, vl_res, vl_err = sess.run([val_images,val_labels,val_res,val_error])
                    print("\t validating")
                    print("\t val error = {}".format(vl_err))

                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str,step)
                # save the model check point
                if step % 250 == 0:
                    saver.save(sess,os.path.join(
                        config.model_output,
                        config.model_name+'_'+str(step)+'.ckpt'
                    ),global_step=step)

        except tf.errors.OutOfRangeError:
            print("Finished training for %d epochs" % config.epochs)
        finally:
            coord.request_stop()
            coord.join(threads)