Exemplo n.º 1
0
def getdebug(cont):
    return model.debug(cont)
Exemplo n.º 2
0
def main(_):
    if gfile.Exists(TRAIN_DIR):
        gfile.DeleteRecursively(TRAIN_DIR)
    gfile.MakeDirs(TRAIN_DIR)

    # locally
    #train()

    print("ps: %s" % (DFLAGS.task_index))

    ps_hosts = DFLAGS.ps_hosts.split(",")
    worker_hosts = DFLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    # training data
    filename_queue = tf.train.string_input_producer(
        ["output/data/airquality.csv"])
    datas, targets = dataset.mini_batch(filename_queue, BATCH_SIZE)

    if DFLAGS.job_name == "ps":
        server.join()
    elif DFLAGS.job_name == "worker":
        # Assigns ops to the local worker by default.
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % DFLAGS.task_index,
                    cluster=cluster)):

            # step num of global
            global_step = tf.Variable(0, trainable=False)

            # inference
            logits = model.inference(datas)

            debug_value = model.debug(logits)

            # loss graphのoutputとlabelを利用
            loss = model.loss(logits, targets)

            global_step = tf.Variable(0)

            #train_op = tf.train.AdagradOptimizer(0.0001).minimize(
            #    loss, global_step=global_step)
            train_op = op.train(loss, global_step)

            saver = tf.train.Saver()
            summary_op = tf.merge_all_summaries()
            init_op = tf.initialize_all_variables()

        # Create a "supervisor", which oversees the training process.
        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                 logdir="/tmp/train_logs",
                                 init_op=init_op,
                                 init_feed_dict=None,
                                 summary_op=summary_op,
                                 saver=saver,
                                 global_step=global_step,
                                 save_model_secs=60)

        # The supervisor takes care of session initialization, restoring from
        # a checkpoint, and closing when done or an error occurs.
        with sv.managed_session(server.target) as sess:
            # Loop until the supervisor shuts down or 1000000 steps have completed.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            step = 0
            while not sv.should_stop() and step < 1000000:
                # Run a training step asynchronously.
                # See `tf.train.SyncReplicasOptimizer` for additional details on how to
                # perform *synchronous* training.
                start_time = time.time()
                _, loss_value, predict_value, targets_eval, step = sess.run(
                    [train_op, loss, debug_value, targets, global_step])
                #_, step = sess.run([train_op, global_step])
                duration = time.time() - start_time

                if step % 100 == 0:
                    # mini batch size
                    num_examples_per_step = BATCH_SIZE

                    # examples num per sec
                    examples_per_sec = num_examples_per_step / duration

                    # duration per batch
                    sec_per_batch = float(duration)

                    # time, step num, loss, exampeles num per sec, time per batch
                    format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                    print str(datetime.now()) + ': step' + str(
                        step) + ', loss= ' + str(loss_value) + ' ' + str(
                            examples_per_sec) + ' examples/sec; ' + str(
                                sec_per_batch) + ' sec/batch'
                    print "predict: ", predict_value
                    print "targets: ", targets_eval

            coord.request_stop()
            coord.join(threads)
            sess.close()

        # Ask for all the services to stop.
        sv.stop()
Exemplo n.º 3
0
def main():
    homedir = os.environ['HOME']
    confdir = os.path.join(homedir, ".elshelves")
    dbfile = os.path.join(confdir, "elshelves.sqlite3")

    parser = optparse.OptionParser()
    parser.add_option("--importorg", action="store", default = None)
    opts, args = parser.parse_args()

    # let me use different database file
    if args:
        dbfile = args[0]

    try:
        os.makedirs(confdir)
    except OSError as e:
        if e.errno != 17: # Already existing dir
            raise

    errlog = file(os.path.join(confdir, "error_log"), "w")
    model.debug(errlog)

    store = model.getStore("sqlitefk:%s" % dbfile,
                           create = not os.path.exists(dbfile))

    schema_version = store.get(model.Meta, u"version").value

    text_header = "Shelves %s (db %s)" % (__version__, schema_version)

    a = app.App(text_header)
    actions_screen = Actions(a, store)
    a.switch_screen_with_return(actions_screen)

    if opts.importorg:
        try:
            data = open(opts.importorg, "r").readlines()
            parts = []
            for l in data:
                if l.strip() == "":
                    continue
                l = [i.strip() for i in l[1:].split("|", 7)]

                source = store.find(model.Source,
                                    model.Source.name.like("%%%s%%" % l[3].decode("utf8"), "$", False)
                                    ).one()

                part = model.RawPart({
                    "search_name": l[0].decode("utf8"),
                    "count": int(l[1]),
                    "manufacturer": l[2].decode("utf8"),
                    "source": source,
                    "summary": l[4].decode("utf8"),
                    "footprint": l[5].decode("utf8"),
                    "description": l[6].decode("utf8")
                    })
                parts.append(part)

            dlg = SearchForParts(a, store,
                                 back=None, action=PartCreator,
                                 parts = parts)
            a.switch_screen_with_return(dlg)
        except:
            raise

    a.run()
Exemplo n.º 4
0
def getdebug(catalog):
    return model.debug(catalog)
Exemplo n.º 5
0
def train():
    '''
    Train
    '''
    with tf.Graph().as_default():
        # globalなstep数
        global_step = tf.Variable(0, trainable=False)

        csv_train = FLAGS.train_csv
        csv_test = FLAGS.eval_csv

        image_input = DataSet()
        #images, targets = image_input.csv_inputs(csv_train, FLAGS.batch_size)
        #images_val, targets_val = image_input.csv_inputs(csv_test, FLAGS.num_examples)
        images, targets = image_input.csv_inputs_augumentation(csv_train, FLAGS.batch_size)
        images_val, targets_val = image_input.csv_inputs_augumentation(csv_test, FLAGS.num_examples)
        images_val_debug = model.debug(images_val)
        targets_val_debug = model.debug(targets_val)

        keep_conv = tf.placeholder(tf.float32)
        keep_hidden = tf.placeholder(tf.float32)

        # graphのoutput
        print("train.")
        encoder_output = model.inference_segnet_former(images, keep_conv, keep_hidden)
        encoder_output_val = model.inference_segnet_former(images_val, keep_conv, keep_hidden, reuse=True)
        logits, logits_argmax = model.inference_segnet_latter(images, encoder_output, FLAGS.num_classes, keep_conv, keep_hidden, batch_size=FLAGS.batch_size)
        logits_val, logits_argmax_val = model.inference_segnet_latter(images_val, encoder_output_val, FLAGS.num_classes, keep_conv, keep_hidden, batch_size=FLAGS.num_examples, reuse=True)


        # loss graphのoutputとlabelを利用
        loss = model.loss_segnet(logits, targets)
        loss_val = model.loss_segnet(logits_val, targets_val)
        tf.scalar_summary("validation", loss_val)
        # 学習オペレーション
        train_op = op.train(loss, global_step)

        # サマリー
        summary_op = tf.merge_all_summaries()


        # 初期化オペレーション
        init_op = tf.initialize_all_variables()

        # Session
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT))

        summary_writer = tf.train.SummaryWriter(LOG_DIR, graph_def=sess.graph_def)

        # saver
        #saver = tf.train.Saver(tf.all_variables())

        sess.run(init_op)

        segnet_params = {}
        #if FLAGS.refine_train:
        #    for variable in tf.all_variables():
        #        variable_name = variable.name
        #        print("parameter: %s" % (variable_name))
        #        if variable_name.find("/") < 0 or variable_name.count("/") != 1:
        #            print("ignore.")
        #            continue
        #        if variable_name.find('coarse') >= 0:
        #            print("coarse parameter: %s" % (variable_name))
        #            coarse_params[variable_name] = variable
        #        print("parameter: %s" %(variable_name))
        #        if variable_name.find('fine') >= 0:
        #            print("refine parameter: %s" % (variable_name))
        #            refine_params[variable_name] = variable
        #else:
        print("Create variable list for saver.")
        for variable in tf.trainable_variables():
            variable_name = variable.name
            if variable_name.find("/") < 0 or variable_name.count("/") > 2:
                print("ignore parameter: %s" % (variable_name))
                continue
            if variable_name.find('en_conv') >= 0:
                print("en_conv parameter: %s" % (variable_name))
                segnet_params[variable_name] = variable
            if variable_name.find('de_conv') >= 0:
                print("de_conv parameter: %s" % (variable_name))
                segnet_params[variable_name] = variable

        print("=="*100)

        #for variable in tf.get_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES):
        for variable in tf.all_variables():
            variable_name = variable.name
            #print("MOVING_AVERAGE_VARIABLES collection: %s" % (variable_name))
            if variable_name.find("moving_mean") >= 0:
                print("moving mean parameter: %s" % (variable_name))
                segnet_params[variable_name] = variable
            elif variable_name.find("moving_variance") >= 0:
                print("moving variance parameter: %s" % (variable_name))
                segnet_params[variable_name] = variable

        print("=="*100)

        # define saver
        #saver = tf.train.Saver(segnet_params)
        saver = tf.train.Saver(tf.all_variables())

        # fine tune
        if FLAGS.fine_tune:
            # load coarse paramteters
            segnet_ckpt = tf.train.get_checkpoint_state(TRAIN_DIR)
            if segnet_ckpt and segnet_ckpt.model_checkpoint_path:
                print("Pretrained segnet Model Loading.")
                print("model path: %s" % (segnet_ckpt.model_checkpoint_path))
                saver.restore(sess, segnet_ckpt.model_checkpoint_path)
                print("Pretrained segnet Model Restored.")
            else:
                print("No Pretrained segnet Model.")
            
        # TODO train coarse or refine (change trainable)
        #if not FLAGS.coarse_train:
        #    for val in coarse_params:
        #        print val
        #if not FLAGS.refine_train:
        #    for val in coarse_params:
        #        print val

        # train refine
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        dropout_flag = False
        # max_stepまで繰り返し学習
        avg_loss_list = []
        previous_loss = 10000000
        saturate_step = 0
        for step in xrange(MAX_STEPS):
            start_time = time.time()
            previous_time = start_time
            index = 0
            for i in xrange(100):
                loss_list = []
                if dropout_flag:
                    _, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 0.5, keep_hidden: 0.5})
                else:
                    _, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 1.0, keep_hidden: 1.0})
                #_, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 0.5, keep_hidden: 0.5})

                if i == 0:
                    if dropout_flag:
                        print("------------------- using Dropout ---------------------")
                        print("saturate step is %d" % saturate_step)

                if index % 10 == 0:
                    end_time = time.time()
                    duration = end_time - previous_time
                    num_examples_per_step = BATCH_SIZE * 10
                    examples_per_sec = num_examples_per_step / duration
                    print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration))
                    assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                previous_time = end_time
                index += 1
                loss_list.append(loss_value)

            else:
                avg_loss_list.append(sum(loss_list)/len(loss_list))

                summary_str = sess.run(summary_op, feed_dict={keep_conv: 1.0, keep_hidden: 1.0})
                summary_writer.add_summary(summary_str, step)

            if step % 1 == 0 or (step * 1) == MAX_STEPS:
                images_debug_eval, depths_debug_eval, output_vec, output_vec_argmax, cost_value = sess.run([images_val_debug, targets_val_debug, logits_val, logits_argmax_val, loss_val], feed_dict={keep_conv: 1.0, keep_hidden: 1.0})
                print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value))

            if step % 10 == 0 or (step * 1) == MAX_STEPS:
                output_dir = "predicts_%05dstep" % (step)
                print("predicts output: %s" % output_dir)
                image_input.output_predict(output_vec_argmax, output_dir)
                image_input.output_images(images_debug_eval, output_dir)
                image_input.output_depths(depths_debug_eval, output_dir)

                # if FLAGS.scale3_train:
                #     scale1and2_output_dir = "scale1and2_%05dstep" % (step)
                #     print("scale1and2 output: %s" % scale1and2_output_dir)
                #     dataset.output_predict(coarse_output_vec, coarse_output_dir)

            if step % 30 == 0 or (step * 1) == MAX_STEPS:
                checkpoint_path = TRAIN_DIR + '/model.ckpt'
                saver.save(sess, checkpoint_path, global_step=step)

                if previous_loss < sum(avg_loss_list) / len(avg_loss_list) and saturate_step == 0:
                    checkpoint_path = EX_DIR + '/model.ckpt'
                    saver.save(sess, checkpoint_path, global_step=step)
                    dropout_flag = True
                    saturate_step = step
                previous_loss = sum(avg_loss_list) / len(avg_loss_list)
                print("30steps average loss: %f " % previous_loss)
                avg_loss_list = []

        coord.request_stop()
        coord.join(threads)
        sess.close()