def getdebug(cont): return model.debug(cont)
def main(_): if gfile.Exists(TRAIN_DIR): gfile.DeleteRecursively(TRAIN_DIR) gfile.MakeDirs(TRAIN_DIR) # locally #train() print("ps: %s" % (DFLAGS.task_index)) ps_hosts = DFLAGS.ps_hosts.split(",") worker_hosts = DFLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) # training data filename_queue = tf.train.string_input_producer( ["output/data/airquality.csv"]) datas, targets = dataset.mini_batch(filename_queue, BATCH_SIZE) if DFLAGS.job_name == "ps": server.join() elif DFLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % DFLAGS.task_index, cluster=cluster)): # step num of global global_step = tf.Variable(0, trainable=False) # inference logits = model.inference(datas) debug_value = model.debug(logits) # loss graphのoutputとlabelを利用 loss = model.loss(logits, targets) global_step = tf.Variable(0) #train_op = tf.train.AdagradOptimizer(0.0001).minimize( # loss, global_step=global_step) train_op = op.train(loss, global_step) saver = tf.train.Saver() summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, init_feed_dict=None, summary_op=summary_op, saver=saver, global_step=global_step, save_model_secs=60) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: # Loop until the supervisor shuts down or 1000000 steps have completed. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) step = 0 while not sv.should_stop() and step < 1000000: # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. start_time = time.time() _, loss_value, predict_value, targets_eval, step = sess.run( [train_op, loss, debug_value, targets, global_step]) #_, step = sess.run([train_op, global_step]) duration = time.time() - start_time if step % 100 == 0: # mini batch size num_examples_per_step = BATCH_SIZE # examples num per sec examples_per_sec = num_examples_per_step / duration # duration per batch sec_per_batch = float(duration) # time, step num, loss, exampeles num per sec, time per batch format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str( step) + ', loss= ' + str(loss_value) + ' ' + str( examples_per_sec) + ' examples/sec; ' + str( sec_per_batch) + ' sec/batch' print "predict: ", predict_value print "targets: ", targets_eval coord.request_stop() coord.join(threads) sess.close() # Ask for all the services to stop. sv.stop()
def main(): homedir = os.environ['HOME'] confdir = os.path.join(homedir, ".elshelves") dbfile = os.path.join(confdir, "elshelves.sqlite3") parser = optparse.OptionParser() parser.add_option("--importorg", action="store", default = None) opts, args = parser.parse_args() # let me use different database file if args: dbfile = args[0] try: os.makedirs(confdir) except OSError as e: if e.errno != 17: # Already existing dir raise errlog = file(os.path.join(confdir, "error_log"), "w") model.debug(errlog) store = model.getStore("sqlitefk:%s" % dbfile, create = not os.path.exists(dbfile)) schema_version = store.get(model.Meta, u"version").value text_header = "Shelves %s (db %s)" % (__version__, schema_version) a = app.App(text_header) actions_screen = Actions(a, store) a.switch_screen_with_return(actions_screen) if opts.importorg: try: data = open(opts.importorg, "r").readlines() parts = [] for l in data: if l.strip() == "": continue l = [i.strip() for i in l[1:].split("|", 7)] source = store.find(model.Source, model.Source.name.like("%%%s%%" % l[3].decode("utf8"), "$", False) ).one() part = model.RawPart({ "search_name": l[0].decode("utf8"), "count": int(l[1]), "manufacturer": l[2].decode("utf8"), "source": source, "summary": l[4].decode("utf8"), "footprint": l[5].decode("utf8"), "description": l[6].decode("utf8") }) parts.append(part) dlg = SearchForParts(a, store, back=None, action=PartCreator, parts = parts) a.switch_screen_with_return(dlg) except: raise a.run()
def getdebug(catalog): return model.debug(catalog)
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) csv_train = FLAGS.train_csv csv_test = FLAGS.eval_csv image_input = DataSet() #images, targets = image_input.csv_inputs(csv_train, FLAGS.batch_size) #images_val, targets_val = image_input.csv_inputs(csv_test, FLAGS.num_examples) images, targets = image_input.csv_inputs_augumentation(csv_train, FLAGS.batch_size) images_val, targets_val = image_input.csv_inputs_augumentation(csv_test, FLAGS.num_examples) images_val_debug = model.debug(images_val) targets_val_debug = model.debug(targets_val) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput print("train.") encoder_output = model.inference_segnet_former(images, keep_conv, keep_hidden) encoder_output_val = model.inference_segnet_former(images_val, keep_conv, keep_hidden, reuse=True) logits, logits_argmax = model.inference_segnet_latter(images, encoder_output, FLAGS.num_classes, keep_conv, keep_hidden, batch_size=FLAGS.batch_size) logits_val, logits_argmax_val = model.inference_segnet_latter(images_val, encoder_output_val, FLAGS.num_classes, keep_conv, keep_hidden, batch_size=FLAGS.num_examples, reuse=True) # loss graphのoutputとlabelを利用 loss = model.loss_segnet(logits, targets) loss_val = model.loss_segnet(logits_val, targets_val) tf.scalar_summary("validation", loss_val) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) summary_writer = tf.train.SummaryWriter(LOG_DIR, graph_def=sess.graph_def) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) segnet_params = {} #if FLAGS.refine_train: # for variable in tf.all_variables(): # variable_name = variable.name # print("parameter: %s" % (variable_name)) # if variable_name.find("/") < 0 or variable_name.count("/") != 1: # print("ignore.") # continue # if variable_name.find('coarse') >= 0: # print("coarse parameter: %s" % (variable_name)) # coarse_params[variable_name] = variable # print("parameter: %s" %(variable_name)) # if variable_name.find('fine') >= 0: # print("refine parameter: %s" % (variable_name)) # refine_params[variable_name] = variable #else: print("Create variable list for saver.") for variable in tf.trainable_variables(): variable_name = variable.name if variable_name.find("/") < 0 or variable_name.count("/") > 2: print("ignore parameter: %s" % (variable_name)) continue if variable_name.find('en_conv') >= 0: print("en_conv parameter: %s" % (variable_name)) segnet_params[variable_name] = variable if variable_name.find('de_conv') >= 0: print("de_conv parameter: %s" % (variable_name)) segnet_params[variable_name] = variable print("=="*100) #for variable in tf.get_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES): for variable in tf.all_variables(): variable_name = variable.name #print("MOVING_AVERAGE_VARIABLES collection: %s" % (variable_name)) if variable_name.find("moving_mean") >= 0: print("moving mean parameter: %s" % (variable_name)) segnet_params[variable_name] = variable elif variable_name.find("moving_variance") >= 0: print("moving variance parameter: %s" % (variable_name)) segnet_params[variable_name] = variable print("=="*100) # define saver #saver = tf.train.Saver(segnet_params) saver = tf.train.Saver(tf.all_variables()) # fine tune if FLAGS.fine_tune: # load coarse paramteters segnet_ckpt = tf.train.get_checkpoint_state(TRAIN_DIR) if segnet_ckpt and segnet_ckpt.model_checkpoint_path: print("Pretrained segnet Model Loading.") print("model path: %s" % (segnet_ckpt.model_checkpoint_path)) saver.restore(sess, segnet_ckpt.model_checkpoint_path) print("Pretrained segnet Model Restored.") else: print("No Pretrained segnet Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) dropout_flag = False # max_stepまで繰り返し学習 avg_loss_list = [] previous_loss = 10000000 saturate_step = 0 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 for i in xrange(100): loss_list = [] if dropout_flag: _, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 0.5, keep_hidden: 0.5}) else: _, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 1.0, keep_hidden: 1.0}) #_, loss_value = sess.run([train_op, loss], feed_dict={keep_conv: 0.5, keep_hidden: 0.5}) if i == 0: if dropout_flag: print("------------------- using Dropout ---------------------") print("saturate step is %d" % saturate_step) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' previous_time = end_time index += 1 loss_list.append(loss_value) else: avg_loss_list.append(sum(loss_list)/len(loss_list)) summary_str = sess.run(summary_op, feed_dict={keep_conv: 1.0, keep_hidden: 1.0}) summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: images_debug_eval, depths_debug_eval, output_vec, output_vec_argmax, cost_value = sess.run([images_val_debug, targets_val_debug, logits_val, logits_argmax_val, loss_val], feed_dict={keep_conv: 1.0, keep_hidden: 1.0}) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if step % 10 == 0 or (step * 1) == MAX_STEPS: output_dir = "predicts_%05dstep" % (step) print("predicts output: %s" % output_dir) image_input.output_predict(output_vec_argmax, output_dir) image_input.output_images(images_debug_eval, output_dir) image_input.output_depths(depths_debug_eval, output_dir) # if FLAGS.scale3_train: # scale1and2_output_dir = "scale1and2_%05dstep" % (step) # print("scale1and2 output: %s" % scale1and2_output_dir) # dataset.output_predict(coarse_output_vec, coarse_output_dir) if step % 30 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) if previous_loss < sum(avg_loss_list) / len(avg_loss_list) and saturate_step == 0: checkpoint_path = EX_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) dropout_flag = True saturate_step = step previous_loss = sum(avg_loss_list) / len(avg_loss_list) print("30steps average loss: %f " % previous_loss) avg_loss_list = [] coord.request_stop() coord.join(threads) sess.close()