def main(_):
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir)

    a = bias_variable([3, 3])
    b = tf.constant(0.2, shape=[3, 3])
    c = tf.constant(10.0, shape=[3, 3])
    d = a + b
    e = tf.multiply(d, c)
    relu1 = tf.nn.relu(e, name='relu1')
    train_relu1 = tf.train.AdamOptimizer(1e-4).minimize(relu1)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.int64, [None])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_,
                                                               logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    from tensorflow.python.profiler import model_analyzer
    from tensorflow.python.profiler import option_builder
    with tf.Session(config=get_sess_config()) as sess:

        many_runs_timeline = TimeLiner()

        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/pool1/MaxPool_grad/MaxPoolGrad'
        )._set_attr(
            '_swap_to_host',
            attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(
                i=[0, 1])))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv1/Relu_grad/ReluGrad')._set_attr(
                '_swap_to_host', attr_value_pb2.AttrValue(i=1))

        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/pool2/MaxPool_grad/MaxPoolGrad'
        )._set_attr(
            '_swap_to_host',
            attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(
                i=[0, 1])))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv2/Relu_grad/ReluGrad')._set_attr(
                '_swap_to_host', attr_value_pb2.AttrValue(i=1))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv2/Conv2D_grad/Conv2DBackpropInput'
        )._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=2))
        #sess.graph.get_operation_by_name('pool1/MaxPool')._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
        #gradient_ops = sess.graph.get_operation_by_name('adam_optimizer/gradients/conv2/Conv2D_grad/ShapeN')
        #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
        #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=1))
        sess.run(tf.global_variables_initializer())
        profiler = model_analyzer.Profiler(sess.graph)
        #for i in range(20000):
        for i in range(FLAGS.iteration_count):
            batch = mnist.train.next_batch(FLAGS.batch_size)
            run_metadata = tf.RunMetadata()
            sess.run(
                train_step,
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 0.5
                },
                options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                run_metadata=run_metadata)
            #sess.run(train_relu1, feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata)

            trace = timeline.Timeline(step_stats=run_metadata.step_stats)
            chrome_trace = trace.generate_chrome_trace_format(
                show_dataflow=True, show_memory=True)
            many_runs_timeline.update_timeline(chrome_trace)

            profiler.add_step(i, run_metadata)

            # profile the timing of your model operations.
            #opts = (tf.profiler.ProfileOptionBuilder(
            #  option_builder.ProfileOptionBuilder.time_and_memory())
            #  .select(['micros', 'bytes', 'occurrence', 'peak_bytes', 'residual_bytes', 'output_bytes'])
            #  .order_by('name').build())
            #profiler.profile_operations(options=opts)

            # can generate a timeline:
            opts = (option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()
            ).with_step(i).with_timeline_output(
                "./timeline_output/step_" + FLAGS.mem_opt +
                str(FLAGS.batch_size) + str(FLAGS.iteration_count)).build())
            profiler.profile_graph(options=opts)
    chrome_trace_filename = str(FLAGS.batch_size) + str(FLAGS.mem_opt) + "new"
    graph_location = str(FLAGS.batch_size) + str(
        FLAGS.mem_opt) + "_swap_test.pbtxt"
    print('Saving graph to: %s' % graph_location)
    tf.train.write_graph(sess.graph_def, '.', graph_location, as_text=True)
    many_runs_timeline.save(chrome_trace_filename + '.ctf.json')
Exemplo n.º 2
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        gc_enabled = args.gc_channels is not None
        lc_enabled = args.lc_channels is not None
        lc_channels = args.lc_channels
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            lc_enabled=lc_enabled,
            lc_channels=lc_channels,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            silence_threshold=silence_threshold)
        audio_batch = reader.dequeue(args.batch_size)
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None
        if lc_enabled:
            lc_batch = reader.dequeue_lc(args.batch_size)
        else:
            lc_batch = None
    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=args.histograms,
        global_condition_channels=args.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality,
        local_condition_channels=args.lc_channels)

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    loss = net.loss(input_batch=audio_batch,
                    global_condition_batch=gc_id_batch,
                    local_condition_batch=lc_batch,
                    l2_regularization_strength=args.l2_regularization_strength)
    optimizer = optimizer_factory[args.optimizer](
        learning_rate=args.learning_rate, momentum=args.momentum)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 3
0
    def testing(self, sess, test_writer):
        ##=======================================
        if USE_ROS:
            import rospy
            from sensor_msgs.msg import PointCloud,Image
            from visualization_msgs.msg import MarkerArray, Marker
            from tools.data_visualize import Boxes_labels_Gen, Image_Gen,PointCloud_Gen

            rospy.init_node('rostensorflow')
            pub = rospy.Publisher('prediction', PointCloud, queue_size=1000)
            img_pub = rospy.Publisher('images_rgb', Image, queue_size=1000)
            box_pub = rospy.Publisher('label_boxes', MarkerArray, queue_size=1000)
            rospy.loginfo("ROS begins ...")
        #=======================================
        with tf.name_scope("Inference"):
            RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1]
            RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1]
            RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred+RNet_rpn_yaw_gt_delta
            rpn_rois_3d = self.net.get_output('rpn_rois')[1]
        with tf.name_scope('view_rpn_bv_tb'):
            roi_bv = self.net.get_output('rpn_rois')[0]
            data_bv = self.net.lidar_bv_data
            image_rpn = tf.reshape(test_show_rpn_tf(data_bv,roi_bv), (1, 601, 601, -1))
            tf.summary.image('lidar_bv_test', image_rpn)

            merged = tf.summary.merge_all()

        with tf.name_scope('load_weights'):
            weights = self.args.weights
            if weights.endswith('.ckpt'):
                print 'Loading test model weights from {:s}'.format(self.args.weights)
                self.saver.restore(sess, weights)
            else:
                print "error: Function [combinet_test.testing] can not load weights {:s}!".format(self.args.weights)
                return 0

        vispy_init()  # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow
        timer = Timer()

        for idx in range(0,self.epoch):
            # index_ = input('Type a new index: ')
            blobs = self.dataset.get_minibatch(idx)
            feed_dict = {
                self.net.lidar3d_data: blobs['lidar3d_data'],
                self.net.lidar_bv_data: blobs['lidar_bv_data'],
                self.net.im_info: blobs['im_info'],
                self.net.calib: blobs['calib']}
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            timer.tic()
            pred_yaw_toshow_,rpn_rois_3d_,summary = \
                sess.run([RNet_rpn_yaw_pred_toshow,rpn_rois_3d,merged],
                         feed_dict=feed_dict, options=run_options, run_metadata=run_metadata)
            timer.toc()

            if idx % 3 ==0 and cfg.TEST.DEBUG_TIMELINE:
                # chrome://tracing
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file = open(cfg.LOG_DIR + '/' +'testing-step-'+ str(idx).zfill(7) + '.ctf.json', 'w')
                trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
                trace_file.close()
            if idx % cfg.TEST.ITER_DISPLAY == 0:
                pass
                print 'Test: %06d/%06d  speed: %.4f s / iter' % (idx+1, self.epoch, timer.average_time)
            if VISION_DEBUG:
                scan = blobs['lidar3d_data']
                img = blobs['image_data']
                cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N],dtype=np.float32)*0
                boxes=BoxAry_Theta(pre_box3d=rpn_rois_3d_,pre_theta_value=pred_yaw_toshow_,pre_cube_cls=cubic_cls_value)# RNet_rpn_yaw_pred_toshow_  rpn_rois_3d_[:,-1]
                if USE_ROS:
                    from tools.data_visualize import PointCloud_Gen,Boxes_labels_Gen,Image_Gen
                    pointcloud = PointCloud_Gen(scan)
                    label_boxes = Boxes_labels_Gen(boxes, ns='Predict')
                    img_ros = Image_Gen(img)
                    pub.publish(pointcloud)
                    img_pub.publish(img_ros)
                    box_pub.publish(label_boxes)
                else:
                    pcd_vispy(scan, img, boxes,index=idx,
                              save_img=True,#cfg.TEST.SAVE_IMAGE,
                              visible=False,
                              name='CubicNet testing')
            if idx % 1 == 0 and cfg.TEST.TENSORBOARD:
                test_writer.add_summary(summary, idx)
                pass
        print 'Testing process has done, happy every day !'
    def train(self, train_data, train_label, valid_data=None, valid_label=None, learning_rate=0.01,
              max_epochs=1000, keep_training=False):
        """ This function defines the training process of the model
        :param train_data: the input training data, must be a shape like [sample_number, data]
        :param train_label: the input label of the training data, must be a shape like [sample_number, labels]
        :param valid_data: the give validation data, must be a shape like [sample_number, data]
        :param valid_label: the give validation labels, must be a shape like [sample_number, labels]
        :param learning_rate: the learning rate of the optimizer
        :param max_epochs: maximum epoch of the training
        :param keep_training: determine to continue train the model
        """

        if valid_data or valid_label is None:
            valid_data = self.fashion_data.test.images
            valid_label = self.fashion_data.test.labels
        # Initialize all parameters
        # Fashion data images size
        pixel_size = train_data.shape[1]
        class_number = train_label.shape[1]
        graph = tf.Graph()
        with graph.as_default(), tf.device('cpu:0'):
            global_step = tf.Variable(0, name="global_step", trainable=False)
            # Define input sample for each layers
            with tf.name_scope('Inputs'):
                x_sample, y_sample = self.create_placeholders(pixel_size, class_number)
            # Define output layer
            hidden_layer1 = self.add_layer(x_sample, pixel_size, 300, 'layer1', activation_function=tf.nn.sigmoid)
            if FLAGS.REGULARIZATION == 'drop out':
                keep_prob = tf.placeholder(tf.float32)
                probability = 0.5
                hidden_layer1 = tf.nn.dropout(hidden_layer1, keep_prob)
            prediction = self.add_layer(hidden_layer1, 300, class_number, 'layer2', activation_function=tf.nn.softmax)
            if FLAGS.REGULARIZATION == 'drop out':
                prediction = tf.nn.dropout(prediction, keep_prob)

            with tf.name_scope("Training"):
                # Define loss function using cross entropy
                with tf.name_scope("Loss_Function"):
                    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_sample *
                        tf.log(prediction + FLAGS.EPSI), reduction_indices=1))
                    tf.summary.scalar("Loss/train", cross_entropy)
                if FLAGS.OPTIMIZER == 'Adam':
                    training = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy, global_step=global_step)

                elif FLAGS.OPTIMIZER == 'Momentum':
                    training = tf.train.MomentumOptimizer(learning_rate, momentum=0.7).minimize(cross_entropy,
                                                                                  global_step=global_step)
                else:
                    training = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy,
                                                                                         global_step=global_step)
                # else:
                #     print('Please select a correct optimizer.')
                with tf.name_scope("Accuracy"):
                    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_sample, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
                    tf.summary.scalar('accuracy', accuracy)

            # Create Data Pipeline
            batch_size = 512
            minibatch = InputPipeline(batch_size, train_data, train_label)
            minibatch.schedule(buffer_size=10000)
            x_batch, y_batch = minibatch.next()
            iterator_initializer, mini_dict = minibatch.initializer()
            num_minibatch = int(train_data.shape[0] / batch_size)

            folder_name = (FLAGS.SUMMARY_FOLDER, FLAGS.TIMELINE_FOLDER, FLAGS.LOG_FOLDER, FLAGS.CHECKINGPOINT_FOLDER)
            for folder in folder_name:
                if not os.path.exists(folder):
                    os.makedirs(folder)
            f_result = open(FLAGS.LOG_FOLDER + '/training result_{:.0f}.txt'.format(time.time()), 'a')

            # Initialize a session and saver
            sess = tf.Session()
            init = tf.global_variables_initializer()
            sess.run(init)
            saver = tf.train.Saver()

            # Initialize Tensorboard Summary
            merged_train = tf.summary.merge_all()
            merged_test = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(FLAGS.SUMMARY_FOLDER + '/train', sess.graph)
            test_writer = tf.summary.FileWriter(FLAGS.SUMMARY_FOLDER + '/test', sess.graph)

            # Set runtime statistics option
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()

            start_time = time.time()
            if FLAGS.VERBOSE:
                print('Training Start!')
                print('Optimizer:', FLAGS.OPTIMIZER)
                print('Regularization method:', FLAGS.REGULARIZATION)
            # Start training process and iteration is determined by max_epochs
            if keep_training is True:
                saver.restore(sess, tf.train.latest_checkpoint(FLAGS.CHECKINGPOINT_FOLDER))
                if FLAGS.VERBOSE:
                    print("Restore model from ", FLAGS.CHECKINGPOINT_FOLDER)
            for epoch in range(max_epochs):
                sess.run(iterator_initializer, mini_dict)
                # Start an epoch and training through all mini batches
                for step in range(num_minibatch):
                    global_step_value = sess.run(global_step)
                    batch_x_sample, batch_y_sample = sess.run([x_batch, y_batch])
                    if FLAGS.REGULARIZATION == 'drop out':
                        sess.run(training, feed_dict={x_sample: batch_x_sample,
                                                      y_sample: batch_y_sample, keep_prob: probability})
                    else:
                        sess.run(training, feed_dict={x_sample: batch_x_sample, y_sample: batch_y_sample})
                epoch = int(global_step_value / num_minibatch) + 1
                # Start learning process and writer summary every 30 epochs
                if epoch % 30 == 0 or epoch == max_epochs:
                    if FLAGS.REGULARIZATION == 'drop out':
                        loss_train = sess.run(cross_entropy, feed_dict={x_sample: train_data,
                                                                        y_sample: train_label, keep_prob: 1})
                        loss_test = sess.run(cross_entropy, feed_dict={x_sample: valid_data,
                                                                       y_sample: valid_label, keep_prob: 1})
                        acc_test = sess.run(accuracy, feed_dict={x_sample: valid_data,
                                                                 y_sample: valid_label, keep_prob: 1})
                        train_summary = sess.run(merged_train,
                                                 feed_dict={x_sample: train_data,
                                                            y_sample: train_label, keep_prob: 1},
                                                 options=run_options, run_metadata=run_metadata)
                        test_summary = sess.run(merged_test, feed_dict={x_sample: valid_data,
                                                                        y_sample: valid_label, keep_prob: 1},
                                                options=run_options, run_metadata=run_metadata)
                    else:
                        loss_train = sess.run(cross_entropy, feed_dict={x_sample: train_data, y_sample: train_label})
                        loss_test = sess.run(cross_entropy, feed_dict={x_sample: valid_data, y_sample: valid_label})
                        acc_test = sess.run(accuracy, feed_dict={x_sample: valid_data, y_sample: valid_label})
                        train_summary = sess.run(merged_train,
                                                 feed_dict={x_sample: train_data, y_sample: train_label},
                                                 options=run_options, run_metadata=run_metadata)
                        test_summary = sess.run(merged_test, feed_dict={x_sample: valid_data, y_sample: valid_label},
                                                options=run_options, run_metadata=run_metadata)

                    # Add tensorboard summary
                    train_writer.add_run_metadata(run_metadata, 'epoch%d' % epoch)
                    train_writer.add_summary(train_summary, global_step=epoch)
                    test_writer.add_summary(test_summary, global_step=epoch)
                    result_log = "Epoch: {}, Accuracy: {:.3f}, Loss train: {:.3f}," \
                                 " Loss test: {:.3f}\n".format(epoch, acc_test, loss_train, loss_test)
                    if FLAGS.VERBOSE:
                        print('Adding run metadata for epoch:', epoch)
                        print(result_log)
                    # Save training logs to txt file
                    f_result.write('Adding run metadata for epoch:{}\n'.format(epoch))
                    f_result.write(result_log)
                    # Save runtime statistic results
                    fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                    chrome_trace = fetched_timeline.generate_chrome_trace_format()
                    with open(FLAGS.TIMELINE_FOLDER + '/timeline_epoch_{}.json'.format(epoch), 'w') as f_tracing:
                        f_tracing.write(chrome_trace)
            duration = time.time() - start_time
            if FLAGS.VERBOSE:
                print('Training for {} epochs took {:.3f} sec.\n'.format(epoch, duration))
                print('Training process finished')
            f_result.write('Training for {} epochs took {:.3f} sec.'.format(epoch, duration))
            f_result.close()
            # Save trained model to .ckpt files
            saver.save(sess, FLAGS.CHECKINGPOINT_FOLDER + '/project1_trained_model')
            sess.close()
Exemplo n.º 5
0
 def profile(run_metadata, epoch=0):
     with open('profs/timeline_step' + str(epoch) + '.json', 'w') as f:
         # Create the Timeline object, and write it to a json file
         fetched_timeline = timeline.Timeline(run_metadata.step_stats)
         chrome_trace = fetched_timeline.generate_chrome_trace_format()
         f.write(chrome_trace)
        result = tf.matmul(matrix1, matrix2)
    return result



if __name__ == "__main__":
    batch_run  = 4
    image_size = 32
    count = 0
    results = []
    ##define the graph
    while count < batch_run:    
        results.append(matrix_mul(image_size))
        count = count + 1
    # build option for perf
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    # build tensorboard 
    tf_writer = tf.summary.FileWriter("./tensorboard_multi_ops_group", graph=tf.get_default_graph())
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 
    init = tf.global_variables_initializer()
    sess.run(init)
    start = time.time()
    final_result = sess.run(tf.group(*results), options = options, run_metadata = run_metadata)
    end  =  time.time()
    fetched_timeline = timeline.Timeline(run_metadata.step_stats)
    chrome_trace = fetched_timeline.generate_chrome_trace_format()
    with open('timeline_02_step_%d.json', 'w') as f:
          f.write(chrome_trace)
    print("benchmark time: {}".format(end - start))
Exemplo n.º 7
0
  def run_op_benchmark(self,
                       sess,
                       op_or_tensor,
                       feed_dict=None,
                       burn_iters=2,
                       min_iters=10,
                       store_trace=False,
                       store_memory_usage=True,
                       name=None,
                       extras=None,
                       mbs=0):
    """Run an op or tensor in the given session.  Report the results.

    Args:
      sess: `Session` object to use for timing.
      op_or_tensor: `Operation` or `Tensor` to benchmark.
      feed_dict: A `dict` of values to feed for each op iteration (see the
        `feed_dict` parameter of `Session.run`).
      burn_iters: Number of burn-in iterations to run.
      min_iters: Minimum number of iterations to use for timing.
      store_trace: Boolean, whether to run an extra untimed iteration and
        store the trace of iteration in returned extras.
        The trace will be stored as a string in Google Chrome trace format
        in the extras field "full_trace_chrome_format". Note that trace
        will not be stored in test_log_pb2.TestResults proto.
      store_memory_usage: Boolean, whether to run an extra untimed iteration,
        calculate memory usage, and store that in extras fields.
      name: (optional) Override the BenchmarkEntry name with `name`.
        Otherwise it is inferred from the top-level method name.
      extras: (optional) Dict mapping string keys to additional benchmark info.
        Values may be either floats or values that are convertible to strings.
      mbs: (optional) The number of megabytes moved by this op, used to
        calculate the ops throughput.

    Returns:
      A `dict` containing the key-value pairs that were passed to
      `report_benchmark`. If `store_trace` option is used, then
      `full_chrome_trace_format` will be included in return dictionary even
      though it is not passed to `report_benchmark` with `extras`.
    """
    for _ in range(burn_iters):
      sess.run(op_or_tensor, feed_dict=feed_dict)

    deltas = [None] * min_iters

    for i in range(min_iters):
      start_time = time.time()
      sess.run(op_or_tensor, feed_dict=feed_dict)
      end_time = time.time()
      delta = end_time - start_time
      deltas[i] = delta

    extras = extras if extras is not None else {}
    unreported_extras = {}
    if store_trace or store_memory_usage:
      run_options = config_pb2.RunOptions(
          trace_level=config_pb2.RunOptions.FULL_TRACE)
      run_metadata = config_pb2.RunMetadata()
      sess.run(op_or_tensor, feed_dict=feed_dict,
               options=run_options, run_metadata=run_metadata)
      tl = timeline.Timeline(run_metadata.step_stats)

      if store_trace:
        unreported_extras["full_trace_chrome_format"] = (
            tl.generate_chrome_trace_format())

      if store_memory_usage:
        step_stats_analysis = tl.analyze_step_stats(show_memory=True)
        allocator_maximums = step_stats_analysis.allocator_maximums
        for k, v in allocator_maximums.items():
          extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes

    def _median(x):
      if not x:
        return -1
      s = sorted(x)
      l = len(x)
      lm1 = l - 1
      return (s[l//2] + s[lm1//2]) / 2.0

    def _mean_and_stdev(x):
      if not x:
        return -1, -1
      l = len(x)
      mean = sum(x) / l
      if l == 1:
        return mean, -1
      variance = sum([(e - mean) * (e - mean) for e in x]) / (l - 1)
      return mean, math.sqrt(variance)

    median_delta = _median(deltas)

    benchmark_values = {
        "iters": min_iters,
        "wall_time": median_delta,
        "extras": extras,
        "name": name,
        "throughput": mbs / median_delta
    }
    self.report_benchmark(**benchmark_values)

    mean_delta, stdev_delta = _mean_and_stdev(deltas)
    unreported_extras["wall_time_mean"] = mean_delta
    unreported_extras["wall_time_stdev"] = stdev_delta
    benchmark_values["extras"].update(unreported_extras)
    return benchmark_values
    def prediction_callback(self, input_msg):
        tic = timeit.default_timer()
        print("subscribed to prediction input")
        tic0 = timeit.default_timer()
        feed_dict = {
            self.car1: multiarray_to_numpy(input_msg.car1),
            self.car2: multiarray_to_numpy(input_msg.car2),
            self.extras: multiarray_to_numpy(input_msg.extras),
            self.traj_lengths: multiarray_to_numpy(input_msg.traj_lengths),
            self.sample_ct: [input_msg.sample_ct]
        }
        if input_msg.car1_future.data:
            feed_dict[self.car1_future] = multiarray_to_numpy(
                input_msg.car1_future)
        else:
            feed_dict[self.car1_future_x] = multiarray_to_numpy(
                input_msg.car1_future_x)
            feed_dict[self.car1_future_y] = multiarray_to_numpy(
                input_msg.car1_future_y)
        toc0 = timeit.default_timer()

        print("constructing feed_dict took: ", toc0 - tic0,
              " (s), running tf!")

        tic0 = timeit.default_timer()
        if profile:
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            c1top32 = self.sess.run(self.c1top32,
                                    feed_dict=feed_dict,
                                    options=options,
                                    run_metadata=run_metadata)
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(
                    '/home/schmrlng/Dropbox/timeline' +
                    os.environ["CUDA_VISIBLE_DEVICES"] + '_0.json', 'w') as f:
                f.write(chrome_trace)

            feed_dict[self.car1_future] = c1top32
            feed_dict[self.sample_ct] = [input_msg.sample_ct * 64]
            feed_dict.pop(self.car1_future_x)  # should be unnecessary
            feed_dict.pop(self.car1_future_y)  # should be unnecessary

            run_metadata = tf.RunMetadata()
            c1best, r = self.sess.run([self.c1best, self.r],
                                      feed_dict=feed_dict,
                                      options=options,
                                      run_metadata=run_metadata)
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(
                    '/home/schmrlng/Dropbox/timeline' +
                    os.environ["CUDA_VISIBLE_DEVICES"] + '_1.json', 'w') as f:
                f.write(chrome_trace)
        else:
            c1top32 = self.sess.run(self.c1top32, feed_dict=feed_dict)
            feed_dict[self.car1_future] = c1top32
            feed_dict[self.sample_ct] = [input_msg.sample_ct * 64]
            feed_dict.pop(self.car1_future_x)  # should be unnecessary
            feed_dict.pop(self.car1_future_y)  # should be unnecessary
            c1best, r = self.sess.run([self.c1best, self.r],
                                      feed_dict=feed_dict)
        toc0 = timeit.default_timer()

        print("done running tf!, took (s): ", toc0 - tic0)

        tic0 = timeit.default_timer()
        output_msg = prediction_output()
        output_msg.y = numpy_to_multiarray(c1best)
        output_msg.r = numpy_to_multiarray(r)
        self.pub.publish(output_msg)
        toc0 = timeit.default_timer()
        toc = timeit.default_timer()

        print("output_msg constructed and published, took (s): ", toc0 - tic0)
        print("total time taken (s): ", toc - tic)
def main_LSTM(args):
    '''
    implementation of the original LSTM approach (https://github.com/KhaledSaleh/driving_behaviour_classification)
    '''
    # set config params specific for the original code
    config = Config()
    config.training_volume = args.training_volume
    config.input_dim = args.input_dim
    config.encoding_dim = args.encoding_dim
    config.scale = args.scale
    if args.runtime_measurement:
        config.n_time_measures = 10
    else:
        config.n_time_measures = 1

    # load preprocessed data
    data = load_dataset(args.dataset,config)
    X_train = data[0]
    X_test = data[1]
    y_train = data[2]
    y_test = data[3]
    config = data[4]

    logs = []
    # if train test data not a list, create one
    if type(X_train)==list:
        print("given data is not a list")
        X_train_list = X_train
        X_test_list = X_test
        y_train_list = y_train
        y_test_list = y_test
    else:
        X_train_list =[X_train]
        X_test_list = [X_test]
        y_train_list = [y_train]
        y_test_list = [y_test]

    #######################################################################################
    # statistical iteration
    #######################################################################################
    acc_mean = []
    f1_mean = []

    for stat_it in range(args.stat_iterations):
        logger.info('Statistial iteration: ' + str(stat_it))

        # train for each element in list (that is why we need list form, even if it contains only one element)
        logger.info('Training data contains ' + str(len(X_train_list)) + ' training instances...')
        scores = []
        accs = []
        for it in range(len(X_train_list)):
            logger.info(('.......'))
            logger.info('instance ' + str(it) + ':')

            X_train = X_train_list[it]
            X_test = X_test_list[it]
            y_train = y_train_list[it]
            y_test = y_test_list[it]

            # use only fraction of training samples (if given)
            X_train = X_train[1:int(X_train.shape[0] * config.training_volume), :]
            y_train = y_train[1:int(y_train.shape[0] * config.training_volume), :]

            config.n_inputs = X_train.shape[2]
            config.train_count = len(X_train)
            config.test_data_count = len(X_test)
            config.n_steps = len(X_train[0])
            config.n_classes = len(np.unique(y_train))

            logger.info('Training dataset shape: ' + str(X_train.shape) + str(y_train.shape))
            logger.info('Test dataset shape: ' + str(X_test.shape) + str(y_test.shape))
            graph = tf.Graph()
            with graph.as_default():

                X = tf.compat.v1.placeholder(tf.float32, [None, config.n_steps, config.n_inputs], name="X")
                Y = tf.compat.v1.placeholder(tf.float32, [None, config.n_classes], name="Y")

                pred_Y = LSTM_Network(X, config)

                # Loss,optimizer,evaluation
                l2 = config.lambda_loss_amount * \
                     sum(tf.nn.l2_loss(tf_var) for tf_var in tf.compat.v1.trainable_variables())
                # Softmax loss and L2
                cost = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(logits=pred_Y, labels=Y), name="cost") + l2
                optimizer = tf.compat.v1.train.AdamOptimizer(
                    learning_rate=config.learning_rate).minimize(cost)

                correct_pred = tf.equal(tf.argmax(pred_Y, 1), tf.argmax(Y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

                saver = tf.compat.v1.train.Saver()

            with tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=False)) as sess:
                if not args.test:
                    init_op = tf.compat.v1.global_variables_initializer()
                    sess.run(init_op)
                    best_accuracy = 0.0
                    # Start training for each batch and loop epochs
                    for i in range(config.training_epochs):
                        starttime = time()
                        for start, end in zip(range(0, config.train_count, config.batch_size),
                                              range(config.batch_size, config.train_count + 1, config.batch_size)):
                            sess.run(optimizer, feed_dict={X: X_train[start:end],
                                                           Y: one_hot(y_train[start:end],config.n_classes)})
                            saver.save(sess, os.path.join("./weights", 'LSTM_model'))
                        # Test completely at every epoch: calculate accuracy
                        pred_out, accuracy_out, loss_out = sess.run([pred_Y, accuracy, cost], feed_dict={
                            X: X_test, Y: one_hot(y_test, config.n_classes)})
                        logs.append(time() - starttime)
                        print("Training iter: {},".format(i) + \
                              " Test accuracy : {},".format(accuracy_out) + \
                              " Loss : {}".format(loss_out))
                        best_accuracy = max(best_accuracy, accuracy_out)
                    print("")
                    mean_epoch_time = np.mean(logs)
                    overall_time = np.sum(logs)
                    logger.info("Mean Epoch time: " + str(mean_epoch_time))
                    logger.info("overall training time: " + str(overall_time))
                    logger.info("Final test accuracy: {}".format(accuracy_out))
                    logger.info("Best epoch's test accuracy: {}".format(best_accuracy))

                    print("")
                # start testing the trained model
                else:
                    saver.restore(sess, os.path.join("./weights", 'LSTM_model'))
                    t1 = time()
                    pred_out, accuracy_out, loss_out = sess.run([pred_Y, accuracy, cost], feed_dict={
                        X: X_test, Y: one_hot(y_test,config.n_classes)})
                    inference_time = time() - t1
                    print(" Test accuracy : {},".format(accuracy_out) + \
                          " Loss : {}".format(loss_out))

            #############################################################################################
            # evaluation of results
            #############################################################################################

            pred_test_bool = pred_out.argmax(1)

            # runtime measurement
            t=[]
            traces = []
            options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
            run_metadata = tf.compat.v1.RunMetadata()
            for i in range(config.n_time_measures):
                with tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=False)) as Sess:
                    init_op = tf.compat.v1.global_variables_initializer()
                    Sess.run(init_op)
                    t1 = time()
                    Sess.run([pred_Y, accuracy, cost], feed_dict={
                        X: X_test, Y: one_hot(y_test, config.n_classes)}, options=options, run_metadata=run_metadata)
                    inference_time = time() - t1
                    fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                    chrome_trace = fetched_timeline.generate_chrome_trace_format()
                    traces.append(chrome_trace)
                    t.append(inference_time)
            with open('./logs/LSTM_ts_preproc_timeline_test.json', 'w') as f:
                f.write(traces[-1])
            inference_time = np.median(inference_time)
            logger.info("Inference time: " + str(inference_time))
            logger.info("Inference time of one sequence [ms]: " + str(inference_time*1000/X_test.shape[0]))

            logger.info('Accuracy on training data: ')
            report = classification_report(y_test.astype(int), pred_test_bool, output_dict=True)
            logger.info(classification_report(y_test.astype(int), pred_test_bool))

            accs.append((report['accuracy']))

            logger.info("Confusion matrix:")
            confusion_matrix = metrics.confusion_matrix(y_test.astype(int), pred_test_bool)
            logger.info(confusion_matrix)

            # f1 score
            f1 = f1_score(y_test.astype(int), pred_test_bool, average='weighted')
            scores.append(f1)
            logger.info("F1 Score: " + str(f1))

        # add results to statistical result array
        acc_mean.append(np.mean(accs))
        f1_mean.append(np.mean(scores))

    # save as mat files
    save_dic = {"report": report, "confusion_matrix": confusion_matrix, "config": config, "pred": pred_out,
                "label": y_test, "f1": np.mean(f1_mean), "acc_mean": np.mean(acc_mean)}
    savemat("results/" + args.dataset + "/results_origNet_" + str(config.training_volume) + ".mat", save_dic)

    logger.info('Accuracy results of statistical repetitions: ' + str(acc_mean))
    logger.info('F1 scores of statistical repetitions: ' + str(f1_mean))

    # write all scores to extra file
    logger.info('Mean Score: ' + str(np.mean(f1_mean)))
    logger.info('Mean Accuracy: ' + str(np.mean(acc_mean)))
    with open("results/results_" + args.dataset + "_LSTM.txt", 'a') as file:
        file.write(str(args.stat_iterations) + '\t'
                   + str(round(np.mean(f1_mean), 3)) + '\t'
                   + str(round(np.mean(acc_mean), 3)) + '\t'
                   + str(round(np.std(f1_mean), 3)) + '\t'
                   + str(round(np.std(acc_mean), 3)) + '\t'
                   + str(args.training_volume) + '\n'
                   )
Exemplo n.º 10
0
threads = [
    threading.Thread(group=None, target=run_op, args=(op, ))
    for op in (enqueue_zeros, enqueue_ones)
]
if reverse:
    threads.reverse()

for t in threads:
    t.start()

# wait for threads to finish
for t in threads:
    t.join()

# generate merged timeline
merged_metadata = tf.RunMetadata()
for run_metadata in run_metadatas:
    merged_metadata.MergeFrom(run_metadata)

tl = timeline.Timeline(merged_metadata.step_stats)
ctf = tl.generate_chrome_trace_format()
with open(sys.argv[0] + '_%s_timeline.json' % (reverse), 'w') as f:
    f.write(ctf)

assert sess.run(queue.size()) == 2 * n
result = sess.run(queue.dequeue_many(2 * n))
padding = np.array([0])

diffs = np.concatenate([padding, result]) - np.concatenate([result, padding])
print("Interleaving detected: %s" % (abs(diffs).sum() > 2))
Exemplo n.º 11
0
    def train_model(self, sess, max_iters):
        """Network training loop."""

        # 返回一个RoIDataLayer类对象,内容self._roidb ,self._num_classes ,self._perm,self._cur
        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)

        # RPN
        # classification loss
        # 将'rpn_cls_score_reshape'层的输出(1,n,n,18)reshape为(-1,2),其中2为前景与背景的多分类得分()
        rpn_cls_score = tf.reshape(
            self.net.get_output('rpn_cls_score_reshape'), [-1, 2])

        # 'rpn-data'层输出的[0]为rpn_label,shape为(1, 1, A * height, width),中存的是所有anchor的label(-1,0,1)
        # 问题1:目前感觉有异议,数据读取方向labels有问题################################
        rpn_label = tf.reshape(self.net.get_output('rpn-data')[0], [-1])

        # 把rpn_label不等于-1对应引索的rpn_cls_score取出,重新组合成rpn_cls_score
        rpn_cls_score = tf.reshape(
            tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))),
            [-1, 2])

        # 把rpn_label不等于-1对应引索的rpn_label取出,重新组合成rpn_label
        rpn_label = tf.reshape(
            tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1])

        # score损失:tf.nn.sparse_softmax_cross_entropy_with_logits函数的两个参数logits,labels数目相同(shape[0]相同),分别为最后一层的输出与标签
        # NOTE:这个函数返回的是一个向量,要求交叉熵就tf.reduce_sum,要求损失就tf.reduce_mean
        # 问题2:logits,labels应该shape相同的,但这里不同,有异议
        rpn_cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=rpn_cls_score, labels=rpn_label))

        # bounding box regression L1 loss
        # 'rpn_bbox_pred'层为了回归bbox,存的是(dx,dy,dw,dh)
        rpn_bbox_pred = self.net.get_output('rpn_bbox_pred')
        # 'rpn-data'[1]返回一个用于anchor回归成target的包含每个anchor回归值(dx、dy、dw、dh)的array,形状((len(inds_inside), 4),即(anchors.shape[0],4)
        # 重新reshape成(1, height, width, A * 4)
        rpn_bbox_targets = tf.transpose(
            self.net.get_output('rpn-data')[1], [0, 2, 3, 1])
        # rpn_bbox_inside_weights:标签为1的anchor,对应(1.0, 1.0, 1.0, 1.0)
        # 重新reshape成(1, height, width, A * 4)
        rpn_bbox_inside_weights = tf.transpose(
            self.net.get_output('rpn-data')[2], [0, 2, 3, 1])
        # rpn_bbox_outside_weights:标签为0或者1的,权重初始化都为(1/num_examples,1/num_examples,1/num_examples,1/num_examples),num_examples为标签为0或者1的anchor总数
        # 重新reshape成(1, height, width, A * 4)
        rpn_bbox_outside_weights = tf.transpose(
            self.net.get_output('rpn-data')[3], [0, 2, 3, 1])

        # 计算smooth_l1损失
        rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred,
                                                 rpn_bbox_targets,
                                                 rpn_bbox_inside_weights,
                                                 rpn_bbox_outside_weights)
        # rpn_smooth_l1计算出的为一个向量,现在要合成loss形式
        rpn_loss_box = tf.reduce_mean(
            tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))

        # R-CNN
        # classification loss
        # 得到最后一个score分支fc层的输出
        cls_score = self.net.get_output('cls_score')
        # label:筛选出的proposal与GT结合形成all_roi,从all_roi中筛选出符合的roi,得到这些roi的label
        label = tf.reshape(self.net.get_output('roi-data')[1], [-1])
        # 用这些roi的label与最后一个score分支fc层的输出相比较,得到loss
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score,
                                                           labels=label))

        # bounding box regression L1 loss
        # 得到最后一个bbox分支fc层的输出
        bbox_pred = self.net.get_output('bbox_pred')
        bbox_targets = self.net.get_output('roi-data')[2]
        bbox_inside_weights = self.net.get_output('roi-data')[3]
        bbox_outside_weights = self.net.get_output('roi-data')[4]

        # 计算smooth_l1损失
        smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets,
                                             bbox_inside_weights,
                                             bbox_outside_weights)
        # smooth_l1计算出的为一个向量,现在要合成loss形式
        loss_box = tf.reduce_mean(
            tf.reduce_sum(smooth_l1, reduction_indices=[1]))

        # final loss 计算总损失
        loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box

        # optimizer and learning rate
        global_step = tf.Variable(0, trainable=False)
        # cfg.TRAIN.LEARNING_RATE为0.001,  cfg.TRAIN.STEPSIZE为50000
        # tf.train.exponential_decay(初始lr,初始步数,多少步进入下一平台值,总步数,下一次平台值是多少(基于上次的比率),staircase)
        # staircase为True则遵循刚才规则,如为False则每一次迭代更新一次
        lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                        global_step,
                                        cfg.TRAIN.STEPSIZE,
                                        0.1,
                                        staircase=True)
        # cfg.TRAIN.MOMENTUM 为 0.9
        momentum = cfg.TRAIN.MOMENTUM
        # 动态系数为0.9的梯度下降法
        train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(
            loss, global_step=global_step)

        # iintialize variables
        sess.run(tf.global_variables_initializer())
        if self.pretrained_model is not None:  #如果有预训练模型,则加载
            print('Loading pretrained model weights from {:s}'.format(
                self.pretrained_model))
            self.net.load(self.pretrained_model, sess, self.saver, True)

        last_snapshot_iter = -1
        timer = Timer()  #记录当前时间
        for iter in range(max_iters):
            # get one batch
            blobs = data_layer.forward()  #得到一个batch信息

            # Make one SGD update
            feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \
                           self.net.gt_boxes: blobs['gt_boxes']}  #给定placehold信息

            run_options = None
            run_metadata = None
            # False
            if cfg.TRAIN.DEBUG_TIMELINE:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            timer.tic()

            rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, _ = sess.run(
                [
                    rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box,
                    train_op
                ],
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata)

            timer.toc()

            if cfg.TRAIN.DEBUG_TIMELINE:
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file = open(
                    str(long(time.time() * 1000)) + '-train-timeline.ctf.json',
                    'w')
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=False))
                trace_file.close()

            if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0:
                print('iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\
                        (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())) # clw modify: for py3
                print('speed: {:.3f}s / iter'.format(
                    timer.average_time))  # clw modify: for py3

            if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                self.snapshot(sess, iter)

        if last_snapshot_iter != iter:
            self.snapshot(sess, iter)
def train_loop(sess, train_step, global_step, optlist, args, trainset,
               validationset, disable_training, enable_tf_timeline):
    train_loop_logger = logger(int(args["task_index"]), "Train Loop")
    train_loop_logger.start_timer()

    options = None
    run_metadata = None
    many_runs_timeline = None

    if enable_tf_timeline:
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        many_runs_timeline = timeliner()

    #counter stuff
    trainset.reset()
    validationset.reset()

    #restore weights belonging to graph
    epochs_completed = 0
    if not args['restart']:
        last_model = tf.train.latest_checkpoint(args['modelpath'])
        print("Restoring model %s.", last_model)
        model_saver.restore(sess, last_model)

    #losses
    train_loss = 0.
    train_batches = 0
    total_batches = 0
    train_time = 0

    #do training
    while not sess.should_stop():
        train_iteration_logger = logger(int(args['task_index']),
                                        "Training Iteration", epochs_completed)
        train_iteration_logger.start_timer()

        #increment total batch counter
        total_batches += 1

        #get next batch
        images, labels, normweights, _, _ = trainset.next_batch(
            args['train_batch_size_per_node'])
        #set weights to zero
        normweights[:] = 1.
        #set up feed dict:
        feed_dict = {
            variables['images_']: images,
            variables['labels_']: labels,
            variables['weights_']: normweights,
            variables['keep_prob_']: args['dropout_p']
        }

        if not disable_training:
            #update weights
            start_time = time.time()
            if args['create_summary']:
                _, gstep, summary, tmp_loss = sess.run(
                    [train_step, global_step, train_summary, loss_fn],
                    feed_dict=feed_dict,
                    options=options,
                    run_metadata=run_metadata)

                if enable_tf_timeline:
                    fetched_timeline = timeline.Timeline(
                        run_metadata.step_stats)
                    chrome_trace = fetched_timeline.generate_chrome_trace_format(
                    )
                    many_runs_timeline.update_timeline(chrome_trace)
            else:
                _, gstep, tmp_loss = sess.run(
                    [train_step, global_step, loss_fn],
                    feed_dict=feed_dict,
                    options=options,
                    run_metadata=run_metadata)

                if enable_tf_timeline:
                    fetched_timeline = timeline.Timeline(
                        run_metadata.step_stats)
                    chrome_trace = fetched_timeline.generate_chrome_trace_format(
                    )
                    many_runs_timeline.update_timeline(chrome_trace)

            #update kfac parameters
            if optlist:
                sess.run(optlist[0],
                         feed_dict=feed_dict,
                         options=options,
                         run_metadata=run_metadata)

                if enable_tf_timeline:
                    fetched_timeline = timeline.Timeline(
                        run_metadata.step_stats)
                    chrome_trace = fetched_timeline.generate_chrome_trace_format(
                    )
                    many_runs_timeline.update_timeline(chrome_trace)

                if gstep % args["kfac_inv_update_frequency"] == 0:
                    sess.run(optlist[1],
                             feed_dict=feed_dict,
                             options=options,
                             run_metadata=run_metadata)

                    if enable_tf_timeline:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        many_runs_timeline.update_timeline(chrome_trace)

        end_time = time.time()
        train_time += end_time - start_time

        #increment train loss and batch number
        train_loss += tmp_loss
        train_batches += 1

        #determine if we give a short update:
        if gstep % args['display_interval'] == 0:
            print(
                time.time(), "REPORT rank", args["task_index"],
                "global step %d., average training loss %g (%.3f sec/batch)" %
                (gstep, train_loss / float(train_batches),
                 train_time / float(train_batches)))

        #check if epoch is done
        if trainset._epochs_completed > epochs_completed:
            epochs_completed = trainset._epochs_completed
            print(
                time.time(), "COMPLETED rank", args["task_index"],
                "epoch %d, average training loss %g (%.3f sec/batch)" %
                (epochs_completed, train_loss / float(train_batches),
                 train_time / float(train_batches)))

            #reset counters
            train_loss = 0.
            train_batches = 0
            train_time = 0

            #compute validation loss:
            #reset variables
            validation_loss = 0.
            validation_batches = 0

            #iterate over batches
            while True:
                #get next batch
                images, labels, normweights, weights, _ = validationset.next_batch(
                    args['validation_batch_size_per_node'])
                #set weights to 1:
                normweights[:] = 1.
                weights[:] = 1.

                if not disable_training:
                    #compute loss
                    if args['create_summary']:
                        summary, tmp_loss = sess.run(
                            [validation_summary, loss_fn],
                            feed_dict={
                                variables['images_']: images,
                                variables['labels_']: labels,
                                variables['weights_']: normweights,
                                variables['keep_prob_']: 1.0
                            })
                    else:
                        tmp_loss = sess.run(
                            [loss_fn],
                            feed_dict={
                                variables['images_']: images,
                                variables['labels_']: labels,
                                variables['weights_']: normweights,
                                variables['keep_prob_']: 1.0
                            })

                    #add loss
                    validation_loss += tmp_loss[0]
                    validation_batches += 1

                    #update accuracy
                    sess.run(accuracy_fn[1],
                             feed_dict={
                                 variables['images_']: images,
                                 variables['labels_']: labels,
                                 variables['weights_']: normweights,
                                 variables['keep_prob_']: 1.0
                             })

                    #update auc
                    sess.run(auc_fn[1],
                             feed_dict={
                                 variables['images_']: images,
                                 variables['labels_']: labels,
                                 variables['weights_']: normweights,
                                 variables['keep_prob_']: 1.0
                             })

                #check if full pass done
                if validationset._epochs_completed > 0:
                    validationset.reset()
                    break

            print(
                time.time(), "COMPLETED epoch %d, average validation loss %g" %
                (epochs_completed,
                 validation_loss / float(validation_batches)))
            validation_accuracy = sess.run(accuracy_fn[0])
            print(
                time.time(), "COMPLETED epoch %d, average validation accu %g" %
                (epochs_completed, validation_accuracy))
            validation_auc = sess.run(auc_fn[0])
            print(
                time.time(), "COMPLETED epoch %d, average validation auc %g" %
                (epochs_completed, validation_auc))

        if enable_tf_timeline:
            many_runs_timeline.save('Timeliner_output.json')

        train_iteration_logger.end_timer()

    if enable_tf_timeline:
        many_runs_timeline.save('Timeliner_output.json')

    train_loop_logger.end_timer()
Exemplo n.º 13
0
  def benchmark_model(self, warmup_runs, bm_runs, num_threads,
                      trace_filename=None):
    """Benchmark model."""
    if self.tensorrt:
      print('Using tensorrt ', self.tensorrt)
      self.build_and_save_model()
      graphdef = self.freeze_model()

    if num_threads > 0:
      print('num_threads for benchmarking: {}'.format(num_threads))
      sess_config = tf.ConfigProto(
          intra_op_parallelism_threads=num_threads,
          inter_op_parallelism_threads=1)
    else:
      sess_config = tf.ConfigProto()

    # rewriter_config_pb2.RewriterConfig.OFF
    sess_config.graph_options.rewrite_options.dependency_optimization = 2
    if self.use_xla:
      sess_config.graph_options.optimizer_options.global_jit_level = (
          tf.OptimizerOptions.ON_2)

    with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
      inputs = tf.placeholder(tf.float32, name='input', shape=self.inputs_shape)
      output = self.build_model(inputs, is_training=False)

      img = np.random.uniform(size=self.inputs_shape)

      sess.run(tf.global_variables_initializer())
      if self.tensorrt:
        fetches = [inputs.name] + [i.name for i in output]
        goutput = self.convert_tr(graphdef, fetches)
        inputs, output = goutput[0], goutput[1:]

      if not self.use_xla:
        # Don't use tf.group because XLA removes the whole graph for tf.group.
        output = tf.group(*output)
      for i in range(warmup_runs):
        start_time = time.time()
        sess.run(output, feed_dict={inputs: img})
        print('Warm up: {} {:.4f}s'.format(i, time.time() - start_time))
      print('Start benchmark runs total={}'.format(bm_runs))
      timev = []
      for i in range(bm_runs):
        if trace_filename and i == (bm_runs // 2):
          run_options = tf.RunOptions()
          run_options.trace_level = tf.RunOptions.FULL_TRACE
          run_metadata = tf.RunMetadata()
          sess.run(output, feed_dict={inputs: img},
                   options=run_options, run_metadata=run_metadata)
          tf.logging.info('Dumping trace to %s' % trace_filename)
          trace_dir = os.path.dirname(trace_filename)
          if not tf.io.gfile.exists(trace_dir):
            tf.io.gfile.makedirs(trace_dir)
          with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
            from tensorflow.python.client import timeline  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
            trace = timeline.Timeline(step_stats=run_metadata.step_stats)
            trace_file.write(
                trace.generate_chrome_trace_format(show_memory=True))

        start_time = time.time()
        sess.run(output, feed_dict={inputs: img})
        timev.append(time.time() - start_time)

      timev.sort()
      timev = timev[2:bm_runs-2]
      print('{} {}runs {}threads: mean {:.4f} std {:.4f} min {:.4f} max {:.4f}'
            .format(self.model_name, len(timev), num_threads, np.mean(timev),
                    np.std(timev), np.min(timev), np.max(timev)))
Exemplo n.º 14
0
    def run(self, *args_, **kwargs_):
        if self._end_trace:
            ret = self.sess.run(*args_, **kwargs_)
        elif not self._end_trace and self.step_cnt < self.start_step:
            ret = self.sess.run(*args_, **kwargs_)
            self.step_cnt += 1
        elif not self._end_trace and self.step_cnt < self.end_step:
            ret = self.sess.run(*args_,
                                options=self.run_options,
                                run_metadata=self.run_metadata,
                                **kwargs_)
            # Create the Timeline object, and write it to a json
            tl = timeline.Timeline(self.run_metadata.step_stats)
            ctf = json.loads(tl.generate_chrome_trace_format())
            self.traces["traceEvents"] += ctf["traceEvents"]
            print("Add the {}th step of traces".format(self.step_cnt))
            self.step_cnt += 1

            ### Create the DAG
            if self.dag is None:
                self.dag = nx.DiGraph()
                for trace in ctf["traceEvents"]:
                    if trace["ph"] == "M" or "args" not in trace:
                        continue
                    op = trace["args"]["op"]
                    name = trace["args"]["name"]

                    ### Add nodes to the DAG
                    if name not in self.dag.nodes:
                        self.dag.add_node(name)

                    ### Add dependency info
                    for k, v in trace["args"].items():
                        if "input" in k:
                            self.dag.add_edge(v, name)

            try:
                not_found = False
                nx.find_cycle(self.dag.cycle)
            except:
                not_found = True
            assert not_found

            def flatten_fetch_list(fetch_list):
                if not isinstance(fetch_list, (list, tuple)):
                    return [fetch_list]
                else:
                    result_list = []
                    for op in fetch_list:
                        result_list += flatten_fetch_list(op)
                    return result_list

            ### Output traces
            if self.step_cnt == self.end_step:
                fd = kwargs_.get("feed_dict")
                tensor_names, tensor_shape_ops = self.tensor_shape_ops
                out_shapes = self.sess.run(tensor_shape_ops, feed_dict=fd)
                self.tensor_shapes = {}
                for name, shape in zip(tensor_names, out_shapes):
                    self.tensor_shapes[name] = [int(s) for s in list(shape)]
                # collect feed dict meta
                self.fetches = [
                    tensor.name for tensor in flatten_fetch_list(args_[0])
                ]
                for key, tensor in fd.items():
                    shape_as_list = [int(dim) for dim in tensor.shape]
                    dtype_as_str = (str(tensor.dtype).split("\'")[1]
                                    if "\'" in str(tensor.dtype) else str(
                                        tensor.dtype)).split("_ref")[0]
                    self.feed_dict_meta[key.op.name] = {
                        "shape": shape_as_list,
                        "dtype": dtype_as_str
                    }
                self._end_trace = True
                self.output_traces()

        ### Return all fetches
        return ret
Exemplo n.º 15
0
def main():
    args = get_arguments()
    data_dir = 'midi-Corpus/' + args.data_set + '/'
    logdir = data_dir + 'max_dilation=%d_reps=%d/' % (args.max_dilation_pow,
                                                      args.expansion_reps)
    print('*************************************************')
    print(logdir)
    print('*************************************************')
    sys.stdout.flush()
    restore_from = logdir
    if not os.path.exists(logdir):
        os.makedirs(logdir)

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    wavenet_params = loadParams(args.max_dilation_pow, args.expansion_reps,
                                args.dil_chan, args.res_chan, args.skip_chan)

    with open(logdir + 'wavenet_params.json', 'w') as outfile:
        json.dump(wavenet_params, outfile)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        gc_enabled = False
        # data queue for the training set
        train_dir = data_dir + 'train/'
        train_reader = MidiReader(
            train_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size)
        train_batch = train_reader.dequeue(args.batch_size)
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    # Create network.
    net = WaveNetModel(
        batch_size=BATCH_SIZE,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=False,
        global_condition_channels=None,
        global_condition_cardinality=train_reader.gc_category_cardinality)
    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    print('constructing training loss')
    sys.stdout.flush()
    train_loss, target_output, prediction = net.loss(
        input_batch=train_batch,
        global_condition_batch=gc_id_batch,
        l2_regularization_strength=args.l2_regularization_strength)
    print('constructing validation loss')
    sys.stdout.flush()

    print('making optimizer')
    sys.stdout.flush()
    optimizer = optimizer_factory['adam'](learning_rate=args.learning_rate,
                                          momentum=args.momentum)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(train_loss, var_list=trainable)

    print('setting up tensorboard')
    sys.stdout.flush()
    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    valid_input = tf.placeholder(dtype=tf.float32, shape=(1, None, 88))
    valid_loss, valid_target_output, valid_prediction = net.loss(
        input_batch=valid_input,
        global_condition_batch=gc_id_batch,
        l2_regularization_strength=args.l2_regularization_strength)
    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    print('saver')
    sys.stdout.flush()
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=5)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    print('thread stuff')
    sys.stdout.flush()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    train_reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step

    # load validation data
    validation_audio = load_all_audio(data_dir + 'valid/')
    num_valid_files = len(validation_audio)
    valid_loss_values = np.zeros((int(np.ceil(args.num_steps / 50)), ))
    vl_ind = 0
    print('optimization time')
    sys.stdout.flush()
    min_valid_loss = 1e10
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run(
                    [summaries, train_loss, optim],
                    options=run_options,
                    run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                valid_losses_step = np.zeros((num_valid_files, ))
                for i in range(num_valid_files):
                    audio_i = np.expand_dims(validation_audio[i], 0)
                    valid_losses_step[i] = sess.run(valid_loss,
                                                    {valid_input: audio_i})
                valid_loss_value_step = np.mean(valid_losses_step)
                valid_loss_values[vl_ind] = valid_loss_value_step
                np.savez(logdir + 'validation.npz',
                         validation_loss=valid_loss_values)
                vl_ind += 1
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))

                if (valid_loss_value_step < min_valid_loss):
                    min_valid_loss = valid_loss_value_step
                    save(saver, sess, logdir, step)
                    last_saved_step = step
            else:
                summary, loss_value, _ = sess.run(
                    [summaries, train_loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))
            sys.stdout.flush()

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 16
0
    def train(self, data, valid_data):
        stop_batch = self.stop_batch
        if self.run_opt.is_distrib:
            self._init_sess_distrib()
        else:
            self._init_sess_serial()

        self.print_head()
        fp = None
        if self.run_opt.is_chief:
            fp = open(self.disp_file, "a")

        cur_batch = self.sess.run(self.global_step)
        is_first_step = True
        self.cur_batch = cur_batch
        self.run_opt.message(
            "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
            % (self.sess.run(self.learning_rate), self.lr.value(cur_batch),
               self.lr.decay_steps_, self.lr.decay_rate_,
               self.lr.value(stop_batch)))

        prf_options = None
        prf_run_metadata = None
        if self.profiling:
            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            prf_run_metadata = tf.RunMetadata()

        train_time = 0
        while cur_batch < stop_batch:
            batch_data = data.get_batch(sys_probs=self.sys_probs,
                                        auto_prob_style=self.auto_prob_style)
            feed_dict_batch = {}
            for kk in batch_data.keys():
                if kk == 'find_type' or kk == 'type':
                    continue
                if 'find_' in kk:
                    feed_dict_batch[self.place_holders[kk]] = batch_data[kk]
                else:
                    feed_dict_batch[self.place_holders[kk]] = np.reshape(
                        batch_data[kk], [-1])
            for ii in ['type']:
                feed_dict_batch[self.place_holders[ii]] = np.reshape(
                    batch_data[ii], [-1])
            for ii in ['natoms_vec', 'default_mesh']:
                feed_dict_batch[self.place_holders[ii]] = batch_data[ii]
            feed_dict_batch[self.place_holders['is_training']] = True

            if self.display_in_training and is_first_step:
                self.test_on_the_fly(fp, valid_data, feed_dict_batch)
                is_first_step = False
            if self.timing_in_training: tic = time.time()
            self.sess.run([self.train_op],
                          feed_dict=feed_dict_batch,
                          options=prf_options,
                          run_metadata=prf_run_metadata)
            if self.timing_in_training: toc = time.time()
            if self.timing_in_training: train_time += toc - tic
            cur_batch = self.sess.run(self.global_step)
            self.cur_batch = cur_batch

            if self.display_in_training and (cur_batch % self.disp_freq == 0):
                tic = time.time()
                self.test_on_the_fly(fp, valid_data, feed_dict_batch)
                toc = time.time()
                test_time = toc - tic
                if self.timing_in_training:
                    self._message(
                        "batch %7d training time %.2f s, testing time %.2f s" %
                        (cur_batch, train_time, test_time))
                    train_time = 0
                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.run_opt.is_chief:
                    if self.saver is not None:
                        self.saver.save(self.sess,
                                        os.getcwd() + "/" + self.save_ckpt)
                        self._message("saved checkpoint %s" % self.save_ckpt)
        if self.run_opt.is_chief:
            fp.close()
        if self.profiling and self.run_opt.is_chief:
            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(self.profiling_file, 'w') as f:
                f.write(chrome_trace)
Exemplo n.º 17
0
def run_model(model,
              horovod=False,
              gpu_num=1,
              output=None,
              steptime=False,
              profile=False,
              timeline=False,
              loss=False,
              session=1,
              step=1,
              batchsize=None,
              graph=False):
    # TODO: description

    # cannot dump graph if timeline or profile is On
    if graph and (timeline or profile):
        raise ValueError("cannot dump graph togother with timeline or tfprof")

    with tf.Graph().as_default():

        times_list = []
        losses_list = []
        op, _loss = tf_model.get_model(model, batchsize, horovod=horovod)

        # set gpus available
        config = tf.ConfigProto()
        if horovod is True:
            config.gpu_options.allow_growth = False
            config.gpu_options.visible_device_list = str(hvd.local_rank())
            # print('DEBUG: ', str(hvd.local_rank()))
        else:
            # buildup gpus='0,1,2...'
            config.gpu_options.allow_growth = False
            gpus = ','.join(map(str, range(gpu_num)))
            print('DEBUG: gpus=%s' % gpus)
            config.gpu_options.visible_device_list = gpus

        for i in range(session):

            sess = tf.Session(config=config)
            sess.run(tf.global_variables_initializer())
            times = []
            losses = []

            opts = None
            run_metadata = None

            # the dump graph mode on
            if graph:
                opts = tf.RunOptions(output_partition_graphs=True)
                run_metadata = tf.RunMetadata()
            # the profile mode on
            elif profile or timeline:
                # create runOptions and run_metadata object
                opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                if profile:
                    # Create a profiler.
                    profiler = model_analyzer.Profiler(sess.graph)
            for n in range(step):
                start_time = time.time()

                # run model
                if loss is True:
                    res = sess.run([op, _loss],
                                   options=opts,
                                   run_metadata=run_metadata)
                    losses.append(res[1])
                else:

                    res = sess.run(op, options=opts, run_metadata=run_metadata)

                train_time = time.time() - start_time
                times.append(train_time)

                # print steptime and loss at realtime
                if loss is True:
                    print('Sess%d/%d Step%d/%d: time=%.2fms loss=%.2f' %
                          (i + 1, session, n + 1, step, train_time * 1000,
                           res[1]))
                else:
                    print('Sess%d/%d Step%d/%d: time=%.2fms' %
                          (i + 1, session, n + 1, step, train_time * 1000))
                if (not graph) and profile:
                    profiler.add_step(step=step, run_meta=run_metadata)

            times_list.append(times)
            losses_list.append(losses)

        if output is not None:

            # make folder if it not exist
            try:
                if not os.path.exists(output):
                    os.makedirs(output)
            except (FileExistsError):
                print("")

            file_loss = '_lossOn' if loss else ''
            file_trace = '_traceOn' if profile or timeline else ''
            file_horovod = '_hvdRank%d' % hvd.rank() if horovod else ''
            file_batchsize = '_bs%d' % batchsize if batchsize is not None\
                else '_bsDefault'
            file_gpunum = '_gpunum%d' % gpu_num

            if steptime is True:
                filename = '%s%s%s%s%s%s_steptime.csv' %\
                    (model, file_batchsize, file_loss, file_trace,
                        file_horovod, file_gpunum)
                output_csv(filename, times_list, path=output, scale=1000)

            if loss is True:
                filename = '%s%s%s%s%s%s_loss.csv' % \
                    (model, file_batchsize, file_loss, file_trace,
                        file_horovod, file_gpunum)
                output_csv(filename, losses_list, path=output)

            if graph:
                # save each partition of graph with _output_shapes attr

                if horovod:
                    graph_dir = os.path.join(
                        output, '%s%s%s%s_partitionGraph' %
                        (model, file_batchsize, file_loss, file_gpunum),
                        str(hvd.rank()))
                    if not os.path.exists(graph_dir):
                        os.makedirs(graph_dir)
                    save_partition_graph_shapes(run_metadata, graph_dir,
                                                'graph')
                else:
                    save_partition_graph_shapes(
                        run_metadata, output, '%s%s%s%s%s_partitionGraph' %
                        (model, file_batchsize, file_loss, file_horovod,
                         file_gpunum))

            if profile is True:
                filename = '%s%s%s%s%s_gpunum%d.profile' % \
                    (model, file_batchsize, file_loss, file_trace,
                        file_horovod, gpu_num)
                filepath = output + '/' + filename
                generate_tfprof_profile(profiler, filepath)

            if timeline is True:
                filename = '%s%s%s%s%s_gpunum%d.timeline' % \
                    (model, file_batchsize, file_loss, file_trace,
                        file_horovod, gpu_num)
                filepath = output + '/' + filename
                tl = _timeline.Timeline(run_metadata.step_stats)
                ctf = tl.generate_chrome_trace_format()
                with open(filepath, 'w') as f:
                    f.write(ctf)
Exemplo n.º 18
0
def train(train_data, test_data=None, sampler_name='Uniform'):

    G = train_data[0]
    features = train_data[1]
    id_map = train_data[2]
    class_map = train_data[4]

    if isinstance(list(class_map.values())[0], list):
        num_classes = len(list(class_map.values())[0])
    else:
        num_classes = len(set(class_map.values()))

    if not features is None:
        # pad with dummy zero vector
        features = np.vstack([features, np.zeros((features.shape[1], ))])

    context_pairs = train_data[3] if FLAGS.random_context else None
    placeholders = construct_placeholders(num_classes)
    minibatch = NodeMinibatchIterator(G,
                                      id_map,
                                      placeholders,
                                      class_map,
                                      num_classes,
                                      batch_size=FLAGS.batch_size,
                                      max_degree=FLAGS.max_degree,
                                      context_pairs=context_pairs)
    adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    adj_shape = adj_info.get_shape().as_list()

    if FLAGS.model == 'mean_concat':
        # Create model

        if sampler_name == 'Uniform':
            sampler = UniformNeighborSampler(adj_info)
        elif sampler_name == 'ML':
            sampler = MLNeighborSampler(adj_info, features)
        elif sampler_name == 'FastML':
            sampler = FastMLNeighborSampler(adj_info, features)

        if FLAGS.samples_3 != 0:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2),
                SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_3)
            ]
        elif FLAGS.samples_2 != 0:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
            ]
        else:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1)
            ]

        # modified
        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos,
                                    concat=True,
                                    model_size=FLAGS.model_size,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    elif FLAGS.model == 'mean_add':
        # Create model

        if sampler_name == 'Uniform':
            sampler = UniformNeighborSampler(adj_info)
        elif sampler_name == 'ML':
            sampler = MLNeighborSampler(adj_info, features)
        elif sampler_name == 'FastML':
            sampler = FastMLNeighborSampler(adj_info, features)

        if FLAGS.samples_3 != 0:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2),
                SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_3)
            ]
        elif FLAGS.samples_2 != 0:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
            ]
        else:
            layer_infos = [
                SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1)
            ]

        # modified
        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos,
                                    concat=False,
                                    model_size=FLAGS.model_size,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    elif FLAGS.model == 'gcn':

        if sampler_name == 'Uniform':
            sampler = UniformNeighborSampler(adj_info)
        elif sampler_name == 'ML':
            sampler = MLNeighborSampler(adj_info, features)
        elif sampler_name == 'FastML':
            sampler = FastMLNeighborSampler(adj_info, features)

        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2)
        ]

        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos=layer_infos,
                                    aggregator_type="gcn",
                                    model_size=FLAGS.model_size,
                                    concat=False,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    elif FLAGS.model == 'graphsage_seq':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos=layer_infos,
                                    aggregator_type="seq",
                                    model_size=FLAGS.model_size,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    elif FLAGS.model == 'graphsage_maxpool':

        if sampler_name == 'Uniform':
            sampler = UniformNeighborSampler(adj_info)
        elif sampler_name == 'ML':
            sampler = MLNeighborSampler(adj_info, features)
        elif sampler_name == 'FastML':
            sampler = FastMLNeighborSampler(adj_info, features)

        #sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos=layer_infos,
                                    aggregator_type="maxpool",
                                    model_size=FLAGS.model_size,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    elif FLAGS.model == 'graphsage_meanpool':

        if sampler_name == 'Uniform':
            sampler = UniformNeighborSampler(adj_info)
        elif sampler_name == 'ML':
            sampler = MLNeighborSampler(adj_info, features)
        elif sampler_name == 'FastML':
            sampler = FastMLNeighborSampler(adj_info, features)

        #sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SupervisedGraphsage(num_classes,
                                    placeholders,
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                    layer_infos=layer_infos,
                                    aggregator_type="meanpool",
                                    model_size=FLAGS.model_size,
                                    sigmoid_loss=FLAGS.sigmoid,
                                    identity_dim=FLAGS.identity_dim,
                                    logging=True)

    else:
        raise Exception('Error: model name unrecognized.')

    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
    config.allow_soft_placement = True

    # Initialize session
    sess = tf.Session(config=config)
    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(log_dir(sampler_name), sess.graph)

    # Save model
    model_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    saver = tf.train.Saver(var_list=model_vars)

    model_path = './model/' + FLAGS.train_prefix.split(
        "/")[-1] + '-' + model_prefix() + '-' + sampler_name
    model_path += hyper_prefix()

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # Init variables
    sess.run(tf.global_variables_initializer(),
             feed_dict={adj_info_ph: minibatch.adj})

    # Restore params of ML sampler model
    if sampler_name == 'ML' or sampler_name == 'FastML':
        sampler_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope="MLsampler")
        #pdb.set_trace()
        saver_sampler = tf.train.Saver(var_list=sampler_vars)

        if FLAGS.allhop_rewards:
            sampler_model_path = './model/MLsampler-' + FLAGS.train_prefix.split(
                '/')[-1] + '-' + model_prefix() + '-allhops'
        else:
            sampler_model_path = './model/MLsampler-' + FLAGS.train_prefix.split(
                '/')[-1] + '-' + model_prefix() + '-lasthop'

        sampler_model_path += hyper_prefix()

        saver_sampler.restore(sess, sampler_model_path + 'model.ckpt')

    # Train model

    total_steps = 0
    avg_time = 0.0
    epoch_val_costs = []

    train_adj_info = tf.assign(adj_info, minibatch.adj)
    val_adj_info = tf.assign(adj_info, minibatch.test_adj)

    val_cost_ = []
    val_f1_mic_ = []
    val_f1_mac_ = []
    duration_ = []
    epoch_laps_ = []

    ln_acc = sparse.csr_matrix((adj_shape[0], adj_shape[0]), dtype=np.float32)
    lnc_acc = sparse.csr_matrix((adj_shape[0], adj_shape[0]), dtype=np.int32)

    ln_acc = ln_acc.tolil()
    lnc_acc = lnc_acc.tolil()

    #learning_rate = [0.01, 0.001, 0.0001]
    learning_rate = [FLAGS.learning_rate]

    for lr_iter in range(len(learning_rate)):

        for epoch in range(FLAGS.epochs):

            epoch_time = time.time()

            minibatch.shuffle()

            iter = 0
            print('Epoch: %04d' % (epoch + 1))
            epoch_val_costs.append(0)

            while not minibatch.end():
                # Construct feed dictionary
                feed_dict, labels = minibatch.next_minibatch_feed_dict()

                if feed_dict.values()[0] != FLAGS.batch_size:
                    break

                feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                feed_dict.update(
                    {placeholders['learning_rate']: learning_rate[lr_iter]})

                t = time.time()

                # Training step
                outs = sess.run([
                    merged, model.opt_op, model.loss, model.preds,
                    model.loss_node, model.loss_node_count
                ],
                                feed_dict=feed_dict)
                train_cost = outs[2]

                if iter % FLAGS.validate_iter == 0:
                    # Validation
                    sess.run(val_adj_info.op)
                    if FLAGS.validate_batch_size == -1:
                        val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(
                            sess, model, minibatch, FLAGS.batch_size)
                    else:
                        val_cost, val_f1_mic, val_f1_mac, duration = evaluate(
                            sess, model, minibatch, FLAGS.validate_batch_size)

                    # accumulate val results
                    val_cost_.append(val_cost)
                    val_f1_mic_.append(val_f1_mic)
                    val_f1_mac_.append(val_f1_mac)
                    duration_.append(duration)

                    #
                    sess.run(train_adj_info.op)
                    epoch_val_costs[-1] += val_cost

                if total_steps % FLAGS.print_every == 0:
                    summary_writer.add_summary(outs[0], total_steps)

                # Print results
                avg_time = (avg_time * total_steps + time.time() -
                            t) / (total_steps + 1)

                ln = outs[4].values
                ln_idx = outs[4].indices
                ln_acc[ln_idx[:, 0], ln_idx[:, 1]] += ln

                lnc = outs[5].values
                lnc_idx = outs[5].indices
                lnc_acc[lnc_idx[:, 0], lnc_idx[:, 1]] += lnc

                if total_steps % FLAGS.print_every == 0:
                    train_f1_mic, train_f1_mac = calc_f1(labels, outs[3])
                    print("Iter:", '%04d' % iter, "train_loss=",
                          "{:.5f}".format(train_cost), "train_f1_mic=",
                          "{:.5f}".format(train_f1_mic), "val_loss=",
                          "{:.5f}".format(val_cost), "val_f1_mic=",
                          "{:.5f}".format(val_f1_mic), "time per iter=",
                          "{:.5f}".format(avg_time))

                iter += 1
                total_steps += 1

                if total_steps > FLAGS.max_total_steps:
                    break

            epoch_laps = time.time() - epoch_time
            epoch_laps_.append(epoch_laps)
            print("Epoch time=", "{:.5f}".format(epoch_laps))

            if total_steps > FLAGS.max_total_steps:
                break

    print("avg time per epoch=", "{:.5f}".format(np.mean(epoch_laps_)))

    # Save model
    save_path = saver.save(sess, model_path + 'model.ckpt')
    print('model is saved at %s' % save_path)

    # Save loss node and count
    loss_node_path = './loss_node/' + FLAGS.train_prefix.split(
        '/')[-1] + '-' + model_prefix() + '-' + sampler_name
    loss_node_path += hyper_prefix()

    if not os.path.exists(loss_node_path):
        os.makedirs(loss_node_path)

    loss_node = sparse.save_npz(loss_node_path + 'loss_node.npz',
                                sparse.csr_matrix(ln_acc))
    loss_node_count = sparse.save_npz(loss_node_path + 'loss_node_count.npz',
                                      sparse.csr_matrix(lnc_acc))
    print('loss and count per node is saved at %s' % loss_node_path)

    print("Optimization Finished!")
    sess.run(val_adj_info.op)

    # test
    val_cost_ = []
    val_f1_mic_ = []
    val_f1_mac_ = []
    duration_ = []

    print("Writing test set stats to file (don't peak!)")

    # timeline
    if FLAGS.timeline == True:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
    else:
        run_options = None
        run_metadata = None

    for iter in range(10):

        val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(
            sess,
            model,
            minibatch,
            FLAGS.batch_size,
            run_options,
            run_metadata,
            test=True)

        print("Full validation stats:", "loss=", "{:.5f}".format(val_cost),
              "f1_micro=", "{:.5f}".format(val_f1_mic), "time=",
              "{:.5f}".format(duration))

        val_cost_.append(val_cost)
        val_f1_mic_.append(val_f1_mic)
        duration_.append(duration)

    print("mean: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format(
        np.mean(val_cost_), np.mean(val_f1_mic_), np.mean(duration_)))

    # write test results
    with open(log_dir(sampler_name) + "test_stats.txt", "w") as fp:
        for iter in range(10):
            fp.write("loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format(
                val_cost_[iter], val_f1_mic_[iter], duration_[iter]))

        fp.write("mean: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format(
            np.mean(val_cost_), np.mean(val_f1_mic_), np.mean(duration_)))
        fp.write("variance: loss={:.5f} f1_micro={:.5f} time={:.5f}\n".format(
            np.var(val_cost_), np.var(val_f1_mic_), np.var(duration_)))

    # create timeline object, and write it to a json
    if FLAGS.timeline == True:
        tl = timeline.Timeline(run_metadata.step_stats)
        ctf = tl.generate_chrome_trace_format(show_memory=True)
        with open(log_dir(sampler_name) + 'timeline.json', 'w') as f:
            print('timeline written at %s' %
                  (log_dir(sampler_name) + 'timelnie.json'))
            f.write(ctf)

    sess.close()
    tf.reset_default_graph()
Exemplo n.º 19
0
def im_detect(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict = {
            net.data: blobs['data'],
            net.im_info: blobs['im_info'],
            net.keep_prob: 1.0
        }
    else:
        feed_dict = {
            net.data: blobs['data'],
            net.rois: blobs['rois'],
            net.keep_prob: 1.0
        }

    run_options = None
    run_metadata = None
    if cfg.TEST.DEBUG_TIMELINE:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

    cls_score, cls_prob, bbox_pred, rois = sess.run([
        net.get_output('cls_score'),
        net.get_output('cls_prob'),
        net.get_output('bbox_pred'),
        net.get_output('rois')
    ],
                                                    feed_dict=feed_dict,
                                                    options=run_options,
                                                    run_metadata=run_metadata)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = _clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.DEBUG_TIMELINE:
        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        trace_file = open(
            str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
        trace_file.close()

    return scores, pred_boxes
Exemplo n.º 20
0
    def train(self,
              names_train,
              y_train,
              names_valid,
              y_valid,
              model_path,
              batch_size=128,
              patience=1024000,
              stat_interval=1000,
              valid_interval=1000,
              summary_interval=1000,
              valid_batch_size=2048,
              profile=False):
        """Train a gender classifier on the name/gender pairs."""
        start_time = time()

        def add_metric_summaries(mode, iteration, name2metric):
            """Add summary for metric."""
            metric_summary = tf.Summary()
            for name, metric in name2metric.items():
                metric_summary.value.add(tag='{}_{}'.format(mode, name),
                                         simple_value=metric)
            summary_writer.add_summary(metric_summary, global_step=iteration)

        def show_train_stats(epoch, iteration, losses, y_cat, y_cat_pred):
            # compute mean statistics
            loss = np.mean(losses)
            accuracy = accuracy_score(y_cat, y_cat_pred)
            score = accuracy - loss

            _LOGGER.info(
                'Epoch={}, Iter={:,}, Mean Training Loss={:.4f}, Accuracy={:.4f}, '
                'Accuracy - Loss={:.4f}'.format(epoch, iteration, loss,
                                                accuracy, score))
            add_metric_summaries(
                'train', iteration, {
                    'cross_entropy': loss,
                    'accuracy': accuracy,
                    'accuracy - loss': score
                })
            _LOGGER.info('\n{}'.format(
                classification_report(y_cat, y_cat_pred, digits=3)))
            return list(), list(), list()

        def validate(epoch, iteration, X, y, best_score, patience):
            """Validate the model on validation set."""
            batch_generator = BatchGenerator(X,
                                             y,
                                             batch_size=valid_batch_size,
                                             valid=True)
            losses, y_cat, y_cat_pred = list(), list(), list()
            for X_batch, y_batch in batch_generator:
                X_batch, word_lens, char_lens = self._add_padding(X_batch)
                loss, y_pred = session.run(
                    [nodes['loss'], nodes['y_pred']],
                    feed_dict={
                        nodes['X']: X_batch,
                        nodes['y']: y_batch,
                        nodes['word_lens']: word_lens,
                        nodes['char_lens']: char_lens,
                        nodes['is_train']: False
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                losses.append(loss)
                y_cat.extend(self._categorize_y(y_batch))
                y_cat_pred.extend(self._categorize_y(y_pred))

            # compute mean statistics
            loss = np.mean(losses)
            accuracy = accuracy_score(y_cat, y_cat_pred)
            score = accuracy - loss

            _LOGGER.info(
                'Epoch={}, Iter={:,}, Validation Loss={:.4f}, Accuracy={:.4f}, '
                'Accuracy - Loss={:.4f}'.format(epoch, iteration, loss,
                                                accuracy, score))
            add_metric_summaries(
                'valid', iteration, {
                    'cross_entropy': loss,
                    'accuracy': accuracy,
                    'accuracy - loss': score
                })
            _LOGGER.info('\n{}'.format(
                classification_report(y_cat, y_cat_pred, digits=3)))

            if score > best_score:
                _LOGGER.info(
                    'Best score (Accuracy - Loss) so far, save the model.')
                self._save(model_path, session)
                best_score = score

                if iteration * 2 > patience:
                    patience = iteration * 2
                    _LOGGER.info('Increased patience to {:,}'.format(patience))

            if run_metadata:
                with open(_VALID_PROFILE_FILE, 'w') as file_:
                    file_.write(
                        timeline.Timeline(run_metadata.step_stats).
                        generate_chrome_trace_format())

            return best_score, patience

        _LOGGER.info('Prepare inputs and other variables for the model...')
        self._fit_encoder(names_train + names_valid)
        X_train = self._encode_chars(names_train)
        X_valid = self._encode_chars(names_valid)
        train_size = len(X_train)
        train_batch_generator = BatchGenerator(X_train, y_train, batch_size)
        best_valid_score = np.float64('-inf')
        losses = list()
        y_cat = list()
        y_cat_pred = list()
        iteration = 0

        # profiler
        run_options = tf.RunOptions(
            trace_level=tf.RunOptions.FULL_TRACE) if profile else None
        run_metadata = tf.RunMetadata() if profile else None

        _LOGGER.info('Building the tensorflow graph...')
        self._build_graph()
        nodes = self._nodes
        session = tf.Session(graph=self._graph)
        summary_writer = tf.summary.FileWriter(
            os.path.join(model_path, self._tensorboard_dir), session.graph)
        self._visualize_embedding(model_path, summary_writer)
        session.run(nodes['init'])
        _LOGGER.info('Start fitting a model...')

        # iterate over batches
        for batch_id, (X_batch, y_batch) in enumerate(train_batch_generator):
            epoch = 1 + iteration // train_size

            if batch_id % summary_interval == 0:
                summaries = session.run(nodes['summaries'])
                summary_writer.add_summary(summaries, global_step=iteration)

            X_batch, word_lens, char_lens = self._add_padding(X_batch)

            # Predict labels and update the parameters
            _, loss, y_pred = session.run(
                [nodes['optimizer'], nodes['loss'], nodes['y_pred']],
                feed_dict={
                    nodes['X']: X_batch,
                    nodes['y']: y_batch,
                    nodes['word_lens']: word_lens,
                    nodes['char_lens']: char_lens,
                    nodes['is_train']: True
                },
                options=run_options,
                run_metadata=run_metadata)

            losses.append(loss)
            y_cat.extend(self._categorize_y(y_batch))
            y_cat_pred.extend(self._categorize_y(y_pred))
            iteration += batch_size

            if run_metadata:
                with open(_TRAIN_PROFILE_FILE, 'w') as file_:
                    file_.write(
                        timeline.Timeline(run_metadata.step_stats).
                        generate_chrome_trace_format())

            if batch_id % stat_interval == 0:
                losses, y_cat, y_cat_pred = show_train_stats(
                    epoch, iteration, losses, y_cat, y_cat_pred)

            if batch_id % valid_interval == 0:
                best_valid_score, patience = validate(epoch, iteration,
                                                      X_valid, y_valid,
                                                      best_valid_score,
                                                      patience)

            if iteration > patience:
                _LOGGER.info(
                    'Iteration is more than patience, finish training.')
                break

        _LOGGER.info('Finished fitting the model.')
        _LOGGER.info(
            'Best Validation Score (Accuracy - Cross-entropy Loss): {:.4f}'.
            format(best_valid_score))

        # close the session
        session.close()

        end_time = time()
        _LOGGER.info('Took {:,} seconds to train the model.'.format(
            int(end_time - start_time)))
        return best_valid_score
def main():
    def _str_to_bool(s):
        """Convert string to bool (in argparse context)."""
        if s.lower() not in ['true', 'false']:
            raise ValueError(
                'Argument needs to be a boolean, got {}'.format(s))
        return {'true': True, 'false': False}[s.lower()]

    parser = argparse.ArgumentParser(description='WaveNet example network')

    DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\son'
    #DATA_DIRECTORY =  'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon'
    parser.add_argument('--data_dir',
                        type=str,
                        default=DATA_DIRECTORY,
                        help='The directory containing the VCTK corpus.')

    #LOGDIR = None
    LOGDIR = './/logdir-wavenet//train//2019-03-27T20-27-18'

    parser.add_argument(
        '--logdir',
        type=str,
        default=LOGDIR,
        help=
        'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.'
    )

    parser.add_argument(
        '--logdir_root',
        type=str,
        default=None,
        help=
        'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.'
    )
    parser.add_argument(
        '--restore_from',
        type=str,
        default=None,
        help=
        'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.'
    )

    CHECKPOINT_EVERY = 1000  # checkpoint 저장 주기
    parser.add_argument(
        '--checkpoint_every',
        type=int,
        default=CHECKPOINT_EVERY,
        help='How many steps to save each checkpoint after. Default: ' +
        str(CHECKPOINT_EVERY) + '.')

    parser.add_argument('--eval_every',
                        type=int,
                        default=2,
                        help='Steps between eval on test data')

    config = parser.parse_args()  # command 창에서 입력받을 수 있는 조건
    config.data_dir = config.data_dir.split(",")

    try:
        directories = validate_directories(config, default_hparams)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    log_path = os.path.join(logdir, 'train.log')
    infolog.init(log_path, logdir)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    if default_hparams.l2_regularization_strength == 0:
        default_hparams.l2_regularization_strength = None

    # Create coordinator.
    coord = tf.train.Coordinator()
    num_speakers = len(config.data_dir)
    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = default_hparams.silence_threshold if default_hparams.silence_threshold > EPSILON else None
        gc_enable = True  # Before: num_speakers > 1    After: 항상 True

        # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다.
        reader = DataFeederWavenet(
            coord,
            config.data_dir,
            batch_size=default_hparams.wavenet_batch_size,
            gc_enable=gc_enable,
            test_mode=False)

        # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다.
        reader_test = DataFeederWavenet(coord,
                                        config.data_dir,
                                        batch_size=1,
                                        gc_enable=gc_enable,
                                        test_mode=True,
                                        queue_size=1)

        audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id

    # Create train network.
    net = create_network(default_hparams,
                         default_hparams.wavenet_batch_size,
                         num_speakers,
                         is_training=True)
    net.add_loss(
        input_batch=audio_batch,
        local_condition=lc_batch,
        global_condition_batch=gc_id_batch,
        l2_regularization_strength=default_hparams.l2_regularization_strength,
        upsample_type=default_hparams.upsample_type)
    net.add_optimizer(default_hparams, global_step)

    run_metadata = tf.RunMetadata()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)
                      )  # log_device_placement=False --> cpu/gpu 자동 배치.
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(
        var_list=tf.global_variables(),
        max_to_keep=default_hparams.max_checkpoints)  # 최대 checkpoint 저장 갯수 지정

    try:
        start_step = load(saver, sess, restore_from)  # checkpoint load
        if is_overwritten_training or start_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            zero_step_assign = tf.assign(global_step, 0)
            sess.run(zero_step_assign)
            start_step = 0
    except:
        print(
            "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model."
        )
        raise

    ###########

    reader.start_in_session(sess, start_step)
    reader_test.start_in_session(sess, start_step)

    ################### Create test network.  <---- Queue 생성 때문에, sess restore후 test network 생성
    net_test = create_network(default_hparams,
                              1,
                              num_speakers,
                              is_training=False)

    if default_hparams.scalar_input:
        samples = tf.placeholder(tf.float32, shape=[net_test.batch_size, None])
        waveform = 2 * np.random.rand(net_test.batch_size).reshape(
            net_test.batch_size, -1) - 1

    else:
        samples = tf.placeholder(tf.int32, shape=[
            net_test.batch_size, None
        ])  # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이)
        waveform = np.random.randint(default_hparams.quantization_channels,
                                     size=net_test.batch_size).reshape(
                                         net_test.batch_size, -1)
    upsampled_local_condition = tf.placeholder(
        tf.float32, shape=[net_test.batch_size, default_hparams.num_mels])

    speaker_id = tf.placeholder(tf.int32, shape=[net_test.batch_size])
    next_sample = net_test.predict_proba_incremental(
        samples, upsampled_local_condition, speaker_id
    )  # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용

    sess.run(net_test.queue_initializer)

    # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition
    # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다.  reader_test의 역할은 여기서 끝난다.

    mel_input_test, speaker_id_test = sess.run(
        [reader_test.local_condition, reader_test.speaker_id])

    with tf.variable_scope('wavenet', reuse=tf.AUTO_REUSE):
        upsampled_local_condition_data = net_test.create_upsample(
            mel_input_test, upsample_type=default_hparams.upsample_type)
        upsampled_local_condition_data_ = sess.run(
            upsampled_local_condition_data
        )  # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다.

    ######################################################

    start_step = sess.run(global_step)
    step = last_saved_step = start_step
    try:

        while not coord.should_stop():

            start_time = time.time()
            if default_hparams.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                log('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize],
                    options=run_options,
                    run_metadata=run_metadata)

                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize])

            duration = time.time() - start_time
            log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % config.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

            if step % config.eval_every == 0:  # config.eval_every
                eval_step(sess, logdir, step, waveform,
                          upsampled_local_condition_data_, speaker_id_test,
                          mel_input_test, samples, speaker_id,
                          upsampled_local_condition, next_sample)

            if step >= default_hparams.num_steps:
                # error message가 나오지만, 여기서 멈춘 것은 맞다.
                raise Exception('End xxx~~~yyy')

    except Exception as e:
        print('finally')
        log('Exiting due to exception: %s' % e, slack=True)
        #if step > last_saved_step:
        #    save(saver, sess, logdir, step)
        traceback.print_exc()
        coord.request_stop(e)
Exemplo n.º 22
0
def train_mnist_cnn(FLAGS):
    # Config
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False,
                            inter_op_parallelism_threads=1)
    # Enable the custom optimizer using the rewriter config options
    # CRL-ORIG: config = ngraph_bridge.update_config(config)

    # Note: Additional configuration option to boost performance is to set the
    # following environment for the run:
    # OMP_NUM_THREADS=44 KMP_AFFINITY=granularity=fine,scatter
    # The OMP_NUM_THREADS number should correspond to the number of
    # cores in the system

    # Set Seed
    shuffle_batch = True

    if FLAGS.make_deterministic:
        seed = 1
        tf.random.set_random_seed(seed)
        shuffle_batch = False

    supported_optimizers = ["adam", "sgd"]

    assert (FLAGS.optimizer in supported_optimizers), "Optimizer not supported"

    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    optimizer_scope = FLAGS.optimizer + "_optimizer"
    with tf.name_scope(optimizer_scope):
        if FLAGS.optimizer == "adam":
            train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        elif FLAGS.optimizer == "sgd":
            train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(
                cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)
    tf.summary.scalar('Training accuracy', accuracy)
    tf.summary.scalar('Loss function', cross_entropy)

    graph_location = "./tf-profile-train"
    print('Saving graph to: %s' % graph_location)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        train_loops = FLAGS.train_loop_count
        loss_values = []
        for i in range(train_loops):
            batch = mnist.train.next_batch(FLAGS.batch_size,
                                           shuffle=shuffle_batch)
            if i % 10 == 0:
                t = time.time()
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 1.0
                })
                #tf.summary.scalar('Training accuracy', train_accuracy)
                print('step %d, training accuracy %g, %g sec to evaluate' %
                      (i, train_accuracy, time.time() - t))
            t = time.time()

            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            _, summary, loss = sess.run([train_step, merged, cross_entropy],
                                        feed_dict={
                                            x: batch[0],
                                            y_: batch[1],
                                            keep_prob: 0.5
                                        },
                                        options=run_options,
                                        run_metadata=run_metadata)
            train_writer.add_run_metadata(run_metadata,
                                          'cnn_' + "step_{}".format(i), i)

            if (i >= 100) and (
                    i < 105):  # Only write timelines for steps 100 through 104
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                with open(graph_location + '/timeline_{}.json'.format(i),
                          'w') as f:
                    f.write(chrome_trace)

            loss_values.append(loss)
            print('step %d, loss %g, %g sec for training step' %
                  (i, loss, time.time() - t))
            train_writer.add_summary(summary, i)

        print("Training finished. Running test")

        num_test_images = FLAGS.test_image_count
        x_test = mnist.test.images[:num_test_images]
        y_test = mnist.test.labels[:num_test_images]

        test_accuracy = accuracy.eval(feed_dict={
            x: x_test,
            y_: y_test,
            keep_prob: 1.0
        })
        print('test accuracy %g' % test_accuracy)
        saver.save(sess, FLAGS.model_dir)
        return loss_values, test_accuracy
Exemplo n.º 23
0
def train_model(job_id):
    model_type_list = cfg_para.multi_model_type
    num_layer_list = cfg_para.multi_num_layer
    activation_list = cfg_para.multi_activation
    batch_size_list = cfg_para.multi_batch_size
    learning_rate_list = cfg_para.multi_learning_rate
    optimizer_list = cfg_para.multi_opt

    model_type = model_type_list[job_id]
    num_layer = num_layer_list[job_id]
    activation = activation_list[job_id]
    batch_size = batch_size_list[job_id]
    learning_rate = learning_rate_list[job_id]
    optimizer = optimizer_list[job_id]

    num_epoch = cfg_para.multi_num_epoch
    train_dataset = cfg_para.multi_train_dataset
    use_tf_timeline = cfg_para.multi_use_tb_timeline
    use_cpu = cfg_para.multi_use_cpu

    if use_cpu:
        train_device = '/cpu:0'
    else:
        train_device = '/gpu:0'

    model_name = '{0}-{1}-{2}-{3}-{4}-{5}-{6}-{7}'.format(
        job_id, model_type, num_layer, batch_size, learning_rate, optimizer,
        num_epoch, train_dataset)

    ##########################################
    # load dataset
    ##########################################

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)
    train_feature_input, train_label_input = load_train_dataset(train_dataset)

    ##########################################
    # build model
    ##########################################

    features = tf.placeholder(tf.float32,
                              [None, img_width, img_height, num_channel])
    labels = tf.placeholder(tf.int64, [None, num_class])

    dm = ModelImporter(model_type,
                       str(job_id),
                       num_layer,
                       img_height,
                       img_width,
                       num_channel,
                       num_class,
                       batch_size,
                       optimizer,
                       learning_rate,
                       activation,
                       batch_padding=False)

    model_entity = dm.get_model_entity()
    model_logit = model_entity.build(features, is_training=True)
    train_op = model_entity.train(model_logit, labels)

    ##########################################
    # train model
    ##########################################

    step_time = 0
    step_count = 0

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    if train_dataset == 'imagenet':
        image_list = sorted(os.listdir(train_feature_input))

    with tf.device(train_device):
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            num_batch = train_label_input.shape[0] // batch_size

            for e in range(num_epoch):
                for i in range(num_batch):
                    print('epoch %d / %d, step %d / %d' %
                          (e + 1, num_epoch, i + 1, num_batch))

                    if i != 0:
                        start_time = timer()

                    batch_offset = i * batch_size
                    batch_end = (i + 1) * batch_size
                    if train_dataset == 'imagenet':
                        batch_list = image_list[batch_offset:batch_end]
                        train_feature_batch = load_imagenet_raw(
                            train_feature_input, batch_list, img_height,
                            img_width)
                    else:
                        train_feature_batch = train_feature_input[
                            batch_offset:batch_end]

                    train_label_batch = train_label_input[
                        batch_offset:batch_end]

                    if use_tf_timeline:
                        profile_path = cfg_path.profile_path
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        sess.run(train_op,
                                 feed_dict={
                                     features: train_feature_batch,
                                     labels: train_label_batch
                                 },
                                 options=run_options,
                                 run_metadata=run_metadata)

                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file = open(
                            profile_path + '/' + str(model_type) + '-' +
                            str(batch_size) + '-' + str(i) + '.json', 'w')
                        trace_file.write(
                            trace.generate_chrome_trace_format(
                                show_dataflow=True, show_memory=True))
                    else:
                        sess.run(train_op,
                                 feed_dict={
                                     features: train_feature_batch,
                                     labels: train_label_batch
                                 })

                    if i != 0:
                        end_time = timer()
                        dur_time = end_time - start_time
                        print("step time:", dur_time)
                        step_time += dur_time
                        step_count += 1

    step_time_result = f'average step time (ms) of {model_name}: {step_time / step_count * 1000}'
    return step_time_result
Exemplo n.º 24
0
def main():
    configproto = tf.ConfigProto()
    configproto.gpu_options.allow_growth = True
    configproto.log_device_placement = args.log_device
    configproto.allow_soft_placement = args.soft_placement
    configproto.inter_op_parallelism_threads = args.num_cores
    configproto.intra_op_parallelism_threads = args.num_cores
    with tf.Graph().as_default(), tf.Session(config=configproto) as sess:
        run_options = tf.RunOptions(
            trace_level=tf.RunOptions.FULL_TRACE) if show_run_meta else None
        run_metadata = tf.RunMetadata() if show_run_meta else None

        model = graph_moudle.Model()
        model.init_global_step()

        vt, vs, vo = model.model_setup()
        tf.initialize_all_variables().run()
        cnt = 0
        for var in vs:
            cnt += 1
            str_line = str(cnt) + '. ' + str(var.name) + ': ' + str(
                var.get_shape())
            print(str_line)
        ssll = input('aaaaa')
        np.random.seed(1234567890)
        qs = np.random.randint(0, args.vocab_size,
                               [10, args.batchsize, args.max_sent_length])
        qsm = np.ones_like(qs, dtype=np.float32)
        qsm[:, :, -1:] = 0
        ts = np.random.randint(0, args.vocab_size,
                               [10, args.batchsize, 3, args.max_sent_length])
        tsm = np.ones_like(ts, dtype=np.float32)
        g = np.random.randint(1, 3, (10, args.batchsize))

        for i in range(400):
            bs = i % 10
            if bs == 0:
                idx = np.random.shuffle(np.arange(10 * args.batchsize))
                qs = np.reshape(qs, [10 * args.batchsize, -1])[idx]
                ts = np.reshape(ts, [10 * args.batchsize, -1])[idx]
                g = np.reshape(g, [10 * args.batchsize, -1])[idx]
                qs = np.reshape(qs, qsm.shape)
                ts = np.reshape(ts, tsm.shape)
                g = np.reshape(g, (10, args.batchsize))

            stime = time.time()
            loss, regu_loss = 0, 0

            step, loss, pair_loss, regu_loss, acc, acc01, score, _ = \
                model.run_epoch(sess, [qs[bs], qsm[bs], ts[bs], tsm[bs], g[bs], True],
                                run_options=run_options, run_metadata=run_metadata)
            '''
            pair_loss, acc, acc01, score = \
                model.run_epoch(sess, [qs[bs], qsm[bs], ts[bs], tsm[bs], g[bs], False],
                                run_options=run_options, run_metadata=run_metadata)
            '''
            print(loss, pair_loss, regu_loss, acc, acc01, time.time() - stime)
            print(score[0, :], g[bs][0])
            if show_run_meta:
                tl = timeline.Timeline(run_metadata.step_stats)
                ctf = tl.generate_chrome_trace_format(show_memory=True)
                with open(args.log_dir_path + '/timeline.json', 'w') as f:
                    f.write(ctf)
Exemplo n.º 25
0
def train(hparams):
    """Build and train the model as specified in hparams"""

    ckptsdir = str(Path(hparams.modeldir, "ckpts"))

    # build training and eval graphs
    train_tuple = create_model(hparams, tf.contrib.learn.ModeKeys.TRAIN)
    eval_tuple = create_model(hparams, tf.contrib.learn.ModeKeys.EVAL)

    with train_tuple.graph.as_default():
        initializer = tf.global_variables_initializer()
        train_tables_initializer = tf.tables_initializer()

    with eval_tuple.graph.as_default():
        local_initializer = tf.local_variables_initializer()
        eval_tables_initializer = tf.tables_initializer()

    # Summary writers
    summary_writer = tf.summary.FileWriter(hparams.modeldir,
                                           train_tuple.graph,
                                           max_queue=25,
                                           flush_secs=30)

    if hparams.saved is not None:
        # load checkpoint
        train_tuple.model.saver.restore(train_tuple.session, hparams.saved)
    else:
        train_tuple.session.run([initializer])

    start_time = process_time()
    # initialize the training dataset
    train_tuple.session.run([train_tables_initializer])
    train_tuple.session.run([train_tuple.iterator.initializer])
    # initialize the eval table only once
    eval_tuple.session.run([eval_tables_initializer])
    # finalize the graph
    train_tuple.graph.finalize()

    profile_next_step = False
    profiled = False
    # Train until the dataset throws an error (at the end of num_epochs)
    while True:
        step_time = []
        try:
            curr_time = process_time()
            if False:
                #if not profiled and profile_next_step:
                print("Running training step with profiling")
                # run profiling
                _, train_loss, global_step, _, summary, metadata = train_tuple.model.\
                        train_with_profile(train_tuple.session, summary_writer)
                # write the metadata out to a chrome trace file
                trace = timeline.Timeline(step_stats=metadata.step_stats)
                with open(hparams.modeldir + "/timeline.ctf.json",
                          "w") as tracefile:
                    tracefile.write(trace.generate_chrome_trace_format())
                profile_next_step = False
                profiled = True
            else:
                _, train_loss, global_step, _, summary = train_tuple.model.train(
                    train_tuple.session)
            step_time.append(process_time() - curr_time)

            # write train summaries
            if global_step == 1:
                summary_writer.add_summary(summary, global_step)
            if global_step % 15 == 0:
                summary_writer.add_summary(summary, global_step)
                print("Step: %d, Training Loss: %f, Avg Sec/Step: %2.2f" %
                      (global_step, train_loss, np.mean(step_time)))

            if global_step % 100 == 0:
                step_time = []
                profile_next_step = True
                # Do one evaluation
                checkpoint_path = train_tuple.model.saver.save(
                    train_tuple.session,
                    ckptsdir + "/ckpt",
                    global_step=global_step)
                print(checkpoint_path)
                eval_tuple.model.saver.restore(eval_tuple.session,
                                               checkpoint_path)
                eval_tuple.session.run(
                    [eval_tuple.iterator.initializer, local_initializer])
                while True:
                    try:
                        eval_loss, eval_acc, eval_summary, _ = eval_tuple.model.eval(
                            eval_tuple.session)
                        # summary_writer.add_summary(summary, global_step)
                    except tf.errors.OutOfRangeError:
                        print("Step: %d, Eval Loss: %f, Eval Accuracy: %f" %
                              (global_step, eval_loss, eval_acc))
                        summary_writer.add_summary(eval_summary, global_step)
                        break

        except tf.errors.OutOfRangeError:
            print("- End of Trainig -")
            break

    # End of training
    summary_writer.close()
    print("Total Training Time: %4.2f" % (process_time() - start_time))
Exemplo n.º 26
0
    def train_step(self, sess, train_op, global_step, train_step_kwargs):
        """Function that takes a gradient step and specifies whether to stop.
    
        Args:
            sess: The current session.
            train_op: An `Operation` that evaluates the gradients and returns the
                total loss.
            global_step: A `Tensor` representing the global training step.
            train_step_kwargs: A dictionary of keyword arguments.
    
        Returns:
            The total loss and a boolean indicating whether or not to stop training.
    
        Raises:
            ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.
        """

        start_time = time.time()
        trace_run_options = None
        run_metadata = None
        if 'should_trace' in train_step_kwargs:
            if 'logdir' not in train_step_kwargs:
                raise ValueError(
                    'logdir must be present in train_step_kwargs when '
                    'should_trace is present')
            if sess.run(train_step_kwargs['should_trace']):
                trace_run_options = config_pb2.RunOptions(
                    trace_level=config_pb2.RunOptions.FULL_TRACE)
                run_metadata = config_pb2.RunMetadata()

        total_loss, np_global_step = sess.run([train_op, global_step],
                                              options=trace_run_options,
                                              run_metadata=run_metadata)
        time_elapsed = time.time() - start_time

        #         self.debug_training(sess,global_step)

        if run_metadata is not None:
            tl = timeline.Timeline(run_metadata.step_stats)
            trace = tl.generate_chrome_trace_format()
            trace_filename = os.path.join(train_step_kwargs['logdir'],
                                          'tf_trace-%d.json' % np_global_step)
            logging.info('Writing trace to %s', trace_filename)
            file_io.write_string_to_file(trace_filename, trace)
            if 'summary_writer' in train_step_kwargs:
                train_step_kwargs['summary_writer'].add_run_metadata(
                    run_metadata, 'run_metadata-%d' % np_global_step)

        if 'should_log' in train_step_kwargs:
            if sess.run(train_step_kwargs['should_log']):
                logging.info('global step %d: loss = %.4f (%.2f sec/step)',
                             np_global_step, total_loss, time_elapsed)

        # TODO(nsilberman): figure out why we can't put this into sess.run. The
        # issue right now is that the stop check depends on the global step. The
        # increment of global step often happens via the train op, which used
        # created using optimizer.apply_gradients.
        #
        # Since running `train_op` causes the global step to be incremented, one
        # would expected that using a control dependency would allow the
        # should_stop check to be run in the same session.run call:
        #
        #     with ops.control_dependencies([train_op]):
        #         should_stop_op = ...
        #
        # However, this actually seems not to work on certain platforms.
        if 'should_stop' in train_step_kwargs:
            should_stop = sess.run(train_step_kwargs['should_stop'])
        else:
            should_stop = False

        return total_loss, should_stop
Exemplo n.º 27
0
                              feed_dict={
                                  x: batch_xs,
                                  y_true: batch_ys,
                                  keep_prob: 0.5
                              },
                              options=run_options,
                              run_metadata=run_metadata)
    writer.add_summary(summary_str, i)
    if (i % 10) == 0:
        test_xs, test_ys = mnist.test.next_batch(100)
        #test_xs, test_ys = [mnist.test.images, mnist.test.labels]
        train_acc = sess.run(accuracy,
                             feed_dict={
                                 x: batch_xs,
                                 y_true: batch_ys,
                                 keep_prob: 1
                             })
        test_acc = sess.run(accuracy,
                            feed_dict={
                                x: test_xs,
                                y_true: test_ys,
                                keep_prob: 1
                            })
        print('Step %.4d : train_err = %.2f%% ; test_err = %.2f%%' %
              (i, (1 - train_acc) * 100, (1 - test_acc) * 100))

trace = timeline.Timeline(step_stats=run_metadata.step_stats)

with open('/tmp/layers/timeline.ctf.json', 'w') as outfile:
    outfile.write(trace.generate_chrome_trace_format())
Exemplo n.º 28
0
step = None
last_saved_step = saved_global_step
minvalloss = 10000
try:
    for step in range(saved_global_step + 1, args.num_steps):
        start_time = time.time()
        if args.store_metadata and step % 50 == 0:
            # Slow run that stores extra information for debugging.
            print('Storing metadata')
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            summary, trloss_value, _ = sess.run([summaries, trloss, optim],
                                                options=run_options,
                                                run_metadata=run_metadata)
            writer.add_summary(summary, step)
            writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step))
            tl = timeline.Timeline(run_metadata.step_stats)
            timeline_path = os.path.join(logdir, 'timeline.trace')
            with open(timeline_path, 'w') as f:
                f.write(tl.generate_chrome_trace_format(show_memory=True))
        else:
            summary, trloss_value, _ = sess.run([summaries, trloss, optim])
            writer.add_summary(summary, step)
        duration = time.time() - start_time
        print('step {:d} - trloss = {:.3f}, ({:.3f} sec/step)'.format(
            step, trloss_value, duration))

        if step % args.checkpoint_every == 0:
            valloss_value = sess.run(valloss)
            print('validateLoss = {:.3f}, ({:.3f} sec/step)'.format(
                valloss_value, duration))
            if (valloss_value < minvalloss):
    def training(self, sess, train_writer):
        with tf.name_scope('loss_function'):
            RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1]
            RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1]
            RNet_rpn_yaw_gt = self.net.get_output(
                'rpn_rois'
            )[1][:,
                 -1]  #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw
            RNet_rpn_yaw_gt_new = RNet_rpn_yaw_gt - RNet_rpn_yaw_gt_delta
            RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred + RNet_rpn_yaw_gt_delta
            rpn_cls_labels = self.net.get_output(
                'rpn_rois'
            )[1][:,
                 -2]  #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw

            RNet_rpn_yaw_pred = self.angle_trans(RNet_rpn_yaw_pred)
            RNet_rpn_yaw_gt_new = self.angle_trans(RNet_rpn_yaw_gt_new)

            debug_pred = tf.multiply(rpn_cls_labels,
                                     self.angle_trans(RNet_rpn_yaw_pred))
            debug_gt = tf.multiply(rpn_cls_labels,
                                   self.angle_trans(RNet_rpn_yaw_gt_new))

            tower_l1_loss = self.Rnet_modified_smooth_l1(
                sigma=3,
                bbox_pred=RNet_rpn_yaw_pred,
                bbox_targets=RNet_rpn_yaw_gt_new)
            tower_l1_loss_keep_positive = tf.multiply(rpn_cls_labels,
                                                      tower_l1_loss)
            loss = tf.reduce_sum(tower_l1_loss_keep_positive) / (
                1e-5 + tf.reduce_sum(
                    tf.cast(tf.not_equal(tower_l1_loss_keep_positive, 0.0),
                            dtype=tf.float32)))

        with tf.name_scope('train_op'):
            global_step = tf.Variable(1, trainable=False, name='Global_Step')
            lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                            global_step,
                                            10000,
                                            0.90,
                                            name='decay-Lr')
            Optimizer = tf.train.AdamOptimizer(lr)
            var_and_grad = Optimizer.compute_gradients(
                loss, var_list=tf.trainable_variables())
            train_op = Optimizer.minimize(loss, global_step=global_step)

        with tf.name_scope('debug_board'):
            tf.summary.scalar('total_loss', loss)
            glb_var = tf.trainable_variables()
            for i in range(len(glb_var)):
                tf.summary.histogram(glb_var[i].name, glb_var[i])
            tf.summary.image('theta',
                             self.net.get_output('RNet_theta')[0],
                             max_outputs=50)
            merged = tf.summary.merge_all()  #hxd: before the next summary ops

        with tf.name_scope('epoch_valid'):
            epoch_cube_theta = tf.placeholder(dtype=tf.float32)
            epoch_cube_theta_sum_op = tf.summary.scalar(
                'valid_los', epoch_cube_theta)

        sess.run(tf.global_variables_initializer())
        if self.args.fine_tune:
            if True:
                # #full graph restore
                print 'Loading pre-trained model weights from {:s}'.format(
                    self.args.weights)
                self.net.load(self.args.weights, sess, self.saver, True)
            else:  # #part graph restore
                #  # METHOD one
                # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc'])
                # saver1 = tf.train.Saver(ref_vars)
                # saver1.restore(sess, self.args.weights)
                #  # METHOD two
                reader = pywrap_tensorflow.NewCheckpointReader(
                    self.args.weights)
                var_to_shape_map = reader.get_variable_to_shape_map()
                with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope:
                    for key in var_to_shape_map:
                        try:
                            var = tf.get_variable(key, trainable=False)
                            sess.run(var.assign(reader.get_tensor(key)))
                            print "    Assign pretrain model: " + key
                        except ValueError:
                            print "    Ignore variable:" + key
        trainable_var_for_chk = tf.trainable_variables(
        )  #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
        print 'Variables to train: ', trainable_var_for_chk

        timer = Timer()
        rpn_rois_3d = self.net.get_output('rpn_rois')[1]

        if DEBUG:
            pass  # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow
            vispy_init()
        i = 0
        training_series = range(self.epoch)  #self.epoch
        for epo_cnt in range(self.args.epoch_iters):
            for data_idx in training_series:  # DO NOT EDIT the "training_series",for the latter shuffle
                iter = global_step.eval(
                )  # function "minimize()"will increase global_step
                blobs = self.dataset.get_minibatch(data_idx,
                                                   'train')  # get one batch
                feed_dict = {
                    self.net.lidar3d_data: blobs['lidar3d_data'],
                    self.net.lidar_bv_data: blobs['lidar_bv_data'],
                    self.net.im_info: blobs['im_info'],
                    self.net.keep_prob: 0.5,
                    self.net.gt_boxes_bv: blobs['gt_boxes_bv'],
                    self.net.gt_boxes_3d: blobs['gt_boxes_3d'],
                    self.net.gt_boxes_corners: blobs['gt_boxes_corners'],
                    self.net.calib: blobs['calib'],
                }

                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                timer.tic()
                debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,loss_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_,_ = \
                    sess.run([debug_pred, tower_l1_loss_keep_positive, RNet_rpn_yaw_gt_delta, rpn_rois_3d, loss, RNet_rpn_yaw_pred_toshow, debug_gt, merged, train_op, ]
                             , feed_dict=feed_dict, options=run_options, run_metadata=run_metadata)
                # debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_, = \
                #     sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,RNet_rpn_yaw_pred_toshow,debug_gt,merged,]
                #              ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata)
                timer.toc()

                if iter % cfg.TRAIN.ITER_DISPLAY == 0:
                    print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter, Loss: %.3f ' % (
                        iter, self.args.epoch_iters * self.epoch,
                        blobs['serial_num'], timer.average_time, loss_)
                    print 'theta_delta:     ',
                    for i in range(50):
                        if delta_[i] != 0.0:
                            print '%6.3f' % (delta_[i]),
                    print '\nPredicted angle: ',
                    for j in range(50):
                        if debug_pred_[j] != 0.0:
                            print '%6.3f' % (debug_pred_[j]),
                    print '\nGt yaw angle:    ',
                    for j in range(50):
                        if debug_gt_[j] != 0.0:
                            print '%6.3f' % (debug_gt_[j]),
                    print '\n'
                if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD:
                    train_writer.add_summary(merged_, iter)
                    pass
                if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter
                                                                       == 100):
                    #chrome://tracing
                    trace = timeline.Timeline(
                        step_stats=run_metadata.step_stats)
                    trace_file = open(
                        cfg.LOG_DIR + '/' + 'training-step-' +
                        str(iter).zfill(7) + '.ctf.json', 'w')
                    trace_file.write(
                        trace.generate_chrome_trace_format(show_memory=False))
                    trace_file.close()
                if DEBUG:
                    scan = blobs['lidar3d_data']
                    cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N],
                                              dtype=np.float32) * 0
                    boxes = BoxAry_Theta(
                        gt_box3d=blobs['gt_boxes_3d'],
                        pre_box3d=rpn_rois_3d_,
                        pre_theta_value=RNet_rpn_yaw_pred_toshow_,
                        pre_cube_cls=cubic_cls_value
                    )  # RNet_rpn_yaw_pred_toshow_  rpn_rois_3d_[:,-1]
                    pcd_vispy(scan,
                              boxes=boxes,
                              name='CubicNet training',
                              index=i,
                              vis_size=(800, 600),
                              save_img=False,
                              visible=False)
                    i += 1
            if cfg.TRAIN.EPOCH_MODEL_SAVE:  #iter % 2000==0 and :
                self.snapshot(sess, iter)
                pass
            if cfg.TRAIN.USE_VALID and True:  #TODO: to complete the valid process
                with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)):
                    print 'Valid the net at the end of epoch_{} ...'.format(
                        epo_cnt + 1)
                    valid_loss_total = 0.0
                    for data_idx in range(self.val_epoch):  # self.val_epoch
                        blobs = self.dataset.get_minibatch(data_idx, 'valid')
                        feed_dict_ = {
                            self.net.lidar3d_data: blobs['lidar3d_data'],
                            self.net.lidar_bv_data: blobs['lidar_bv_data'],
                            self.net.im_info: blobs['im_info'],
                            self.net.keep_prob: 0.5,
                            self.net.gt_boxes_bv: blobs['gt_boxes_bv'],
                            self.net.gt_boxes_3d: blobs['gt_boxes_3d'],
                            self.net.gt_boxes_corners:
                            blobs['gt_boxes_corners'],
                            self.net.calib: blobs['calib'],
                        }
                        loss_valid = sess.run(loss, feed_dict=feed_dict_)
                        # train_writer.add_summary(valid, data_idx)

                        valid_loss_total += loss_valid
                        if cfg.TRAIN.VISUAL_VALID and data_idx % 20 == 0:
                            print 'Valid step: {:d}/{:d} , theta_loss = {:.3f}'\
                                  .format(data_idx + 1,self.val_epoch,float(loss_valid))

                        if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD:
                            pass
                            # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000)

                valid_summary = tf.summary.merge([epoch_cube_theta_sum_op])
                valid_res = sess.run(valid_summary,
                                     feed_dict={
                                         epoch_cube_theta:
                                         float(valid_loss_total) /
                                         self.val_epoch
                                     })
                train_writer.add_summary(valid_res, epo_cnt + 1)
                print 'Validation of epoch_{}:theta_loss_total = {:.3f}\n'\
                      .format(epo_cnt + 1,float(valid_loss_total)/self.val_epoch)
            random.shuffle(training_series)  # shuffle the training series
        print 'Training process has done, enjoy every day !'
Exemplo n.º 30
0
def main():
    def _str_to_bool(s):
        """Convert string to bool (in argparse context)."""
        if s.lower() not in ['true', 'false']:
            raise ValueError(
                'Argument needs to be a boolean, got {}'.format(s))
        return {'true': True, 'false': False}[s.lower()]

    parser = argparse.ArgumentParser(description='WaveNet example network')

    DATA_DIRECTORY = '.\\data\\moon,.\\data\\son'
    parser.add_argument('--data_dir',
                        type=str,
                        default=DATA_DIRECTORY,
                        help='The directory containing the VCTK corpus.')

    LOGDIR = None
    #LOGDIR = './/logdir-wavenet//train//2018-12-21T22-58-10'

    parser.add_argument(
        '--logdir',
        type=str,
        default=LOGDIR,
        help=
        'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.'
    )

    parser.add_argument(
        '--logdir_root',
        type=str,
        default=None,
        help=
        'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.'
    )
    parser.add_argument(
        '--restore_from',
        type=str,
        default=None,
        help=
        'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.'
    )

    CHECKPOINT_EVERY = 1000  # checkpoint 저장 주기
    parser.add_argument(
        '--checkpoint_every',
        type=int,
        default=CHECKPOINT_EVERY,
        help='How many steps to save each checkpoint after. Default: ' +
        str(CHECKPOINT_EVERY) + '.')

    config = parser.parse_args()  # command 창에서 입력받을 수 있는 조건
    config.data_dir = config.data_dir.split(",")

    try:
        directories = validate_directories(config, hparams)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    log_path = os.path.join(logdir, 'train.log')
    infolog.init(log_path, logdir)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Create coordinator.
    coord = tf.train.Coordinator()
    num_speakers = len(config.data_dir)
    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None
        gc_enable = num_speakers > 1

        # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다.
        reader = DataFeederWavenet(
            coord,
            config.data_dir,
            batch_size=hparams.wavenet_batch_size,
            receptive_field=WaveNetModel.calculate_receptive_field(
                hparams.filter_width, hparams.dilations, hparams.scalar_input,
                hparams.initial_filter_width),
            gc_enable=gc_enable)
        if gc_enable:
            audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id
        else:
            audio_batch, lc_batch = reader.inputs_wav, self.local_condition

    # Create network.
    net = WaveNetModel(
        batch_size=hparams.wavenet_batch_size,
        dilations=hparams.dilations,
        filter_width=hparams.filter_width,
        residual_channels=hparams.residual_channels,
        dilation_channels=hparams.dilation_channels,
        quantization_channels=hparams.quantization_channels,
        out_channels=hparams.out_channels,
        skip_channels=hparams.skip_channels,
        use_biases=hparams.use_biases,  #  True
        scalar_input=hparams.scalar_input,
        initial_filter_width=hparams.initial_filter_width,
        global_condition_channels=hparams.gc_channels,
        global_condition_cardinality=num_speakers,
        local_condition_channels=hparams.num_mels,
        upsample_factor=hparams.upsample_factor,
        train_mode=True)

    if hparams.l2_regularization_strength == 0:
        hparams.l2_regularization_strength = None

    net.add_loss(input_batch=audio_batch,
                 local_condition=lc_batch,
                 global_condition_batch=gc_id_batch,
                 l2_regularization_strength=hparams.l2_regularization_strength)
    net.add_optimizer(hparams, global_step)

    run_metadata = tf.RunMetadata()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)
                      )  # log_device_placement=False --> cpu/gpu 자동 배치.
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(
        var_list=tf.global_variables(),
        max_to_keep=hparams.max_checkpoints)  # 최대 checkpoint 저장 갯수 지정

    try:
        start_step = load(saver, sess, restore_from)  # checkpoint load
        if is_overwritten_training or start_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            zero_step_assign = tf.assign(global_step, 0)
            sess.run(zero_step_assign)

    except:
        print(
            "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model."
        )
        raise

    ###########

    start_step = sess.run(global_step)
    last_saved_step = start_step
    try:
        reader.start_in_session(sess, start_step)
        while not coord.should_stop():

            start_time = time.time()
            if hparams.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                log('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize],
                    options=run_options,
                    run_metadata=run_metadata)

                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize])

            duration = time.time() - start_time
            log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % config.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

            if step >= hparams.num_steps:
                # error message가 나오지만, 여기서 멈춘 것은 맞다.
                raise Exception('End xxx~~~yyy')

    except Exception as e:
        print('finally')
        #if step > last_saved_step:
        #    save(saver, sess, logdir, step)

        coord.request_stop(e)