def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    tf.set_random_seed(args.random_seed)
    
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
        image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)])
        image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)])
    
    # Create network.
    with tf.variable_scope('', reuse=False):
        net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes)
    # For a small batch size, it is better to keep 
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model. 
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output100 = net.layers['fc1_voc12']
    raw_output075 = net075.layers['fc1_voc12']
    raw_output05 = net05.layers['fc1_voc12']
    raw_output = tf.reduce_max(tf.stack([raw_output100,
                                         tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
                                         tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
    assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
    
    
    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes])
    raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes])
    raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes])
    
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
    label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False)
    label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False)
    
    raw_gt = tf.reshape(label_proc, [-1,])
    raw_gt075 = tf.reshape(label_proc075, [-1,])
    raw_gt05 = tf.reshape(label_proc05, [-1,])
    
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1)
    indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1)
    
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
    gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)
    
    prediction = tf.gather(raw_prediction, indices)
    prediction100 = tf.gather(raw_prediction100, indices)
    prediction075 = tf.gather(raw_prediction075, indices075)
    prediction05 = tf.gather(raw_prediction05, indices05)
                                                  
                                                  
    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
    loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
    loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses)
    
    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)
    
    # Image summary.
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
    
    total_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())
   
    # Define loss and optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)

    # Define a variable to accumulate gradients.
    accum_grads = [tf.Variable(tf.zeros_like(v.initialized_value()),
                               trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable]

    # Define an operation to clear the accumulated gradients for next batch.
    zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]

    # Compute gradients.
    grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
   
    # Accumulate and normalise the gradients.
    accum_grads_op = [accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in
                       enumerate(grads)]

    grads_conv = accum_grads[:len(conv_trainable)]
    grads_fc_w = accum_grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
    grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):]

    # Apply the gradients.
    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
    
    
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
    
    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)
    
    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = { step_ph : step }
        loss_value = 0

        # Clear the accumulated gradients.
        sess.run(zero_op, feed_dict=feed_dict)
       
        # Accumulate gradients.
        for i in range(args.grad_update_every):
            _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict)
            loss_value += l_val

        # Normalise the loss.
        loss_value /= args.grad_update_every

        # Apply gradients.
        if step % args.save_pred_every == 0:
            images, labels, summary, _ = sess.run([image_batch, label_batch, total_summary, train_op], feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            sess.run(train_op, feed_dict=feed_dict)

        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
Exemplo n.º 2
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
        image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)])
        image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)])
    
    # Create network.
    with tf.variable_scope('', reuse=False):
        net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training)
    with tf.variable_scope('', reuse=True):
        net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training)
    with tf.variable_scope('', reuse=True):
        net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training)
    # For a small batch size, it is better to keep 
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model. 
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output100 = net.layers['fc1_voc12']
    raw_output075 = net075.layers['fc1_voc12']
    raw_output05 = net05.layers['fc1_voc12']
    raw_output = tf.reduce_max(tf.stack([raw_output100,
                                         tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
                                         tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    restore_var = tf.global_variables()
    all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
    assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
    
    
    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, n_classes])
    raw_prediction100 = tf.reshape(raw_output100, [-1, n_classes])
    raw_prediction075 = tf.reshape(raw_output075, [-1, n_classes])
    raw_prediction05 = tf.reshape(raw_output05, [-1, n_classes])
    
    label_proc = prepare_label(label_batch, tf.pack(raw_output.get_shape()[1:3]), one_hot=False) # [batch_size, h, w]
    label_proc075 = prepare_label(label_batch, tf.pack(raw_output075.get_shape()[1:3]), one_hot=False)
    label_proc05 = prepare_label(label_batch, tf.pack(raw_output05.get_shape()[1:3]), one_hot=False)
    
    raw_gt = tf.reshape(label_proc, [-1,])
    raw_gt075 = tf.reshape(label_proc075, [-1,])
    raw_gt05 = tf.reshape(label_proc05, [-1,])
    
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, n_classes - 1)), 1)
    indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, n_classes - 1)), 1)
    indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, n_classes - 1)), 1)
    
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
    gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)
    
    prediction = tf.gather(raw_prediction, indices)
    prediction100 = tf.gather(raw_prediction100, indices)
    prediction075 = tf.gather(raw_prediction075, indices075)
    prediction05 = tf.gather(raw_prediction05, indices05)
                                                  
                                                  
    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
    loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
    loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses)
    
    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)
    
    # Image summary.
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images], tf.uint8)
    
    total_summary = tf.summary.image('images', 
                                     tf.concat(2, [images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir)
   
    # Define loss and optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)

    grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
    grads_conv = grads[:len(conv_trainable)]
    grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
    grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
    
    
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=restore_var, max_to_keep=10)
    
    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)
    
    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = { step_ph : step }
        
        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    tf.set_random_seed(args.random_seed)
    
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
    
    # Create network.
    net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
    # For a small batch size, it is better to keep 
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model. 
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output = net.layers['fc1_voc12']
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    # Restore all variables, or all except the last ones.
    restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers.
    
    prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes)
    gt = tf.reshape(label_proc, [-1, args.num_classes])
    
    # Pixel-wise softmax loss.
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    reduced_loss = tf.reduce_mean(loss)
    
    # Processed predictions.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)
    
    # Image summary.
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
    
    total_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())
   
    # Define loss and optimisation parameters.
    optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    optim = optimiser.minimize(reduced_loss, var_list=trainable)
    
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40)
    
    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)
    
    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        
    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        
        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim])
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, optim])
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
Exemplo n.º 4
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_dir, args.data_list, input_size,
                             RANDOM_SCALE, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = DeepLabResNetModel({'data': image_batch})

    # Predictions.
    raw_output = net.layers['fc1_voc12']

    prediction = tf.reshape(raw_output, [-1, n_classes])
    label_proc = prepare_label(label_batch,
                               tf.pack(raw_output.get_shape()[1:3]))
    gt = tf.reshape(label_proc, [-1, n_classes])

    # Pixel-wise softmax loss.
    loss = tf.nn.softmax_cross_entropy_with_logits(prediction, gt)
    reduced_loss = tf.reduce_mean(loss)

    # Processed predictions.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Define loss and optimisation parameters.
    optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    trainable = tf.trainable_variables()
    optim = optimiser.minimize(reduced_loss, var_list=trainable)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.initialize_all_variables()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=trainable, max_to_keep=40)
    if args.restore_from is not None:
        load(saver, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, _ = sess.run(
                [reduced_loss, image_batch, label_batch, pred, optim])
            fig, axes = plt.subplots(args.save_num_images, 3, figsize=(16, 12))
            for i in xrange(args.save_num_images):
                axes.flat[i * 3].set_title('data')
                axes.flat[i * 3].imshow(
                    (images[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8))

                axes.flat[i * 3 + 1].set_title('mask')
                axes.flat[i * 3 + 1].imshow(decode_labels(labels[i, :, :, 0]))

                axes.flat[i * 3 + 2].set_title('pred')
                axes.flat[i * 3 + 2].imshow(decode_labels(preds[i, :, :, 0]))
            plt.savefig(args.save_dir + str(start_time) + ".png")
            plt.close(fig)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, optim])
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
            step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
Exemplo n.º 5
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
    # For a small batch size, it is better to keep
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model.
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output = net.layers['fc1_voc12']
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    # Restore all variables, or all except the last ones.
    restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers.

    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)


    # Pixel-wise softmax loss.
    if not args.class_weights:
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)

    # Multiply logits by appropriate class weight
    else:
        raw_weights = tf.gather(args.class_weights, tf.cast(raw_gt, tf.int32))
        weights = tf.gather(raw_weights, indices)
        loss = tf.losses.sparse_softmax_cross_entropy(logits=prediction, labels=gt, weights=weights)

    reduced_loss = tf.reduce_mean(loss)

    # Processed predictions.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)

    total_summary = tf.summary.image('images',
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Define loss and optimisation parameters.
    optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    optim = optimiser.minimize(reduced_loss, var_list=trainable)

    #  Prep val data
    if args.val_list:
        val_steps = int(args.val_size / args.batch_size)
        with tf.name_scope("get_val"):
            reader_val = ImageReader(
                args.data_dir,
                args.val_list,
                input_size,
                False,
                False,
                args.ignore_label,
                IMG_MEAN,
                coord)
            val_image_batch, val_label_batch = reader.dequeue(args.batch_size)

        # Val predictions.
        val_raw_output = tf.image.resize_bilinear(raw_output, tf.shape(val_image_batch)[1:3,])
        val_raw_output = tf.argmax(val_raw_output, dimension=3)
        val_pred = tf.expand_dims(val_raw_output, dim=3) # Create 4-d tensor.

        # mIoU
        val_pred = tf.reshape(val_pred, [-1,])
        val_gt = tf.reshape(val_label_batch, [-1,])
        weights = tf.cast(tf.less_equal(val_gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
        mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(val_pred, val_gt, num_classes=args.num_classes, weights=weights)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    if args.val_list:
        sess.run(tf.local_variables_initializer())

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20)

    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim])
            summary_writer.add_summary(summary, step)

            # Print val jaccard loss
            if args.val_list:
                for vstep in range(val_steps):
                    val_preds, _ = sess.run([val_pred, update_op])
                viou = mIoU.eval(session=sess)
                print('Mean IoU: {:.6f}'.format(viou))
                save(saver, sess, args.snapshot_dir, step, val_iou=viou)
            else:
                save(saver, sess, args.snapshot_dir, step)

        else:
            loss_value, _ = sess.run([reduced_loss, optim])
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
Exemplo n.º 6
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    # setup used GPU
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_dir, args.data_list, input_size,
                             args.random_scale, args.random_mirror,
                             args.ignore_label, IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = DeepLabResNetModel({'data': image_batch},
                             is_training=args.is_training,
                             num_classes=args.num_classes)
    # For a small batch size, it is better to keep
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model.
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output = net.layers['fc1_voc12']
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    # Restore all variables, or all except the last ones.
    restore_var = [
        v for v in tf.global_variables()
        if 'fc' not in v.name or not args.not_restore_last
    ]
    trainable = [v for v in tf.trainable_variables()
                 if 'fc1_voc12' in v.name]  # Fine-tune only the last layers.

    prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes)
    gt = tf.reshape(label_proc, [-1, args.num_classes])

    # Pixel-wise softmax loss.
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction,
                                                   labels=gt)
    reduced_loss = tf.reduce_mean(loss)

    # Processed predictions.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Define loss and optimisation parameters.
    global_step = tf.Variable(0,
                              dtype=tf.int32,
                              trainable=False,
                              name='global_step')
    optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    optim = optimiser.minimize(reduced_loss,
                               var_list=trainable,
                               global_step=global_step)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess,
                                [image_batch, args.save_num_images, IMG_MEAN],
                                tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images, args.num_classes],
        tf.uint8)
    preds_summary = tf.py_func(decode_labels,
                               [pred, args.save_num_images, args.num_classes],
                               tf.uint8)

    image_summary = tf.summary.image(
        'images',
        tf.concat(axis=2,
                  values=[images_summary, labels_summary, preds_summary]),
        max_outputs=args.save_num_images)  # Concatenate row-wise.
    loss_summary = tf.summary.scalar("loss", reduced_loss)

    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    print("Setting up summary op...")
    total_summary = tf.summary.merge_all()

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40)

    # Load variables if the checkpoint is provided.
    ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
    elif args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Initial status
    loss_value, summary, itr = sess.run(
        [reduced_loss, total_summary, global_step])
    print('step {:d} \t loss = {:.3f}'.format(itr, loss_value))
    summary_writer.add_summary(summary, itr)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        _, itr = sess.run([optim, global_step])

        # save summary file
        if itr % 100 == 0:
            duration = time.time() - start_time
            loss_value, summary = sess.run([reduced_loss, total_summary])
            summary_writer.add_summary(summary, itr)
            print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format(
                itr, loss_value, duration))

        # save checkpoint
        if itr % args.save_pred_every == 0:
            save(saver, sess, args.snapshot_dir, itr)

    # final status
    loss_value, summary, itr = sess.run(
        [reduced_loss, total_summary, global_step])
    print('step {:d} \t loss = {:.3f}'.format(itr, loss_value))
    save(saver, sess, args.snapshot_dir, global_step)
    summary_writer.add_summary(summary, itr)

    coord.request_stop()
    coord.join(threads)
Exemplo n.º 7
0
def main():
    """主函数:模型构建和训练"""
    
    # 获取训练参数
    args = get_arguments()
    
    # 获取输出尺寸
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    # 设置随机种子
    tf.set_random_seed(args.random_seed)
    
    # 构建队列协调器queue coordinator
    coord = tf.train.Coordinator()
    
    # 加载读取器reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
    
    # 构建DeepLab-ResNet-101网络
    net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)

    # 获取网络输出.
    raw_output = net.layers['fc1_voc12']

    # 获取不同类型的网络权重参数名
    restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # 学习率lr * 1.0
    fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # 学习率lr * 10.0
    fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # 学习率lr * 20.0
    assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
    
    
    # 根据ground truth类别标签,提取
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)
                                                  
                                                  
    # 构建各个像素的损失函数:交叉熵softmax loss + 权重衰减L2 loss
    # 交叉熵softmax loss
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    # 权重衰减L2 loss
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    # loss合并
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
    
    # 预测结果可视化
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)
    
    # 添加图片总结
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)    
    total_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.                               
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())
   
    # 定义loss和优化参数
    # 定义学习率
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    # 定义优化器
    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)

    # 获取loss梯度
    grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
    grads_conv = grads[:len(conv_trainable)]
    grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
    grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

    # 定义梯度优化执行操作
    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
    
    
    # 建立tf session 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    
    # 执行权重变量初始化
    init = tf.global_variables_initializer()    
    sess.run(init)
    
    # 获取训练存储器
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
    
    # 加载已有的checkpoint文件
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)
    
    # 开启队列执行器线程.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # 遍历所有训练step.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = { step_ph : step }
        
        # 执行训练,并存储训练结果
        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        # 仅执行训练,不存储训练结果
        else:
            loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    
    # 停止训练协调器
    coord.request_stop()
    coord.join(threads)
def main():
    """Create the model and start the evaluation process."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,  # No defined input size.
            False,  # No random scale.
            False,  # No random mirror.
            args.ignore_label,
            IMG_MEAN,
            coord,
            ADV_FLAG,
            MASK_FLAG,
            args.eps,
            args.attack,
            args.targeted)
        # image, label = reader.image, reader.label
        image_batch, label_batch = reader.dequeue(args.batch_size)
    # image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.

    image_name_list = reader.image_list
    print(image_name_list[0])

    # Create network.
    net = DeepLabResNetModel({'data': image_batch},
                             is_training=False,
                             num_classes=args.num_classes)

    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.layers['fc1_voc12']

    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices_0 = tf.squeeze(
        tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt_0 = tf.cast(tf.gather(raw_gt, indices_0), tf.int32)
    prediction = tf.gather(raw_prediction, indices_0)

    raw_output = tf.image.resize_bilinear(raw_output,
                                          tf.shape(image_batch)[1:3, ])
    raw_output = tf.argmax(raw_output, dimension=3)
    pred = tf.expand_dims(raw_output, dim=3)  # Create 4-d tensor.

    # mIoU
    pred = tf.reshape(pred, [
        -1,
    ])
    gt = tf.reshape(label_batch, [
        -1,
    ])
    indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)),
                         1)  ## ignore all labels >= num_classes
    gt = tf.cast(tf.gather(gt, indices), tf.int32)
    pred = tf.gather(pred, indices)
    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(
        pred, gt, num_classes=args.num_classes)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    # Load weights.
    loader = tf.train.Saver(var_list=restore_var)
    if args.restore_from is not None:
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over evaluation steps.
    for step in range(args.num_steps):
        preds, _, X, Y = sess.run([pred, update_op, image_batch, label_batch])
        if step % 100 == 0:
            print('step {:d}'.format(step))
    print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
    coord.request_stop()
    coord.join(threads)
Exemplo n.º 9
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    # setup used GPU
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_dir, args.data_list, input_size,
                             args.random_scale, args.random_mirror,
                             args.ignore_label, IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    image_batch_up = tf.image.resize_bilinear(
        image_batch, [h * args.up_scale, w * args.up_scale])

    # Create network.
    net = DeepLabResNetModel({'data': image_batch_up},
                             is_training=args.is_training,
                             num_classes=args.num_classes)
    # For a small batch size, it is better to keep
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model.
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output = net.layers['fc1_voc12']
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    restore_var = [
        v for v in tf.global_variables()
        if 'fc' not in v.name or not args.not_restore_last
    ]
    all_trainable = [
        v for v in tf.trainable_variables()
        if 'beta' not in v.name and 'gamma' not in v.name
    ]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable
                      if 'fc' not in v.name]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable
                      if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable
                      if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)),
                         1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)
    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Define loss and optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())

    # learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    learning_rate = base_lr
    global_step = tf.Variable(0,
                              dtype=tf.int32,
                              trainable=False,
                              name='global_step')

    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)

    grads = tf.gradients(reduced_loss,
                         conv_trainable + fc_w_trainable + fc_b_trainable)
    grads_conv = grads[:len(conv_trainable)]
    grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) +
                                            len(fc_w_trainable))]
    grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable),
                                             global_step=global_step)
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess,
                                [image_batch, args.save_num_images, IMG_MEAN],
                                tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images, args.num_classes],
        tf.uint8)
    preds_summary = tf.py_func(decode_labels,
                               [pred, args.save_num_images, args.num_classes],
                               tf.uint8)

    image_summary = tf.summary.image(
        'images',
        tf.concat(axis=2,
                  values=[images_summary, labels_summary, preds_summary]),
        max_outputs=args.save_num_images)  # Concatenate row-wise.
    loss_summary = tf.summary.scalar("loss", reduced_loss)
    entropy_summary = tf.summary.scalar("entropy", tf.reduce_mean(loss))
    l2_loss_summary = tf.summary.scalar("L2_loss", tf.add_n(l2_losses))
    learning_rate_summary = tf.summary.scalar(
        "learning_rate", learning_rate)  # summary recording learning rate

    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    print("Setting up summary op...")
    total_summary = tf.summary.merge_all()

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2)

    # load weights from saved checkpoint or initial pre-trained model
    if os.path.isdir(args.snapshot_dir):
        # search checkpoint at given path
        ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # load checkpoint file
            load(saver, sess, ckpt.model_checkpoint_path)
    elif os.path.isfile(args.snapshot_dir):
        # load checkpoint file
        load(saver, sess, args.snapshot_dir)
    elif args.restore_from is not None:
        loader = tf.train.Saver(
            var_list=restore_var)  # loader for part of pre-trained model
        load(loader, sess, args.restore_from)
    else:
        print("No model found at{}".format(args.restore_from))

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Initial status
    loss_value, entropy_loss, summary, itr = sess.run(
        [reduced_loss,
         tf.reduce_mean(loss), total_summary, global_step])
    print('step {:d} \t loss = {:.3f}, entropy_loss = {:.3f})'.format(
        itr, loss_value, entropy_loss))
    summary_writer.add_summary(summary, itr)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = {step_ph: step}

        _, itr = sess.run([train_op, global_step], feed_dict=feed_dict)

        # save summary file
        if itr % 100 == 0:
            duration = time.time() - start_time
            loss_value, entropy_loss, summary, itr = sess.run([
                reduced_loss,
                tf.reduce_mean(loss), total_summary, global_step
            ])
            summary_writer.add_summary(summary, itr)
            print(
                'step {:d} \t loss = {:.3f}, entropy_loss = {:.3f}, ({:.3f} sec/step)'
                .format(itr, loss_value, entropy_loss, duration))

        # save checkpoint
        if itr % args.save_pred_every == 0:
            # images, labels, preds = sess.run([image_batch, label_batch, pred])
            save(saver, sess, args.snapshot_dir, global_step)

    # final status
    loss_value, entropy_loss, summary, itr = sess.run(
        [reduced_loss,
         tf.reduce_mean(loss), total_summary, global_step])
    print('step {:d} \t loss = {:.3f}, entropy_loss = {:.3f}'.format(
        itr, loss_value, entropy_loss))
    save(saver, sess, args.snapshot_dir, global_step)
    summary_writer.add_summary(summary, itr)

    coord.request_stop()
    coord.join(threads)
Exemplo n.º 10
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = DeepLabResNetModel({'data': image_batch},
                             is_training=args.is_training)
    # For a small batch size, it is better to keep
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model.
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output = net.layers['fc1_voc12']
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    restore_var = tf.global_variables()
    # Fine-tune only the last layers.
    trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name]

    prediction = tf.reshape(raw_output, [-1, n_classes])
    label_proc = prepare_label(
        label_batch, tf.stack(raw_output.get_shape()[1:3]))
    gt = tf.reshape(label_proc, [-1, n_classes])

    # Pixel-wise softmax loss.
    loss = tf.nn.softmax_cross_entropy_with_logits(prediction, gt)
    reduced_loss = tf.reduce_mean(loss)

    # Processed predictions.
    raw_output_up = tf.image.resize_bilinear(
        raw_output, tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Image summary.
    images_summary = tf.py_func(
        inv_preprocess, [image_batch, args.save_num_images], tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images], tf.uint8)
    preds_summary = tf.py_func(
        decode_labels, [pred, args.save_num_images], tf.uint8)

    total_summary = tf.summary.image('images',
                                     tf.concat(
                                         [images_summary, labels_summary, preds_summary], axis=2),
                                     max_outputs=args.save_num_images)  # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Define loss and optimisation parameters.
    optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    optim = optimiser.minimize(reduced_loss, var_list=trainable)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=restore_var, max_to_keep=40)

    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        if step % args.save_pred_every == 0:
            loss_value, images, labels, preds, summary, _ = sess.run(
                [reduced_loss, image_batch, label_batch, pred, total_summary, optim])
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, optim])
        duration = time.time() - start_time
        print(
            'step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)