Exemplo n.º 1
0
    print('Precrop: {}'.format(PRECROP))
    print('Total Epochs: {}'.format(NUM_EPOCHS))


if __name__ == '__main__':
    """
    Main function to carry out the training loop. 
    This function creates the generator and data loaders. Then, it trains the generator.
    """
    if PRINT_PARAMS:
        print_params()

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # generator
    model = FullNetwork(output_shape=(BATCH_SIZE, CHANNELS, FRAMES, HEIGHT, WIDTH))
    model.load_state_dict(torch.load(weights_path))
    # print('Model Built.')
    model = model.to(device)

    print(model)

    if device == 'cuda':
        net = torch.nn.DataParallel(model)
        cudnn.benchmark = True

    criterion = nn.MSELoss()

    if not os.path.exists(output_video_dir):
        os.mkdir(output_video_dir)
Exemplo n.º 2
0
    print('Total Epochs: {}'.format(NUM_EPOCHS))
    print('Learning Rate: {}'.format(LR))


if __name__ == '__main__':
    """
    Main function to carry out the training loop. 
    This function creates the generator and data loaders. Then, it trains the generator.
    """
    if PRINT_PARAMS:
        print_params()

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # generator
    model = FullNetwork(output_shape=(BATCH_SIZE, CHANNELS, FRAMES, HEIGHT,
                                      WIDTH))
    model = model.to(device)

    print(model)

    if device == 'cuda':
        net = torch.nn.DataParallel(model)
        cudnn.benchmark = True

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    # data
    trainset = NTUDataset(root_dir=data_root_dir,
                          data_file=train_splits,
                          resize_height=HEIGHT,
Exemplo n.º 3
0
def train_network(gpu_config):
    net = FullNetwork(input_shape=(config.img_height, config.img_width))

    with tf.Session(graph=net.graph, config=gpu_config) as sess:
        tf.global_variables_initializer().run()

        if config.use_resnet_weights:
            old_model_scope = ''
            mem_model_scope = 'mem_encoder/'
            curr_model_scope = 'curr_encoder/'

            mem_map = {
                variable.name[len(mem_model_scope):]: variable
                for variable in net.variables_to_restore
                if variable.name.startswith(mem_model_scope)
            }
            mem_map = {
                name.split(":")[0]: variable
                for name, variable in mem_map.items()
                if name.startswith(old_model_scope)
            }
            mem_saver = tf.train.Saver(mem_map)
            mem_saver.restore(sess, config.resnet_file_name)

            curr_map = {
                variable.name[len(curr_model_scope):]: variable
                for variable in net.variables_to_restore
                if variable.name.startswith(curr_model_scope)
            }
            curr_map = {
                name.split(":")[0]: variable
                for name, variable in curr_map.items()
                if name.startswith(old_model_scope)
            }
            curr_saver = tf.train.Saver(curr_map)
            curr_saver.restore(sess, config.resnet_file_name)

        writer = tf.summary.FileWriter(
            '{0}model_{1}'.format(config.tf_logs_dir, config.model_num),
            sess.graph)
        prev_batch_num = 0
        get_num_params()

        if config.use_trained_weights:
            net.load(
                sess, config.save_file_best_name %
                config.epoch_save)  # Uncomment to train from saved weights
            print('Loaded in old weights')
        # else:
        # config.clear_output()

        n_eps_after_acc, best_loss = -1, 1000000
        print('Training on %s' % config.data_dir)
        for ep in range(1, config.n_epochs + 1):
            print(20 * '*', 'epoch', ep, 20 * '*')
            sys.stdout.flush()

            # Trains network for 1 epoch
            data_gen = TrainDataGen(config.wait_for_data,
                                    crop_size=(config.img_height,
                                               config.img_width),
                                    n_frames=config.n_frames,
                                    rand_frame_skip=config.rand_frame_skip,
                                    use_all=config.use_all_frames)
            seg_loss, prev_batch_num = train_one_epoch(sess, net, data_gen,
                                                       writer, prev_batch_num)

            # config.write_output('Epoch%d: SL: %.4f.\n' % (ep, seg_loss))

            # saves every 10 epochs
            if ep % config.save_every_n_epochs == 0:
                try:
                    net.save(sess, config.save_file_name % 1)
                    # config.write_output('Saved Network\n')
                except:
                    print('Failed to save network!!!')
                    sys.stdout.flush()

            # saves when validation loss becomes smaller (after 50 epochs to save space)
            t_loss = seg_loss

            if t_loss < best_loss:
                best_loss = t_loss
                try:
                    net.save(sess, config.save_file_best_name % 0)
                    # config.write_output('Saved Network - Minimum val\n')
                except:
                    print('Failed to save network!!!')
                    sys.stdout.flush()

        writer.close()

    tf.reset_default_graph()