Пример #1
0
def main(args):
    test_path = get_dset_path(args.dataset_name, 'test')

    logger.info("Initializing test dataset")
    test_dset, test_loader = data_loader(args, test_path)

    net = LSTM_model(args)
    net = net.cuda()

    checkpoint_path = ".\model\lstm767.tar"
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['state_dict'])
    net.eval()

    batch_error = 0
    batch_fde = 0
    for idx, batch in enumerate(test_loader):

        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, non_linear_ped,
         loss_mask, seq_start_end) = batch
        num_ped = obs_traj.size(1)   # (8 n 2)
        pred_traj_gt = pred_traj_gt.cuda()
        pred_traj = net(obs_traj.cuda(), num_ped, pred_traj_gt)

        ade_1 = get_mean_error(pred_traj, pred_traj_gt)
        ade_2 = displacement_error(pred_traj, pred_traj_gt) / (pred_traj.size(1) * 12)
        fde = final_displacement_error(pred_traj, pred_traj_gt) / pred_traj.size(1)

        batch_error += ade_2
        batch_fde += fde
    ade = batch_error / (idx+1)
    fin_fde = batch_fde / (idx+1)
    logger.info("ade is {:.2f}".format(ade))
    logger.info("ade is {:.2f}".format(fin_fde))
Пример #2
0
def main(args):
    if os.path.isdir(args.model_path):
        filenames = os.listdir(args.model_path)
        filenames.sort()
        paths = [os.path.join(args.model_path, file_) for file_ in filenames]
    else:
        paths = [args.model_path]

    for path in paths:
        checkpoint = torch.load(path)
        generator = get_generator(checkpoint)
        _args = AttrDict(checkpoint['args'])
        path = get_dset_path(_args.dataset_name, args.dset_type)
        _, loader = data_loader(_args, path)
        ade, fde, trajs = evaluate(_args, loader, generator, args.num_samples)
        print('Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(
            _args.dataset_name, _args.pred_len, ade, fde))

        path = "trajs_dumped/" + "/".join(_args.dataset_name.split("/")[:-1])
        pathlib.Path(path).mkdir(parents=True, exist_ok=True)
        with open(
                "trajs_dumped/" +
                args.model_path.split("/")[-1].split(".")[0] + "_" +
                args.dset_type + "_trajs.pkl", 'wb+') as f:
            pickle.dump(trajs, f)
        print(
            "trajs dumped at ",
            args.model_path.split("/")[-1].split(".")[0] + "_" +
            args.dset_type + "_trajs.pkl")
Пример #3
0
def main(args):
    checkpoint = torch.load(args.resume)
    generator = get_generator(checkpoint)
    path = get_dset_path(args.dataset_name, args.dset_type)

    _, loader = data_loader(args, path)
    plot_trajectory(args, loader, generator)
Пример #4
0
def main(args):
    checkpoint = torch.load(args.resume)
    generator = get_generator(checkpoint)
    path = get_dset_path(args.dataset_name, args.dset_type)

    _, loader = data_loader(args, path)
    ade, fde = evaluate(args, loader, generator)
    print("Dataset: {}, Pred Len: {}, ADE: {:.12f}, FDE: {:.12f}".format(
        args.dataset_name, args.pred_len, ade, fde))
Пример #5
0
def main(args):
    checkpoint = torch.load(args.resume)
    generator = get_generator(checkpoint)
    path = get_dset_path(args.dataset_name, args.dset_type)

    _, loader = data_loader(args, path)
    prediction = evaluate(args, loader, generator)
    print(len(prediction))
    print(prediction[0])
Пример #6
0
def main(args):
    if os.path.isdir(args.model_path):
        filenames = os.listdir(args.model_path)
        filenames.sort()
        paths = [os.path.join(args.model_path, file_) for file_ in filenames]
    else:
        paths = [args.model_path]

    for path in paths:
        checkpoint = torch.load(path)
        generator = get_generator(checkpoint)
        _args = AttrDict(checkpoint['args'])
        path = get_dset_path(_args.dataset_name, args.dset_type)
        _, loader = data_loader(_args, path)
        ade, fde = evaluate(_args, loader, generator, args.num_samples)
        print('Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(
            _args.dataset_name, _args.pred_len, ade, fde))
Пример #7
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    device_ids = [0, 1]
    test_path = get_dset_path(args.dataset_name, 'test')

    logger.info("Initializing test dataset")
    test_dset, test_loader = data_loader(args, test_path)

    net = LSTM_model(args)
    #net = net.cuda(device_ids[1])

    checkpoint_path = "./model/lstm348.tar"
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['state_dict'])
    net.eval()

    count = 0
    total_ade = 0
    total_fde = 0
    for batch in test_loader:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, non_linear_ped,
         loss_mask, seq_start_end) = batch
        num_ped = obs_traj.size(1)   # (8 n 2)
        #pred_traj_gt = pred_traj_gt.cuda(device_ids[1])
        pred_traj = net(obs_traj, num_ped, pred_traj_gt, seq_start_end)
        ade = get_mean_error(pred_traj, pred_traj_gt)
        total_ade += ade
        fde = final_displacement_error(pred_traj[-1], pred_traj_gt[-1])
        total_fde += (fde / num_ped)
        #logger.info("ade is {:.2f}".format(ade))
        count += 1

    ade_fin = total_ade / count
    fde_fin = total_fde / count
    logger.info("ade is {:.2f}".format(ade_fin))
    logger.info("fde is {:.2f}".format(fde_fin))
Пример #8
0
def objective(trial):

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    train_path = get_dset_path(args.dataset_name, 'train')
    val_path = get_dset_path(args.dataset_name, 'val')

    long_dtype, float_dtype = get_dtypes(args)

    discriminator_wight = trial.suggest_categorical('discriminator_wight',
                                                    [0, 1])
    optim_name = trial.suggest_categorical('optim_name',
                                           ['Adam', 'Adamax', 'RMSprop'])

    # args.batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    args.dropout = trial.suggest_categorical('drop_out', [0, 0.2, 0.5])
    args.batch_norm = trial.suggest_categorical('batch_norm', [0, 1])

    N_TRAIN_EXAMPLES = args.batch_size * 30
    N_VALID_EXAMPLES = args.batch_size * 10

    logger.info("Initializing train dataset")
    train_dset, train_loader = data_loader(args, train_path)
    logger.info("Initializing val dataset")
    _, val_loader = data_loader(args, val_path)

    generator = TrajectoryGenerator(
        obs_len=args.obs_len,
        pred_len=args.pred_len,
        embedding_dim=args.embedding_dim,
        encoder_h_dim=args.encoder_h_dim_g,
        decoder_h_dim=args.decoder_h_dim_g,
        mlp_dim=args.mlp_dim,
        num_layers=args.num_layers,
        noise_dim=args.noise_dim,
        noise_type=args.noise_type,
        noise_mix_type=args.noise_mix_type,
        pooling_type=args.pooling_type,
        pool_every_timestep=args.pool_every_timestep,
        dropout=args.dropout,
        bottleneck_dim=args.bottleneck_dim,
        neighborhood_size=args.neighborhood_size,
        grid_size=args.grid_size,
        batch_norm=args.batch_norm,
        use_cuda=args.use_gpu)

    generator.apply(init_weights)
    generator.type(float_dtype).train()
    logger.info('Here is the generator:')
    logger.info(generator)

    discriminator = TrajectoryDiscriminator(obs_len=args.obs_len,
                                            pred_len=args.pred_len,
                                            embedding_dim=args.embedding_dim,
                                            h_dim=args.encoder_h_dim_d,
                                            mlp_dim=args.mlp_dim,
                                            num_layers=args.num_layers,
                                            dropout=args.dropout,
                                            batch_norm=args.batch_norm,
                                            d_type=args.d_type,
                                            use_cuda=args.use_gpu)

    discriminator.apply(init_weights)
    discriminator.type(float_dtype).train()
    logger.info('Here is the discriminator:')
    logger.info(discriminator)

    g_loss_fn = gan_g_loss
    d_loss_fn = gan_d_loss

    if optim_name == 'Adam':
        optimizer_g = optim.Adam([{
            'params': generator.parameters(),
            'initial_lr': args.g_learning_rate
        }],
                                 lr=args.g_learning_rate)
        optimizer_d = optim.Adam([{
            'params': discriminator.parameters(),
            'initial_lr': args.d_learning_rate
        }],
                                 lr=args.d_learning_rate)

    elif optim_name == 'Adamax':
        optimizer_g = optim.Adamax([{
            'params': generator.parameters(),
            'initial_lr': args.g_learning_rate
        }],
                                   lr=args.g_learning_rate)
        optimizer_d = optim.Adamax([{
            'params': discriminator.parameters(),
            'initial_lr': args.d_learning_rate
        }],
                                   lr=args.d_learning_rate)
    else:
        optimizer_g = optim.RMSprop([{
            'params': generator.parameters(),
            'initial_lr': args.g_learning_rate
        }],
                                    lr=args.g_learning_rate)
        optimizer_d = optim.RMSprop([{
            'params': discriminator.parameters(),
            'initial_lr': args.d_learning_rate
        }],
                                    lr=args.d_learning_rate)

    scheduler_g = optim.lr_scheduler.StepLR(optimizer_g,
                                            step_size=100,
                                            gamma=0.5,
                                            last_epoch=-1)
    scheduler_d = optim.lr_scheduler.StepLR(optimizer_d,
                                            step_size=100,
                                            gamma=0.5,
                                            last_epoch=-1)

    t, epoch = 0, 0

    while t < 50:
        gc.collect()
        d_steps_left = args.d_steps
        g_steps_left = args.g_steps

        for batch_idx, batch in enumerate(train_loader):

            # Limiting training utils for faster epochs.
            if batch_idx * args.batch_size >= N_TRAIN_EXAMPLES:
                break

            # Decide whether to use the batch for stepping on discriminator or
            # generator; an iteration consists of args.d_steps steps on the
            # discriminator followed by args.g_steps steps on the generator.
            if d_steps_left > 0:
                step_type = 'd'
                losses_d = discriminator_step(args, batch, generator,
                                              discriminator, d_loss_fn,
                                              optimizer_d)

                d_steps_left -= 1
            elif g_steps_left > 0:
                step_type = 'g'
                losses_g = generator_step(args, batch, generator,
                                          discriminator, g_loss_fn,
                                          optimizer_g, discriminator_wight)

                g_steps_left -= 1

            # Skip the rest if we are not at the end of an iteration
            if d_steps_left > 0 or g_steps_left > 0:
                continue

            t += 1
            d_steps_left = args.d_steps
            g_steps_left = args.g_steps
            if t >= args.num_iterations:
                break

        scheduler_g.step()
        scheduler_d.step()

        metrics_val = check_accuracy(args, val_loader, generator,
                                     discriminator, d_loss_fn,
                                     N_VALID_EXAMPLES)

        ade = metrics_val['ade']

        trial.report(ade, t)

    return ade
Пример #9
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    train_path = get_dset_path(args.dataset_name, 'train')
    val_path = get_dset_path(args.dataset_name, 'val')

    long_dtype, float_dtype = get_dtypes(args)

    logger.info("Initializing train dataset")
    train_dset, train_loader = data_loader(args, train_path)
    logger.info("Initializing val dataset")
    _, val_loader = data_loader(args, val_path)

    iterations_per_epoch = len(train_dset) / args.batch_size / args.d_steps
    if args.num_epochs:
        args.num_iterations = int(iterations_per_epoch * args.num_epochs)

    logger.info(
        'There are {} iterations per epoch'.format(iterations_per_epoch))

    generator = TrajectoryGenerator(
        obs_len=args.obs_len,
        pred_len=args.pred_len,
        embedding_dim=args.embedding_dim,
        encoder_h_dim=args.encoder_h_dim_g,
        decoder_h_dim=args.decoder_h_dim_g,
        mlp_dim=args.mlp_dim,
        num_layers=args.num_layers,
        noise_dim=args.noise_dim,
        noise_type=args.noise_type,
        noise_mix_type=args.noise_mix_type,
        pooling_type=args.pooling_type,
        pool_every_timestep=args.pool_every_timestep,
        dropout=args.dropout,
        bottleneck_dim=args.bottleneck_dim,
        neighborhood_size=args.neighborhood_size,
        grid_size=args.grid_size,
        batch_norm=args.batch_norm)

    generator.apply(init_weights)
    generator.type(float_dtype).train()
    logger.info('Here is the generator:')
    logger.info(generator)

    # discriminator = TrajectoryDiscriminator(
    #     obs_len=args.obs_len,
    #     pred_len=args.pred_len,
    #     embedding_dim=args.embedding_dim,
    #     h_dim=args.encoder_h_dim_d,
    #     mlp_dim=args.mlp_dim,
    #     num_layers=args.num_layers,
    #     dropout=args.dropout,
    #     batch_norm=args.batch_norm,
    #     d_type=args.d_type)

    # discriminator.apply(init_weights)
    # discriminator.type(float_dtype).train()
    logger.info('Here is the discriminator:')
    # logger.info(discriminator)

    g_loss_fn = gan_g_loss
    d_loss_fn = gan_d_loss

    optimizer_g = optim.Adam(generator.parameters(), lr=args.g_learning_rate)
    # optimizer_d = optim.Adam(
    # discriminator.parameters(), lr=args.d_learning_rate
    # )

    # Maybe restore from checkpoint
    restore_path = None
    if args.checkpoint_start_from is not None:
        restore_path = args.checkpoint_start_from
    elif args.restore_from_checkpoint == 1:
        restore_path = os.path.join(args.output_dir,
                                    '%s_with_model.pt' % args.checkpoint_name)

    if restore_path is not None and os.path.isfile(restore_path):
        logger.info('Restoring from checkpoint {}'.format(restore_path))
        checkpoint = torch.load(restore_path)
        generator.load_state_dict(checkpoint['g_state'])
        # discriminator.load_state_dict(checkpoint['d_state'])
        optimizer_g.load_state_dict(checkpoint['g_optim_state'])
        # optimizer_d.load_state_dict(checkpoint['d_optim_state'])
        t = checkpoint['counters']['t']
        epoch = checkpoint['counters']['epoch']
        checkpoint['restore_ts'].append(t)
    else:
        # Starting from scratch, so initialize checkpoint data structure
        t, epoch = 0, 0
        checkpoint = {
            'args': args.__dict__,
            'G_losses': defaultdict(list),
            'D_losses': defaultdict(list),
            'losses_ts': [],
            'metrics_val': defaultdict(list),
            'metrics_train': defaultdict(list),
            'sample_ts': [],
            'restore_ts': [],
            'norm_g': [],
            'norm_d': [],
            'counters': {
                't': None,
                'epoch': None,
            },
            'g_state': None,
            'g_optim_state': None,
            'd_state': None,
            'd_optim_state': None,
            'g_best_state': None,
            'd_best_state': None,
            'best_t': None,
            'g_best_nl_state': None,
            'd_best_state_nl': None,
            'best_t_nl': None,
        }
    t0 = None
    while t < args.num_iterations:
        gc.collect()
        d_steps_left = args.d_steps
        g_steps_left = args.g_steps
        epoch += 1
        logger.info('Starting epoch {}'.format(epoch))
        for batch in train_loader:
            # if args.timing == 1:
            #     torch.cuda.synchronize()
            #     t1 = time.time()

            # Decide whether to use the batch for stepping on discriminator or
            # generator; an iteration consists of args.d_steps steps on the
            # discriminator followed by args.g_steps steps on the generator.
            # if d_steps_left > 0:
            #     step_type = 'd'
            # losses_d = discriminator_step(args, batch, generator,
            #                               discriminator, d_loss_fn,
            #                               optimizer_d)
            # checkpoint['norm_d'].append(
            #     get_total_norm(discriminator.parameters()))
            # d_steps_left -= 1
            # elif g_steps_left > 0:
            step_type = 'g'
            losses_g = generator_step(args, batch, generator, optimizer_g)
            checkpoint['norm_g'].append(get_total_norm(generator.parameters()))
            g_steps_left -= 1

            # if args.timing == 1:
            #     torch.cuda.synchronize()
            #     t2 = time.time()
            #     logger.info('{} step took {}'.format(step_type, t2 - t1))

            # Skip the rest if we are not at the end of an iteration
            # if d_steps_left > 0 or g_steps_left > 0:
            #     continue

            # if args.timing == 1:
            #     if t0 is not None:
            #         logger.info('Interation {} took {}'.format(
            #             t - 1, time.time() - t0
            #         ))
            #     t0 = time.time()

            # Maybe save loss
            if t % args.print_every == 0:
                print(
                    "ARSAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
                )
                logger.info('t = {} / {}'.format(t + 1, args.num_iterations))
                # for k, v in sorted(losses_d.items()):
                #     logger.info('  [D] {}: {:.3f}'.format(k, v))
                #     checkpoint['D_losses'][k].append(v)
                for k, v in sorted(losses_g.items()):
                    print(k)
                    print(v)
                    logger.info('  [G] {}: {:.3f}'.format(k, v))
                    checkpoint['G_losses'][k].append(v)
                checkpoint['losses_ts'].append(t)

            # Maybe save a checkpoint
            # if t > 0 and t % args.checkpoint_every == 0:
            if t > 0:

                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint['sample_ts'].append(t)

                # Check stats on the validation set
                logger.info('Checking stats on val ...')
                metrics_val = check_accuracy(args, val_loader, generator)
                logger.info('Checking stats on train ...')
                metrics_train = check_accuracy(
                    args,
                    train_loader,
                    generator,
                    # d_loss_fn,
                    limit=True)

                for k, v in sorted(metrics_val.items()):
                    logger.info('  [val] {}: {:.3f}'.format(k, v))
                    checkpoint['metrics_val'][k].append(v)
                for k, v in sorted(metrics_train.items()):
                    logger.info('  [train] {}: {:.3f}'.format(k, v))
                    checkpoint['metrics_train'][k].append(v)

                min_ade = min(checkpoint['metrics_val']['ade'])
                min_ade_nl = min(checkpoint['metrics_val']['ade_nl'])

                if metrics_val['ade'] == min_ade:
                    logger.info('New low for avg_disp_error')
                    checkpoint['best_t'] = t
                    checkpoint['g_best_state'] = generator.state_dict()
                    # checkpoint['d_best_state'] = discriminator.state_dict()

                if metrics_val['ade_nl'] == min_ade_nl:
                    logger.info('New low for avg_disp_error_nl')
                    checkpoint['best_t_nl'] = t
                    checkpoint['g_best_nl_state'] = generator.state_dict()
                    # checkpoint['d_best_nl_state'] = discriminator.state_dict()

                # Save another checkpoint with model weights and
                # optimizer state
                checkpoint['g_state'] = generator.state_dict()
                checkpoint['g_optim_state'] = optimizer_g.state_dict()
                # checkpoint['d_state'] = discriminator.state_dict()
                # checkpoint['d_optim_state'] = optimizer_d.state_dict()
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_with_model.pt' % args.checkpoint_name)
                logger.info('Saving checkpoint to {}'.format(checkpoint_path))
                torch.save(checkpoint, checkpoint_path)
                logger.info('Done.')

                # Save a checkpoint with no model weights by making a shallow
                # copy of the checkpoint excluding some items
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_no_model.pt' % args.checkpoint_name)
                logger.info('Saving checkpoint to {}'.format(checkpoint_path))
                key_blacklist = [
                    'g_state', 'd_state', 'g_best_state', 'g_best_nl_state',
                    'g_optim_state', 'd_optim_state', 'd_best_state',
                    'd_best_nl_state'
                ]
                small_checkpoint = {}
                for k, v in checkpoint.items():
                    if k not in key_blacklist:
                        small_checkpoint[k] = v
                torch.save(small_checkpoint, checkpoint_path)
                logger.info('Done.')

            t += 1
            # d_steps_left = args.d_steps
            g_steps_left = args.g_steps
            if t >= args.num_iterations:
                break
Пример #10
0
def main(args):
    train_path = get_dset_path(args.dataset_name, 'train')
    val_path = get_dset_path(args.dataset_name, 'val')

    # 随机种子
    # torch.manual_seed(2)
    # np.random.seed(2)
    # if args.use_gpu:
    #     torch.cuda.manual_seed_all(2)

    logger.info("Initializing train dataset")
    train_dset, train_loader = data_loader(args, train_path)
    logger.info("Initializing val dataset")
    _, val_loader = data_loader(args, val_path)

    log_path = './log/'
    log_file_curve = open(os.path.join(log_path, 'log_loss.txt'), 'w+')
    log_file_curve_val = open(os.path.join(log_path, 'log_loss_val.txt'), 'w+')
    log_file_curve_val_ade = open(
        os.path.join(log_path, 'log_loss_val_ade.txt'), 'w+')

    net = LSTM_model(args)
    if args.use_gpu:
        net = net.cuda()

    optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

    #接着上次训练的地方继续训练
    # restore_path = '.\model\lstm294.tar'
    # logger.info('Restoring from checkpoint {}'.format(restore_path))
    # checkpoint = torch.load(restore_path)
    # net.load_state_dict(checkpoint['state_dict'])
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    #
    # for i_epoch in range(checkpoint['epoch']+1):
    #     if (i_epoch + 1) % 100 == 0:
    #         args.learning_rate *= 0.98

    epoch_loss_min = 160
    epoch_smallest = 0
    #for epoch in range(checkpoint['epoch']+1, args.num_epochs):
    for epoch in range(args.num_epochs):
        count = 0
        batch_loss = 0

        for batch in train_loader:
            # Zero out gradients
            net.zero_grad()
            optimizer.zero_grad()

            (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel,
             non_linear_ped, loss_mask, seq_start_end) = batch
            num_ped = obs_traj.size(1)
            pred_traj_gt = pred_traj_gt

            #model_teacher.py
            pred_traj = net(obs_traj, num_ped, pred_traj_gt, seq_start_end)
            loss = displacement_error(pred_traj, pred_traj_gt)
            #loss = get_mean_error(pred_traj, pred_traj_gt)

            # Compute gradients
            loss.backward()
            # Clip gradients
            torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip)
            # Update parameters
            optimizer.step()

            batch_loss += loss
            count += 1

            #print(loss / num_ped)
        if (epoch + 1) % 6 == 0:
            pass
            #scheduler.step()
        logger.info('epoch {} train loss is {}'.format(epoch,
                                                       batch_loss / count))
        log_file_curve.write(str(batch_loss.item() / count) + "\n")

        batch_loss = 0
        val_ade = 0
        total_ade = 0
        for idx, batch in enumerate(val_loader):
            (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel,
             non_linear_ped, loss_mask, seq_start_end) = batch
            num_ped = obs_traj.size(1)
            pred_traj_gt = pred_traj_gt

            # model_teacher.py
            pred_traj = net(obs_traj, num_ped, pred_traj_gt, seq_start_end)
            loss = displacement_error(pred_traj, pred_traj_gt)

            batch_loss += loss
            val_ade += loss / (num_ped * 12)
            total_ade += val_ade

            count += 1

        fin_ade = total_ade / (idx + 1)
        log_file_curve_val_ade.write(str(fin_ade.item()) + "\n")

        epoch_loss = batch_loss / count
        if epoch_loss_min > epoch_loss:
            epoch_loss_min = epoch_loss
            epoch_smallest = epoch

            logger.info('Saving model')
            torch.save(
                {
                    'epoch': epoch,
                    'state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, checkpoint_path(epoch))
        logger.info('epoch {} val loss is {}'.format(epoch, epoch_loss))
        log_file_curve_val.write(str(epoch_loss.item()) + "\n")
        logger.info('epoch {} is smallest loss is {}'.format(
            epoch_smallest, epoch_loss_min))
        logger.info('the smallest ade is {}'.format(total_ade / (idx + 1)))
        logger.info("-" * 50)
Пример #11
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    train_path = get_dset_path(args.dataset_name, "train")
    val_path = get_dset_path(args.dataset_name, "test")

    logging.info("Initializing train dataset")
    train_dset, train_loader = data_loader(args, train_path)
    logging.info("Initializing val dataset")
    _, val_loader = data_loader(args, val_path)

    writer = SummaryWriter()

    n_units = ([args.traj_lstm_hidden_size] +
               [int(x) for x in args.hidden_units.strip().split(",")] +
               [args.graph_lstm_hidden_size])
    n_heads = [int(x) for x in args.heads.strip().split(",")]

    model = TrajectoryGenerator(
        obs_len=args.obs_len,
        pred_len=args.pred_len,
        traj_lstm_input_size=args.traj_lstm_input_size,
        traj_lstm_hidden_size=args.traj_lstm_hidden_size,
        n_units=n_units,
        n_heads=n_heads,
        graph_network_out_dims=args.graph_network_out_dims,
        dropout=args.dropout,
        alpha=args.alpha,
        graph_lstm_hidden_size=args.graph_lstm_hidden_size,
        noise_dim=args.noise_dim,
        noise_type=args.noise_type,
    )
    model.cuda()
    optimizer = optim.Adam(
        [
            {
                "params": model.traj_lstm_model.parameters(),
                "lr": 1e-2
            },
            {
                "params": model.traj_hidden2pos.parameters()
            },
            {
                "params": model.gatencoder.parameters(),
                "lr": 3e-2
            },
            {
                "params": model.graph_lstm_model.parameters(),
                "lr": 1e-2
            },
            {
                "params": model.traj_gat_hidden2pos.parameters()
            },
            {
                "params": model.pred_lstm_model.parameters()
            },
            {
                "params": model.pred_hidden2pos.parameters()
            },
        ],
        lr=args.lr,
    )
    global best_ade
    if args.resume:
        if os.path.isfile(args.resume):
            logging.info("Restoring from checkpoint {}".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint["epoch"]
            model.load_state_dict(checkpoint["state_dict"])
            logging.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint["epoch"]))
        else:
            logging.info("=> no checkpoint found at '{}'".format(args.resume))

    training_step = 1
    for epoch in range(args.start_epoch, args.num_epochs + 1):
        if epoch < 150:
            training_step = 1
        elif epoch < 250:
            training_step = 2
        else:
            if epoch == 250:
                for param_group in optimizer.param_groups:
                    param_group["lr"] = 5e-3
            training_step = 3
        train(args, model, train_loader, optimizer, epoch, training_step,
              writer)
        if training_step == 3:
            ade = validate(args, model, val_loader, epoch, writer)
            is_best = ade < best_ade
            best_ade = min(ade, best_ade)

            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "state_dict": model.state_dict(),
                    "best_ade": best_ade,
                    "optimizer": optimizer.state_dict(),
                },
                is_best,
                f"./checkpoint/checkpoint{epoch}.pth.tar",
            )
    writer.close()
Пример #12
0
def main(args):

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    train_path = get_dset_path(args.dataset_name, 'train')
    val_path = get_dset_path(args.dataset_name, 'val')

    long_dtype, float_dtype = get_dtypes(args)

    logger.info("Initializing train dataset")
    train_dset, train_loader = data_loader(args, train_path)
    logger.info("Initializing val dataset")
    _, val_loader = data_loader(args, val_path)

    iterations_per_epoch = len(train_dset) / args.batch_size
    if args.num_epochs:
        args.num_iterations = int(iterations_per_epoch * args.num_epochs)

    logger.info(
        'There are {} iterations per epoch'.format(iterations_per_epoch))

    lstm = My_Net_V2(seq_len=args.pred_len,
                     mlp_dim=args.mlp_dim,
                     dropout=args.dropout,
                     use_cuda=args.use_gpu)

    lstm.apply(init_weights)
    lstm.type(float_dtype).train()
    logger.info('Here is the lstm:')
    logger.info(lstm)

    # optimizer = optim.Adam(lstm.parameters(), lr=args.g_learning_rate)
    optimizer = optim.RMSprop(lstm.parameters(), lr=args.g_learning_rate)

    # Maybe restore from checkpoint
    restore_path = None
    if args.checkpoint_start_from is not None:
        restore_path = args.checkpoint_start_from
    elif args.restore_from_checkpoint == 1:
        restore_path = os.path.join(args.output_dir,
                                    '%s_with_model.pt' % args.checkpoint_name)

    if restore_path is not None and os.path.isfile(restore_path):
        logger.info('Restoring from checkpoint {}'.format(restore_path))
        checkpoint = torch.load(restore_path)
        lstm.load_state_dict(checkpoint['state'])
        optimizer.load_state_dict(checkpoint['optim_state'])
        t = checkpoint['counters']['t']
        epoch = checkpoint['counters']['epoch']
        checkpoint['restore_ts'].append(t)
    else:
        # Starting from scratch, so initialize checkpoint dataset structure
        t, epoch = 0, 0
        checkpoint = {
            'args': args.__dict__,
            'losses': defaultdict(list),
            'losses_ts': [],
            'metrics_val': defaultdict(list),
            'metrics_train': defaultdict(list),
            'sample_ts': [],
            'restore_ts': [],
            'norm': [],
            'counters': {
                't': None,
                'epoch': None,
            },
            'state': None,
            'optim_state': None,
            'best_state': None,
            'best_t': None
        }
    t0 = None
    while t < args.num_iterations:
        gc.collect()
        epoch += 1
        logger.info('Starting epoch {}'.format(epoch))
        for batch in train_loader:
            if args.timing == 1:
                torch.cuda.synchronize()
                t1 = time.time()

            # Decide whether to use the batch for stepping on discriminator or
            # generator; an iteration consists of args.d_steps steps on the
            # discriminator followed by args.g_steps steps on the generator.
            losses = generator_step(args, batch, lstm, optimizer)
            # checkpoint['norm_g'].append(
            #     get_total_norm(lstm.parameters())
            # )

            if args.timing == 1:
                if t0 is not None:
                    logger.info('Interation {} took {}'.format(
                        t - 1,
                        time.time() - t0))
                t0 = time.time()

            # Maybe save loss
            if t % args.print_every == 0:
                logger.info('t = {} / {}'.format(t + 1, args.num_iterations))
            #     for k, v in sorted(losses.items()):
            #         logger.info('  [D] {}: {:.7f}'.format(k, v))
            #         checkpoint['losses'][k].append(v)
            #     checkpoint['losses_ts'].append(t)

            # Maybe save a checkpoint
            if t > 0 and t % args.checkpoint_every == 0:
                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint['sample_ts'].append(t)

                # Check stats on the validation set
                logger.info('Checking stats on val ...')
                metrics_val = check_accuracy(args,
                                             val_loader,
                                             lstm,
                                             is_train=False)
                logger.info('Checking stats on train ...')
                metrics_train = check_accuracy(args,
                                               train_loader,
                                               lstm,
                                               limit=True,
                                               is_train=True)

                for k, v in sorted(metrics_val.items()):
                    logger.info('  [val] {}: {:.7f}'.format(k, v))
                    checkpoint['metrics_val'][k].append(v)
                for k, v in sorted(metrics_train.items()):
                    logger.info('  [train] {}: {:.7f}'.format(k, v))
                    checkpoint['metrics_train'][k].append(v)

                min_ade = min(checkpoint['metrics_val']['ade'])

                if metrics_val['ade'] == min_ade:
                    logger.info('New low for avg_disp_error')
                    checkpoint['best_t'] = t
                    checkpoint['best_state'] = lstm.state_dict()

                # Save another checkpoint with model weights and
                # optimizer state
                checkpoint['state'] = lstm.state_dict()
                checkpoint['optim_state'] = optimizer.state_dict()

                checkpoint_path = os.path.join(
                    args.output_dir, '%s_with_model.pt' % args.checkpoint_name)
                logger.info('Saving checkpoint to {}'.format(checkpoint_path))
                torch.save(checkpoint, checkpoint_path)
                logger.info('Done.')

            t += 1
            if t >= args.num_iterations:
                break
Пример #13
0
def main():
    train_path = get_dset_path(DATASET_NAME, 'train')
    val_path = get_dset_path(DATASET_NAME, 'val')
    long_dtype, float_dtype = get_dtypes()

    print("Initializing train dataset")
    train_dset, train_loader = data_loader(train_path)
    print("Initializing val dataset")
    _, val_loader = data_loader(val_path)

    iterations_per_epoch = len(train_dset) / D_STEPS
    NUM_ITERATIONS = int(iterations_per_epoch * NUM_EPOCHS)
    print('There are {} iterations per epoch'.format(iterations_per_epoch))

    generator = TrajectoryGenerator()
    generator.apply(init_weights)
    generator.type(float_dtype).train()
    print('Here is the generator:')
    print(generator)

    discriminator = TrajectoryDiscriminator()
    discriminator.apply(init_weights)
    discriminator.type(float_dtype).train()
    print('Here is the discriminator:')
    print(discriminator)

    optimizer_g = optim.Adam(generator.parameters(), lr=G_LR)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=D_LR)

    t, epoch = 0, 0
    t0 = None
    min_ade = None
    while t < NUM_ITERATIONS:
        gc.collect()
        d_steps_left = D_STEPS
        g_steps_left = G_STEPS
        epoch += 1
        print('Starting epoch {}'.format(epoch))
        for batch in train_loader:

            if d_steps_left > 0:
                losses_d = discriminator_step(batch, generator, discriminator,
                                              gan_d_loss, optimizer_d)
                d_steps_left -= 1
            elif g_steps_left > 0:
                losses_g = generator_step(batch, generator, discriminator,
                                          gan_g_loss, optimizer_g)
                g_steps_left -= 1

            if d_steps_left > 0 or g_steps_left > 0:
                continue

            if t % PRINT_EVERY == 0:
                print('t = {} / {}'.format(t + 1, NUM_ITERATIONS))
                for k, v in sorted(losses_d.items()):
                    print('  [D] {}: {:.3f}'.format(k, v))
                for k, v in sorted(losses_g.items()):
                    print('  [G] {}: {:.3f}'.format(k, v))

                print('Checking stats on val ...')
                metrics_val = check_accuracy(val_loader, generator,
                                             discriminator, gan_d_loss)

                print('Checking stats on train ...')
                metrics_train = check_accuracy(train_loader,
                                               generator,
                                               discriminator,
                                               gan_d_loss,
                                               limit=True)

                for k, v in sorted(metrics_val.items()):
                    print('  [val] {}: {:.3f}'.format(k, v))
                for k, v in sorted(metrics_train.items()):
                    print('  [train] {}: {:.3f}'.format(k, v))

                if min_ade is None or metrics_val['ade'] < min_ade:
                    min_ade = metrics_val['ade']
                    checkpoint = {
                        't': t,
                        'g': generator.state_dict(),
                        'd': discriminator.state_dict(),
                        'g_optim': optimizer_g.state_dict(),
                        'd_optim': optimizer_d.state_dict()
                    }
                    print("Saving checkpoint to model.pt")
                    torch.save(checkpoint, "model.pt")
                    print("Done.")

            t += 1
            d_steps_left = D_STEPS
            g_steps_left = G_STEPS
            if t >= NUM_ITERATIONS:
                break
Пример #14
0
def load_and_evaluate(generator, version):
    print("Initializing {} dataset".format(version))
    path = get_dset_path(DATASET_NAME, version)
    _, loader = data_loader(path)
    ade, fde = evaluate(loader, generator)
    print('{} Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(version, DATASET_NAME, PRED_LEN, ade, fde))