Пример #1
0
def flickr_objective_50(param):
    from model import KBPRModel
    from utils import early_stop, flickr, preprocess
    import gc, theano.misc.pkl_utils
    from hyperopt import STATUS_OK
    import cStringIO
    n_items, n_users, train_dict, valid_dict, test_dict, exclude_dict = \
        preprocess(flickr(), portion=[8, 1, 1, 0])
    flickr_50 = KBPRModel(50, n_users, n_items,
                          per_user_sample=50,
                          learning_rate=0.1,
                          variance_mu=1,
                          update_mu=True,
                          lambda_variance=1,
                          use_warp=True, **param)
    best_metric, best_model = early_stop(flickr_50, train_dict,
                                         lambda m: -m.recall(valid_dict, train_dict, n_users=3000)[0],
                                         n_epochs=10000,
                                         patience=500, validation_frequency=100)
    output = cStringIO.StringIO()
    theano.misc.pkl_utils.dump(best_model, output)
    del flickr_50
    gc.collect()

    return {"loss": best_metric, "attachments": {"model": output.getvalue()}, "status": STATUS_OK}
Пример #2
0
def main():
    args = parse_args()
    train_dataset, test_dataset = dataset.get_dataset(args.path,
                                                      args.use_augmentation,
                                                      args.use_fivecrop)
    train_loader = DataLoader(train_dataset,
                              args.batch,
                              True,
                              num_workers=args.worker,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             args.batch,
                             False,
                             num_workers=args.worker,
                             pin_memory=True)
    if args.cuda:
        torch.cuda.set_device(0)
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    if args.model == 'ResNet18':
        mymodel = model.ResNet18(args.frozen_layers).to(device)
    elif args.model == 'ResNet34':
        mymodel = model.ResNet34(args.frozen_layers).to(device)
    elif args.model == 'ResNet50':
        mymodel = model.ResNet50(args.frozen_layers).to(device)
    elif args.model == 'DenseNet':
        mymodel = model.DenseNet().to(device)
    else:
        pass
    op = optim.Adam(mymodel.parameters(), lr=args.lr)
    train_losses, test_mF1s, test_precisions, test_recalls = [], [], [], []
    early = args.early
    for i in range(args.epoch):
        train_loss = train.train(mymodel, op, train_loader, i, device,
                                 args.log, utils.pos_weight)
        mF1, recall, presicion = test.test(mymodel, test_loader, device,
                                           args.use_fivecrop)
        train_losses.append(train_loss)
        test_mF1s.append(mF1)
        test_precisions.append(presicion)
        test_recalls.append(recall)
        early = utils.early_stop(test_mF1s, early)
        if early <= 0:
            break
    utils.save_log(mymodel, train_losses, test_mF1s, test_precisions,
                   test_recalls)
Пример #3
0
        else:
            metrics_te = defaultdict(float)
        metrics_tr = {'loss': loss}
        metrics_all = (metrics, metrics_te, metrics_tr)

        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        save_everything(args, metrics_hist_all, model, model_dir, None,
                        args.criterion, test_only)

        sys.stdout.flush()

        if test_only:
            break

        if args.criterion in metrics_hist.keys():
            if early_stop(metrics_hist, args.criterion, args.patience):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping..." %
                      (args.criterion, args.patience))
                test_only = True
                args.test_model = '%s/model_best_%s.pth' % (model_dir,
                                                            args.criterion)
                model = pick_model(args, dicts)
Пример #4
0
    def train_f(self, x, y, validation_split=0.2, batch_size=0.2, epochs=1, verbose=1):
        n_train = int(len(y) * (1 - validation_split))
        xe_train = x[:n_train]
        yf_train = y[:n_train]

        xe_val = x[n_train:]
        yf_val = y[n_train:]

        n_batches = int(n_train / batch_size)
        if n_train % batch_size != 0:
            n_batches += 1

        history = {
            'loss': [],
            'mae': [],
            'rmse': [],
            'val_loss': [],
            'val_mae': [],
            'val_rmse': []
        }

        for e in range(epochs):
            start_epoch_time = time.time()
            loss = 0.0
            mae = 0.0
            for b in range(n_batches):
                xe = xe_train[b * batch_size: (b + 1) * batch_size]
                yf = yf_train[b * batch_size: (b + 1) * batch_size]

                input_feed = {
                    self._xe: xe,
                    self._yf: yf
                }
                output_feed = [self._loss_f, self._mae_f, self._train_op_f]
                l, m, _ = self._sess.run(output_feed, input_feed)

                loss += l
                mae += m
            loss /= n_batches
            mae /= n_batches
            history['loss'].append(loss)
            history['mae'].append(mae)

            val_loss, val_mae = self._sess.run([self._loss_f, self._mae_f],
                                              {
                                                  self._xe: xe_val,
                                                  self._yf: yf_val
                                              })
            history['val_loss'].append(val_loss)
            history['val_mae'].append(val_mae)
            epoch_time = time.time() - start_epoch_time
            if verbose > 0:
                print(
                    "Epoch {}/{}: time={:.2f}s, loss={:.5f}, mae={:.5f}, val_loss={:.5f}, val_mae={:.5f}".format(
                        e + 1, epochs, epoch_time,
                        loss, mae, val_loss, val_mae))
            if utils.early_stop(history['val_loss'], e, patience=self._patience):
                print('Early stop at epoch', (e + 1))
                break
            if np.isnan(loss):
                break
        return history
Пример #5
0
def main():
    #  set env
    setproctitle.setproctitle(f"train{CONFIG['name']}")
    os.environ["CUDA_VISIBLE_DEVICES"] = CONFIG['gpu_id']
    device = torch.device('cuda')

    #  fix seed
    seed = 123
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

    #  load data
    bundle_train_data, bundle_test_data, item_data, assist_data = \
            dataset.get_dataset(CONFIG['path'], CONFIG['dataset_name'], task=CONFIG['task'])

    train_loader = DataLoader(bundle_train_data,
                              2048,
                              True,
                              num_workers=8,
                              pin_memory=True)
    test_loader = DataLoader(bundle_test_data,
                             4096,
                             False,
                             num_workers=16,
                             pin_memory=True)

    #  pretrain
    if 'pretrain' in CONFIG:
        pretrain = torch.load(CONFIG['pretrain'], map_location='cpu')
        print('load pretrain')

    #  graph
    ub_graph = bundle_train_data.ground_truth_u_b
    ui_graph = item_data.ground_truth_u_i
    bi_graph = assist_data.ground_truth_b_i

    #  metric
    metrics = [
        Recall(20),
        NDCG(20),
        Recall(40),
        NDCG(40),
        Recall(80),
        NDCG(80)
    ]
    TARGET = 'Recall@10'

    #  loss
    loss_func = loss.BPRLoss('mean')

    #  log
    log = logger.Logger(os.path.join(CONFIG['log'], CONFIG['dataset_name'],
                                     f"{CONFIG['model']}_{CONFIG['task']}",
                                     TAG),
                        'best',
                        checkpoint_target=TARGET)

    theta = 0.6

    time_path = time.strftime("%y%m%d-%H%M%S", time.localtime(time.time()))

    for lr, decay, message_dropout, node_dropout \
            in product(CONFIG['lrs'], CONFIG['decays'], CONFIG['message_dropouts'], CONFIG['node_dropouts']):
        # vis = VisShow('localhost', 16666,
        #               f'{CONFIG['dataset_name']}-{MODELTYPE.__name__}-{decay}-{lr}-{theta}-3layer')

        visual_path = os.path.join(
            CONFIG['visual'], CONFIG['dataset_name'],
            f"{CONFIG['model']}_{CONFIG['task']}",
            f"{time_path}@{CONFIG['note']}",
            f"lr{lr}_decay{decay}_medr{message_dropout}_nodr{node_dropout}")

        # model
        if CONFIG['model'] == 'BGCN':
            model_type = '1model_3feature'
            graph = [ub_graph, ui_graph, bi_graph]
            info = BGCN_Info(64, decay, message_dropout, node_dropout, 1)
            model = BGCN(info, assist_data, graph, device,
                         pretrain=None).to(device)

        assert model.__class__.__name__ == CONFIG['model']

        # op
        op = optim.Adam(model.parameters(), lr=lr)
        # env
        env = {
            'lr': lr,
            'op': str(op).split(' ')[0],  # Adam
            'dataset': CONFIG['dataset_name'],
            'model': CONFIG['model'],
            'sample': CONFIG['sample'],
        }
        #  print(info)

        #  continue train
        if CONFIG['sample'] == 'hard' and 'conti_train' in CONFIG:
            model.load_state_dict(torch.load(CONFIG['conti_train']))
            print('load model and continue to train')

        retry = CONFIG['retry']  # =1
        while retry >= 0:
            # log
            log.update_modelinfo(info, env, metrics)
            try:
                # train & test
                early = CONFIG['early']
                train_writer = SummaryWriter(log_dir=visual_path,
                                             comment='train')
                test_writer = SummaryWriter(log_dir=visual_path,
                                            comment='test')
                for epoch in range(CONFIG['epochs']):
                    # train
                    trainloss = train(model, epoch + 1, train_loader, op,
                                      device, CONFIG, loss_func)
                    train_writer.add_scalars('loss/single',
                                             {"loss": trainloss}, epoch)
                    # vis.update('train loss', [epoch], [trainloss])

                    # test
                    if epoch % CONFIG['test_interval'] == 0:
                        output_metrics = test(model, epoch + 1, test_loader,
                                              device, CONFIG, metrics)

                        for metric in output_metrics:
                            test_writer.add_scalars(
                                'metric/all',
                                {metric.get_title(): metric.metric}, epoch)
                            if metric == output_metrics[0]:
                                test_writer.add_scalars(
                                    'metric/single',
                                    {metric.get_title(): metric.metric}, epoch)

                        # log
                        log.update_log(metrics, model)
                        #  # show(log.metrics_log)

                        # check overfitting
                        if epoch > 10:
                            if check_overfitting(log.metrics_log,
                                                 TARGET,
                                                 1,
                                                 show=False):
                                break
                        # early stop
                        early = early_stop(log.metrics_log[TARGET],
                                           early,
                                           threshold=0)
                        if early <= 0:
                            break
                train_writer.close()
                test_writer.close()

                log.close_log(TARGET)
                retry = -1
            except RuntimeError:
                retry -= 1
    log.close()