Exemplo n.º 1
0
def main():
    args = get_args()
    config = process_config(args)
    print(config)

    if config.get('seed') is not None:
        random.seed(config.seed)
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(config.seed)

    dataset = get_dataset(config.dataset_name).shuffle()
    run_model(Net, dataset, config=config)
Exemplo n.º 2
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(get_datetime_str())
    create_dir(output_dir)  # Create directory to save log files and outputs
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level='INFO')
    _logger = logging.getLogger(__name__)
    _logger.info("Finished setting up the logger.")

    # Save configs
    save_yaml_config(vars(args), path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Load dataset
    dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree,
                               args.noise_type, args.B_scale, args.seed)
    _logger.info("Finished loading the dataset.")

    # Load B_init for initialization
    if args.init:
        if args.init_path is None:
            args.init_path = get_init_path('output/')

        B_init = np.load('{}'.format(args.init_path))
        _logger.info("Finished loading B_init from {}.".format(args.init_path))
    else:
        B_init = None

    # GOLEM
    B_est = golem(dataset.X, args.lambda_1, args.lambda_2,
                  args.equal_variances, args.num_iter, args.learning_rate,
                  args.seed, args.checkpoint_iter, output_dir, B_init)
    _logger.info("Finished training the model.")

    # Post-process estimated solution
    B_processed = postprocess(B_est, args.graph_thres)
    _logger.info("Finished post-processing the estimated graph.")

    # Checkpoint
    checkpoint_after_training(output_dir, dataset.X, dataset.B, B_init, B_est,
                              B_processed, _logger.info)
Exemplo n.º 3
0
def main():
    args = get_args()
    config = process_config(args.config)

    # logging to the file and stdout
    logger = get_logger(config.log_dir, config.experiment)

    # fix random seed to reproduce results
    random.seed(config.random_seed)
    logger.info('Random seed: {:d}'.format(config.random_seed))

    model = Deeplab(config, logger)

    # Get train dataloader
    target_loader = get_target_train_dataloader(config.datasets.target)

    # Get validation dataloader
    val_loader = get_target_val_dataloader(config.datasets.target)

    if config.mode == 'train':
        model.train(target_loader, val_loader)
Exemplo n.º 4
0
def step_one_test():
    args = get_args()

    print(args)
    one_quest_test_list = read_data_json("./data/test.json")
    print(one_quest_test_list)
    #Checkpoint.CHECKPOINT_DIR_NAME = "0120_0030"
    Checkpoint.CHECKPOINT_DIR_NAME = args.checkpoint_dir_name
    checkpoint_path = os.path.join("./experiment",
                                   Checkpoint.CHECKPOINT_DIR_NAME, "best")
    print('-----', args.checkpoint_dir_name, checkpoint_path)
    checkpoint = Checkpoint.load(checkpoint_path)
    seq2seq = checkpoint.model
    if args.cuda_use:
        seq2seq = seq2seq.cuda()
    data_loader = DataLoader(args)
    seq2seq.eval()
    evaluator = Evaluator(vocab_dict=data_loader.vocab_dict,
                          vocab_list=data_loader.vocab_list,
                          decode_classes_dict=data_loader.decode_classes_dict,
                          decode_classes_list=data_loader.decode_classes_list,
                          loss=NLLLoss(),
                          cuda_use=args.cuda_use)
    name = args.run_flag
    test_temp_acc, test_ans_acc = evaluator.evaluate(
        model=seq2seq,
        data_loader=data_loader,
        data_list=one_quest_test_list,
        template_flag=False,  # todo: 这里改成了false
        batch_size=64,
        evaluate_type=0,
        use_rule=False,
        mode=args.mode,
        post_flag=args.post_flag,
        name_save=name)
    print(test_temp_acc, test_ans_acc)
Exemplo n.º 5
0
def main():
    # capture the config path from the run arguments
    # then process the json configuration file

    try:
        args = get_args()
        config = process_config(args)
        print(config)

    except Exception as e:
        print("missing or invalid arguments %s" % e)
        exit(0)

    torch.manual_seed(100)
    np.random.seed(100)
    # torch.backends.cudnn.deterministic = True  # can impact performance
    # torch.backends.cudnn.benchmark = False  # can impact performance

    print("lr = {0}".format(config.hyperparams.learning_rate))
    print("decay = {0}".format(config.hyperparams.decay_rate))
    if config.target_param is not False:  # (0 == False) while (0 is not False)
        print("target parameter: {0}".format(config.target_param))
    print(config.architecture)
    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])
    doc_utils.doc_used_config(config)

    data = DataGenerator(config)
    # create an instance of the model you want
    model_wrapper = ModelWrapper(config, data)
    # create trainer and pass all the previous components to it
    trainer = Trainer(model_wrapper, data, config)
    # here you train your model
    trainer.train()
    # test model, restore best model
    test_dists, test_loss = trainer.test(load_best_model=True)
Exemplo n.º 6
0
            "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (dev_cost, speed, acc, p, r, f))
        if current_score > best_dev:
            print("Exceed previous best f score:", best_dev)
            if not os.path.exists(args.param_stored_directory +
                                  args.dataset_name + "_param"):
                os.makedirs(args.param_stored_directory + args.dataset_name +
                            "_param")
            model_name = "{}epoch_{}_f1_{}.model".format(
                args.param_stored_directory + args.dataset_name + "_param/",
                idx, current_score)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        gc.collect()


if __name__ == '__main__':
    args, unparsed = get_args()
    for arg in vars(args):
        print(arg, ":", getattr(args, arg))
    # os.environ["CUDA_VISIBLE_DEVICES"] = str(args.visible_gpu)
    seed = args.random_seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = False  # True
    data = data_initialization(args)
    model = BLSTM_GAT_CRF(data, args)
    train(data, model, args)
Exemplo n.º 7
0
    return retrievers, rankers, reformulators, formatters


def set_width(max_width):
    st.markdown(
        f"""
    <style>
        .reportview-container .main .block-container{{
            max-width: {max_width}px;
            padding-top: {5}rem;
            padding-right: {5}rem;
            padding-left: {5}rem;
            padding-bottom: {5}rem;
        }}
        .reportview-container .main {{
            color: BLACK;
            background-color: WHITE;
        }}
    </style>
    """,
        unsafe_allow_html=True,
    )


if __name__ == '__main__':
    args = get_args()
    retriever_models_dict, ranker_models_dict, reformulator_dict, formatter_dict = load_models_on_start(
        args)
    main(args, retriever_models_dict, ranker_models_dict, reformulator_dict,
         formatter_dict)
Exemplo n.º 8
0
def main():
    # 获取配置文件路径
    # 运行:python main.py -c configs/ed_config.json         #for softmax
    #       python main.py -c configs/ed_siamese_config.json #for siamese

    #   Or: python main.py -c configs/who_config.json  #for WHO
    # 可视化: tensorboard --logdir=experiments/Compare/logs
    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        exit(0)

    create_dirs([])
    No, max_score = -1, 0
    save_tag = config.exp_name
    # 重复10次实验
    for i in range(config.repeat):
        # (1)载入数据
        print('Create the data generator.')
        train_dataset = MRIData(config, train=True)
        test_dataset = MRIData(config, train=False)

        # printData(test_dataset, type='normal')
        '''
        data_num
        (5L, 5L, 32L, 32L) 0.0
        (5L, 5L, 32L, 32L) 2.0
                ...
        (5L, 5L, 32L, 32L) 1.0
        (5L, 5L, 32L, 32L) 3.0
                ...
        '''
        if config.exp_name == 'siamese' or config.exp_name == 'compare':
            # Set up data loaders
            # Returns pairs of images and target same/different
            # if config.isSelect and False:
            # from data_loader.datasets import BalancedBatchSampler
            # train_batch_dataset = BalancedBatchSampler(train_dataset, n_classes=config.classes, n_samples=16)
            # test_batch_dataset = BalancedBatchSampler(test_dataset, n_classes=config.classes, n_samples=16)
            # else:
            train_dataset = SiameseMRI(train_dataset)
            test_dataset = SiameseMRI(test_dataset)

            # printData(test_dataset, type=config.exp_name, only_shape=True)
            '''
            data_num
            (5L, 5L, 32L, 32L) (5L, 5L, 32L, 32L) 1 [2.0, 2.0]
            (5L, 5L, 32L, 32L) (5L, 5L, 32L, 32L) 1 [0.0, 0.0]
                              ...
            (5L, 5L, 32L, 32L) (5L, 5L, 32L, 32L) 0 [3.0, 0.0]
            (5L, 5L, 32L, 32L) (5L, 5L, 32L, 32L) 0 [1.0, 0.0]
                              ...
            '''
        elif config.exp_name == 'triplet':
            # Set up data loaders
            # Returns triplets of images
            train_dataset = TripletMRI(train_dataset)
            test_dataset = TripletMRI(test_dataset)

        # 批数据
        kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
        # if config.exp_name == 'siamese' and config.isSelect and False:
        # train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_dataset, **kwargs)
        # test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_dataset, **kwargs)
        # else:
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            **kwargs)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=config.batch_size,
                                                  shuffle=False,
                                                  **kwargs)

        # printData(test_loader, type=config.exp_name, only_shape = False)
        '''
        for siamese:
        data_num/batch_size
        (16L, 5L, 5L, 32L, 32L) (16L, 5L, 5L, 32L, 32L) (16L,) [(16L,), (16L,)]
                                ...
        (8L, 5L, 5L, 32L, 32L) (8L, 5L, 5L, 32L, 32L) (16L,) [(8L,), (8L,)]
        '''

        # (2)构建模型
        # # Set up the network and training parameters
        embedding_net = EmbeddingNet3D(len(config.Fusion),
                                       config.embedding_size)
        if config.exp_name == 'softmax':
            model = ClassificationNet(embedding_net, n_classes=config.classes)
            model.apply(weights_init)
            print(model)

            if cuda:
                model.cuda()
            loss_fn = torch.nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=1e-2)
            scheduler = lr_scheduler.StepLR(optimizer,
                                            8,
                                            gamma=0.1,
                                            last_epoch=-1)
            fit(train_loader, test_loader, model, loss_fn, optimizer, scheduler, config.num_epochs,\
                cuda, log_interval = config.log_interval, metrics=[AccuracyMetric()])
        elif config.exp_name == 'siamese':
            # if config.isSelect and False:
            # model = embedding_net
            # print(model)
            # if cuda:
            # model.cuda()
            # from models.losses import OnlineContrastiveLoss
            # # Strategies for selecting pairs within a minibatch
            # from utils.selector import AllPositivePairSelector, HardNegativePairSelector
            # loss_fn = OnlineContrastiveLoss(margin, HardNegativePairSelector())
            # else:
            model = SiameseNet(embedding_net)
            model.apply(weights_init)
            print(model)
            if cuda:
                model.cuda()
            loss_fn = ContrastiveLoss(config.margin)

            optimizer = optim.Adam(model.parameters(), lr=1e-2)
            scheduler = lr_scheduler.StepLR(optimizer,
                                            8,
                                            gamma=0.1,
                                            last_epoch=-1)
            fit(train_loader, test_loader, model, loss_fn, optimizer, scheduler, config.num_epochs,\
                cuda, log_interval = config.log_interval, obj_label=True)

        elif config.exp_name == 'triplet':
            model = TripletNet(embedding_net)
            model.apply(weights_init)
            print(model)
            if cuda:
                model.cuda()
            loss_fn = TripletLoss(config.margin)

            optimizer = optim.Adam(model.parameters(), lr=1e-2)
            scheduler = lr_scheduler.StepLR(optimizer,
                                            8,
                                            gamma=0.1,
                                            last_epoch=-1)
            fit(train_loader, test_loader, model, loss_fn, optimizer, scheduler, config.num_epochs,\
                cuda, log_interval = config.log_interval, obj_label=True)

        elif config.exp_name == 'compare':
            model = CompareNet(embedding_net, 2 * config.embedding_size)
            model.apply(weights_init)
            print(model)
            if cuda:
                model.cuda()
            if cuda:
                model.cuda()
            loss_fn = torch.nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=1e-2)
            scheduler = lr_scheduler.StepLR(optimizer,
                                            8,
                                            gamma=0.1,
                                            last_epoch=-1)
            fit(train_loader, test_loader, model, loss_fn, optimizer, scheduler, config.num_epochs,\
                cuda, log_interval = config.log_interval, metrics=[AccuracyMetric()], obj_label=True)

        # 读取未扩容未采样的数据
        # Set up data loaders
        kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
        config.isSample = False  #不再采样
        config.isAug = False  #不扩容
        train_dataset = MRIData(config, train=True)
        test_dataset = MRIData(config, train=False)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=1000,
                                                   shuffle=False,
                                                   **kwargs)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=1000,
                                                  shuffle=False,
                                                  **kwargs)

        # 特征可视化
        train_embeddings_cl, train_labels_cl = extract_embeddings(
            train_loader, model)
        plot_embeddings(train_embeddings_cl,
                        train_labels_cl,
                        save_tag='train',
                        n_classes=config.classes)
        val_embeddings_cl, val_labels_cl = extract_embeddings(
            test_loader, model)
        plot_embeddings(val_embeddings_cl,
                        val_labels_cl,
                        save_tag='test',
                        n_classes=config.classes)

        # 保存整个模型
        torch.save(model, 'models/model.pkl')

        # (3)评估siamese模型
        model = torch.load('models/model.pkl')  #加载模型
        if config.exp_name == 'compare':
            siamese_eval.score(config, train_loader, test_loader, model, cuda)
        else:
            siamese_eval.eval(config, train_loader, test_loader, model, cuda)
Exemplo n.º 9
0
def main():
    args = get_args()
    config = process_config(args)
    print(config)

    if config.get('seed') is not None:
        random.seed(config.seed)
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(config.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ### automatic dataloading and splitting

    sys.stdin = In()

    dataset = PygGraphPropPredDataset(name=config.dataset_name)

    if config.feature == 'full':
        pass
    elif config.feature == 'simple':
        print('using simple feature')
        # only retain the top two node/edge features
        dataset.data.x = dataset.data.x[:, :2]
        dataset.data.edge_attr = dataset.data.edge_attr[:, :2]

    split_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = Evaluator(config.dataset_name)

    train_loader = DataLoader(dataset[split_idx["train"]],
                              batch_size=config.hyperparams.batch_size,
                              shuffle=True,
                              num_workers=config.num_workers)
    valid_loader = DataLoader(dataset[split_idx["valid"]],
                              batch_size=config.hyperparams.batch_size,
                              shuffle=False,
                              num_workers=config.num_workers)
    test_loader = DataLoader(dataset[split_idx["test"]],
                             batch_size=config.hyperparams.batch_size,
                             shuffle=False,
                             num_workers=config.num_workers)

    model = Net(config.architecture, num_tasks=dataset.num_tasks).to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=config.hyperparams.learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config.hyperparams.step_size,
        gamma=config.hyperparams.decay_rate)

    valid_curve = []
    test_curve = []
    train_curve = []
    trainL_curve = []

    writer = SummaryWriter(config.directory)

    ts_fk_algo_hp = str(config.time_stamp) + '_' \
                    + str(config.commit_id[0:7]) + '_' \
                    + str(config.architecture.methods) + '_' \
                    + str(config.architecture.pooling) + '_' \
                    + str(config.architecture.JK) + '_' \
                    + str(config.architecture.layers) + '_' \
                    + str(config.architecture.hidden) + '_' \
                    + str(config.architecture.variants.BN) + '_' \
                    + str(config.architecture.dropout) + '_' \
                    + str(config.hyperparams.learning_rate) + '_' \
                    + str(config.hyperparams.step_size) + '_' \
                    + str(config.hyperparams.decay_rate) + '_' \
                    + 'B' + str(config.hyperparams.batch_size) + '_' \
                    + 'S' + str(config.seed if config.get('seed') is not None else "na") + '_' \
                    + 'W' + str(config.num_workers if config.get('num_workers') is not None else "na")

    for epoch in range(1, config.hyperparams.epochs + 1):
        print("Epoch {} training...".format(epoch))
        train_loss = train(model, device, train_loader, optimizer,
                           dataset.task_type)

        scheduler.step()

        print('Evaluating...')
        train_perf = eval(model, device, train_loader, evaluator)
        valid_perf = eval(model, device, valid_loader, evaluator)
        test_perf = eval(model, device, test_loader, evaluator)

        print('Train:', train_perf[dataset.eval_metric], 'Validation:',
              valid_perf[dataset.eval_metric], 'Test:',
              test_perf[dataset.eval_metric], 'Train loss:', train_loss)

        train_curve.append(train_perf[dataset.eval_metric])
        valid_curve.append(valid_perf[dataset.eval_metric])
        test_curve.append(test_perf[dataset.eval_metric])
        trainL_curve.append(train_loss)

        writer.add_scalars(
            config.dataset_name,
            {ts_fk_algo_hp + '/traP': train_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(
            config.dataset_name,
            {ts_fk_algo_hp + '/valP': valid_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(
            config.dataset_name,
            {ts_fk_algo_hp + '/tstP': test_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(config.dataset_name,
                           {ts_fk_algo_hp + '/traL': train_loss}, epoch)

    writer.close()

    if 'classification' in dataset.task_type:
        best_val_epoch = np.argmax(np.array(valid_curve))
        best_train = max(train_curve)
    else:
        best_val_epoch = np.argmin(np.array(valid_curve))
        best_train = min(train_curve)

    print(
        'Finished test: {}, Validation: {}, epoch: {}, best train: {}, best loss: {}'
        .format(test_curve[best_val_epoch], valid_curve[best_val_epoch],
                best_val_epoch, best_train, min(trainL_curve)))
Exemplo n.º 10
0
def main():
    args = get_args()
    config = process_config(args)
    print(config)

    if config.get('seed') is not None:
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(config.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ### automatic dataloading and splitting
    dataset = PygGraphPropPredDataset(name=config.dataset_name)

    seq_len_list = np.array([len(seq) for seq in dataset.data.y])
    print('Target seqence less or equal to {} is {}%.'.format(config.max_seq_len, np.sum(seq_len_list <= config.max_seq_len) / len(seq_len_list)))

    split_idx = dataset.get_idx_split()

    # print(split_idx['train'])
    # print(split_idx['valid'])
    # print(split_idx['test'])

    # train_method_name = [' '.join(dataset.data.y[i]) for i in split_idx['train']]
    # valid_method_name = [' '.join(dataset.data.y[i]) for i in split_idx['valid']]
    # test_method_name = [' '.join(dataset.data.y[i]) for i in split_idx['test']]
    # print('#train')
    # print(len(train_method_name))
    # print('#valid')
    # print(len(valid_method_name))
    # print('#test')
    # print(len(test_method_name))

    # train_method_name_set = set(train_method_name)
    # valid_method_name_set = set(valid_method_name)
    # test_method_name_set = set(test_method_name)

    # # unique method name
    # print('#unique train')
    # print(len(train_method_name_set))
    # print('#unique valid')
    # print(len(valid_method_name_set))
    # print('#unique test')
    # print(len(test_method_name_set))

    # # unique valid/test method name
    # print('#valid unseen during training')
    # print(len(valid_method_name_set - train_method_name_set))
    # print('#test unseen during training')
    # print(len(test_method_name_set - train_method_name_set))


    ### building vocabulary for sequence predition. Only use training data.

    vocab2idx, idx2vocab = get_vocab_mapping([dataset.data.y[i] for i in split_idx['train']], config.num_vocab)

    # test encoder and decoder
    # for data in dataset:
    #     # PyG >= 1.5.0
    #     print(data.y)
    #
    #     # PyG 1.4.3
    #     # print(data.y[0])
    #     data = encode_y_to_arr(data, vocab2idx, config.max_seq_len)
    #     print(data.y_arr[0])
    #     decoded_seq = decode_arr_to_seq(data.y_arr[0], idx2vocab)
    #     print(decoded_seq)
    #     print('')

    ## test augment_edge
    # data = dataset[2]
    # print(data)
    # data_augmented = augment_edge(data)
    # print(data_augmented)

    ### set the transform function
    # augment_edge: add next-token edge as well as inverse edges. add edge attributes.
    # encode_y_to_arr: add y_arr to PyG data object, indicating the array representation of a sequence.
    dataset.transform = transforms.Compose([augment_edge, lambda data: encode_y_to_arr(data, vocab2idx, config.max_seq_len)])

    ### automatic evaluator. takes dataset name as input
    evaluator = Evaluator(config.dataset_name)

    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=config.hyperparams.batch_size, shuffle=True, num_workers=config.num_workers)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=config.hyperparams.batch_size, shuffle=False, num_workers=config.num_workers)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=config.hyperparams.batch_size, shuffle=False, num_workers=config.num_workers)

    nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz'))
    nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz'))

    ### Encoding node features into emb_dim vectors.
    ### The following three node features are used.
    # 1. node type
    # 2. node attribute
    # 3. node depth
    node_encoder = ASTNodeEncoder(config.architecture.hidden, num_nodetypes=len(nodetypes_mapping['type']), num_nodeattributes=len(nodeattributes_mapping['attr']), max_depth=20)

    model = Net(config.architecture,
                num_vocab=len(vocab2idx),
                max_seq_len=config.max_seq_len,
                node_encoder=node_encoder).to(device)

    # optimizer = optim.Adam(model.parameters(), lr=0.001)
    optimizer = optim.Adam(model.parameters(), lr=config.hyperparams.learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config.hyperparams.step_size,
                                                gamma=config.hyperparams.decay_rate)

    valid_curve = []
    test_curve = []
    train_curve = []
    trainL_curve = []

    writer = SummaryWriter(config.directory)

    ts_fk_algo_hp = str(config.time_stamp) + '_' \
                    + str(config.commit_id[0:7]) + '_' \
                    + str(config.architecture.nonlinear_conv) + '_' \
                    + str(config.architecture.variants.fea_activation) + '_' \
                    + str(config.architecture.pooling) + '_' \
                    + str(config.architecture.JK) + '_' \
                    + str(config.architecture.layers) + '_' \
                    + str(config.architecture.hidden) + '_' \
                    + str(config.architecture.variants.BN) + '_' \
                    + str(config.architecture.dropout) + '_' \
                    + str(config.hyperparams.learning_rate) + '_' \
                    + str(config.hyperparams.step_size) + '_' \
                    + str(config.hyperparams.decay_rate) + '_' \
                    + 'B' + str(config.hyperparams.batch_size) + '_' \
                    + 'S' + str(config.seed)

    for epoch in range(1, config.hyperparams.epochs + 1):
        print("Epoch {} training...".format(epoch))
        train_loss = train(model, device, train_loader, optimizer)

        scheduler.step()

        print('Evaluating...')
        train_perf = eval(model, device, train_loader, evaluator, arr_to_seq=lambda arr: decode_arr_to_seq(arr, idx2vocab))
        valid_perf = eval(model, device, valid_loader, evaluator, arr_to_seq=lambda arr: decode_arr_to_seq(arr, idx2vocab))
        test_perf = eval(model, device, test_loader, evaluator, arr_to_seq=lambda arr: decode_arr_to_seq(arr, idx2vocab))

        # print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf})
        print('Train:', train_perf[dataset.eval_metric],
              'Validation:', valid_perf[dataset.eval_metric],
              'Test:', test_perf[dataset.eval_metric],
              'Train loss:', train_loss)

        train_curve.append(train_perf[dataset.eval_metric])
        valid_curve.append(valid_perf[dataset.eval_metric])
        test_curve.append(test_perf[dataset.eval_metric])
        trainL_curve.append(train_loss)

        writer.add_scalars(config.dataset_name, {ts_fk_algo_hp + '/traP': train_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(config.dataset_name, {ts_fk_algo_hp + '/valP': valid_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(config.dataset_name, {ts_fk_algo_hp + '/tstP': test_perf[dataset.eval_metric]}, epoch)
        writer.add_scalars(config.dataset_name, {ts_fk_algo_hp + '/traL': train_loss}, epoch)
    writer.close()

    print('F1')
    best_val_epoch = np.argmax(np.array(valid_curve))
    best_train = max(train_curve)
    print('Finished training!')
    print('Best validation score: {}'.format(valid_curve[best_val_epoch]))
    print('Test score: {}'.format(test_curve[best_val_epoch]))

    print('Finished test: {}, Validation: {}, Train: {}, epoch: {}, best train: {}, best loss: {}'
          .format(test_curve[best_val_epoch], valid_curve[best_val_epoch], train_curve[best_val_epoch],
                  best_val_epoch, best_train, min(trainL_curve)))
Exemplo n.º 11
0
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time, os, csv, random, pdb
from pylab import *

from utils.accumulator import Accumulator
from utils.train import *
from utils.mnist import mnist_subset
from utils.config import get_args, setup

args = get_args(sys.argv[1:])
savedir = setup(args)

# To protect Human Error!
if args.model == 'dpse':
    from model.vsepro.vsepro_nway_kl import combine
else:
    print("select proper args.model!")
    raise NotImplementedError()

xtr, xtrte, xte, nxtr, nxtrte, nxte = mnist_subset([100] * 5)

# set-embedding placeholder x 10
x_emb = [
    tf.placeholder(tf.float32, [None, 784], name='x_emb_%d' % k)
    for k in range(args.way)
]

# query placeholder x 10
x_qry = [
Exemplo n.º 12
0
        output = [m(x) for m in self.children()]
        if scores is not None:
            scores = scores[:, self.body_parts]
            scores = torch.transpose(scores, 1, 0).long() - 1
            loss = torch.stack([F.cross_entropy(out, tar) for out, tar in zip(output, scores)]).sum()
        else:
            loss = 0

        risks = torch.stack([torch.max(out, dim=1)[1] + 1 for out in output]).transpose(1, 0).detach()
        return risks, loss


if __name__ == '__main__':
    from dataloaders import make_dataloaders
    from utils.config import get_args
    from utils.misc import get_available_device

    args, _ = get_args()
    args.device = get_available_device(0)
    _, _, _, test_loader, num_classes = make_dataloaders(args)

    data = torch.randn((args.test_batch_size, 1024))
    # targets = torch.FloatTensor([[2, 3, 4], [2, 3, 4]])
    _, scores, targets = test_loader.get_batch()
    del test_loader

    model = Classifiers(in_dim=1024, k=num_classes, body_parts=args.body_parts)
    output, loss = model(data, targets=scores)

    print('done')