import data_loader

from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--data_set', default='val', choices=['train', 'val', 'test'],
help='The data set you want to evaluate')
arg_parser.add_argument('--model', default='bilstm_mlp_elmo.pt', help='Model name')


if __name__ == '__main__':
    args = arg_parser.parse_args()
    data_set = args.data_set
    model = args.model

    params = utils.Params('data/balanced/dataset_params.json')
    params.update('experiments/elmo_model/params.json')

    dl = data_loader.DataLoader('data/averaged_elmo/', params)
    data = dl.load_elmo_data([data_set], 'data/averaged_elmo')

    net = net.Network(params)

    net.load_state_dict(torch.load(model))

    # Evaluation
    val_data_iter = dl.elmo_iterator(data[data_set], params, shuffle=False)

    total_correct = 0

    predictions = torch.tensor([], dtype=torch.long)
示例#2
0
    x += tuple(d[f].flat_values for f in (EMT, DMT))
    if group in (qs.QAS, qs.FIX):
        y = (d[OUT].to_tensor(), )
    else:
        y = (d[TGT].to_tensor(), )
    return x, y


def dset_for(ps, root=None, group=None, adapter=adapter, count=None):
    ds = load(ps, root, group, count=count)
    ds = ds.map(lambda x: adapter(x, group), -1)
    return ds.shuffle(1000)


if __name__ == '__main__':
    np.random.seed(12345)
    import utils as qu
    ps = dict(
        dim_batch=5,
        dim_pool=10,
        max_val=1000,
        num_samples=20,
        num_shards=3,
    )
    ps = qu.Params(**ps)
    ss = [s for s in dump(ps)]
    ds = load(ps, shards=ss).map(adapter, -1)
    for i, _ in enumerate(ds):
        pass
    print(f'dumped {i + 1} batches of {ps.dim_batch} samples each')
    return metrics_mean, AUROCs


if __name__ == '__main__':
    """
    Evaluates the model on the test set.
    """
    # Load user arguments
    arguments = argument_parser.parse_args()

    # Load hyperparameters from JSON file
    json_path = os.path.join(arguments.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), 'No json configuration file found at {}'.format(json_path)
    parameters = utils.Params(json_path)

    # Record whether GPU is available
    parameters.cuda = torch.cuda.is_available()

    # Set random seed for reproducible experiments
    torch.manual_seed(230)
    if parameters.cuda: torch.cuda.manual_seed(230)

    # Configure logger
    utils.set_logger(os.path.join(arguments.model_dir,
                                  'evaluate_ensemble.log'))

    # Create data loaders for test data
    logging.info('Loading test dataset...')
    test_dataloader = data_loader.fetch_dataloader(
示例#4
0
            pre_result = pre_result.append(
                {
                    'example_id': int(example_id),
                    'tags': pred_tag,
                    'split_to_ori': s_to_o
                },
                ignore_index=True)

    pre_result.to_csv(path_or_buf=params.params_path / f'{mode}_tags_pre.csv',
                      encoding='utf-8',
                      index=False)


if __name__ == '__main__':
    args = parser.parse_args()
    params = utils.Params(args.pre_model_type, args.ex_index)
    # set type
    params.ds_encoder_type = args.ds_encoder_type

    # 设置模型使用的gpu
    torch.cuda.set_device(args.device_id)
    # 查看现在使用的设备
    print('current device:', torch.cuda.current_device())
    # 预测验证集还是测试集
    mode = args.mode
    # Set the random seed for reproducible experiments
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    params.seed = args.seed

    # Set the logger
示例#5
0
        net_classes = {'no-batch-norm': BiggerLeakyUnet,
                       'batch-norm': BiggerLeakyBNUnet}

        for normalization in normalizations:
            print(f'============== normalization: {normalization} ==============')
            self.params.normalization = normalization
            if normalization == 'batch-norm':
                self.params.learning_rate = .1
            else:
                self.params.learning_rate = 1e-5
            self.trainer = Trainer(params=self.params,
                                   net_class=net_classes[normalization],
                                   experiment_dir=self.experiment_dir,
                                   is_toy=self.is_toy,
                                   set_seed=self.set_seed)
            history = self.trainer.train()
            utils.save_history(history, self.trainer, param_name='normalization', name_modifier=name_modifier)


if __name__ == '__main__':

    experiment_dir = Path('experiments/transf_learn_resnet_toy')
    params = utils.Params(experiment_dir / 'params.json')
    tuner = Tuner(params=params,
                  net_class=FullUnetResnet,
                  experiment_dir=experiment_dir,
                  is_toy=True,
                  set_seed=True)
    tuner.tune_lr(rates=(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6))
示例#6
0
        # last_report_path = os.path.join(model_dir, f"report_{epoch}.txt")
        # utils.save_report(report, last_report_path)


if __name__ == '__main__':
    # Load the parameters from json file
    args = parser.parse_args()
    model_params_json_path = os.path.join(args.model_dir, 'params.json')
    data_params_json_path = os.path.join(args.data_dir, 'params.json')
    assert os.path.isfile(
        model_params_json_path), "No json configuration file found at {}".format(model_params_json_path)
    assert os.path.isfile(
        data_params_json_path), "No json configuration file found at {}".format(data_params_json_path)

    data_params = utils.DataParams.from_json(data_params_json_path)
    model_params = utils.Params(cuda=torch.cuda.is_available(), src='en', trg='hu')
    model_params.update(model_params_json_path)

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if model_params.cuda:
        torch.cuda.manual_seed(230)

    # Create tensorboard summary writer
    tb = SummaryWriter(args.tensorboard_dir)

    # Set the logger
    utils.set_logger(os.path.join(args.tensorboard_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")
示例#7
0
def setup_and_train(args):

    #set up the bb run, can choose different algorithm to select next param to try
    bb.run(alg="tree_structured_parzen_estimator")

    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)
    # params.loss_fns = [net.negative_log_partial_likelihood_loss] * (1 if params.linear_output_size > 0 else 0) + [nn.MSELoss()] * (
    #         # params.linear_output_size - 1) + [nn.BCEWithLogitsLoss()] * (params.binary_output_size)
    #         params.linear_output_size - 1) + [nn.BCELoss()] * (params.binary_output_size)
    # params.survival_indices = eval(params.survival_indices)
    # params.continuous_phenotype_indices = eval(params.continuous_phenotype_indices)
    # params.binary_phentoype_indices = eval(params.binary_phentoype_indices)

    # params.loss_excluded_from_training = eval(params.loss_excluded_from_training)
    # params.metrics = eval(params.metrics)

    params.loss_fns, params.mask, linear_output_size, binary_output_size = net.create_lossfns_mask(
        params)
    print(params.loss_fns)
    print(params.mask)

    # use GPU if available
    params.cuda = torch.cuda.is_available()
    # print(params.cuda)

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    # if params.cuda:
    # torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))
    tensorboard_dir = os.path.join(
        args.model_dir, 'tensorboardLog', args.tensorboard_prefix +
        datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    writer = SummaryWriter(tensorboard_dir)
    copy(json_path, tensorboard_dir)
    copy(args.data_dir, tensorboard_dir)
    logging.info("Tensorboard logging directory {}".format(tensorboard_dir))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders
    datasets = data_generator.fetch_dataloader_list(args.prefix,
                                                    ['train', 'val'],
                                                    args.data_dir, params)
    _, train_input_size, _ = datasets[0][0]['train']
    # _, _, val_dl = dataloaders['val']
    # train_dl = dataloaders['train']
    # val_dl = dataloaders['val']
    input_size = train_input_size
    # params.dict['num_batches_per_epoch'] = train_steps_gen
    logging.info("- done.")

    # Define the model and optimizer
    # if len(params.out_channels_list) > 0:
    embedding_model = net.EmbeddingNet(
        net.ConvolutionBlock,
        input_size,
        out_channels_list=params.out_channels_list,
        FC_size_list=params.FC_size_list,
        embedding_size=params.embedding_size,
        kernel_sizes=params.kernel_sizes,
        strides=params.strides,
        dropout_rate=params.dropout_rate)
    # else:
    #     embedding_model = net.EmbeddingNet_FC(
    #         net.FullConnectedBlock, input_size, FC_size_list=params.FC_size_list, embedding_size=params.embedding_size, dropout_rate=params.dropout_rate)

    outputs = net.outputLayer_simple(params.embedding_size,
                                     linear_output_size=linear_output_size,
                                     binary_output_size=binary_output_size)

    if params.cuda:
        # model = model.cuda()
        embedding_model = embedding_model.cuda()
        outputs = outputs.cuda()

    ### TODO: change other params to bb modifiable params
    lr = bb.loguniform("lr", 10e-4, 10e-2)

    #use the bbopt params for learning rate
    embedding_optimizer = optim.Adam(embedding_model.parameters(),
                                     lr=lr,
                                     weight_decay=params.weight_decay)
    outputs_optimizer = optim.Adam(outputs.parameters(),
                                   lr=lr,
                                   weight_decay=params.weight_decay)

    # fetch loss function and metrics
    # loss_fn = net.negative_log_partial_likelihood
    metrics = net.metrics

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    val_metrics = train_and_evaluate(embedding_model, outputs, datasets,
                                     embedding_optimizer, outputs_optimizer,
                                     metrics, params, args.model_dir,
                                     tensorboard_dir, args.restore_file)
    # writer.export_scalars_to_json("./all_scalars.json")
    writer.close()

    bb.remember(val_metrics)
    bb.maximize(val_metrics[params.best_model_metric])
    model.save_weights(PREFIX + '_weights.hdf5')
    """

    with open(PREFIX + '_trainhist.keras', 'wb') as f:
        pickle.dump(history.history, f)

    return model, history


if __name__ == "__main__":
    from keras.backend import tensorflow_backend
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

    if TRAIN:

        params = utils.Params("./configurations/example2.json")

        neural_network_2c(params)

    else:
        model = load_model(PREFIX + '_model.hdf5',
                           custom_objects={"rmse": rmse})
        with open("snp_X3k.keras", 'rb') as f:
            X = pickle.load(f)
        with open("snp_y3k.keras", 'rb') as f:
            y = pickle.load(f)

        y_pred = model.predict(X, batch_size=32)
        diff = y_pred - y
        mean_diff = np.mean(diff, axis=0)
        print(mean_diff)
示例#9
0
def autosim(args, eng):

    os.makedirs(args.output_dir, exist_ok=True)
    # Set the logger
    utils.set_logger(os.path.join(args.output_dir, 'train.log'))

    copyfile(args.output_dir)

    # Load parameters from json file
    json_path = os.path.join(args.output_dir, 'Params.json')
    assert os.path.isfile(json_path), "No json file found at {}".format(
        json_path)
    params = utils.Params(json_path)

    # Add attributes to params
    params.output_dir = args.output_dir
    params.cuda = torch.cuda.is_available()
    params.restore_from = args.restore_from
    params.numIter = int(params.numIter)
    params.noise_dims = int(params.noise_dims)
    params.gkernlen = int(params.gkernlen)
    params.step_size = int(params.step_size)
    params.gen_ver = int(args.gen_ver)
    params.dime = 1
    if args.wavelength is not None:
        params.wavelength = int(args.wavelength)
    if args.angle is not None:
        params.angle = int(args.angle)
        #build a recorder
    max_recorder = utils.max_recorder()
    params.recorder = max_recorder

    #build tools
    writer = SummaryWriter(log_dir=r'./scan/runs')
    max_recorder = utils.max_recorder()
    params.recorder = max_recorder
    params.writer = writer

    # make directory
    os.makedirs(args.output_dir + '/outputs', exist_ok=True)
    os.makedirs(args.output_dir + '/model', exist_ok=True)
    os.makedirs(args.output_dir + '/figures/histogram', exist_ok=True)
    os.makedirs(args.output_dir + '/figures/deviceSamples', exist_ok=True)
    os.makedirs(args.output_dir + '/figures/deviceSamples_max', exist_ok=True)
    os.makedirs(args.output_dir + '/deg{}_wl{}_gen_ver{}'.format(
        params.angle, params.wavelength, params.gen_ver),
                exist_ok=True)
    # Define the models
    if params.gen_ver == 0:
        generator = Generator0(params)
    else:
        generator = Generator(params)

    # Move to gpu if possible
    if params.cuda:
        generator.cuda()

    # Define the optimizer
    optimizer = torch.optim.Adam(generator.parameters(),
                                 lr=params.lr,
                                 betas=(params.beta1, params.beta2))

    # Define the scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=params.step_size,
                                                gamma=params.gamma)

    # Load model data
    if args.restore_from is not None:
        params.checkpoint = utils.load_checkpoint(restore_from, generator,
                                                  optimizer, scheduler)
        logging.info('Model data loaded')

    #set the timer
    timer = utils.timer()

    # Train the model and save
    if params.numIter != 0:
        logging.info('Start training')
        train(generator, optimizer, scheduler, eng, params)

    # Generate images and save
    logging.info('Start generating devices')
    evaluate(generator, eng, numImgs=500, params=params)

    timer.out()
    writer.close()
示例#10
0
import os
import utils
import matplotlib.pyplot as plt
import numpy as np
from YOLO import YOLO

params = utils.Params('experiment/params.json')
params.device = "cpu"

images = []
for i in range(32):
	name = utils.get_image_name(i)
	image = plt.imread('./data/raw_GTSDB/' + name)
	images.append(image)
images = np.array(images)

yolo = YOLO(params)
output = yolo.predict(images)

for i in range(output.shape[0]):
	plt.subplot(4, 8, i+1)
	plt.imshow(output[i])
plt.show()

示例#11
0
    for i in torch.arange(x.shape[0]):
        include = np.delete(np.arange(x.shape[0]), i)
        # p = get_class_probs(out_z[i,:], c[include,:], l[include], out_w[include], params)
        p = get_class_probs(out_z[i, :], c[include, :], l[include], None,
                            params)
        loss += loss_fn(p, l[i], params)

    print(", loss: {}".format(loss.item()))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


if __name__ == "__main__":
    # Load the parameters from json file
    args = parser.parse_args()

    params = utils.Params("params.json")

    model = Net(params)
    optimizer = optim.Adam(model.parameters(), params.lr)

    x, t = simulate_data(params)
    data = {"x": x, "target": t}
    storage = {}

    for epoch in range(params.epochs):
        train(data, model, optimizer, storage, args, params, epoch + 1)
示例#12
0
def train_from_workspace(workspace_dir):
    global args, data_loader

    data_dir = workspace_dir
    model_dir = os.path.join(data_dir, "model")

    # Load the parameters from json file
    args = parser.parse_args()
    src_json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(src_json_path), "No json configuration file found at {}".format(src_json_path)

    trgt_json_path = os.path.join(model_dir, 'params.json')
    if not os.path.exists(model_dir):
        print("Workspace Model Directory does not exist! Making directory {}".format(model_dir))
        os.mkdir(model_dir)
    else:
        print("Workspace Model Directory exists! ")

    shutil.copyfile(src_json_path, trgt_json_path)

    params = utils.Params(trgt_json_path)
    params.data_dir = data_dir if data_dir else args.data_dir
    params.model_dir = model_dir if model_dir else args.model_dir

    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(params.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params.data_dir, params)
    data = data_loader.load_data_from_dir(['train', 'val'], params.data_dir)
    train_data = data['train']
    val_data = data['val']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    model = net.Net(params).cuda() if params.cuda else net.Net(params)
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
    loss_fn = net.loss_fn
    metrics = net.metrics

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    best_eval_acc = train_and_evaluate(model, train_data, val_data, optimizer, loss_fn, metrics, params, params.model_dir,
                       args.restore_file)

    return best_eval_acc
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        type=str,
                        default='tire',
                        help='Dataset name (default: CIFAR10)')
    parser.add_argument(
        '--root_path',
        default=
        r'D:\2020\project_small_data\Tire_inspection\tire_inspection_cropped_data_final',
        help="Directory containing the dataset")

    parser.add_argument('--experiment_path',
                        type=str,
                        default='exp_1',
                        help='the name of the experiment (dir where all the \
                        log files and trained weights of the experimnet will be saved)'
                        )

    parser.add_argument(
        '--restore_file',
        default='rotNet_tire_resnet-18_4rot_epoch0_lr_checkpoint.pth',
        help="name of the file in --experiment_name \
                     containing weights to load")
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='Random seed (default: 1)')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    args.experiment_path = r'D:\2020\project_small_data\Small_Data\pretraining\experiment_dir\exp_1'
    args.restore_file = 'rotNet_tire_resnet-18_lr_best.pth'

    yaml_path = os.path.join(args.experiment_path, 'params.yaml')
    assert os.path.isfile(
        yaml_path), "No parameters config file found at {}".format(yaml_path)
    params = utils.Params(yaml_path)

    use_cuda = params.use_cuda and torch.cuda.is_available()
    device = torch.device(
        "cuda:{}".format(params.cuda_num) if use_cuda else "cpu")
    params.use_cuda = use_cuda
    ## get the dataloaders
    params.root_path = args.root_path
    params.pretraining = None
    dloader_train, dloader_val, dloader_test = get_data(params)

    # Load the model
    params.num_classes = 4
    model = get_model(params,
                      os.path.join(args.experiment_path, args.restore_file))
    model = model.to(device)

    layer_name = model.avg_pool
    sf = SaveFeatures(layer_name)  ## Output before the last FC layer

    # save the feature embeddings for every image
    train_feat_path = os.path.join(args.experiment_path,
                                   'train_features_dict.p')
    val_feat_path = os.path.join(args.experiment_path, 'val_features_dict.p')
    test_feat_path = os.path.join(args.experiment_path, 'test_features_dict.p')

    img_names,features_dict = save_features_as_dict(model,dloader_train,sf,\
                                            save_path=train_feat_path,num_batch='all')
    img_names,features_dict = save_features_as_dict(model,dloader_val,sf,\
                                            save_path=val_feat_path,num_batch='all')
    img_names,features_dict = save_features_as_dict(model,dloader_test,sf,\
                                            save_path=test_feat_path,num_batch='all')
    hash_params = {'hash_size': 20, 'num_tables': 5, 'dim': 18432}
    hash_path = os.path.join(args.experiment_path, 'features_hash.p')
    save_embedding_hash(hash_params, hash_path, img_names, features_dict)
示例#14
0
文件: train.py 项目: ygcinar/SmoothI
def main(args):
    if args.cv:
        folds = range(args.fold, args.nr_folds + 1)
    else:
        folds = [args.fold]
    for fold in folds:
        loop_restore_file = args.restore_file
        if args.load_params:
            json_path = os.path.join(args.model_dir, 'params.json')
            assert os.path.isfile(
                json_path), "No json configuration file found at {}".format(
                    json_path)
            params = utils.Params(json_path)
            params.tensortype = torch.float32
            args.model_dir_fold = os.path.join(args.model_dir,
                                               'fold%s/' % fold)
        else:
            params, exp_path = args_to_params(args)
            if params.tensortype == 'float32':
                params.tensortype = torch.float32
            args.model_dir_fold = os.path.join(exp_path, 'fold%s/' % fold)
            args.model_dir = exp_path
            if args.cv:
                args.load_params = True
        # Set the random seed for reproducible experiments
        torch.manual_seed(SEED)
        if params.cuda: torch.cuda.manual_seed_all(SEED)
        #
        if not os.path.exists(args.model_dir_fold):
            os.makedirs(args.model_dir_fold)
        #
        # reset logger
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)
        # Set the logger
        utils.set_logger(os.path.join(args.model_dir_fold, 'train.log'))
        #
        # parent_dir = [folder for folder in args.model_dir_fold.split('/') if 'experiment' in folder][0]
        tb_dir = args.model_dir_fold  #args.model_dir_fold.replace(parent_dir, parent_dir + '/tb_logs').replace('/fold', '_fold')
        logging.info('Saving tensorboard logs to {}'.format(tb_dir))
        tb_writer = SummaryWriter(tb_dir)
        #
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        logging.info('using {}'.format(device))
        if args.gpu:
            assert device != 'cpu'
        # save model parameters before training
        if args.save_first:
            model = initialize_model(params, device=device)
            criterion, optimizer = initialize_loss_and_optimizer(params,
                                                                 model,
                                                                 device=device)
            utils.save_checkpoint(
                {
                    'epoch': 0,
                    'state_dict': model.state_dict(),
                    'optim_dict': optimizer.state_dict()
                },
                is_best=False,
                checkpoint=args.model_dir_fold,
                save_last=False,
                is_first=True)
        logging.info("Loading the datasets...")
        #  getting training data in minibatches
        train_dataloader, val_dataloader = initialize_dataloader(params, fold)
        # initialize model torch.nn layer
        model = initialize_model(params, device=device)
        # initialize training criterion and optimizer
        criterion, optimizer = initialize_loss_and_optimizer(params,
                                                             model,
                                                             device=device)
        #
        logging.info('parameters: {}'.format(
            params.__dict__))  # log parameters
        #
        if args.dont_continue:
            loop_restore_file = None
        else:
            restore_path = os.path.join(args.model_dir_fold, 'last.pth.tar')
            if os.path.exists(restore_path):
                logging.info('Restoring from last.pth.tar')
                loop_restore_file = 'last'
        # Train the model
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        main_train_and_evaluate(model,
                                train_dataloader,
                                val_dataloader,
                                optimizer,
                                criterion,
                                params,
                                args.model_dir_fold,
                                loop_restore_file,
                                tb_writer=tb_writer,
                                device=device,
                                evol_val=True)
        logging.info("- done.")
示例#15
0
def predict_from_workspace(workspace_dir, input_data):
    """
        Evaluate the model on the test set.
    """
    global args, data_loader

    data_dir = workspace_dir
    model_dir = os.path.join(data_dir, "model")

    # Load the parameters
    args = parser.parse_args()
    trgt_json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(
        trgt_json_path), "No json configuration file found at {}".format(
            trgt_json_path)

    params = utils.Params(trgt_json_path)
    params.data_dir = data_dir if data_dir else args.data_dir
    params.model_dir = model_dir if model_dir else args.model_dir

    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(params.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    data_loader = DataLoader(params.data_dir, params)
    data = data_loader.load_data_for_predict(input_data)
    batch_sentences = data["predict"]["data"]

    # compute length of longest sentence in batch
    batch_max_len = max([len(s) for s in batch_sentences])

    # prepare a numpy array with the data, initialising the data with pad_ind and all labels with -1
    # initialising labels to -1 differentiates tokens with tags from PADding tokens
    batch_data = data_loader.pad_ind * np.ones(
        (len(batch_sentences), batch_max_len))

    # copy the data to the numpy array
    for j in range(len(batch_sentences)):
        cur_len = len(batch_sentences[j])
        batch_data[j][:cur_len] = batch_sentences[j]

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    logging.info("Starting prediction")

    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model)

    # Evaluate
    results = predict(model, batch_data)

    return results
示例#16
0
                pre_result = pre_result.append(
                    {
                        'example_id': int(example_id),
                        'tags': pre_bio_labels,
                        'split_to_ori': s_t_o[:act_len]
                    },
                    ignore_index=True)

    pre_result.to_csv(path_or_buf=params.params_path / f'{mode}_tags_pre.csv',
                      encoding='utf-8',
                      index=False)


if __name__ == '__main__':
    args = parser.parse_args()
    params = utils.Params(args.ex_index)

    # 设置模型使用的gpu
    torch.cuda.set_device(args.device_id)
    # 查看现在使用的设备
    print('current device:', torch.cuda.current_device())
    # 预测验证集还是测试集
    mode = args.mode
    # Set the random seed for reproducible experiments
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    params.seed = args.seed

    # Set the logger
    utils.set_logger()
示例#17
0
        # save histories to csv
        utils.save_metric_histories(train_histories, valid_histories,
                                    results_path)


if __name__ == '__main__':
    args = parser.parse_args()

    # load json params
    params_path = args.experiment
    assert os.path.isfile(
        params_path), "No json configuration file found at {}".format(
            params_path)

    params = utils.Params()
    params.load(params_path)

    # load json features
    features_path = os.path.join(args.path_to_data, params['dataset'],
                                 'features.json')
    assert os.path.isfile(
        features_path), "No json features file found at {}".format(
            features_path)

    features = utils.Features()
    features.load(features_path)

    # update params with features - needed for network construction
    params.update(features)
示例#18
0
                prompt: d.get(prompt)['predictions']
                for prompt in d
            }

        return self


if __name__ == '__main__':
    args = parser.parse_args()
    # Loading the evaluation dataset
    print("Loading dataset")
    data_params_json_path = os.path.join(args.data_dir, 'params.json')
    data_params = utils.DataParams.from_json(data_params_json_path)
    val_dataset = DoulingoDataset(data_params, split='val')
    # Loading the model
    print("Loading model...")
    checkpoint = os.path.join(args.model_dir,
                              f"runs/{args.checkpoint}.pth.tar")
    config = utils.Params(cuda=torch.cuda.is_available(), src='en', trg='hu')
    model = Net(config)
    checkpoint = torch.load(checkpoint)
    model.load_state_dict(checkpoint['state_dict'])
    print("Finished Loading")
    # Evaluation ...
    print("Starting Evaluation..")
    if not os.path.exists(args.results_dir):
        os.mkdir(args.results_dir)
    metrics = evaluate_model(model.cuda(), val_dataset, args.results_dir)
    result_json = os.path.join(args.results_dir, 'metrics.json')
    utils.save_dict_to_json(metrics, result_json)
示例#19
0
            print('repeat: ', i)
            self._reset_params()
            max_test_acc, max_f1 = self._train(criterion, optimizer, max_test_acc_overall=max_test_acc_overall)
            print('max_test_acc: {0}     max_f1: {1}'.format(max_test_acc, max_f1))
            max_test_acc_overall = max(max_test_acc, max_test_acc_overall)
            max_f1_overall = max(max_f1, max_f1_overall)
            print('#' * 100)
        print("max_test_acc_overall:", max_test_acc_overall)
        print("max_f1_overall:", max_f1_overall)


if __name__ == '__main__':
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path),  'No json configuration file found at {}'.format(json_path)
    opt = utils.Params(json_path)
    
    model_classes = {
        'base_model': Cabasc, 
        'cabasc': Cabasc
    }
    dataset_files = {
        'twitter': {
            'train': 'data/datasets/Twitter_Train.raw',
            'test': 'data/datasets/Twitter_Test.raw'
        },
        'restaurant': {
            'train': 'data/datasets/Restaurants_Train.xml.seg',
            'test': 'data/datasets/Restaurants_Test.xml.seg'
        },
        'laptop': {
示例#20
0
def mle_k(dataset_name,
          target_model,
          task='classification',
          sampled_number=10,
          without_wne=False,
          k=16,
          s=0,
          print_iter=10,
          debug=False):
    X = []
    y = []
    params = utils.Params(target_model)
    ps = params.arg_names
    total_t = 0.0
    info = []
    X_t, res_t = None, -1.0
    if without_wne:
        gp = utils.GaussianProcessRegressor()
    else:
        K = utils.K(len(ps))
        gp = utils.GaussianProcessRegressor(K)
    for t in range(sampled_number):
        b_t = time.time()
        i = t
        wne = get_wne(dataset_name, 'sampled/s{}'.format(i), cache=True)
        for v in range(k):
            kargs = params.random_args(ps)
            res = get_result(dataset_name, target_model, task, kargs,
                             'sampled/s{}'.format(i))
            if without_wne:
                X.append([kargs[p] for p in ps])
            else:
                X.append(np.hstack(([kargs[p] for p in ps], wne)))
            if debug:
                print('sample {}, {}/{}, kargs: {}, res: {}, time: {:.4f}s'.
                      format(t, v, k, [kargs[p] for p in ps], res,
                             time.time() - b_t))
            y.append(res)

    for t in range(s):
        b_t = time.time()
        gp.fit(np.vstack(X), y)
        X_temp, res_temp = _get_mle_result(gp, dataset_name, target_model,
                                           task, without_wne, params, ps, 0, X,
                                           y)
        if without_wne:
            X.append(X_temp)
        else:
            X.append(np.hstack((X_temp, wne)))
        y.append(res_temp)
        if res_t < res_temp:
            res_t = res_temp
            X_t = X_temp
        e_t = time.time()
        total_t += e_t - b_t
        info.append([res_temp, total_t])
        print('iters: {}/{}, params: {}, res: {}, time: {:.4f}s'.format(
            t, s, X_temp, res_temp, total_t))
    if debug:
        return X_t, res_t, info
    return X_t, res_t
示例#21
0
                patience_counter += 1
            else:
                patience_counter = 0
        else:
            patience_counter += 1

        # Early stopping and logging best f1
        if (patience_counter > params.patience_num
                and epoch > params.min_epoch_num) or epoch == args.epoch_num:
            logging.info("Best val f1: {:05.2f}".format(best_val_f1))
            break


if __name__ == '__main__':
    args = parser.parse_args()
    params = utils.Params(ex_index=args.ex_index)
    utils.set_logger(log_path=os.path.join(params.params_path, 'train.log'),
                     save=True)

    if args.multi_gpu:
        params.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        n_gpu = torch.cuda.device_count()
        params.n_gpu = n_gpu
    else:
        # 设置模型使用的gpu
        torch.cuda.set_device(3)
        # 查看现在使用的设备
        print('current device:', torch.cuda.current_device())
        n_gpu = 1
        params.n_gpu = n_gpu
示例#22
0
def main(stride):
    logger = logging.getLogger('DeepAR.Train')

    arg = {'model_name' : f'base_stock_stride={stride}',
           'data_folder' : 'data',
           'dataset': 'stock',
           'relative_metrics' : 0,
           'sampling' : 0,
           'restore_file' : None,
           'save_best' : 0,
           'generate_features' : 0,
           'default_base' : 1,
           'save_directory' : 'stock',
           'stride_size' : 8
           }

    train_files, test_files = prepare_data_main(stride, arg)

    model_dir = os.path.join('experiments', arg['model_name'])
    json_path = os.path.join(model_dir, 'params.json')
    data_dir = os.path.join(arg['data_folder'], arg['dataset'])
    assert os.path.isfile(json_path), f'No json configuration file found at {json_path}'
    params = utils.Params(json_path)

    params.relative_metrics = arg['relative_metrics']
    params.sampling =  arg['sampling']
    params.model_dir = model_dir
    params.plot_dir = os.path.join(model_dir, 'figures')

    # create missing directories
    try:
        os.mkdir(params.plot_dir)
    except FileExistsError:
        pass

    utils.set_logger(os.path.join(model_dir, 'train.log'))

    # use GPU if available
    cuda_exist = torch.cuda.is_available()
    # Set random seeds for reproducible experiments if necessary

    if cuda_exist:
        params.device = torch.device('cuda')
        # torch.cuda.manual_seed(240)
        logger.info('Using Cuda...')
        model = net.Net(params).cuda()
    else:
        params.device = torch.device('cpu')
        # torch.manual_seed(230)
        logger.info('Not using cuda...')
        model = net.Net(params)

    torch.manual_seed(777)
    torch.cuda.manual_seed(777)
    np.random.seed(777)

    logger.info('Loading the datasets...')

    train_set = TrainDataset(data_dir, arg['dataset'], params.num_class, data = train_files[0], label = train_files[-1])
    test_set = TestDataset(data_dir, arg['dataset'], params.num_class, data = test_files[0],
                           v = test_files[1], label = test_files[-1])
    sampler = WeightedSampler(data_dir, arg['dataset'], v = train_files[1]) # Use weighted sampler instead of random sampler
    train_loader = DataLoader(train_set, batch_size=params.batch_size, sampler=sampler, num_workers=4)
    test_loader = DataLoader(test_set, batch_size=params.predict_batch, sampler=RandomSampler(test_set), num_workers=4)
    logger.info('Loading complete.')

    logger.info(f'Model: \n{str(model)}')
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function
    loss_fn = net.loss_fn

    # Train the model
    logger.info('Starting training for {} epoch(s) with stride_size {}'.format(params.num_epochs,
                                                                               stride))
    train_and_evaluate(model,
                       train_loader,
                       test_loader,
                       optimizer,
                       loss_fn,
                       params,
                       arg['restore_file'],
                       arg)

    logger.info(f'Finished processing {stride}')
    return True
示例#23
0
def set_params():

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        default='elect',
                        help='Name of the dataset')
    parser.add_argument('--data-folder',
                        default='data',
                        help='Parent dir of the dataset')
    parser.add_argument('--model-name',
                        default='base_model',
                        help='Directory containing params.json')
    parser.add_argument(
        '--relative-metrics',
        action='store_true',
        help='Whether to normalize the metrics by label scales')
    parser.add_argument(
        '--restore-file',
        default='best',
        help=
        'Optional, name of the file in --model_dir containing weights to reload before \
                        training')  # 'best' or 'epoch_#'
    parser.add_argument('--output_folder', help='Output folder for plots')

    # Attack parameters
    parser.add_argument('--c',
                        nargs='+',
                        type=float,
                        default=[0.01, 0.1, 1, 10, 100],
                        help='list of c coefficients (see Carlini et al.)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--batch_size',
                        nargs='+',
                        type=int,
                        default=50,
                        help='Batch size for perturbation generation')
    parser.add_argument('--n_iterations',
                        type=int,
                        default=1000,
                        help='Number of iterations for attack')
    parser.add_argument('--target',
                        type=int,
                        default=-7,
                        help='Attacking output time')
    parser.add_argument('--tolerance',
                        nargs='+',
                        type=float,
                        default=[0.01, 0.1, 1],
                        help='Max perturbation L2 norm')

    parser.add_argument('--debug', action="store_true", help='Debug mode')

    # Batching
    parser.add_argument('--batch_c',
                        type=int,
                        default=6,
                        help='Number of c values batched together')

    # Load the parameters
    args = parser.parse_args()
    model_dir = os.path.join('experiments', args.model_name)
    json_path = os.path.join(model_dir, 'params.json')
    data_dir = os.path.join(args.data_folder, args.dataset)
    assert os.path.isfile(
        json_path), 'No json configuration file found at {}'.format(json_path)

    params = utils.Params(json_path)

    params.model_dir = model_dir
    params.plot_dir = os.path.join(model_dir, 'figures')
    params.c = args.c
    params.n_iterations = args.n_iterations
    params.tolerance = args.tolerance
    params.batch_size = args.batch_size
    params.learning_rate = args.lr
    params.output_folder = os.path.join("attack_logs", args.output_folder)
    params.batch_c = args.batch_c
    params.target = args.target

    if not os.path.exists(params.output_folder):
        os.makedirs(params.output_folder)

    with open(os.path.join(params.output_folder, "params.txt"),
              'w') as param_file:
        json.dump(params.dict, param_file)

    return params, model_dir, args, data_dir
示例#24
0
def params():
    """ read params from json file """
    return utils.Params('../experiments/base-model/params.json')
示例#25
0
    def __init__(self,
                 params=None,
                 experiment_dir=Path('experiments/bigger_leaky_unet'),
                 net_class=None,
                 set_seed=False,
                 is_toy=False
                 ):

        tf.keras.backend.clear_session()

        # parameters
        if params:
            self.params = params
        else:
            self.params = utils.Params(experiment_dir / 'params.json')

        # net and model
        self.net = net_class(params=self.params, set_seed=set_seed)
        self.model = self.net.get_model()

        # directories and files
        self.is_toy = is_toy
        if not is_toy:
            self.data_dir = Path.home() / 'data/isic_2018'
        else:
            self.data_dir = Path.home() / 'data/isic_2018/toy'

        self.experiment_dir = experiment_dir
        self.weight_file = self.experiment_dir / 'weights'

        # data generators
        self.data_gen = SkinLesionDataGen(params=self.params,
                                          data_dir=self.data_dir)
        self.train_gen = self.data_gen.get_train_gen()
        self.val_gen = self.data_gen.get_val_gen()

        # optimizer
        if self.params.optimizer == 'adam':
            self.optimizer = tf.keras.optimizers.Adam(lr=self.params.learning_rate)
        elif self.params.optimizer == 'sgd':
            self.optimizer = tf.keras.optimizers.SGD(learning_rate=self.params.learning_rate,
                                                     momentum=.9,
                                                     nesterov=True)
        else:
            raise ValueError

        # metrics and loss
        # self.metrics = ['accuracy', pixel_diff]
        # self.loss = self.params.loss
        self.metrics = [utils.jaccard_coef]
        self.loss = utils.jaccard_coef_loss
        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss,
                           metrics=self.metrics)

        # callbacks
        self.callbacks = [
            tf.keras.callbacks.ModelCheckpoint(str(self.weight_file),
                                               save_weights_only=True,
                                               monitor='val_loss',
                                               save_best_only=True,
                                               verbose=1),
            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=0.75,
                                                 patience=5,
                                                 min_lr=1e-6,
                                                 verbose=1),
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=1e-3,
                                             patience=15,
                                             mode='min',
                                             verbose=1)
        ]
        y = pickle.load(f)

    history = model.fit(X, y, epochs=NUMEPOCHS, callbacks=[early_stop, check, metric])
    
    with open(PREFIX + '_trainhist.keras', 'wb') as f:
        pickle.dump(history.history, f)
    
    return model, history

if __name__ == "__main__":
    from keras.backend import tensorflow_backend
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

    if TRAIN:
            
        params = utils.Params("./configurations/reverse.json")
        
        neural_network_2c(params)
    
    else:
        model = load_model(PREFIX + '_model.hdf5', custom_objects={"rmse": rmse})
        with open("snp_X3k.keras", 'rb') as f:
            X = pickle.load(f)
        with open("snp_y3k.keras", 'rb') as f:
            y = pickle.load(f)

        y_pred = model.predict(X, batch_size=32)
        diff = y_pred - y
        mean_diff = np.mean(diff, axis=0)
        print(mean_diff)
        print(np.mean(mean_diff))
示例#27
0
    Returns: (float) accuracy in [0,1]
    """
    # print('outputs', outputs)
    # print('labels', labels)
    outputs = np.argmax(outputs, axis=1)
    # print('outputs', outputs)
    return np.sum(outputs == labels) / float(labels.size)


# maintain all metrics required in this dictionary- these are used in the training and evaluation loops
metrics = {
    'accuracy': accuracy,
    # could add more metrics such as accuracy for each token type
}

if __name__ == '__main__':
    # Test for class `LeNet5`
    import torch
    import sys
    sys.path.append(".")
    import utils

    params = utils.Params('./experiments/cifar10_lenet5/params.json')
    model = LeNet5(params)
    print(model)
    x = torch.randn(2, 3, 32, 32)
    print(x)
    y = model(x)
    print(y)
    print(y.size())
示例#28
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage = usage)

    parser.add_option("--test", action = "store_true", dest = "test", default = False)

    # Paramsfile includes hyperparameters for training
    parser.add_option('--params_file', dest = "params_file", default = './params/exp_params.json',
                      help = "Path to the file  containing the training settings")
    parser.add_option('--data_dir', dest = "data_dir", default = './trees',
                      help = "Directory containing the trees")

    # Directory containing the model to test
    parser.add_option("--model_directory", dest = "test_dir", type = "string")
    parser.add_option("--data", dest = "data", type = "string", default = "train")

    (opts, args) = parser.parse_args(args)

    results_dir = "./results"
    if opts.test:
        pass
    else:
        results_dir_current_job = os.path.join(results_dir, utils.now_as_str_f())
        while os.path.isdir(results_dir_current_job):  # generate a new timestamp if the current one already exists
            results_dir_current_job = os.path.join(results_dir, utils.now_as_str_f())
        os.makedirs(results_dir_current_job)

    # Load training settings (e.g. hyperparameters)
    params = utils.Params(opts.params_file)

    if opts.test:
        pass
    else:
        # Copy the settings file into the results directory
        copyfile(opts.params_file, os.path.join(results_dir_current_job, os.path.basename(opts.params_file)))

    # Get the logger
    if opts.test:
        log_path = os.path.join(opts.test_dir, 'testing.log')
    else:
        log_path = os.path.join(results_dir_current_job, 'training.log')
    log_level = params.log_level if hasattr(params, 'log_level') else logging.DEBUG
    log = utils.get_logger(log_path, log_level)

    if opts.test:
        log.info("Testing directory: " + opts.test_dir)
        log.info("Dataset used for testing: " + opts.data)
    else:
        log.info("Results directory: " + results_dir_current_job)
        log.info("Minibatch: " + str(params.optimizer_settings['minibatch']))
        log.info("Optimizer: " + params.optimizer)
        log.info("Epsilon: " + str(params.optimizer_settings['epsilon']))
        log.info("Alpha: " + str(params.optimizer_settings['alpha']))
        log.info("Number of samples used: " + str(params.sample_size))

    # Testing
    if opts.test:
        test(opts.test_dir, opts.data)
        return

    log.info("Loading data...")
    # load training data
    trees = tr.loadTrees(sample_size = params.sample_size)
    params.numWords = len(tr.loadWordMap())
    overall_performance = pd.DataFrame()

    rnn = nnet.RNN(params.wvecDim, params.outputDim, params.numWords, params.optimizer_settings['minibatch'])
    rnn.initParams()

    sgd = optimizer.SGD(rnn, alpha = params.optimizer_settings['alpha'],
                        minibatch = params.optimizer_settings['minibatch'],
                        optimizer = params.optimizer, epsilon = params.optimizer_settings['epsilon'])

    best_val_cost = float('inf')
    best_epoch = 0

    for e in range(params.num_epochs):
        start = time.time()
        log.info("Running epoch %d" % e)
        df, updated_model, train_cost, train_acc = sgd.run(trees)
        end = time.time()
        log.info("Time per epoch : %f" % (end - start))
        log.info("Training accuracy : %f" % train_acc)
        # VALIDATION
        val_df, val_cost, val_acc = validate(updated_model, results_dir_current_job)

        if val_cost < best_val_cost:
            # best validation cost we have seen so far
            log.info("Validation score improved, saving model")
            best_val_cost = val_cost
            best_epoch = e
            best_epoch_row = {"epoch": e, "train_cost": train_cost, "val_cost": val_cost, "train_acc": train_acc,
                              "val_acc": val_acc}
            with open(results_dir_current_job + "/checkpoint.bin", 'w') as fid:
                pickle.dump(params, fid)
                pickle.dump(sgd.costt, fid)
                rnn.toFile(fid)

        val_df.to_csv(results_dir_current_job + "/validation_preds_epoch_ " + str(e) + ".csv", header = True, index = False)
        df.to_csv(results_dir_current_job + "/training_preds_epoch_" + str(e) + ".csv", header = True, index = False)

        row = {"epoch": e, "train_cost": train_cost, "val_cost": val_cost, "train_acc": train_acc, "val_acc": val_acc}
        overall_performance = overall_performance.append(row, ignore_index = True)

        # break if no val loss improvement in the last epochs
        if (e - best_epoch) >= params.num_epochs_early_stop:
            log.tinfo("No improvement in the last {num_epochs_early_stop} epochs, stop training.".format(num_epochs_early_stop=params.num_epochs_early_stop))
            break

    overall_performance = overall_performance.append(best_epoch_row, ignore_index = True)
    overall_performance.to_csv(results_dir_current_job + "/train_val_costs.csv", header = True, index = False)
    log.info("Experiment end")
if __name__ == '__main__':

    # Setup Slack
    # sm = SlackManager(channel='#temp')
    sm = SlackManager(channel='#dl-model-progress')
    if 'SLACK_API_TOKEN' in os.environ:
        sm.setup(slack_api_token=os.environ['SLACK_API_TOKEN'])

    # Collect arguments from command-line options
    args = parser.parse_args()

    # Load the parameters from json file
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'test.log'))

    slack_message = "*Testing of {} started*".format(args.model_dir)
    sm.post_slack_message(slack_message)

    # Set variables
    data_dir = "./data/"
    model_name = params.model_name
    batch_size = params.test_batch_size
    num_workers = params.num_workers

    # Get the required input size of the network for resizing images
    input_size = mh.input_size_of_model(model_name)
示例#30
0
    for var in dep_vars:
        print("mean (sd) {}: {:.3f} ({:.3f})".format(var,
                                                     X[:, var2idx[var]].mean(),
                                                     X[:, var2idx[var]].std()))

    return X, var2idx, idx2var


if __name__ == '__main__':
    # Load the parameters from json file
    args = parser.parse_args()

    # Load information from last setting if none provided:
    if args.setting == "" and Path('last-defaults.json').exists():
        print("using last default setting")
        last_defaults = utils.Params("last-defaults.json")
        args.setting = last_defaults.dict["setting"]
        for param, value in last_defaults.dict.items():
            print("{}: {}".format(param, value))
    else:
        with open("last-defaults.json", "r+") as jsonFile:
            defaults = json.load(jsonFile)
            tmp = defaults["setting"]
            defaults["setting"] = args.setting
            jsonFile.seek(0)  # rewind
            json.dump(defaults, jsonFile)
            jsonFile.truncate()

    setting_home = os.path.join(args.setting_dir, args.setting)
    setting = utils.Params(os.path.join(setting_home, "setting.json"))
    data_dir = os.path.join(setting_home, "data")