Пример #1
0
    rSVMRawfname = "svm_results/randSVMRaw_"+str(sampleSize)+".p"
    aSVMF1fname = "svm_results/activeSVMF1_"+str(sampleSize)+".p"
    aSVMRawfname = "svm_results/activeSVMRaw_"+str(sampleSize)+".p"

    # run with this sample size this many times
    for _ in range(150):
        #getting test data to use for both models
        (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8)

        #make random train data and models
        rand_train_PCA = train_pca.random_sample(size=sampleSize)

        TESTDATA.append(test_pca)
        RANDTRAIN.append(rand_train_PCA)

        rand_SVM = Model('SVM')
        rand_SVM.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y())
        randSVMF1s.append(rand_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted'))
        randSVMRaw.append(rand_SVM.predict(test_pca.get_x()))

        #make active model for step size 5, 10, 15
        for stepSize in [5,10,15]:
            active_SVM = Model('SVM', sample='Active')
            active_SVM.activeLearn(train_pca.get_x(), train_pca.get_y(), start_size=startSize, end_size=sampleSize, step_size=stepSize)
            activeSVMF1s[stepSize].append(active_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted'))
            activeSVMRaw[stepSize].append(active_SVM.predict(test_pca.get_x()))

    pickle.dump(TESTDATA, open(testfnam, "wb"))
    pickle.dump(RANDTRAIN, open(randtrainfnam, "wb"))

    pickle.dump(randSVMF1s, open(rSVMF1fname, "wb" ))
Пример #2
0
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from models.model import Model
import os
from models.test import Test
from sklearn.feature_selection import chi2

pd.set_option('display.max_columns', 500)
to_drop = ["ht", "at", "Unnamed: 0"]
results = "home_team_won"

clf = RandomForestClassifier(max_depth=2, random_state=0)
data = os.path.abspath("./data/big_data.csv")
clf = Model(clf, data)

clf.drop_columns(to_drop)
clf.get_X_y(results)

tester = Test(clf)
tester.test_k_best()

clf.k_best(chi2, 3)
clf.split_data(results, 0.20)
clf.standard_scale()
clf.lda(1)
clf.fit_clf()
clf.pred_clf()
cm, cr = clf.eval_clf()

print(cm)
print(cr)
Пример #3
0
def main():
    # ====================== Parameters ======================
    name_subtask = "pretraining"

    test_every = 1
    save_every = 5

    smpc = False
    output_folder = "output"

    # ====================== User inputs =====================

    parser = argparse.ArgumentParser()
    parser.add_argument("-d",
                        "--data",
                        help="Name of the dataset",
                        type=str,
                        default="sleep",
                        choices=["sleep", "mnist"])
    parser.add_argument("-a",
                        "--algo",
                        help="Federated algorithm",
                        type=str,
                        default="fedavg",
                        choices=["fedavg", "scaffold"])
    parser.add_argument("-c",
                        "--clients",
                        help="Number of clients",
                        type=int,
                        default=2)
    parser.add_argument("-s",
                        "--samples",
                        help="Number of samples per clients",
                        type=int,
                        default=1000)
    parser.add_argument("-k",
                        help="Number of clients per round",
                        type=int,
                        default=2)
    parser.add_argument("-r",
                        "--rounds",
                        help="Number of rounds",
                        type=int,
                        default=20)
    parser.add_argument("-e",
                        "--epochs",
                        help="Number of local epochs (client epochs)",
                        type=int,
                        default=1)
    parser.add_argument("-b", help="Batch size", type=int, default=32)
    parser.add_argument("--lr", help="Learning rate", type=float, default=0.01)

    args = parser.parse_args()

    problem_name = args.data
    algo = args.algo
    scaffold = True if algo == "scaffold" else False
    n_rounds = args.rounds
    n_local_epochs = args.epochs

    n_clients = args.clients
    n_clients_round = args.k
    max_samples = args.samples

    lr = args.lr
    batch_size = args.b

    subtask_folder = os.path.join(output_folder, f"{n_clients}-clients",
                                  f"{n_local_epochs}-epochs", algo,
                                  name_subtask)

    # ================== Create clients ======================

    hook = sy.TorchHook(torch)
    clients = [
        sy.VirtualWorker(hook, id=f"client{i}") for i in range(n_clients)
    ]
    crypto_provider = sy.VirtualWorker(hook, id="crypto_provider")

    # ===================== Load data =======================

    data_loader = DataLoader(problem_name,
                             clients,
                             max_samples_per_client=max_samples)
    data_loader.send_data_to_clients()

    #  ==================== Load model ======================

    model = Model("EEG_CNN", clients)  # ["MNIST_CNN", "EEG_CNN"]
    if smpc:
        model.send_model_to_clients()

    #  ==================== Train model =====================

    save_folder = os.path.join(subtask_folder, "model")
    trainer = FedAvg(model, data_loader, crypto_provider, save_folder)
    trainer.train(n_rounds,
                  n_local_epochs,
                  n_clients_round,
                  lr,
                  batch_size,
                  test_every,
                  save_every,
                  scaffold=scaffold,
                  smpc=smpc)

    #  =================== Plot results ======================

    list_test_loss_client = trainer.list_test_loss_client
    list_train_loss_client = trainer.list_train_loss_client
    list_accuracy_client = trainer.list_accuracy_client
    list_test_rounds = trainer.list_test_rounds

    # list_test_loss_client = [[4, 2, 1, 0.5, 0.25]] * n_clients
    # list_test_rounds = list(range(0, n_rounds*2, test_every))
    plotter = Plotter(subtask_folder)

    # Loss learning curve
    plotter.plot_learning_curve_avg(list_test_rounds, list_test_loss_client,
                                    list_train_loss_client)
    plotter.plot_learning_curve_clients(list_test_rounds,
                                        list_test_loss_client,
                                        list_train_loss_client,
                                        n_clients=n_clients)

    # Accuracy learning curve
    plotter.plot_learning_curve_avg(list_test_rounds,
                                    list_accuracy_client,
                                    label="accuracy",
                                    filename="accuracy-avg")
    plotter.plot_learning_curve_clients(list_test_rounds,
                                        list_accuracy_client,
                                        n_clients=n_clients,
                                        label="accuracy",
                                        filename="accuracy-clients")
Пример #4
0
    opt_v.phase = 'val'
    torch.cuda.set_device(opt.gpu_ids[0])
    torch.backends.cudnn.deterministic = opt.deterministic
    torch.backends.cudnn.benchmark = not opt.deterministic

    vis = Visualizer(opt)
    if not opt.debug:
        wandb.init(project="depth_refine", name=opt.name)
        wandb.config.update(opt)
    dataset = Dataloader(opt)
    dataset_size = len(dataset)
    print('The number of training images = {}'.format(dataset_size))
    dataset_v = Dataloader(opt_v)
    dataset_size_v = len(dataset_v)
    print('The number of test images = {}'.format(dataset_size_v))
    model = Model(opt)
    model.setup()
    if not opt.debug:
        wandb.watch(model)
    global_iter = 0
    for epoch in range(opt.epoch_count, opt.n_epochs + opt.n_epochs_decay + 1):
        model.train_mode()
        epoch_start_time = time.time()
        for i, data in enumerate(dataset):
            iter_start_time = time.time()
            global_iter += 1
            model.set_input(data)
            model.optimize_param()
            iter_finish_time = time.time()
            if global_iter % opt.loss_freq == 0:
                if not opt.debug:
Пример #5
0
def train(cfg):
    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        torch.distributed.init_process_group(backend="nccl",
                                             world_size=num_gpus)

    # set logger
    log_dir = os.path.join("logs/", cfg.source_dataset, cfg.prefix)
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir, exist_ok=True)

    logging.basicConfig(format="%(asctime)s %(message)s",
                        filename=log_dir + "/" + "log.txt",
                        filemode="a")

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.INFO)
    logger.addHandler(stream_handler)

    # writer = SummaryWriter(log_dir, purge_step=0)

    if dist.is_initialized() and dist.get_rank() != 0:

        logger = writer = None
    else:
        logger.info(pprint.pformat(cfg))

    # training data loader
    if not cfg.joint_training:  # single domain
        train_loader = get_train_loader(root=os.path.join(
            cfg.source.root, cfg.source.train),
                                        batch_size=cfg.batch_size,
                                        image_size=cfg.image_size,
                                        random_flip=cfg.random_flip,
                                        random_crop=cfg.random_crop,
                                        random_erase=cfg.random_erase,
                                        color_jitter=cfg.color_jitter,
                                        padding=cfg.padding,
                                        num_workers=4)
    else:  # cross domain
        source_root = os.path.join(cfg.source.root, cfg.source.train)
        target_root = os.path.join(cfg.target.root, cfg.target.train)

        train_loader = get_cross_domain_train_loader(
            source_root=source_root,
            target_root=target_root,
            batch_size=cfg.batch_size,
            random_flip=cfg.random_flip,
            random_crop=cfg.random_crop,
            random_erase=cfg.random_erase,
            color_jitter=cfg.color_jitter,
            padding=cfg.padding,
            image_size=cfg.image_size,
            num_workers=8)

    # evaluation data loader
    query_loader = None
    gallery_loader = None
    if cfg.eval_interval > 0:
        query_loader = get_test_loader(root=os.path.join(
            cfg.target.root, cfg.target.query),
                                       batch_size=512,
                                       image_size=cfg.image_size,
                                       num_workers=4)

        gallery_loader = get_test_loader(root=os.path.join(
            cfg.target.root, cfg.target.gallery),
                                         batch_size=512,
                                         image_size=cfg.image_size,
                                         num_workers=4)

    # model
    num_classes = cfg.source.num_id
    num_cam = cfg.source.num_cam + cfg.target.num_cam
    cam_ids = train_loader.dataset.target_dataset.cam_ids if cfg.joint_training else train_loader.dataset.cam_ids
    num_instances = len(
        train_loader.dataset.target_dataset) if cfg.joint_training else None

    model = Model(num_classes=num_classes,
                  drop_last_stride=cfg.drop_last_stride,
                  joint_training=cfg.joint_training,
                  num_instances=num_instances,
                  cam_ids=cam_ids,
                  num_cam=num_cam,
                  neighbor_mode=cfg.neighbor_mode,
                  neighbor_eps=cfg.neighbor_eps,
                  scale=cfg.scale,
                  mix=cfg.mix,
                  alpha=cfg.alpha)

    model.cuda()

    # optimizer
    ft_params = model.backbone.parameters()
    new_params = [
        param for name, param in model.named_parameters()
        if not name.startswith("backbone.")
    ]
    param_groups = [{
        'params': ft_params,
        'lr': cfg.ft_lr
    }, {
        'params': new_params,
        'lr': cfg.new_params_lr
    }]

    optimizer = optim.SGD(param_groups, momentum=0.9, weight_decay=cfg.wd)

    # convert model for mixed precision distributed training

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      enabled=cfg.fp16,
                                      opt_level="O2")
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                  milestones=cfg.lr_step,
                                                  gamma=0.1)

    if dist.is_initialized():
        model = parallel.DistributedDataParallel(model, delay_allreduce=True)

    # engine
    checkpoint_dir = os.path.join("checkpoints", cfg.source_dataset,
                                  cfg.prefix)
    engine = get_trainer(
        model=model,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        logger=logger,
        # writer=writer,
        non_blocking=True,
        log_period=cfg.log_period,
        save_interval=10,
        save_dir=checkpoint_dir,
        prefix=cfg.prefix,
        eval_interval=cfg.eval_interval,
        query_loader=query_loader,
        gallery_loader=gallery_loader)

    # training
    engine.run(train_loader, max_epochs=cfg.num_epoch)

    if dist.is_initialized():
        dist.destroy_process_group()
Пример #6
0
def pretraining_model(dataset, cfg, args):
    nasbench = api.NASBench('data/nasbench_only108.tfrecord')
    train_ind_list, val_ind_list = range(int(len(dataset)*0.9)), range(int(len(dataset)*0.9), len(dataset))
    X_adj_train, X_ops_train, indices_train = _build_dataset(dataset, train_ind_list)
    X_adj_val, X_ops_val, indices_val = _build_dataset(dataset, val_ind_list)
    model = Model(input_dim=args.input_dim, hidden_dim=args.hidden_dim, latent_dim=args.dim,
                   num_hops=args.hops, num_mlp_layers=args.mlps, dropout=args.dropout, **cfg['GAE']).cuda()
    optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08)
    epochs = args.epochs
    bs = args.bs
    loss_total = []
    for epoch in range(0, epochs):
        chunks = len(train_ind_list) // bs
        if len(train_ind_list) % bs > 0:
            chunks += 1
        X_adj_split = torch.split(X_adj_train, bs, dim=0)
        X_ops_split = torch.split(X_ops_train, bs, dim=0)
        indices_split = torch.split(indices_train, bs, dim=0)
        loss_epoch = []
        Z = []
        for i, (adj, ops, ind) in enumerate(zip(X_adj_split, X_ops_split, indices_split)):
            optimizer.zero_grad()
            adj, ops = adj.cuda(), ops.cuda()
            # preprocessing
            adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep'])
            # forward
            ops_recon, adj_recon, mu, logvar = model(ops, adj.to(torch.long))
            Z.append(mu)
            adj_recon, ops_recon = prep_reverse(adj_recon, ops_recon)
            adj, ops = prep_reverse(adj, ops)
            loss = VAEReconstructed_Loss(**cfg['loss'])((ops_recon, adj_recon), (ops, adj), mu, logvar)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()
            loss_epoch.append(loss.item())
            if i%1000==0:
                print('epoch {}: batch {} / {}: loss: {:.5f}'.format(epoch, i, chunks, loss.item()))
        Z = torch.cat(Z, dim=0)
        z_mean, z_std = Z.mean(0), Z.std(0)
        validity_counter = 0
        buckets = {}
        model.eval()
        for _ in range(args.latent_points):
            z = torch.randn(7, args.dim).cuda()
            z = z * z_std + z_mean
            op, ad = model.decoder(z.unsqueeze(0))
            op = op.squeeze(0).cpu()
            ad = ad.squeeze(0).cpu()
            max_idx = torch.argmax(op, dim=-1)
            one_hot = torch.zeros_like(op)
            for i in range(one_hot.shape[0]):
                one_hot[i][max_idx[i]] = 1
            op_decode = transform_operations(max_idx)
            ad_decode = (ad>0.5).int().triu(1).numpy()
            ad_decode = np.ndarray.tolist(ad_decode)
            spec = api.ModelSpec(matrix=ad_decode, ops=op_decode)
            if nasbench.is_valid(spec):
                validity_counter += 1
                fingerprint = graph_util.hash_module(np.array(ad_decode), one_hot.numpy().tolist())
                if fingerprint not in buckets:
                    buckets[fingerprint] = (ad_decode, one_hot.numpy().astype('int8').tolist())
        validity = validity_counter / args.latent_points
        print('Ratio of valid decodings from the prior: {:.4f}'.format(validity))
        print('Ratio of unique decodings from the prior: {:.4f}'.format(len(buckets) / (validity_counter+1e-8)))
        acc_ops_val, mean_corr_adj_val, mean_fal_pos_adj_val, acc_adj_val = get_val_acc_vae(model, cfg, X_adj_val, X_ops_val, indices_val)
        print('validation set: acc_ops:{0:.4f}, mean_corr_adj:{1:.4f}, mean_fal_pos_adj:{2:.4f}, acc_adj:{3:.4f}'.format(
                acc_ops_val, mean_corr_adj_val, mean_fal_pos_adj_val, acc_adj_val))
        print('epoch {}: average loss {:.5f}'.format(epoch, sum(loss_epoch)/len(loss_epoch)))
        loss_total.append(sum(loss_epoch) / len(loss_epoch))
        save_checkpoint_vae(model, optimizer, epoch, sum(loss_epoch) / len(loss_epoch), args.dim, args.name, args.dropout, args.seed)
    print('loss for epochs: \n', loss_total)
from data.dataset import Dataset
from models.model import Model

print(datetime.datetime.now())

mnist_pca = pickle.load(open( "../../data/pickled/mnist_data_pca50.p", "rb" ))

mnist_pca_sample = mnist_pca.random_sample(percent=.5) #24 instances

for sampleSize in range(50, 401, 30):
    print(sampleSize)
    sysSVMF1s = []
    sysSVMF1fname = "svm_results/sysSVMF1_"+str(sampleSize)+".p"

    for _ in range(150):
        #getting test data to use for models
        (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8)

        #make random train data and model
        sys_train_PCA = train_pca.systematic_sample(size=sampleSize, sort='magnitude')

        sys_SVM = Model('SVM')
        sys_SVM.fit(sys_train_PCA.get_x(), sys_train_PCA.get_y())

        sysSVMF1s.append(sys_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True))


    pickle.dump(sysSVMF1s, open(sysSVMF1fname, "wb" ))

print(datetime.datetime.now())
Пример #8
0
    status = "running"

    parser = argparse.ArgumentParser()
    parser.add_argument('--containers_manager', type=str, required=True)
    args = parser.parse_args()

    # init log
    log_format = "%(asctime)s:%(levelname)s:%(name)s:" \
                 "%(filename)s:%(lineno)d:%(message)s"
    logging.basicConfig(level='DEBUG', format=log_format)

    # get models information
    models_endpoint = args.containers_manager + "/models"
    logging.info("Getting models from: %s", models_endpoint)
    models = [
        Model(json_data=json_model) for json_model in get_data(models_endpoint)
    ]
    logging.info("Models: %s", [model.to_json() for model in models])

    # get containers information
    containers_endpoint = args.containers_manager + "/containers"
    logging.info("Getting containers from: %s", containers_endpoint)
    containers = [
        Container(json_data=json_container)
        for json_container in get_data(containers_endpoint)
    ]
    logging.info("Containers: %s",
                 [container.to_json() for container in containers])

    app.run(host='0.0.0.0', port=5002)
Пример #9
0
def train(args):
    train_path = args['--train-src']
    dev_path = args['--dev-src']
    vocab_path = args['--vocab-src']
    lr = float(args['--lr'])
    log_every = int(args['--log-every'])
    model_path = args['--model-path']
    optim_path = args['--optim-path']
    max_patience = int(args['--patience'])
    max_num_trials = int(args['--max-num-trial'])
    clip_grad = float(args['--clip-grad'])
    valid_iter = int(args['--valid-niter'])

    if args['--data'] == 'quora':
        train_data = utils.read_data(train_path, 'quora')
        dev_data = utils.read_data(dev_path, 'quora')
        vocab_data = utils.load_vocab(vocab_path)
        network = Model(args, vocab_data, 2)

    if args['--cuda'] == str(1):
        network.model = network.model.cuda()

    epoch = 0
    train_iter = 0
    report_loss = 0
    cum_loss = 0
    rep_examples = 0
    cum_examples = 0
    batch_size = int(args['--batch-size'])
    optimiser = torch.optim.Adam(list(network.model.parameters()), lr=lr)
    begin_time = time.time()
    prev_acc = 0
    val_hist = []
    num_trial = 0
    softmax = torch.nn.Softmax(dim=1)

    if args['--cuda'] == str(1):
        softmax = softmax.cuda()

    while True:
        epoch += 1

        for labels, p1, p2, idx in utils.batch_iter(train_data, batch_size):
            optimiser.zero_grad()
            train_iter += 1

            _, iter_loss = network.forward(labels, p1, p2)
            report_loss += iter_loss.item()
            cum_loss += iter_loss.item()

            iter_loss.backward()
            nn.utils.clip_grad_norm_(list(network.model.parameters()),
                                     clip_grad)
            optimiser.step()

            rep_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss, %.4f, cum. examples %d, time elapsed %.2f' %\
                     (epoch, train_iter, report_loss, cum_examples, time.time() - begin_time), file=sys.stderr)

                report_loss, rep_examples = 0, 0

            if train_iter % valid_iter == 0:
                print('epoch %d, iter %d, avg. loss, %.4f, cum. examples %d, time elapsed %.2f' %\
                     (epoch, train_iter, cum_loss / train_iter, cum_examples, time.time() - begin_time), file=sys.stderr)

                cum_loss, cum_examples = 0, 0
                print('Begin Validation .. ', file=sys.stderr)
                network.model.eval()
                total_examples = 0
                total_correct = 0
                val_loss, val_examples = 0, 0
                for val_labels, valp1, valp2, idx in utils.batch_iter(
                        dev_data, batch_size):
                    total_examples += len(val_labels)
                    pred, _ = network.forward(val_labels, valp1, valp2)
                    pred = softmax(pred)
                    _, pred = pred.max(dim=1)
                    label_cor = network.get_label(val_labels)
                    total_correct += (pred == label_cor).sum().float()
                final_acc = total_correct / total_examples

                val_hist.append(final_acc)
                val_acc = final_acc
                print('Validation: iter %d, val_acc %.4f' %
                      (train_iter, val_acc),
                      file=sys.stderr)
                if val_acc > prev_acc:
                    patience = 0
                    prev_acc = val_acc
                    print('Saving model and optimiser state', file=sys.stderr)
                    torch.save(network.model, model_path)
                    torch.save(optimiser.state_dict(), optim_path)
                else:
                    patience += 1
                    print('hit patience %d' % (patience), file=sys.stderr)
                    if patience == max_patience:
                        num_trial += 1
                        print('hit #%d' % (num_trial), file=sys.stderr)
                        if num_trial == max_num_trials:
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        lr = lr * float(args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % (lr),
                            file=sys.stderr)

                        network.model = torch.load(model_path)
                        if args['--cuda'] == str(1):
                            network.model = network.model.cuda()

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimiser = torch.optim.Adam(list(
                            network.model.parameters()),
                                                     lr=lr)
                        optimiser.load_state_dict(torch.load(optim_path))
                        for state in optimiser.state.values():
                            for k, v in state.items():
                                if isinstance(v, torch.Tensor):
                                    state[k] = v
                        for group in optimiser.param_groups:
                            group['lr'] = lr

                        patience = 0
                network.model.train()

        if epoch == int(args['--max-epoch']):
            print('reached maximum number of epochs!', file=sys.stderr)
            exit(0)
Пример #10
0
        csv_params = {
            'index_col': ESSAY_INDEX,
            'dtype': {'domain1_score': np.float}
        }
        y_preds = []
        for feature_set in feature_sets:
            train_data = pd.read_csv(f"../{feature_set}/TrainSet{set_id}.csv", **csv_params)
            train_label = pd.read_csv(f"../{feature_set}/TrainLabel{set_id}.csv", **csv_params)
            valid_data = pd.read_csv(f"../{feature_set}/ValidSet{set_id}.csv", **csv_params)
            valid_label = pd.read_csv(f"../{feature_set}/ValidLabel{set_id}.csv", **csv_params)
            test_data = pd.read_csv(f"../{feature_set}/TestSet{set_id}.csv", **csv_params)

            data = pd.concat([train_data, valid_data])
            label = pd.concat([train_label, valid_label])

            model1 = Model({}, LgbClassifier)
            model1.fit((data, label[ESSAY_LABEL]))
            y_preds.append(model1.predict(test_data))
            model2 = Model({}, ElasticNetClassifier, hyper_search=False)
            model2.fit((data, label[ESSAY_LABEL]))
            y_preds.append(model2.predict(test_data))
        y_hat = np.average([y_preds[i] for i in final_choose[set_id]], axis=0)

        tmp = pd.DataFrame({ESSAY_INDEX: test_data.index})
        tmp.set_index(ESSAY_INDEX, drop=True, inplace=True)
        tmp['essay_set'] = set_id
        tmp['pred'] = y_hat
        result.append(tmp)
        # weights[set_id - start] = len(valid_label)
    result = pd.concat(result)
    result['pred'] = result['pred'].apply(np.round)
Пример #11
0
 def make_model(self):
     return Model(model=GradientBoostingClassifier(),
                  vectorizer=Vectorizer(pca=True))
Пример #12
0
        print("Training Done")


if __name__ == "__main__":

    from models.model import Model

    path_weights = sys.argv[1]
    path_node_partition = sys.argv[2]
    path_edge_partition = sys.argv[3]

    nodes = pd.read_csv(path_node_partition,
                        sep='\t',
                        lineterminator='\n',
                        header=None).loc[:, 0:1433]
    nodes.set_index(0, inplace=True)

    edges = pd.read_csv(path_edge_partition,
                        sep='\s+',
                        lineterminator='\n',
                        header=None)
    edges.columns = ["source", "target"]

    model = Model(nodes, edges)
    model.initialize()

    client = Client(model, weights_path=path_weights)

    client.run()
Пример #13
0
from models.queues_policies import QueuesPolicies, QueuesPolicy
from models.model import Model
import logging
import random
import threading
import time
import queue
import statistics
import matplotlib.pyplot as plt

GPUS = [1, 1, 1, 1, 1]  # speeds of GPUs
MODELS = [Model("m1", 1, 0.5, 1), Model("m2", 1, 0.5, 1)]  # models
AVG_RESPONSE_TIME = {
    "m1": 0.05,
    "m2": 0.01
}  # avg response time for the app [s]
STDEV = [0.6, 1]  # standard deviation, min max
ARRIVAL_RATES = {"m1": 50, "m2": 100}  # arrival rate [req/s]
SIM_DURATION = 5  # simulation duration [s]
QUEUES_POLICY = QueuesPolicy.HEURISTIC_1


class Req:
    model = None
    ts_in = None
    ts_out = None

    def __init__(self, model):
        self.ts_in = time.time()
        self.model = model
Пример #14
0
def verify_password(email, password):
    user = Model(table='users').read(email)
    if not user or not hashpw(password, str(user[0][1])) == user[0][1]:
        return False
    session['user'] = user[0][2]
    return True
Пример #15
0
def main():
    # Init the class DataManager
    print("===================== load data =========================")
    dataManager = DataManager(img_height, img_width)
    # Get data
    train_data, validation_data = dataManager.get_train_data(
        train_data_dir, validation_data_dir, train_batch_size, val_batch_size)
    # Get class name:id
    label_map = (train_data.class_indices)
    # save model class id
    with open(saved_model_classid_path, 'w') as outfile:
        json.dump(label_map, outfile)
    # Init the class ScratchModel

    model = Model(image_shape, class_number)
    # Get model architecture

    print(
        "===================== load model architecture ========================="
    )
    loaded_model = model.get_model_architecture()
    # plot the model
    plot_model(loaded_model, to_file=model_png)  # not working with windows
    # serialize model to JSON
    model_json = loaded_model.to_json()
    with open(saved_model_arch_path, "w") as json_file:
        json_file.write(model_json)

    # Delete the last summary file
    delete_file(model_summary_file)
    # Add the new model summary
    loaded_model.summary(print_fn=save_summary)
    print("===================== compile model =========================")

    # Compile the model
    loaded_model = model.compile_model(loaded_model, model_loss_function,
                                       model_optimizer_rmsprop, model_metrics)

    # prepare weights for the model
    Kernels = np.empty([5, 5, 4], dtype=np.float32)
    for i in xrange(0, 5):
        row = np.empty([5, 4], dtype=np.float32)
        for j in xrange(0, 5):
            row[j][0] = KV[i][j]
            row[j][1] = KM[i][j]
            row[j][2] = GH[i][j]
            row[j][3] = GV[i][j]
        Kernels[i] = row

    preprocess_weights = np.reshape(Kernels, (5, 5, 1, 4))

    #loaded_model.summary()

    #loaded_model.set_weights([preprocess_weights])

    loaded_model.load_weights(best_weights)

    loaded_model = model.compile_model(loaded_model, model_loss_function,
                                       model_optimizer_rmsprop, model_metrics)

    # Prepare callbacks
    csv_log = callbacks.CSVLogger(train_log_path, separator=',', append=False)
    early_stopping = callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=0,
                                             patience=0,
                                             verbose=0,
                                             mode='auto')
    checkpoint = callbacks.ModelCheckpoint(train_checkpoint_path,
                                           monitor='val_loss',
                                           verbose=1,
                                           save_best_only=True,
                                           mode='min')
    tensorboard = TensorBoard(log_dir=model_tensorboard_log +
                              "{}".format(time()))
    callbacks_list = [csv_log, tensorboard, checkpoint]

    print(
        "===================== start training model =========================")
    # start training

    history = loaded_model.fit_generator(
        train_data,
        steps_per_epoch=num_of_train_samples // train_batch_size,
        epochs=num_of_epoch,
        validation_data=validation_data,
        validation_steps=num_of_validation_samples // val_batch_size,
        verbose=1,
        callbacks=callbacks_list)

    print(history)
    print(
        "========================= training process completed! ==========================="
    )
from models.model import Model
from models.activelearn import Active_Learner

# delete old results file
if os.path.isfile('results.txt'):
    os.remove('results.txt')

# Make our data
data = Dataset('SUSY_100k.csv').random_sample(.01)  #1k points
data = data.pca(n_components=5)
(total_train, total_test) = data.test_train_split(train_percent=.8)
train160 = total_train.random_sample(.05)
sys_train160 = total_train.systematic_sample(percent=0.05)

# Make our models
rand_SVM800 = Model('SVM')
rand_SVM800.fit(total_train.get_x(), total_train.get_y())

rand_SVM160 = Model('SVM')
rand_SVM160.fit(train160.get_x(), train160.get_y())

sys_SVM160 = Model('SVM', sample='Systematic')
sys_SVM160.fit(sys_train160.get_x(), sys_train160.get_y())

active_SVM = Model('SVM', sample='Active')
AL_SVM = Active_Learner(model=active_SVM,
                        start_size=.01,
                        end_size=.05,
                        step_size=.005)
active_SVM = AL_SVM.fit(total_train.get_x(), total_train.get_y())
Пример #17
0
def inference_net(cfg):
    dataset_loader = dataloader_jt.DATASET_LOADER_MAPPING[
        cfg.DATASET.TEST_DATASET](cfg)
    test_data_loader = dataset_loader.get_dataset(
        dataloader_jt.DatasetSubset.TEST, batch_size=1, shuffle=False)

    model = Model(dataset=cfg.DATASET.TEST_DATASET)

    assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS
    print('loading: ', cfg.CONST.WEIGHTS)
    model.load(cfg.CONST.WEIGHTS)

    # Switch models to evaluation mode
    model.eval()

    # The inference loop
    n_samples = len(test_data_loader)
    t_obj = tqdm(test_data_loader)

    for model_idx, (taxonomy_id, model_id, data) in enumerate(t_obj):
        taxonomy_id = taxonomy_id[0] if isinstance(
            taxonomy_id[0], str) else taxonomy_id[0].item()
        model_id = model_id[0]

        partial = jittor.array(data['partial_cloud'])
        partial = random_subsample(
            partial.repeat((1, 8, 1)).reshape(-1, 16384, 3))  # b*8, 2048, 3
        pcds = model(partial)[0]

        pcd1 = pcds[0].reshape(-1, 16384, 3)
        pcd2 = pcds[1].reshape(-1, 16384, 3)
        pcd3 = pcds[2].reshape(-1, 16384, 3)

        output_folder = os.path.join(cfg.DIR.OUT_PATH, 'benchmark',
                                     taxonomy_id)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        output_folder_pcd1 = os.path.join(output_folder, 'pcd1')
        output_folder_pcd2 = os.path.join(output_folder, 'pcd2')
        output_folder_pcd3 = os.path.join(output_folder, 'pcd3')
        if not os.path.exists(output_folder_pcd1):
            os.makedirs(output_folder_pcd1)
            os.makedirs(output_folder_pcd2)
            os.makedirs(output_folder_pcd3)

        output_file_path = os.path.join(output_folder, 'pcd1',
                                        '%s.h5' % model_id)
        utils.io.IO.put(output_file_path, pcd1.squeeze(0).detach().numpy())

        output_file_path = os.path.join(output_folder, 'pcd2',
                                        '%s.h5' % model_id)
        utils.io.IO.put(output_file_path, pcd2.squeeze(0).detach().numpy())

        output_file_path = os.path.join(output_folder, 'pcd3',
                                        '%s.h5' % model_id)
        utils.io.IO.put(output_file_path, pcd3.squeeze(0).detach().numpy())

        t_obj.set_description(
            'Test[%d/%d] Taxonomy = %s Sample = %s File = %s' %
            (model_idx + 1, n_samples, taxonomy_id, model_id,
             output_file_path))
Пример #18
0
import unittest

import numpy as np
import pandas as pd
from models.model import Model
import models.features as feat
from models.features import Features
from models.prerocesing import PreprocessTags
from models.sentence_processor import FinkMos
import os

os.chdir(r'C:\Users\amoscoso\Documents\Technion\nlp\nlp_hw\tests')
# %%
data = PreprocessTags(True).load_data(r'..\data\train.wtag')
word_num = 500
# generate tests - (comment out if file is updated)
feat_generator = Features()
feat_generator.generate_tuple_corpus(data.x[0:word_num], data.y[0:word_num])
for template in feat.templates_dict.values():
    feat_generator.generate_lambdas(template['func'], template['tuples'])
feat_generator.save_tests()
test_data = PreprocessTags(True).load_data(r'..\data\test.wtag')
# %%
word_num = 500
test_number = 50
model1 = Model()
model1.fit(data.x[0:word_num], data.y[0:word_num])

y_hat = model1.predict(test_data.x[:test_number])
model1.confusion(y_hat, data.y[:test_number])
Пример #19
0
args = parser.parse_args()

# some hyperparms
original_height = 1400
original_width = 2100
objective_height = 350
objective_width = 525
type_list = ['Fish', 'Flower', 'Gravel', 'Sugar']

test_dataset = CloudDataset(root_dataset=args.test_dataset,
                            list_data=args.list_test,
                            phase='test',
                            mode=args.mode)

model = Model(num_class=args.num_class,
              encoder=args.encoder,
              decoder=args.decoder,
              mode=args.mode)
model = model.cuda()
model.load_state_dict(torch.load(args.checkpoint)['state_dict'])
model.eval()
criterion = Criterion(mode=args.mode)

test_loader = DataLoader(test_dataset,
                         batch_size=args.batch_size,
                         shuffle=False,
                         num_workers=args.num_workers)

submission = pd.read_csv(args.list_test)


def get_transforms():
Пример #20
0
def main(json_path: str = 'options/train_denoising.json'):
    parser = argparse.ArgumentParser()
    parser.add_argument('-opt',
                        type=str,
                        default=json_path,
                        help='Path to option JSON file.')

    opt = option.parse(parser.parse_args().opt, is_train=True)
    util.makedirs(
        [path for key, path in opt['path'].items() if 'pretrained' not in key])

    current_step = 0

    option.save(opt)

    # logger
    logger_name = 'train'
    utils_logger.logger_info(
        logger_name, os.path.join(opt['path']['log'], logger_name + '.log'))
    logger = logging.getLogger(logger_name)
    logger.info(option.dict2str(opt))

    # seed
    seed = opt['train']['manual_seed']
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    cuda.manual_seed_all(seed)

    # data
    opt_data_train: Dict[str, Any] = opt["data"]["train"]
    train_set: DatasetDenoising = select_dataset(opt_data_train, "train")

    train_loader: DataLoader[DatasetDenoising] = DataLoader(
        train_set,
        batch_size=opt_data_train['batch_size'],
        shuffle=True,
        num_workers=opt_data_train['num_workers'],
        drop_last=True,
        pin_memory=True)

    opt_data_test = opt["data"]["test"]
    test_sets: List[DatasetDenoising] = select_dataset(opt_data_test, "test")
    test_loaders: List[DataLoader[DatasetDenoising]] = []
    for test_set in test_sets:
        test_loaders.append(
            DataLoader(test_set,
                       batch_size=1,
                       shuffle=False,
                       num_workers=1,
                       drop_last=True,
                       pin_memory=True))

    # model
    model = Model(opt)
    model.init()

    # train
    start = time.time()
    for epoch in range(1000000):  # keep running
        for train_data in tqdm(train_loader):
            current_step += 1

            model.feed_data(train_data)

            model.train()

            model.update_learning_rate(current_step)

            if current_step % opt['train']['checkpoint_log'] == 0:
                model.log_train(current_step, epoch, logger)

            if current_step % opt['train']['checkpoint_test'] == 0:
                avg_psnrs: Dict[str, List[float]] = {}
                avg_ssims: Dict[str, List[float]] = {}
                tags: List[str] = []
                test_index = 0
                for test_loader in tqdm(test_loaders):
                    test_set: DatasetDenoising = test_loader.dataset
                    avg_psnr = 0.
                    avg_ssim = 0.
                    for test_data in tqdm(test_loader):
                        test_index += 1
                        model.feed_data(test_data)
                        model.test()

                        psnr, ssim = model.cal_metrics()
                        avg_psnr += psnr
                        avg_ssim += ssim

                        if current_step % opt['train'][
                                'checkpoint_saveimage'] == 0:
                            model.save_visuals(test_set.tag)

                    avg_psnr = round(avg_psnr / len(test_loader), 2)
                    avg_ssim = round(avg_ssim * 100 / len(test_loader), 2)

                    name = test_set.name

                    if name in avg_psnrs:
                        avg_psnrs[name].append(avg_psnr)
                        avg_ssims[name].append(avg_ssim)
                    else:
                        avg_psnrs[name] = [avg_psnr]
                        avg_ssims[name] = [avg_ssim]
                    if test_set.tag not in tags:
                        tags.append(test_set.tag)

                header = ['Dataset'] + tags
                t = PrettyTable(header)
                for key, value in avg_psnrs.items():
                    t.add_row([key] + value)
                logger.info(f"Test PSNR:\n{t}")

                t = PrettyTable(header)
                for key, value in avg_ssims.items():
                    t.add_row([key] + value)
                logger.info(f"Test SSIM:\n{t}")

                logger.info(f"Time elapsed: {time.time() - start:.2f}")
                start = time.time()

                model.save(logger)
Пример #21
0
def main(options):
    if not os.path.exists(options.checkpoint_dir):
        os.system("mkdir -p %s"%options.checkpoint_dir)
        pass
    if not os.path.exists(options.test_dir):
        os.system("mkdir -p %s"%options.test_dir)
        pass

    dataset = FloorplanDataset(options, split='train', random=True)

    print('the number of images', len(dataset))    

    dataloader = DataLoader(dataset, batch_size=options.batchSize, shuffle=True, num_workers=16)

    model = Model(options)
    model.cuda()
    model.train()

    if options.restore == 1:
        print('restore')
        model.load_state_dict(torch.load(options.checkpoint_dir + '/checkpoint.pth'))
        pass

    
    if options.task == 'test':
        dataset_test = FloorplanDataset(options, split='test', random=False)
        testOneEpoch(options, model, dataset_test)
        exit(1)
    
    optimizer = torch.optim.Adam(model.parameters(), lr = options.LR)
    if options.restore == 1 and os.path.exists(options.checkpoint_dir + '/optim.pth'):
        optimizer.load_state_dict(torch.load(options.checkpoint_dir + '/optim.pth'))
        pass

    for epoch in range(options.numEpochs):
        epoch_losses = []
        data_iterator = tqdm(dataloader, total=len(dataset) // options.batchSize + 1)
        for sampleIndex, sample in enumerate(data_iterator):
            optimizer.zero_grad()
            
            images, corner_gt, icon_gt, room_gt = sample[0].cuda(), sample[1].cuda(), sample[2].cuda(), sample[3].cuda()

            corner_pred, icon_pred, room_pred = model(images)
            #print([(v.shape, v.min(), v.max()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]])
            #exit(1)
            #print(corner_pred.shape, corner_gt.shape)
            #exit(1)
            corner_loss = torch.nn.functional.binary_cross_entropy(corner_pred, corner_gt)
            icon_loss = torch.nn.functional.cross_entropy(icon_pred.view(-1, NUM_ICONS + 2), icon_gt.view(-1))
            room_loss = torch.nn.functional.cross_entropy(room_pred.view(-1, NUM_ROOMS + 2), room_gt.view(-1))            
            losses = [corner_loss, icon_loss, room_loss]
            loss = sum(losses)

            loss_values = [l.data.item() for l in losses]
            epoch_losses.append(loss_values)
            status = str(epoch + 1) + ' loss: '
            for l in loss_values:
                status += '%0.5f '%l
                continue
            data_iterator.set_description(status)
            loss.backward()
            optimizer.step()

            if sampleIndex % 500 == 0:
                visualizeBatch(options, images.detach().cpu().numpy(), [('gt', {'corner': corner_gt.detach().cpu().numpy(), 'icon': icon_gt.detach().cpu().numpy(), 'room': room_gt.detach().cpu().numpy()}), ('pred', {'corner': corner_pred.max(-1)[1].detach().cpu().numpy(), 'icon': icon_pred.max(-1)[1].detach().cpu().numpy(), 'room': room_pred.max(-1)[1].detach().cpu().numpy()})])
                if options.visualizeMode == 'debug':
                    exit(1)
                    pass
            continue
        print('loss', np.array(epoch_losses).mean(0))
        if True:
            torch.save(model.state_dict(), options.checkpoint_dir + '/checkpoint.pth')
            torch.save(optimizer.state_dict(), options.checkpoint_dir + '/optim.pth')
            pass

        #testOneEpoch(options, model, dataset_test)        
        continue
    return
Пример #22
0
def main(options):
    if not os.path.exists(options.checkpoint_dir):
        os.system("mkdir -p %s" % options.checkpoint_dir)
        pass
    if not os.path.exists(options.test_dir):
        os.system("mkdir -p %s" % options.test_dir)
        pass

    model = Model(options)
    model.cuda()
    model.train()

    base = 'best'

    if options.restore == 1:
        print('restore from ' + options.checkpoint_dir + '/checkpoint_%s.pth' %
              (base))
        model.load_state_dict(
            torch.load(options.checkpoint_dir + '/checkpoint_%s.pth' % (base)))
        pass

    if options.task == 'test':
        print('-' * 20, 'test')
        dataset_test = FloorplanDataset(options, split='test_3', random=False)
        print('the number of test images', len(dataset_test))
        testOneEpoch(options, model, dataset_test)
        exit(1)

    if options.task == 'test_batch':
        print('-' * 20, 'test_batch')
        dataset_test = FloorplanDataset(options,
                                        split='test_batch',
                                        random=False,
                                        test_batch=True)
        print('the number of test_batch images', len(dataset_test))
        testBatch_unet(options, model, dataset_test)
        exit(1)

    dataset = FloorplanDataset(options,
                               split='sb_train++',
                               random=True,
                               augment=options.augment)
    print('the number of training images', len(dataset), ', batch size: ',
          options.batchSize, ' augment: ', options.augment)
    dataloader = DataLoader(dataset,
                            batch_size=options.batchSize,
                            shuffle=True,
                            num_workers=16)

    optimizer = torch.optim.Adam(model.parameters(), lr=options.LR)
    if options.restore == 1 and os.path.exists(options.checkpoint_dir +
                                               '/optim_%s.pth' % (base)):
        print('optimizer using ' + options.checkpoint_dir + '/optim_%s.pth' %
              (base))
        optimizer.load_state_dict(
            torch.load(options.checkpoint_dir + '/optim_%s.pth' % (base)))
        pass

    with open('loss_file.csv', 'w') as loss_file:
        writer = csv.writer(loss_file, delimiter=',', quotechar='"')
        best_loss = np.float('inf')
        for epoch in range(options.numEpochs):
            epoch_losses = []
            data_iterator = tqdm(dataloader,
                                 total=len(dataset) // options.batchSize + 1)
            for sampleIndex, sample in enumerate(data_iterator):
                optimizer.zero_grad()

                images, corner_gt, icon_gt, room_gt = sample[0].cuda(
                ), sample[1].cuda(), sample[2].cuda(), sample[3].cuda()

                corner_pred, icon_pred, room_pred = model(images)
                #print([(v.shape, v.min(), v.max()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]])
                #print([(v.shape, v.type()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]]);exit(1)
                #print(corner_pred.shape, corner_gt.shape)
                corner_loss = NF.binary_cross_entropy_with_logits(
                    corner_pred, corner_gt)
                #icon_loss = NF.cross_entropy(icon_pred.view(-1, NUM_ICONS + 2), icon_gt.view(-1))
                icon_loss = NF.binary_cross_entropy_with_logits(
                    icon_pred, icon_gt)
                #room_loss = NF.cross_entropy(room_pred.view(-1, NUM_ROOMS + 2), room_gt.view(-1))
                room_loss = NF.binary_cross_entropy_with_logits(
                    room_pred, room_gt)
                losses = [corner_loss, icon_loss, room_loss]
                loss = sum(losses)

                loss_values = [l.data.item() for l in losses]
                writer.writerow(loss_values)
                loss_file.flush()

                epoch_losses.append(loss_values)
                status = str(epoch + 1) + ' loss: '
                for l in loss_values:
                    status += '%0.5f ' % l
                    continue
                data_iterator.set_description(status)
                loss.backward()
                optimizer.step()

                if sampleIndex % 500 == 0:
                    visualizeBatch(
                        options,
                        images.detach().cpu().numpy(),
                        [('gt', {
                            'corner': corner_gt.detach().cpu().numpy(),
                            'icon': icon_gt.detach().cpu().numpy(),
                            'room': room_gt.detach().cpu().numpy()
                        }),
                         ('pred', {
                             'corner':
                             corner_pred.max(-1)[1].detach().cpu().numpy(),
                             'icon':
                             icon_pred.max(-1)[1].detach().cpu().numpy(),
                             'room':
                             room_pred.max(-1)[1].detach().cpu().numpy()
                         })])
                    if options.visualizeMode == 'debug':
                        exit(1)
                        pass
                continue
            print('loss', np.array(epoch_losses).mean(0))
            if (epoch + 1) % 100 == 0:
                torch.save(
                    model.state_dict(), options.checkpoint_dir +
                    '/checkpoint_%d.pth' % (int(base) + epoch + 1))
                torch.save(
                    optimizer.state_dict(), options.checkpoint_dir +
                    '/optim_%d.pth' % (int(base) + epoch + 1))
                pass

            if loss.item() < best_loss:
                best_loss = loss.item()
                torch.save(model.state_dict(),
                           options.checkpoint_dir + '/checkpoint_best.pth')
                torch.save(optimizer.state_dict(),
                           options.checkpoint_dir + '/optim_best.pth')
                print('best loss: ', best_loss)
            #testOneEpoch(options, model, dataset_test)
            continue
        return
Пример #23
0
tr_config = TrainConfig('model1.cfg')
tr_config.show_config()

# setup model input tensors
x = tf.placeholder(tf.float32, [None, 784])
y_hat = tf.placeholder(tf.int32, [
    None,
])
keep_prob = tf.placeholder(tf.float32)
input_tensors = {}
input_tensors['x'] = x
input_tensors['y_hat'] = y_hat
input_tensors['keep_prob'] = keep_prob

# init model
model = Model(tr_config, input_tensors)
sess = tf.Session()
model.init_vars(sess)

# python3
#batches = batch_iter(list(zip(training_data[0], training_data[1])),
batches = batch_iter(zip(training_data[0], training_data[1]),
                     tr_config.batch_size, tr_config.num_epochs)
step = 0
for batch in batches:
    x_batch, y_hat_batch = zip(*batch)
    x_batch, y_hat_batch = np.array(x_batch), np.array(y_hat_batch)

    model.train(sess, x_batch, y_hat_batch, tr_config.keep_prob)
    step += 1
    if step % tr_config.eval_every == 0:
Пример #24
0
def create_app(
        containers_manager="http://localhost:5001",
        requests_store="http://localhost:5002",
        verbose=1,
        gpu_queues_policy=QueuesPolicy.HEURISTIC_1,
        cpu_queues_policy=QueuesPolicy.ROUND_ROBIN,
        max_log_consumers=1,
        max_polling=1,  # the number of threads waiting for requests
        max_consumers_cpu=100,
        max_consumers_gpu=100):  # the number of concurrent threads requests
    global reqs_queues, requests_store_host, status, gpu_policy, cpu_policy, responses_list
    requests_store_host = requests_store + "/requests"

    # init log
    coloredlogs.install(level='DEBUG', milliseconds=True)
    # log_format = "%(asctime)s:%(levelname)s:%(name)s: %(filename)s:%(lineno)d:%(message)s"
    # logging.basicConfig(level='DEBUG', format=log_format)

    # init models and containers
    status = "Init models and containers"
    logging.info(status)
    models_endpoint = containers_manager + "/models"
    containers_endpoint = containers_manager + "/containers"
    logging.info("Getting models from: %s", models_endpoint)
    logging.info("Getting containers from: %s", containers_endpoint)

    models = [
        Model(json_data=json_model) for json_model in get_data(models_endpoint)
    ]
    logging.info("Models: %s", [model.to_json() for model in models])
    containers = [
        Container(json_data=json_container)
        for json_container in get_data(containers_endpoint)
    ]
    logging.info("Containers: %s",
                 [container.to_json() for container in containers])
    logging.info("Found %d models and %d containers", len(models),
                 len(containers))

    # init reqs queues
    reqs_queues = {model.name: queue.Queue() for model in models}
    responses_list = {model.name: [] for model in models}

    # init policy
    queues_policies = QueuesPolicies(reqs_queues, responses_list, models,
                                     logging)
    gpu_policy = queues_policies.policies.get(gpu_queues_policy)
    cpu_policy = queues_policies.policies.get(cpu_queues_policy)
    logging.info("Policy for GPUs: %s", gpu_queues_policy)
    logging.info("Policy for CPUs: %s", cpu_queues_policy)

    # disable logging if verbose == 0
    logging.info("Verbose: %d", verbose)
    if verbose == 0:
        app.logger.disabled = True
        logging.getLogger('werkzeug').setLevel(logging.WARNING)

    # init dispatchers
    status = "Init dispatchers"
    logging.info(status)
    dispatcher_gpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.GPU)
    dispatcher_cpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.CPU)

    # start the send requests thread
    status = "Start send reqs thread"
    logging.info(status)
    log_consumer_threads_pool = ThreadPoolExecutor(
        max_workers=max_log_consumers)
    for i in range(max_log_consumers):
        log_consumer_threads_pool.submit(log_consumer)

    # start the queues consumer threads
    status = "Start queues consumer threads"
    logging.info(status)

    if list(filter(lambda c: c.device == Device.GPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to gpus
        polling_gpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling)
        for i in range(max_polling):
            polling_gpu_threads_pool.submit(queues_pooling, dispatcher_gpu,
                                            gpu_policy, max_consumers_gpu)

    if list(filter(lambda c: c.device == Device.CPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to cpus
        pooling_cpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling)
        for i in range(max_polling):
            pooling_cpu_threads_pool.submit(queues_pooling, dispatcher_cpu,
                                            cpu_policy, max_consumers_cpu)

    # start
    status = "Running"
    logging.info(status)
    return app
Пример #25
0
 def __init__(self, view):
     super().__init__()
     self.view = view
     self.model = Model()
 def make_model(self):
     return Model(model=LogisticRegression(),
                  vectorizer=BagOfWordsAutoEncoder(num_epochs=1))
Пример #27
0
from visualizer.visualizer import Visualizer
from sklearn.preprocessing import label_binarize
from options.configer import Configer
import torch.nn as nn
from data.datarecorder import DataRecorder
from data.dataprober import DataProber
import utils
from models.model import Model
from options.test_options import TestOptions
from data.datasets import ISICDataset
from torch.utils.data import DataLoader
options = TestOptions()
logger = DataRecorder()
configer = Configer().get_configer()
args = options.get_args()
model = Model(args)
#load model being trained previously
model.load_model(args.date, args.time)
image_path = configer['testImagePath']
label_path = configer['testLabelPath']
test_csv = utils.get_csv_by_path_name(label_path)
dataprober = DataProber(image_path, test_csv[0])
# dataprober.get_size_profile()
# dataprober.get_type_profile()
# dataprober.get_data_difference()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transforms = utils.get_transforms(args)
visualizer = Visualizer()
isic = ISICDataset(image_path, test_csv[0], transforms)
testdata_loader = DataLoader(isic, batch_size=args.batchsize)
Пример #28
0
import utils
from models.model import Model
from options.configer import Configer
from options.train_options import TrainingOptions
from data.datasets import ISICDataset
from torch.utils.data import DataLoader
from data.autoaugment import *
from visualizer.visualizer import Visualizer
# model = torchvision.models.resnet18(pretrained=True).cuda()
options = TrainingOptions()
logger = DataRecorder()  #初始化记录器
visualizer = Visualizer()  #初始化视觉展示器
args = options.get_args()  #获取参数
auto_augment = AutoAugment()  #初始化数据增强器
args.augment_policy = auto_augment.policy_detail  #记录数据增强策略
model = Model(args)  #根据参数获取模型
#continue training if date and time are specified
if args.date and args.time:
    model.load_model(args.date, args.time)
configer = Configer().get_configer()  #获取环境配置
logger = DataRecorder()  #初始化记录器
# dataprober.get_data_difference()
transforms = utils.get_transforms(args)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
image_path = configer['trainingImagePath']
label_path = configer['trainingLabelPath']
training_csv = utils.get_csv_by_path_name(label_path)
dataprober = DataProber(image_path, training_csv[0])  #初始化数据探查器
isic = ISICDataset(image_path, training_csv[0], transforms)
isic.__assert_equality__()
trainingdata_loader = DataLoader(isic,
 def make_model(self):
     return Model(model=AutoSklearnClassifier(),
                  vectorizer=Vectorizer(pca=True))
Пример #30
0
def train(args):
    # Get hardware device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Check if weights and biases integration is enabled.
    if args.wandb == 1:
        import wandb
        wandb.init(entity='surajpai',
                   project='FacialEmotionRecognition',
                   config=vars(args))

    # Get the dataset with "Training" usage.
    dataset = FER2013Dataset(args.data_path, "Training")

    # Randomly split the dataset into train and validation based on the specified train_split argument
    train_dataset, validation_dataset = torch.utils.data.random_split(
        dataset, [
            int(len(dataset) * args.train_split),
            len(dataset) - int(len(dataset) * args.train_split)
        ])

    logging.info(
        'Samples in the training set: {}\n Samples in the validation set: {} \n\n'
        .format(len(train_dataset), len(validation_dataset)))

    # Get class weights as inverse of frequencies from class occurences in the dataset.
    dataset_summary = dataset.get_summary_statistics()
    class_weights = (1 / dataset_summary["class_occurences"])
    class_weights = torch.Tensor(class_weights /
                                 np.sum(class_weights)).to(device)

    # Train loader and validation loader initialized with batch_size as specified and randomly shuffled
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True)
    val_loader = DataLoader(validation_dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            pin_memory=True)

    # Model initialization
    model = torch.nn.DataParallel(Model(args.model_config)).to(device)

    # Set torch optimizer
    optimizer = torch.optim.Adam(model.parameters(), )

    # Get loss for training the network from the utils get_loss function
    criterion = get_loss(args, class_weights)
    bestLoss = -1000

    # Create metric logger object
    metrics = Metrics(upload=args.wandb)

    # Define augmentation transforms, if --augment is enabled
    if args.augment == 1:
        transform = transforms.RandomChoice([
            transforms.RandomHorizontalFlip(p=0.75),
            transforms.RandomAffine(15,
                                    translate=(0.1, 0.1),
                                    scale=(1.2, 1.2),
                                    shear=15),
            transforms.ColorJitter()
        ])

    # Start iterating over the total number of epochs set by epochs argument
    for n_epoch in range(args.epochs):

        # Reset running metrics at the beginning of each epoch.
        metrics.reset()

        # Utils logger
        logging.info(' Starting Epoch: {}/{} \n'.format(n_epoch, args.epochs))
        '''

        TRAINING

        '''

        # Model in train mode for batch-norm and dropout related ops.
        model.train()

        # Iterate over each batch in the train loader
        for idx, batch in enumerate(tqdm(train_loader)):

            # Reset gradients
            optimizer.zero_grad()

            # Apply augmentation transforms, if --augment is enabled
            if args.augment == 1 and n_epoch % 2 == 0:
                batch = apply_transforms(batch, transform)

            # Move the batch to the device, needed explicitly if GPU is present
            image, target = batch["image"].to(device), batch["emotion"].to(
                device)

            # Run a forward pass over images from the batch
            out = model(image)

            # Calculate loss based on the criterion set
            loss = criterion(out, target)

            # Backward pass from the final loss
            loss.backward()

            # Update the optimizer
            optimizer.step()

            # Update metrics for this batch
            metrics.update_train({
                "loss": loss.item(),
                "predicted": out,
                "ground_truth": target
            })
        '''

        VALIDATION

        '''

        logging.info(' Validating on the validation split ... \n \n')

        # Model in eval mode.
        model.eval()

        # Set no grad to disable gradient saving.
        with torch.no_grad():

            # Iterate over each batch in the val loader
            for idx, batch in enumerate(val_loader):

                # Move the batch to the device, needed explicitly if GPU is present
                image, target = batch["image"].to(device), batch["emotion"].to(
                    device)

                # Forward pass
                out = model(image)

                # Calculate loss based on the criterion set
                loss = criterion(out, target)

                # Metrics and sample predictions updated for validation batch
                metrics.update_val({
                    "loss": loss.item(),
                    "predicted": out,
                    "ground_truth": target,
                    "image": image,
                    "class_mapping": dataset.get_class_mapping()
                })

        # Display metrics at the end of each epoch
        metrics.display()

        # Weight Checkpointing to save the best model on validation loss
        save_path = "./saved_models/{}.pth.tar".format(
            args.model_config.split('/')[-1].split('.')[0])
        bestLoss = min(bestLoss, metrics.metric_dict["loss@val"])
        is_best = (bestLoss == metrics.metric_dict["loss@val"])
        save_checkpoint(
            {
                'epoch': n_epoch,
                'state_dict': model.state_dict(),
                'bestLoss': bestLoss,
                'optimizer': optimizer.state_dict(),
            }, is_best, save_path)

    # After training is completed, if weights and biases is enabled, visualize filters and upload final model.
    if args.wandb == 1:
        visualize_filters(model.modules())
        wandb.save(save_path)

    # Get report from the metrics logger
    train_report, val_report = metrics.get_report()

    # Save the report to csv files
    train_report.to_csv("{}_trainreport.csv".format(
        save_path.rstrip(".pth.tar")))
    val_report.to_csv("{}_valreport.csv".format(save_path.rstrip(".pth.tar")))