def run(dataset='mnist', batch_size=64, n_features=200, n_layers=6, n_bins=4,
        optimizer='adam', learnrate=1e-4, dropout=0.9, exp_name='pixelCNN',
        exp_dir='~/experiments/conditional-pixelcnn/', cuda=True,
        resume=False):

    exp_name += '_%s_%ifeat_%ilayers_%ibins'%(
        dataset, n_features, n_layers, n_bins)
    exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    # Data loaders
    train_loader, val_loader, onehot_fcn, n_classes = data.loader(dataset,
                                                                  batch_size)

    if not resume:
        # Store experiment params in params.json
        params = {'batch_size':batch_size, 'n_features':n_features,
                  'n_layers':n_layers, 'n_bins':n_bins, 'optimizer': optimizer,
                  'learnrate':learnrate, 'dropout':dropout, 'cuda':cuda}
        with open(os.path.join(exp_dir,'params.json'),'w') as f:
            json.dump(params,f)

        # Model
        net = model.PixelCNN(1, n_classes, n_features, n_layers, n_bins,
                             dropout)
    else:
        # if resuming, need to have params, stats and checkpoint files
        if not (os.path.isfile(os.path.join(exp_dir,'params.json'))
                and os.path.isfile(os.path.join(exp_dir,'stats.json'))
                and os.path.isfile(os.path.join(exp_dir,'last_checkpoint'))):
            raise Exception('Missing param, stats or checkpoint file on resume')
        net = torch.load(os.path.join(exp_dir, 'last_checkpoint'))

    # Define loss fcn, incl. label formatting from input
    def input2label(x):
        return torch.squeeze(torch.round((n_bins-1)*x).type(torch.LongTensor),1)
    loss_fcn = torch.nn.NLLLoss2d()

    # Train
    train.fit(train_loader, val_loader, net, exp_dir, input2label, loss_fcn,
              onehot_fcn, n_classes, optimizer, learnrate=learnrate, cuda=cuda,
              resume=resume)

    # Generate some between-class examples
    generate_between_classes(net, [28, 28], [1, 7],
                             os.path.join(exp_dir,'1-7.jpeg'), n_classes, cuda)
    generate_between_classes(net, [28, 28], [3, 8],
                             os.path.join(exp_dir,'3-8.jpeg'), n_classes, cuda)
    generate_between_classes(net, [28, 28], [4, 9],
                             os.path.join(exp_dir,'4-9.jpeg'), n_classes, cuda)
    generate_between_classes(net, [28, 28], [5, 6],
                             os.path.join(exp_dir,'5-6.jpeg'), n_classes, cuda)
示例#2
0
def model_pipeline(train_dataset,
                   test_dataset,
                   batch_size,
                   num_epochs,
                   optimizer,
                   weights=None):
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=4)
    net = model.MaskDetector(train_dataset.df)
    if weights is not None:
        net.load_state_dict(
            torch.load(weights, map_location=lambda storage, loc: storage))
    print(
        summary(net,
                torch.zeros((1, 3, 100, 100)),
                show_input=False,
                show_hierarchical=True))
    model_net = train.fit(net,
                          train_loader,
                          test_loader,
                          num_epochs,
                          optimizer,
                          plot=True,
                          save=True,
                          checkpoint=True)
    net.visualize_conv2d_features('convLayer1', 'convLayer1')
    return model_net
示例#3
0
def run_experiments(finetune, kernel_sizes, filters, lr, pooling, weight_decay,
                    other_params):
    global embeddings_matrix, training_set, validation_set

    other_params['commit_hash'] = commit_hash

    (vocab_size, dimensions) = embeddings_matrix.shape
    net = models.Net(dimensions=dimensions,
                     finetune=finetune,
                     vocab_size=vocab_size,
                     kernel_sizes=kernel_sizes,
                     filters=filters,
                     dropout_rate=0.5,
                     pooling=pooling,
                     lr=lr,
                     weight_decay=weight_decay,
                     embeddings_matrix=embeddings_matrix)

    hyperparams = util.fill_dict(net.hyperparameters, other_params)
    logger.info('experiment with hyperparameters: {}'.format(
        json.dumps(hyperparams, sort_keys=True, indent=None)))

    with get_archiver(datadir='data/models',
                      suffix="_" + commit_hash[:6]) as a1, get_archiver(
                          datadir='data/results',
                          suffix="_" + commit_hash[:6]) as a:

        save_model(hyperparams, net, a.getFilePath)

        early_stopping = train.EarlyStopping(c.monitor, c.patience,
                                             c.monitor_objective)
        model_checkpoint = train.ModelCheckpoint(a1.getFilePath('checkpoint'))
        csv_logger = train.CSVLogger(a.getFilePath('logger.csv'))

        adam_config = train.AdamConfig(
            lr=net.hyperparameters['lr'],
            beta_1=net.hyperparameters['beta_1'],
            beta_2=net.hyperparameters['beta_2'],
            epsilon=net.hyperparameters['epsilon'],
            weight_decay=net.hyperparameters['weight_decay'])

        history = train.fit(
            net,
            training_set,
            validation_set,
            batch_size=c.batch_size,
            epochs=c.epochs,
            validation_split=0.2,
            callbacks=[early_stopping, model_checkpoint, csv_logger],
            optimizer=adam_config)

        save_history(history, a.getDirPath())

    return
def cross_validation_loop(X, Y, leave_out=5):
    assert 1 <= leave_out <= 5 and type(leave_out) == int
    n_features = X.shape[1]
    n_targets = Y.shape[1]
    assert len(X) == len(Y)
    averaged_scores = {
        'MSE': np.zeros((n_targets, )),
        'feature_grad': np.zeros((n_targets, n_features)),
        'feature_grad_abs': np.zeros((n_targets, n_features))
    }
    cross_validation_times = len(X) // leave_out
    for i in range(cross_validation_times):
        print("Leave %d out [%d/%d]" %
              (leave_out, i + 1, cross_validation_times))
        test_indices = [m for m in range(i * leave_out, (i + 1) * leave_out)]
        train_indices = [i for i in range(len(X)) if i not in test_indices]
        X_train = X[train_indices, :]
        Y_train = Y[train_indices, :]
        X_test = X[test_indices, :]
        Y_test = Y[test_indices, :]

        net = model.FCModel(n_in_features=X.shape[1],
                            n_out_features=Y.shape[1])
        device = 'cuda:0' if torch.cuda.is_available() else "cpu"
        net = net.to(device)
        params = {
            'n_epoch': 200,
            'lr': 1e-2,
            'beta': 0,
            'batch_size': 8,
        }
        train.fit(net, X_train, Y_train, params, verbose=False)
        scores = train.score(net, X_test, Y_test)
        for k, v in scores.items():
            averaged_scores[k] += scores[k] / cross_validation_times

    return averaged_scores
def permutation_loop(X, Y, permutation_times=10):
    n_features = X.shape[1]
    n_targets = Y.shape[1]
    assert len(X) == len(Y)
    permutation_scores = {'performance_gain': np.zeros((n_targets, ))}
    for i in range(permutation_times):
        print("Permutation [%d/%d]" % (i + 1, permutation_times))
        X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
            X, Y, test_size=0.25)

        net = model.FCModel(n_in_features=X.shape[1],
                            n_out_features=Y.shape[1])
        device = 'cuda:0' if torch.cuda.is_available() else "cpu"
        net = net.to(device)
        params = {
            'n_epoch': 200,
            'lr': 1e-2,
            'beta': 0,
            'batch_size': 8,
        }
        train.fit(net, X_train, Y_train, params, verbose=False)
        scores_on_original = train.score(net, X_test, Y_test)

        perm_indices = np.arange(len(Y))
        np.random.shuffle(perm_indices)
        Y_shuffled = Y[perm_indices].copy()

        X_train, X_test, Y_shuffled_train, Y_shuffled_test = sklearn.model_selection.train_test_split(
            X, Y_shuffled, test_size=0.25)

        train.fit(net, X_train, Y_shuffled_train, params, verbose=False)
        scores_on_shuffled = train.score(net, X_test, Y_shuffled_test)
        permutation_scores["performance_gain"] += (
            scores_on_shuffled["MSE"] -
            scores_on_original["MSE"]) * (1 / permutation_times)
    return permutation_scores
示例#6
0
def start():
    # produce_data()
    model = Bert_CRF()
    print('create_iter')
    train_iter, num_train_steps = create_batch_iter("train")
    eval_iter = create_batch_iter("valid")
    print('create_iter finished')

    epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs

    pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size)

    # for name, param in model.named_parameters():
    #     if param.requires_grad:
    #         print(name)
    print('fit')

    fit(model=model,
        training_iter=train_iter,
        eval_iter=eval_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_steps,
        verbose=1)
    percentages = [(1.0 / 7), (2.0 / 7), (3.0 / 7), (4.0 / 7), (5.0 / 7),
                   (6.0 / 7), 1]

    train_losses = []
    test_losses = []

    train_accs = []
    test_accs = []
    metrics = []
    for p in percentages:
        tidx = train_idx[np.random.permutation(int(p * train_idx.shape[0]))]

        best = fit(args.model_type,
                   dataset,
                   tidx,
                   val_idx,
                   device,
                   save,
                   args,
                   stopping=8)
        print(best)
        metrics.append(best)
    print(metrics)
    # n = len(train_losses)
    # xs = np.arange(n)

    # # plot losses
    # fig, ax = plt.subplots()
    # ax.plot(xs, train_losses, '--', linewidth=2, label='train')
    # ax.plot(xs, test_losses, '-', linewidth=2, label='validation')
    # ax.set_xlabel("Epoch")
    # ax.set_ylabel("Training Loss")
# Test Dataset & Loader
validset = Dataset(config.validdata_dir)
validloader = create_loader(dataset=validset,
                            input_size=(3, 224, 224),
                            batch_size=config.batch_size,
                            interpolation="bicubic",
                            mean=(0.485, 0.456, 0.406),
                            std=(0.229, 0.224, 0.225),
                            num_workers=2,
                            crop_pct=1.0)
#validloader = torch.utils.data.DataLoader(validset, batch_size=32, shuffle=False, num_workers=2)
print("Loaded %d Train Images, %d Validation images" %
      (len(trainset), len(validset)))

# # Train Dataset & Loader
# trainset = Dataset(traindata_dir, transform = transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=config.batch_size, shuffle=True, num_workers=2, drop_last= True)

# # Test Dataset & Loader
# validset = Dataset(validdata_dir, transform = transform)
# validloader = torch.utils.data.DataLoader(validset, batch_size=config.batch_size, shuffle=False, num_workers=2)

# Tensorboard
train_writer = SummaryWriter('./checkpoint/logs/')

# Train, Validate
print("Start Training")
#fit(config.save_dir, train_writer, trainloader, validloader, model, model2, loss_fn, optimizer, optimizer_smoothing, scheduler, n_epochs, cuda, log_interval)
fit(config.save_dir, train_writer, trainloader, validloader, model, loss_fn,
    optimizer, scheduler, n_epochs, cuda, log_interval)
示例#9
0
def create_run_ensemble(model_state_list, 
                        n_layers,
                        grad_clip_value=5, 
                        seed=0, 
                        num_epochs=20,
                        learning_rate=0.001,
                        init_channels=get('init_channels'), 
                        batch_size=get('batch_size'), 
                        genotype_class='PCDARTS'):
    
    if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

    gpu = 'cuda:0'
    np.random.seed(seed)
    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(seed)
    logging.info('gpu device = %s' % gpu)
    logging.info("config = %s", config)

    if data_augmentations is None:
        # You can add any preprocessing/data augmentation you want here
        data_augmentations = transforms.ToTensor()
    elif isinstance(type(data_augmentations), list):
        data_augmentations = transforms.Compose(data_augmentations)
    elif not isinstance(data_augmentations, transforms.Compose):
        raise NotImplementedError

    train_dataset = K49(data_dir, True, data_augmentations)
    test_dataset = K49(data_dir, False, data_augmentations)
    # train_dataset = KMNIST(data_dir, True, data_augmentations)
    # test_dataset = KMNIST(data_dir, False, data_augmentations)
    # Make data batch iterable
    # Could modify the sampler to not uniformly random sample
    
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=False)

    genotype = eval("genotypes.%s" % genotype_class)
    dataset = dict()
    dims = []
    for i, model_state in enumerate(model_state_list):
        model = Network(init_channels, train_dataset.n_classes, n_layers, genotype)
        model.load_state_dict(torch.load(model_state))
        model.cuda()
        for p in model.parameters():
            p.requires_grad = False
        trn_labels = []
        trn_features = []
        if i == 0:
            for d,la in train_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                trn_labels.extend(la)
                trn_features.extend(o.cpu().data)
            test_labels = []
            test_features = []
            for d,la in test_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                test_labels.extend(la)
                test_features.extend(o.cpu().data)
            dataset['trn_labels'] = trn_labels
            dataset['test_labels'] = test_labels

        else:
            for d,la in train_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                trn_features.extend(o.cpu().data)
            test_labels = []
            test_features = []
            for d,la in test_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                test_features.extend(o.cpu().data)            
        dataset['trn_features'].extend(trn_features)
        dims.extend(dataset['trn_features'][i][0].size(0))
        dataset['test_features'].extend(test_features)
    

    trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels'])
    test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels'])
    trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True)
    test_feat_loader = DataLoader(val_feat_dset,batch_size=64)
    model = EnsembleModel(dims, out_size=train_dataset.n_classes)
    criterion = torch.nn.optim.CrossEntropyLoss
    criterion = criterion.cuda()
    optimizer = torch.nn.optim.SGD(model.parameters(), 
                                    lr=learning_rate, 
                                    momentum=0.9)   
    
    for epoch in range(num_epochs):
        epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True)
        val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False)


    if save_model_str:
        # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)"
        if not os.path.exists(save_model_str):
            os.mkdir(save_model_str)
        
        torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime())) 
示例#10
0
def grid_search_motifs(min_motifs,
                       max_motifs,
                       device=torch.device('cpu'),
                       num_epochs=30):
    total_motifs = (max_motifs - min_motifs + 1) * 5

    Seq_train, Seq_test, fam_train, fam_test, class_weights, family_set, family_counts = load_data(
    )

    parent_directory = 'Grid-Search'
    if not os.path.exists(parent_directory):
        os.mkdir(parent_directory)

    for min_len in range(min_motifs, max_motifs + 2):
        for max_len in range(min_len + 1, max_motifs + 2):
            motif_lengths = [*range(min_len, max_len)]
            motifs_per_length, remainder = divmod(total_motifs,
                                                  len(motif_lengths))
            num_motifs_of_length = [
                motifs_per_length + (0 if i <
                                     (len(motif_lengths) - remainder) else 1)
                for i, _ in enumerate(motif_lengths)
            ]

            print("Training for motif lengths: {} - {}".format(
                min_len, max_len - 1))
            net = ProtClassifier(len(alphabet), num_motifs_of_length,
                                 motif_lengths, len(family_set))

            results = fit(net, (Seq_train, fam_train), (Seq_test, fam_test),
                          num_epochs=num_epochs,
                          class_weights=class_weights,
                          device=device,
                          parent_directory=parent_directory)
            loss_history, accuracy_history, precision_history, \
            recall_history, f1_score_history, roc_auc_history, \
                mcc_history = results

            save_file_name = '{}/{}-{}'.format(parent_directory, min_len,
                                               max_len - 1)
            if not os.path.exists(save_file_name):
                os.mkdir(save_file_name)

            save_file_name = save_file_name + '/Metrics'
            with open(save_file_name + '.txt', 'w') as f:
                f.write("Loss\n")
                f.write(str(loss_history.tolist()))
                f.write("\nAccuracy\n")
                f.write(str(accuracy_history.tolist()))
                f.write("\nPrecision\n")
                f.write(str(precision_history.tolist()))
                f.write("\nRecall\n")
                f.write(str(recall_history.tolist()))
                f.write("\nF1-Score\n")
                f.write(str(f1_score_history.tolist()))
                f.write("\nROC-AUC\n")
                f.write(str(roc_auc_history.tolist()))
                f.write("\nMCC\n")
                f.write(str(mcc_history.tolist()))

            np.save(
                save_file_name,
                np.stack((loss_history, accuracy_history, precision_history,
                          recall_history, f1_score_history, roc_auc_history,
                          mcc_history)))
示例#11
0
def main():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    parser = argparse.ArgumentParser()
    parser.add_argument("--K", type=int, default=5, help="Number of topics")
    parser.add_argument(
        "--model",
        choices=["slda", "pfslda"],
        default="pfslda",
        help="Specify which model to train",
    )
    parser.add_argument("--p",
                        type=float,
                        default=0.15,
                        help="Value for the switch prior for pf-sLDA")
    parser.add_argument("--alpha",
                        type=bool,
                        default=True,
                        help="Specify if alpha is fixed")
    parser.add_argument(
        "--path",
        type=str,
        default=None,
        help="Path to saved model to load before training",
    )
    parser.add_argument("--lr",
                        type=float,
                        default=0.025,
                        help="Initial learning rate")
    parser.add_argument("--lambd",
                        type=float,
                        default=0,
                        help="Supervised task regularizer weight")
    parser.add_argument("--num_epochs",
                        type=int,
                        default=500,
                        help="Number of epochs to train")
    parser.add_argument(
        "--check",
        type=int,
        default=10,
        help="Number of epochs per stats check (print/save)",
    )
    parser.add_argument(
        "--batch_size",
        type=int,
        default=100,
    )
    parser.add_argument(
        "--y_thresh",
        type=float,
        default=None,
        help="Threshold for yscore (RMSE or AUC) to save model.",
    )
    parser.add_argument(
        "--c_thresh",
        type=float,
        default=None,
        help="Threshold for topic coherence to save model.",
    )

    args = parser.parse_args()

    # make sure args valid
    if args.K < 1:
        raise ValueError("Invalid number of topics.")

    p = args.p
    if p > 1 or p < 0:
        raise ValueError("Invalid switch prior p.")
    p = torch.tensor(p).to(device)
    p = torch.log(p / (1 - p))

    # load dataset and specify target type
    d = load_Pang_Lee()
    W = d["W"]
    W_val = d["W_val"]
    y = d["y"]
    y_val = d["y_val"]
    W_test = d["W_test"]
    y_test = d["y_test"]
    vocab = d["vocab"]
    version = "real"

    V = W.shape[1]
    M = W.shape[0]
    M_val = W_val.shape[0]

    # instantiate model
    if args.model == "slda":
        model = sLDA(args.K, V, M, M_val, args.alpha, device)
    elif args.model == "pfslda":
        model = pfsLDA(args.K, V, M, M_val, p, args.alpha, device)
    model.to(device)

    # load saved model if path specified
    if args.path:
        state_dict = torch.load(args["path"], map_location=device)
        model.load_state_dict(state_dict)

    kwargs = {
        "W": W,
        "y": y,
        "lr": args.lr,
        "lambd": args.lambd,
        "num_epochs": args.num_epochs,
        "check": args.check,
        "batch_size": args.batch_size,
        "version": version,
        "W_val": W_val,
        "y_val": y_val,
        "device": device,
        "y_thresh": args.y_thresh,
        "c_thresh": args.c_thresh,
    }

    fit(model, **kwargs)
    print_topics(model, 10, vocab)
示例#12
0
import os
import json
import torch
import wandb

from train import fit, init_experiment


if __name__ == "__main__":
    os.environ['PYTHONWARNINGS'] = 'ignore:semaphore_tracker:UserWarning'

    config = json.load(open("config/local_config.json", "r"))
    init_experiment(config)

    print("------------------")
    print(config)
    print("------------------")

    config["device"] = torch.device(config["device"] if torch.cuda.is_available() else 'cpu')

    if config["wandb_enable"]:
        wandb.init(project=config["wandb_project"], entity=config["wandb_entity"], name=config["name_run"])

    fit(config)
示例#13
0
    #real_image = tf.divide(real_image,255.0)

    return input_image, real_image


#train_dataset = tf.data.Dataset.list_files(PATH+'train/*/*.jpg')
train_dataset = tf.data.Dataset.list_files(
    settings.config['paths']['train_dataset'])
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.map(
    load, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.batch(
    settings.config.getint('training', 'batch_size'))

test_dataset = tf.data.Dataset.list_files(
    settings.config['paths']['test_dataset'])
#test_dataset = test_dataset.shuffle(BUFFER_SIZE)
test_dataset = test_dataset.map(
    load, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(
    settings.config.getint('training', 'batch_size'))

logdir = os.path.join(settings.config.get('paths', 'tb_logs'),
                      settings.config.get('paths', 'log_tag'))
writer = tf.summary.create_file_writer(logdir)
writer.set_as_default()
pr = cProfile.Profile()

train.fit(train_dataset, test_dataset,
          settings.config.getint('training', 'epochs'))
示例#14
0
def run(batch_size=128,
        n_features=64,
        n_layers=6,
        n_scales=1,
        n_bins=16,
        exp_name='pixelCNN',
        exp_dir='/home/jason/experiments/pytorch_pixelcnn/',
        optimizer='adam',
        learnrate=1e-4,
        dropout=0.5,
        cuda=True,
        resume=False):

    exp_name += '_%ifeat_%iscales_%ilayers_%ibins' % (n_features, n_scales,
                                                      n_layers, n_bins)
    exp_dir = os.path.join(exp_dir, exp_name)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    if not resume:
        # Store experiment params in params.json
        params = {
            'batch_size': batch_size,
            'n_features': n_features,
            'n_layers': n_layers,
            'n_scales': n_scales,
            'n_bins': n_bins,
            'optimizer': optimizer,
            'learnrate': learnrate,
            'dropout': dropout,
            'cuda': cuda
        }
        with open(os.path.join(exp_dir, 'params.json'), 'w') as f:
            json.dump(params, f)

        # Model
        net = model.PixelCNN(1, n_features, n_layers, n_scales, n_bins,
                             dropout)
    else:
        # if resuming, need to have params, stats and checkpoint files
        if not (os.path.isfile(os.path.join(exp_dir, 'params.json'))
                and os.path.isfile(os.path.join(exp_dir, 'stats.json'))
                and os.path.isfile(os.path.join(exp_dir, 'last_checkpoint'))):
            raise Exception(
                'Missing param, stats or checkpoint file on resume')
        net = torch.load(os.path.join(exp_dir, 'last_checkpoint'))

    # Data loaders
    train_loader, val_loader = data.mnist(batch_size)

    # Up-weight 1s (~8x rarer) to balance loss, interpolate intermediate values
    weight = torch.from_numpy(np.linspace(1, 8, n_bins, dtype='float32'))
    if cuda:
        weight = weight.cuda()

    # Define loss fcn, incl. label formatting from input
    def input2label(x):
        return torch.squeeze(
            torch.round((n_bins - 1) * x).type(torch.LongTensor), 1)

    loss_fcn = torch.nn.NLLLoss2d(torch.autograd.Variable(weight))

    # Train
    train.fit(train_loader,
              val_loader,
              net,
              exp_dir,
              input2label,
              loss_fcn,
              optimizer,
              learnrate=learnrate,
              cuda=cuda,
              resume=resume)
示例#15
0
dataset = pd.read_csv(
    "/home/singh/PycharmProjects/MachineLearning_from_Scratch/Datasets/linear_regression"
    + "/one_var/train.csv")

print("Shape of dataset: " + str(dataset.shape))

data_x = dataset['x'][:50]
data_y = dataset['y'][:50]

data_x = data_x.to_numpy()
data_y = data_y.to_numpy()
data_x = data_x.reshape(len(data_x), 1)
data_y = data_y.reshape(len(data_y), 1)

train.fit()


# hypothesis function of the form y = c1 + c2*x
def h(c1, c2, xh):
    size = xh.shape[0]
    a1 = np.ones((size, 1), dtype=np.float)
    a1 = np.concatenate((a1, xh), axis=1)
    a2 = np.array(([c1], [c2]))
    yh = a1.dot(a2)

    return yh


def plot_all(para1, para2, x_data, y_data):
    y_pred = h(para1, para2, x_data)
示例#16
0
def run(dataset='mnist',
        n_samples=50000,
        n_bins=4,
        n_features=200,
        batch_size=64,
        n_layers=6,
        loss='standard',
        optimizer='adam',
        learnrate=1e-4,
        dropout=0.9,
        max_epochs=35,
        cuda=True,
        resume=False,
        exp_dir='out',
        note=''):

    # Data
    if dataset == 'mnistog':
        train_data, val_data, onehot_fcn, n_classes = data.get_loaders(
            'mnist', batch_size)
    if dataset == 'mnist':
        train_data, val_data, onehot_fcn, n_classes = data.get_sorted_data(
            'mnist', batch_size)

    if not resume:
        # Make dir
        exp_name = datetime.datetime.now().strftime("%m_%d_%y-%H_%M_%S")
        exp_name += '_{}_{}samples_{}_{}'.format(dataset, n_samples, loss,
                                                 note)
        print("Out directory: " + exp_name)
        exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name)
        if not os.path.isdir(exp_dir):
            os.makedirs(exp_dir)
        # Store experiment params in params.json
        params = {
            'data': dataset,
            'n_samples': n_samples,
            'loss': loss,
            'batch_size': batch_size,
            'n_features': n_features,
            'n_layers': n_layers,
            'n_bins': n_bins,
            'optimizer': optimizer,
            'learnrate': learnrate,
            'dropout': dropout,
            'cuda': cuda,
            'note': note
        }
        print("Params: " + str(params.items()))
        with open(os.path.join(exp_dir, 'params.json'), 'w') as f:
            json.dump(params, f)
        net = model.PixelCNN(1, n_classes, n_features, n_layers, n_bins,
                             dropout)
    else:
        # if resuming, need to have params, stats and checkpoint files
        if not (os.path.isfile(os.path.join(exp_dir, 'params.json'))
                and os.path.isfile(os.path.join(exp_dir, 'stats.json'))
                and os.path.isfile(os.path.join(exp_dir, 'last_checkpoint'))):
            raise Exception(
                'Missing param, stats or checkpoint file on resume')
        net = torch.load(os.path.join(exp_dir, 'last_checkpoint'))

    # Define loss fcn, incl. label formatting from input
    def input2label(x):
        return torch.squeeze(
            torch.round((n_bins - 1) * x).type(torch.LongTensor), 1)

    loss_fcns = {
        'official': losses.official_loss_function,
        'standard': losses.standard_loss_function,
        'sum': losses.sum_loss_function,
        'min': losses.min_loss_function,
        'debug': torch.nn.NLLLoss()
    }
    loss_fcn = loss_fcns[loss]

    # Train
    train.fit(train_data,
              val_data,
              n_samples,
              net,
              exp_dir,
              input2label,
              loss_fcn,
              onehot_fcn,
              n_classes,
              optimizer,
              learnrate=learnrate,
              cuda=cuda,
              max_epochs=max_epochs,
              resume=resume)
示例#17
0
def run(
        pixelcnn_ckpt,
        vgg_ckpt=None,
        adversarial_range=0.2,
        train_dataset='mnist',
        test_dataset='emnist',
        img_size=28,
        vgg_params={
            'batch_size': 16,
            'base_f': 16,
            'n_layers': 9,
            'dropout': 0.8,
            'optimizer': 'adam',
            'learnrate': 1e-4
        },
        exp_name='domain-prior',
        exp_dir='~/experiments/domain-prior/',
        cuda=True,
        resume=False):

    # Set up experiment directory
    exp_name += '_%s-to-%s_vgg%i-%i_adv%.2f' % (
        train_dataset, test_dataset, vgg_params['n_layers'],
        vgg_params['base_f'], adversarial_range)
    exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    # Train a VGG classifier if not already done
    if vgg_ckpt is None:
        train_loader, val_loader, n_classes = data.loader(
            train_dataset, vgg_params['batch_size'])
        if not resume:
            with open(os.path.join(exp_dir, 'vgg_params.json'), 'w') as f:
                json.dump(vgg_params, f)
            vgg = model.VGG(img_size, 1, vgg_params['base_f'],
                            vgg_params['n_layers'], n_classes,
                            vgg_params['dropout'])
        else:
            vgg = torch.load(os.path.join(exp_dir, 'best_checkpoint'))

        train.fit(train_loader,
                  val_loader,
                  vgg,
                  exp_dir,
                  torch.nn.CrossEntropyLoss(),
                  vgg_params['optimizer'],
                  vgg_params['learnrate'],
                  cuda,
                  resume=resume)
    else:
        vgg = torch.load(vgg_ckpt)

    pixelcnn = torch.load(pixelcnn_ckpt)
    pixelcnn_params = os.path.join(os.path.dirname(pixelcnn_ckpt),
                                   'params.json')
    with open(pixelcnn_params, 'r') as f:
        pixelcnn_params = json.load(f)
    n_bins = pixelcnn_params['n_bins']

    if cuda:
        vgg = vgg.cuda()
        pixelcnn = pixelcnn.cuda()

    # Run the datasets through the networks and calculate 3 pixelcnn losses:
    # 1. Average: mean across the image
    # 2. High-pass filtered: weight by difference to upper- and left- neighbors
    # 3. Saliency: weight by pixel saliency (vgg backprop-to-input)
    _, loader, _ = data.loader(train_dataset, 1)
    print('Calculating losses for ' + train_dataset)
    dom_avg, dom_hp, dom_sw, dom_sal, dom_var = calc_losses(
        vgg, pixelcnn, loader, n_bins, cuda)
    print('Calculating losses for adversarial images')
    adv_avg, adv_hp, adv_sw, adv_sal, adv_var = adversarial(
        vgg, pixelcnn, loader, n_bins, adversarial_range, cuda)
    _, loader, _ = data.loader(test_dataset, 1)
    print('Calculating losses for ' + test_dataset)
    ext_avg, ext_hp, ext_sw, ext_sal, ext_var = calc_losses(
        vgg, pixelcnn, loader, n_bins, cuda)

    # Loss histograms
    n_bins = 100
    all_losses = np.concatenate((dom_avg, adv_avg, ext_avg, dom_hp, adv_hp,
                                 ext_hp, dom_sw, adv_sw, ext_sw))
    edges = np.linspace(0, np.percentile(all_losses, 95), n_bins + 1)
    # average loss
    vis.histogram(dom_avg, edges, train_dataset + ' average loss', exp_dir)
    vis.histogram(adv_avg, edges, 'adversarial average loss', exp_dir)
    vis.histogram(ext_avg, edges, test_dataset + ' average loss', exp_dir)
    # high-pass weighted loss
    vis.histogram(dom_hp, edges, train_dataset + ' highpass loss', exp_dir)
    vis.histogram(adv_hp, edges, 'adversarial highpass loss', exp_dir)
    vis.histogram(ext_hp, edges, test_dataset + ' highpass loss', exp_dir)
    # saliency weighted loss
    vis.histogram(dom_sw, edges, train_dataset + ' saliency loss', exp_dir)
    vis.histogram(adv_sw, edges, 'adversarial saliency loss', exp_dir)
    vis.histogram(ext_sw, edges, test_dataset + ' saliency loss', exp_dir)
    # loss variances
    loss_variances = np.concatenate((dom_var, adv_var, ext_var))
    edges = np.linspace(0, np.percentile(loss_variances, 95), n_bins + 1)
    vis.histogram(dom_var, edges, train_dataset + ' loss variance', exp_dir)
    vis.histogram(adv_var, edges, 'adversarial loss variance', exp_dir)
    vis.histogram(ext_var, edges, test_dataset + ' loss variance', exp_dir)

    # Calculate epistemic uncertainties for each dataset for each model
    _, loader, _ = data.loader(train_dataset, 1)
    dom_class_epi = epistemic(vgg, loader, cuda)
    adv_class_epi = epistemic_adversarial(vgg, adversarial_range, loader, cuda)
    _, loader, _ = data.loader(test_dataset, 1)
    ext_class_epi = epistemic(vgg, loader, cuda)

    # Classifier uncertainty histograms
    n_bins = 100
    all_class_epi = dom_class_epi + adv_class_epi + ext_class_epi
    edges = np.linspace(0, np.percentile(all_class_epi, 95), n_bins + 1)
    vis.histogram(dom_class_epi, edges,
                  train_dataset + ' classifier uncertainty', exp_dir)
    vis.histogram(adv_class_epi, edges, 'adversarial classifier uncertainty',
                  exp_dir)
    vis.histogram(ext_class_epi, edges,
                  test_dataset + ' classifier uncertainty', exp_dir)

    # ROC curves
    vis.roc(dom_avg, ext_avg, 'out-of-domain: average loss', exp_dir)
    vis.roc(dom_hp, ext_hp, 'out-of-domain: high-pass filtered loss', exp_dir)
    vis.roc(dom_sw, ext_sw, 'out-of-domain: saliency-weighted loss', exp_dir)
    vis.roc(dom_class_epi, ext_class_epi,
            'out-of-domain: epistemic uncertainty', exp_dir)
    vis.roc(dom_avg, adv_avg, 'adversarial: average loss', exp_dir)
    vis.roc(dom_hp, adv_hp, 'adversarial: high-pass filtered loss', exp_dir)
    vis.roc(dom_sw, adv_sw, 'adversarial: saliency-weighted loss', exp_dir)
    vis.roc(dom_class_epi, adv_class_epi, 'adversarial: epistemic uncertainty',
            exp_dir)
示例#18
0
        experiment.set_name(args.namestr)
        args.experiment = experiment

    # Because we all like reproducibility (...and also know where we keep our towels)
    # ------------------------------------------------------------------------------
    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)

    # Obtain and train our model here:
    # ------------------------------------------------------------------------------
    model, optim = get_model()
    if use_cuda:
        model.cuda()

    training_loader, validation_loader = _dataloader(args)

    # load trained model if necessary
    if args.load_dir is not None:
        model, optim, start_epoch = load_session(model, optim, args)
    else:
        start_epoch = 0

    fit(model, training_loader, validation_loader, optim, start_epoch, args)

    args.experiment.end()

    # ------------------------------------------------------------------------------
    # So Long, and Thanks for All the Fish!   >< ((('>    >< ((('>    >< ((('>
    # ------------------------------------------------------------------------------
示例#19
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    # Other parameters
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval or not.")
    parser.add_argument("--eval_on",
                        default="dev",
                        help="Whether to run eval on the dev set or test set.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--overwrite_output_dir",
                        action="store_true",
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--weight_decay",
                        default=0.01,
                        type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--fp16_opt_level',
        type=str,
        default='O1',
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()

    # ------------------Parameter Valid Check-------------------------------------------
    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    processors = {"CoNLL2003_NER": CoNLL2003NerProcessor}
    # ------------------Prepare Data-------------------------------------------
    task_name = args.task_name
    if task_name not in processors:
        raise ValueError("Task not found: %s" % task_name)

    data_processor = processors[task_name]()
    label_list = data_processor.get_labels()
    num_labels = len(label_list)

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)
    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    train_iter = None
    num_train_optimization_steps = 0
    if args.do_train:
        train_examples = data_processor.get_train_examples(args.data_dir)
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
            )
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        train_iter = prepare_data_loader(train_features, args, 'train')

    eval_examples = data_processor.get_dev_examples(args.data_dir)
    eval_features = convert_examples_to_features(eval_examples, label_list,
                                                 args.max_seq_length,
                                                 tokenizer)
    eval_iter = prepare_data_loader(eval_features, args, 'eval')

    # Prepare model
    if args.do_train:
        config = BertConfig.from_pretrained(args.bert_model,
                                            num_labels=num_labels,
                                            finetuning_task=args.task_name,
                                            output_hidden_states=True)
        model = BertNer.from_pretrained(args.bert_model, config=config)
        fit(model, train_iter, eval_iter, num_train_optimization_steps,
            label_list, args)
    else:
        # Load a trained model and vocabulary that you have fine-tuned
        model = BertNer.from_pretrained(args.output_dir)
        fit(model, train_iter, eval_iter, num_train_optimization_steps,
            label_list, args)
# In[6]:

# for name,param in model.named_parameters():
#     param.requires_grad = True
# opt = optim.Adam(model.parameters())

# Train the model. We automatically save the model with the lowest val_loss. If you want to continue the training and keep the loss history, just pass it as an additional argument as shown below.

# In[7]:

#!export CUDA_LAUNCH_BLOCKING = 1;

# In[8]:

model, val_hist = fit(10, model, custom_loss, opt, train_dl, valid_dl)

# In[9]:

# model, val_hist = fit(1, model, custom_loss, opt, train_dl, valid_dl, val_hist=val_hist)

# In[10]:

val_hist

# In[11]:

plt.plot(val_hist)

# #### evalute the model
示例#21
0
    # For songs sampling
    "TEMPERATURE": 1,
    "TAKE_MAX_PROBABLE": False,
    "LIMIT_LEN": 300
}
print(config)

# model = VanillaRNN(config["VOCAB_SIZE"], config["HIDDEN"], config["VOCAB_SIZE"]).to(get_device())
model = LSTMSimple(config["VOCAB_SIZE"], config["HIDDEN"],
                   config["VOCAB_SIZE"]).to(get_device())

criterion = CrossEntropyLoss()

# Fit Model
fit(model, train_encoded, val_encoded, config)

# Report NLL for validation and test
nll_val = negative_log_likelihood(model, val_encoded, criterion, config)
nll_test = negative_log_likelihood(model, test_encoded, criterion, config)
print("NLL Validation: {}".format(nll_val))
print("NLL Test: {}".format(nll_test))

# Save error plot to file
save_loss_graph(model)

# Save model to file
print("Saving model...")
now = datetime.now().strftime('%Y-%m-%d-%H-%M')
torch.save(model.state_dict(), "model" + now + ".pth")
print("Saved!")
示例#22
0
        device=device,
        order=args.order,
    ),
    ModelCheckpoint(filepath=PATH + f'/models/maml/{param_str}.pth',
                    monitor=f'val_{args.n}-shot_{args.k}-way_acc'),
    ReduceLROnPlateau(patience=10, factor=0.5, monitor=f'val_loss'),
    CSVLogger(PATH + f'/logs/maml/{param_str}.csv'),
]

fit(
    meta_model,
    meta_optimiser,
    loss_fn,
    epochs=args.epochs,
    dataloader=background_taskloader,
    prepare_batch=prepare_meta_batch(args.n, args.k, args.q,
                                     args.meta_batch_size),
    callbacks=callbacks,
    metrics=['categorical_accuracy'],
    fit_function=meta_gradient_step,
    fit_function_kwargs={
        'n_shot': args.n,
        'k_way': args.k,
        'q_queries': args.q,
        'train': True,
        'order': args.order,
        'device': device,
        'inner_train_steps': args.inner_train_steps,
        'inner_lr': args.inner_lr
    },
)
示例#23
0
def train_model(exp_name, train_tfrecord, val_tfrecord, dictionary_file,
                n_hidden, learn_rate, batch_size, decouple_split=200,
                patience=10, max_epochs=200, sample_length=16, resume=False):
    """
    Train a GRU on some text data

    :param exp_name: experiment name (saved to ~/experiments/story-gen/exp_name)
    :param train_tfrecord: path to tfrecord of training set
    :param val_tfrecord: path to tfrecord of validation set
    :param dictionary_file: path to dictionary json file
    :param n_hidden: number of hidden units in GRU
    :param learn_rate: learning rate
    :param batch_size: batch size
    :param decouple_split: subsequence length between decoupled neural interface
                           or None to not use decoupled neural intefaces
    :param patience: early stopping limit
    :param max_epochs: maximum number of epochs to run
    :param sample_length: length of sample to generate after each epoch
    :param resume: resume from previous run
    :return:
    """

    exp_dir = os.path.join(os.path.expanduser('~/experiments/story-gen/'),
                           exp_name)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    with open(dictionary_file,'r') as f:
        reverse_dict = json.load(f)  # word -> int
    reverse_dict = {v+1:k for k,v in reverse_dict.items()}  # int -> word
    # note: sequences are padded with zero, add to dict_size (for embedding)
    reverse_dict[0] = '_END_'  # this should be removed from sampled output
    dict_size = max(reverse_dict.keys())+1

    if not resume:
        max_sequence = 20000 if decouple_split is not None else 100
        pipeline = Vector_Pipeline(train_tfrecord, val_tfrecord, batch_size,
                                   max_sequence=max_sequence)
        init_train, init_val = pipeline.init_train, pipeline.init_val

        model_input = tf.placeholder_with_default(pipeline.output[:,:-1],
                                                  [None, None], 'input')

        # Embedding
        embedding = orthogonal([dict_size, n_hidden], 'embedding')
        embedded_input = tf.nn.embedding_lookup(embedding, model_input)
        int_label = pipeline.output[:,1:]

        # Decoupled neural interface (optional)
        decoupled = decouple_split is not None
        if decoupled:
            # Split subsequences, reshape to [slow_time, batch, fast_time, feat]
            seq_len = tf.shape(embedded_input)[1]
            # pad so sequence length is divisible by subsequence length
            pad_len = decouple_split-tf.mod(seq_len,tf.constant(decouple_split))
            embedded_input = tf.pad(embedded_input, [[0,0], [0,pad_len], [0,0]],
                                    mode='CONSTANT', constant_values=0)
            int_label = tf.pad(int_label, [[0,0], [0,pad_len]])
            # batch x features x time
            dni_input = tf.transpose(embedded_input, [0,2,1])
            # batch x features x slow_time x fast_time
            dni_input = tf.reshape(
                dni_input,
                [-1, n_hidden,
                 (seq_len+pad_len)//decouple_split, decouple_split])
            # fast_time x features x batch x slow_time
            dni_input = tf.transpose(dni_input, [3,1,0,2])
            # fast_time x features x (batch x slow_time)
            dni_input = tf.reshape(dni_input, [decouple_split, n_hidden, -1])
            # (batch x slow_time) x fast_time x features
            dni_input = tf.transpose(dni_input, [2,0,1])
            # (batch x slow_time) x (fast_time x features)
            dni_input = tf.reshape(dni_input, [tf.shape(dni_input)[0],-1])

            # Decoupled neural interface: simplify to single dense layer
            dni = Dense(dni_input, n_hidden, tf.nn.relu, name='dni',
                        init='uniform', n_in=n_hidden*decouple_split)

            # Reshape DNI out & embedded_input to new_batch x fast_time for GRU
            gru_hidden = tf.reshape(dni.output, [-1, n_hidden])
            embedded_input = tf.reshape(embedded_input,
                                        [-1, decouple_split, n_hidden])
            int_label = tf.reshape(int_label, [-1, decouple_split])
        else:
            gru_hidden = None

        # model part2: GRU
        # transpose: tf.scan needs time x batch x features
        embedded_input = tf.transpose(embedded_input, [1,0,2])
        training_toggle = tf.placeholder(tf.int32, name='training_toggle')
        gru = GRU(embedded_input, n_hidden, training_toggle, h0=gru_hidden,
                  name='gru')
        gru_h0 = gru.h0
        gru_output = gru.output
        # model part3: dropout and dense layer
        dropout_rate = tf.placeholder(tf.float32, name='dropout_rate')
        dropped = tf.nn.dropout(gru_output, 1-dropout_rate)
        dense = Dense(dropped, dict_size)
        model_output = tf.identity(dense.output, 'output')

        # cross-entropy loss
        # note: sequences padded with -1, mask these entries
        mask = tf.not_equal(int_label, -1)
        # swap -1's to avoid error in loss fcn, even though we're ignoring these
        int_label = tf.where(mask, int_label, tf.zeros_like(int_label))
        # mean over entries with mask==1
        mask = tf.cast(mask, dtype=tf.float32)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=int_label, logits=model_output)
        loss = tf.reduce_sum(mask*loss)/tf.reduce_sum(mask)

        if decoupled:
            # decoupled neural interface loss
            dni_label = tf.stop_gradient(gru.output)
            dni_loss = tf.reduce_mean(tf.square(dni_label-dni.output),
                                      name='dni_loss')
        else:
            dni_loss =  tf.constant(0., dtype=tf.float32)

        train_step = tf.train.AdamOptimizer(learn_rate).minimize(
            loss+dni_loss,name='train_step')
    else:
        (model_input, training_toggle, dropout_rate, train_step, init_train,
         init_val, loss, dni_loss, gru_output, gru_h0, model_output
            ) = reload_graph(exp_dir)
    n_examples = tf.shape(model_input)[0]

    sampled_out = tf.multinomial(model_output[0,:1,:],num_samples=1)
    def epoch_callback(sess):
        # TODO: not sure how to initialize this since it's usually from the DNI
        h0 = np.random.rand(1, n_hidden)
        sampled_text = [np.random.randint(0,dict_size,size=(1,1))]
        for i in range(sample_length+1):
            out,h0 = sess.run([sampled_out, gru_output],
                              feed_dict={gru_h0:h0,
                                         model_input:sampled_text[i],
                                         dropout_rate:0,
                                         training_toggle:0})
            h0 = h0[0]
            sampled_text.append(out)
        sampled_text = sampled_text[1:]
        # temp bugfix: screwed up the reverse dictionary, missing keys
        if any([int(o) not in reverse_dict.keys() for o in sampled_text]):
            sampled_text = [
                o if int(o) in reverse_dict.keys()
                else int(np.random.choice(list(reverse_dict.keys())))
                for o in sampled_text]
        print(' '.join([reverse_dict[int(o)] for o in sampled_text]))
        print('')

    fit(training_toggle, dropout_rate, train_step, init_train, init_val, loss,
        dni_loss, n_examples, patience, max_epochs, exp_dir, epoch_callback,
        resume)
示例#24
0
            train_dataloader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=12, pin_memory=True, drop_last=True)
            val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], num_workers=12, pin_memory=True)
            V = len(dataset.vocab.keys())
            P = len(dataset.pos_set.keys())

            model, criterion, optimizer = prepare_model(
                V, 
                P, 
                args['embed'],
                args['hidden'], 
                args['layers'], 
                args['nhead'], 
                dropout=args['dropout'],
                smoothing=args['label_smoothing'], 
                lr=args['lr'], 
                device=device    
            )

            best_loss, best_jacc = fit(model, train_dataloader, val_dataloader, criterion, optimizer, device, args['epoch'], model_prefix + '_' + str(i))
            fold_stats.append([best_loss, best_jacc])

            print('Fold {} - Best Loss: {}, Best Jacc: {}'.format(i, best_loss, best_jacc))
        fold_stats = np.array(fold_stats)
        mean = np.mean(fold_stats, axis=0)
        
        std = np.std(fold_stats, axis=0)

        print(mean)
        print(std)

示例#25
0
    def test_build(self):
        fit("fake")

        assert True
示例#26
0
 def fit(self, input_path, model):
     data = train.prepare(input_path)
     self.func = train.fit(data, model)
     return self.func