示例#1
0
def train_and_report(model_name, kernel, warp, ard):
    dataset_dir = os.path.join(MODEL_DIR, DATASET)
    try: 
        os.makedirs(dataset_dir)
    except OSError:
        print "skipping output folder"
    for fold in xrange(10):
        fold_dir = os.path.join(SPLIT_DIR, DATASET, str(fold))
        train_data = np.loadtxt(os.path.join(fold_dir, 'train'))
        test_data = np.loadtxt(os.path.join(fold_dir, 'test'))
        params_file = None
        output_dir = os.path.join(dataset_dir, str(fold))
        try: 
            os.makedirs(output_dir)
        except OSError:
            print "skipping output folder"

        if ard:
            iso_dir = output_dir.replace('True', 'False')
            params_file = os.path.join(iso_dir, 'params')
        gp = util.train_gp_model(train_data, kernel, warp, ard, params_file)
        util.save_parameters(gp, os.path.join(output_dir, 'params'))
        util.save_gradients(gp, os.path.join(output_dir, 'grads'))
        metrics = util.get_metrics(gp, test_data)



        util.save_metrics(metrics, os.path.join(output_dir, 'metrics'))
        util.save_cautious_curves(gp, test_data, os.path.join(output_dir, 'curves'))
        util.save_predictions(gp, test_data, os.path.join(output_dir, 'preds'))

        asym_metrics = util.get_asym_metrics(gp, test_data)
        util.save_asym_metrics(asym_metrics, os.path.join(output_dir, 'asym_metrics'))
        gc.collect(2) # buggy GPy has allocation cycles...
示例#2
0
def train_and_report(model_name, kernel, warp, ard, likelihood='gaussian'):
    dataset_dir = os.path.join(MODEL_DIR, DATASET)
    try: 
        os.makedirs(dataset_dir)
    except OSError:
        print "skipping output folder"
    for fold in xrange(10):
        fold_dir = os.path.join(SPLIT_DIR, DATASET, str(fold))
        train_data = np.loadtxt(os.path.join(fold_dir, 'train'))
        test_data = np.loadtxt(os.path.join(fold_dir, 'test'))
        output_dir = os.path.join(dataset_dir, str(fold))
        params_file = None
        if ard:
            iso_dir = output_dir.replace('True', 'False')
            params_file = os.path.join(iso_dir, 'params')
        gp = util.train_gp_model(train_data, kernel, warp, ard, params_file, likelihood=likelihood)
        metrics = util.get_metrics(gp, test_data)

        try: 
            os.makedirs(output_dir)
        except OSError:
            print "skipping output folder"
        util.save_parameters(gp, os.path.join(output_dir, 'params'))
        util.save_metrics(metrics, os.path.join(output_dir, 'metrics'))
        #util.save_gradients(gp, os.path.join(output_dir, 'grads'))
        util.save_cautious_curves(gp, test_data, os.path.join(output_dir, 'curves'))
        util.save_predictions(gp, test_data, os.path.join(output_dir, 'preds'))
示例#3
0
def main(args):
    os.makedirs(args.exp_dir, exist_ok=True)

    # ==== LOAD DATA ====
    if args.debug:
        max_data = 1000
    else:
        max_data = None
    train = SNLI("data/snli_1.0/", "train", max_data=max_data)
    val = SNLI("data/snli_1.0/",
               "dev",
               max_data=max_data,
               vocab=(train.stoi, train.itos))

    dataloaders = {
        "train":
        DataLoader(
            train,
            batch_size=100,
            shuffle=True,
            pin_memory=False,
            num_workers=0,
            collate_fn=pad_collate,
        ),
        "val":
        DataLoader(
            val,
            batch_size=100,
            shuffle=False,
            pin_memory=True,
            num_workers=0,
            collate_fn=pad_collate,
        ),
    }

    # ==== BUILD MODEL ====
    model = build_model(
        len(train.stoi),
        args.model_type,
        embedding_dim=args.embedding_dim,
        hidden_dim=args.hidden_dim,
    )

    if args.cuda:
        model = model.cuda()

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    metrics = defaultdict(list)
    metrics["best_val_epoch"] = 0
    metrics["best_val_acc"] = 0
    metrics["best_val_loss"] = np.inf

    # Save model with 0 training
    util.save_checkpoint(serialize(model, train),
                         False,
                         args.exp_dir,
                         filename="0.pth")

    # ==== TRAIN ====
    for epoch in range(args.epochs):
        train_metrics = run("train", epoch, model, optimizer, criterion,
                            dataloaders, args)
        val_metrics = run("val", epoch, model, optimizer, criterion,
                          dataloaders, args)

        for name, val in train_metrics.items():
            metrics[f"train_{name}"].append(val)

        for name, val in val_metrics.items():
            metrics[f"val_{name}"].append(val)

        is_best = val_metrics["acc"] > metrics["best_val_acc"]

        if is_best:
            metrics["best_val_epoch"] = epoch
            metrics["best_val_acc"] = val_metrics["acc"]
            metrics["best_val_loss"] = val_metrics["loss"]

        util.save_metrics(metrics, args.exp_dir)
        util.save_checkpoint(serialize(model, train), is_best, args.exp_dir)
        if epoch % args.save_every == 0:
            util.save_checkpoint(serialize(model, train),
                                 False,
                                 args.exp_dir,
                                 filename=f"{epoch}.pth")
示例#4
0
    print("Starting training for %s steps max" % args.num_steps)
    classifier.fit(
        x=train_bottlenecks.astype(np.float32),
        y=train_ground_truth, batch_size=10,
        max_steps=args.num_steps)

    # We've completed our training, so run a test evaluation on some new images we haven't used before.
    test_bottlenecks, test_ground_truth, image_paths = util.get_all_cached_bottlenecks(
                                                          sess, image_lists, 'testing',
                                                          args.bottleneck_dir, args.image_dir, jpeg_data_tensor,
                                                          bottleneck_tensor)
    test_bottlenecks = np.array(test_bottlenecks)
    test_ground_truth = np.array(test_ground_truth)
    print("evaluating....")
    classifier.evaluate(test_bottlenecks.astype(np.float32), test_ground_truth)

    # write the output labels file if it doesn't already exist
    if gfile.Exists(output_labels_file):
      print("Labels list file already exists; not writing.")
    else:
      output_labels = json.dumps(list(image_lists.keys()))
      with gfile.FastGFile(output_labels_file, 'w') as f:
        f.write(output_labels)

    print("\nSaving metrics...")
    util.save_metrics(args, classifier, test_bottlenecks.astype(np.float32), all_label_names, test_ground_truth, \
                      image_paths, image_lists, exemplars)

    util_plot.plot_metrics(args.model_dir)

    print("Done !")
示例#5
0
def main(args):
    os.makedirs(args.exp_dir, exist_ok=True)

    # ==== LOAD DATA ====
    train = IMDB("data/imdb/", "train")
    # FIXME: sample a real dev set later (this isn't too bad because we are not
    # trying to optimize for test set perf at all)
    val = IMDB("data/imdb/", "test")

    dataloaders = {
        "train": DataLoader(
            train,
            batch_size=100,
            shuffle=True,
            pin_memory=True,
            num_workers=0,
            collate_fn=pad_collate,
        ),
        "val": DataLoader(
            val,
            batch_size=100,
            shuffle=False,
            pin_memory=True,
            num_workers=0,
            collate_fn=pad_collate,
        ),
    }

    # ==== BUILD MODEL ====
    model = build_model(
        len(train.stoi),
        args.model_type,
        embedding_dim=args.embedding_dim,
        hidden_dim=args.hidden_dim,
    )

    if args.cuda:
        model = model.cuda()

    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCEWithLogitsLoss()

    metrics = defaultdict(list)
    metrics["best_val_epoch"] = 0
    metrics["best_val_acc"] = 0
    metrics["best_val_loss"] = np.inf

    # Save model with 0 training
    util.save_checkpoint(serialize(model, train), False, args.exp_dir, filename="0.pth")

    # ==== TRAIN ====
    for epoch in range(args.epochs):
        train_metrics = run(
            "train", epoch, model, optimizer, criterion, dataloaders, args
        )
        val_metrics = run("val", epoch, model, optimizer, criterion, dataloaders, args)

        for name, val in train_metrics.items():
            metrics[f"train_{name}"].append(val)

        for name, val in val_metrics.items():
            metrics[f"val_{name}"].append(val)

        is_best = val_metrics["acc"] > metrics["best_val_acc"]

        if is_best:
            metrics["best_val_epoch"] = epoch
            metrics["best_val_acc"] = val_metrics["acc"]
            metrics["best_val_loss"] = val_metrics["loss"]

        util.save_metrics(metrics, args.exp_dir)
        util.save_checkpoint(serialize(model, train), is_best, args.exp_dir)
        if epoch % args.save_every == 0:
            util.save_checkpoint(
                serialize(model, train), False, args.exp_dir, filename=f"{epoch}.pth"
            )
示例#6
0
文件: train.py 项目: jayelm/m2vae
                          random_state=random)

        for metric, value in train_metrics.items():
            try:
                metrics['train_{}'.format(metric)].append(value)
            except KeyError:
                pass  # Could be missing due to resuming from older code
        for metric, value in val_metrics.items():
            try:
                metrics['val_{}'.format(metric)].append(value)
            except KeyError:
                pass
        metrics['current_epoch'] = epoch

        is_best = val_metrics['f1'] > metrics['best_f1']
        if is_best:
            metrics['best_f1'] = val_metrics['f1']
            metrics['best_loss'] = val_metrics['loss']
            metrics['best_epoch'] = epoch

        # Save model
        util.save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch
            }, is_best, args.exp_dir)

        # Save metrics
        util.save_metrics(metrics, args.exp_dir)
def main(file_path, batch_size, base_model, num_epochs):
    """Train movie sentiment model"""

    # %%
    # base_model  = "roberta-base"
    # batch_size=8
    # num_epochs=5
    print("Initializing models")

    tokenizer = RobertaTokenizerFast.from_pretrained(base_model)
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model = RoBERTaSentimentClassifier(device=device, base_model=base_model)

    print(f"Using device {model.device}")

    #%%
    train_cache = Path(".data/cache/train_data")
    val_cache = Path(".data/cache/validate_data")

    if train_cache.exists() and val_cache.exists():
        print("Load cached datasets")
        train = load_cached_dataset(train_cache)
        val = load_cached_dataset(val_cache)
    else:
        print("Generating datasets")
        PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
        UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)

        # set up fields
        TEXT = data.Field(use_vocab=False,
                          include_lengths=False,
                          batch_first=True,
                          lower=False,
                          fix_length=512,
                          tokenize=tokenizer.encode,
                          pad_token=PAD_INDEX,
                          unk_token=UNK_INDEX)

        LABEL = data.LabelField()

        # make splits for data
        train, test = datasets.IMDB.splits(TEXT, LABEL)

        LABEL.build_vocab(train)

        test, val = test.split(split_ratio=0.9)

        print("Cache train and validate sets")

        save_cached_dataset(train, train_cache)
        save_cached_dataset(val, val_cache)

    print("Prepare dataset iterators")
    # make iterator for splits
    train_iter, val_iter = data.BucketIterator.splits((train, val),
                                                      batch_size=batch_size,
                                                      device=device)

    #%%
    for batch in val_iter:
        if batch.text.shape[0] != batch.label.shape[0]:
            print(batch)
        # print(batch.text.shape, batch.label.shape)
        # break
    #%%
    #dir(val_iter)
    #%%
    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []
    best_valid_loss = float("Inf")

    model.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

    for item in train_iter:
        print(item)
        break

    print("Start training")
    for epoch in range(1, num_epochs + 1):

        print(f"Epoch {epoch}")

        train_iter.init_epoch()
        val_iter.init_epoch()

        for i, (text, labels) in enumerate(tqdm(train_iter, desc="train")):
            labels = labels.type(torch.LongTensor)
            labels = labels.to(device)
            output = model(text, labels)

            loss, _ = output

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update running values
            running_loss += loss.item()
            global_step += 1

        model.eval()
        with torch.no_grad():

            answers = []

            # validation loop
            for i, (text, labels) in enumerate(tqdm(val_iter,
                                                    desc="validate")):
                labels = labels.type(torch.LongTensor)
                labels = labels.to(device)
                output = model(text, labels)
                loss, preds = output

                correct = torch.argmax(preds, dim=1) == labels

                answers.extend(correct.cpu().tolist())

                valid_running_loss += loss.item()

            average_accuracy = sum([1 for a in answers if a]) / len(answers)

            # evaluation
            average_train_loss = running_loss / epoch
            average_valid_loss = valid_running_loss / 10
            train_loss_list.append(average_train_loss)
            valid_loss_list.append(average_valid_loss)
            global_steps_list.append(global_step)

            # resetting running values
            running_loss = 0.0
            valid_running_loss = 0.0
            model.train()

            # print progress
            print(
                'Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Acc: {:.4f}'
                .format(epoch + 1, num_epochs, global_step,
                        num_epochs * len(train_iter), average_train_loss,
                        average_valid_loss, average_accuracy))

            # checkpoint
            if best_valid_loss > average_valid_loss:
                best_valid_loss = average_valid_loss
                save_checkpoint(file_path + '/' + 'model.pt', model,
                                best_valid_loss)
                save_metrics(file_path + '/' + 'metrics.pt', train_loss_list,
                             valid_loss_list, global_steps_list)

    save_metrics(file_path + '/' + 'metrics.pt', train_loss_list,
                 valid_loss_list, global_steps_list)
    print('Finished Training!')
示例#8
0
def train_net(model, args):

    data_path = args.data_path
    num_epochs = args.epochs
    gpu = args.gpu
    n_classes = args.classes
    data_width = args.width
    data_height = args.height

    # set device configuration
    device_ids = []

    if gpu == 'gpu':

        if not torch.cuda.is_available():
            print("No cuda available")
            raise SystemExit

        device = torch.device(args.device1)

        device_ids.append(args.device1)

        if args.device2 != -1:
            device_ids.append(args.device2)

        if args.device3 != -1:
            device_ids.append(args.device3)

        if args.device4 != -1:
            device_ids.append(args.device4)

    else:
        device = torch.device("cpu")

    if len(device_ids) > 1:
        model = nn.DataParallel(model, device_ids=device_ids)

    model = model.to(device)

    # set image into training and validation dataset

    train_dataset = SampleDataset(data_path)

    print('total image : {}'.format(len(train_dataset)))

    train_indices, val_indices = train_test_split(np.arange(
        len(train_dataset)),
                                                  test_size=0.2,
                                                  random_state=42)

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(train_dataset,
                              batch_size=20,
                              num_workers=4,
                              sampler=train_sampler)

    val_loader = DataLoader(train_dataset,
                            batch_size=10,
                            num_workers=4,
                            sampler=valid_sampler)

    model_folder = os.path.abspath('./checkpoints')
    if not os.path.exists(model_folder):
        os.mkdir(model_folder)

    if args.model == 'UNet':
        model_path = os.path.join(model_folder, 'UNet.pth')

    elif args.model == 'SegNet':
        model_path = os.path.join(model_folder, 'SegNet.pth')

    elif args.model == 'DenseNet':
        model_path = os.path.join(model_folder, 'DenseNet.pth')

    # set optimizer

    optimizer = torch.optim.Adam(model.parameters())

    # main train

    display_steps = 30
    best_loss = 1e10
    loss_history = []

    ## for early stopping
    early_stop = False
    patience = 7
    counter = 0

    for epoch in range(num_epochs):
        print('Starting epoch {}/{}'.format(epoch + 1, num_epochs))

        # train
        model.train()

        metrics = defaultdict(float)
        epoch_size = 0

        # train model
        for batch_idx, (images, masks) in enumerate(train_loader):

            images = images.to(device).float()
            masks = masks.to(device).long()

            optimizer.zero_grad()
            outputs = model(images)

            loss, cross, dice = combined_loss(outputs, masks.squeeze(1),
                                              device, n_classes)

            save_metrics(metrics, images.size(0), loss, cross, dice)

            loss.backward()
            optimizer.step()

            # statistics
            epoch_size += images.size(0)

            if batch_idx % display_steps == 0:
                print('    ', end='')
                print('batch {:>3}/{:>3} cross: {:.4f} , dice {:.4f} , combined_loss {:.4f}\r'\
                      .format(batch_idx+1, len(train_loader), cross.item(), dice.item(),loss.item()))

            del images, masks, outputs, loss, cross, dice

        print_metrics(metrics, epoch_size, 'train')

        # evalute
        print('Finished epoch {}, starting evaluation'.format(epoch + 1))
        model.eval()

        # validate model
        for images, masks in val_loader:
            images = images.to(device).float()
            masks = masks.to(device).long()

            outputs = model(images)

            loss, cross, dice = combined_loss(outputs, masks.squeeze(1),
                                              device, n_classes)

            save_metrics(metrics, images.size(0), loss, cross, dice)

            # statistics
            epoch_size += images.size(0)

            del images, masks, outputs, loss, cross, dice

        print_metrics(metrics, epoch_size, 'val')

        epoch_loss = metrics['loss'] / epoch_size

        # save model if best validation loss
        if epoch_loss < best_loss:
            print("saving best model")
            best_loss = epoch_loss

            model_copy = copy.deepcopy(model)
            model_copy = model_copy.cpu()

            model_state_dict = model_copy.module.state_dict(
            ) if len(device_ids) > 1 else model_copy.state_dict()
            torch.save(model_state_dict, model_path)

            del model_copy

            counter = 0

        else:
            counter += 1
            print('EarlyStopping counter : {:>3} / {:>3}'.format(
                counter, patience))

            if counter >= patience:
                early_stop = True

        loss_history.append(best_loss)
        print('Best val loss: {:4f}'.format(best_loss))

        if early_stop:
            print('Early Stopping')
            break

    return loss_history