def main_fewshot():
    """Run the MNIST experiment."""
    log_file = os.path.join(ARGS.result_dir, ARGS.experiment_name, "log.txt")
    print("Result dir: %s", ARGS.result_dir)
    print("Log file: %s", log_file)

    # Setup logging in base_dir/log.txt
    setup_logging(level=ARGS.log_level, filename=log_file)
    logger.info(" -- MNIST Few Shot Experiment -- Started ")
    tstart = time.time()

    try:
        if not ARGS.cuda:
            # Set number of CPU threads
            torch.set_num_threads(1)

        # Create and run experiment
        experiment = FewShotExperiment(ARGS)
        experiment.run()
    except Exception as e:
        logger.exception("Experiment crashed.")
        logger.exception("Exception: %s", str(e))

    # Measure time
    tstr = time_delta_now(tstart)
    logger.info(" -- MNIST -- Finished, took %s", tstr)
示例#2
0
def train_multilabel(model,
                     device,
                     train_loader,
                     optimizer,
                     epoch,
                     log_interval=10):
    """
    Train the model for one epoch.

    Args:
        model (nn.Module): Network model.
        device: Device to train on.
        train_loader: Torch data loader for training set.
        optimizer: Torch opitimizer.
        epoch: Current epoch.
    """

    model.train()

    # Create clipper
    dist_clipper = DistributionClipper(device)

    n_samples = get_n_samples_from_loader(train_loader)
    loss_fn = nn.BCELoss()
    t_start = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        # Send data to correct device
        data, target = data.to(device), target.to(device)

        # Reset gradients
        optimizer.zero_grad()

        # Inference
        output = model(data)

        # Comput loss
        loss = loss_fn(output.sigmoid(), target)

        # Backprop
        loss.backward()
        optimizer.step()

        # Clip distribution values and weights
        model.apply(dist_clipper)

        # Log stuff
        if batch_idx % log_interval == 0:
            logger.info(
                "Train Epoch: {} [{: >5}/{: <5} ({:.0f}%)]\tLoss: {:.6f}".
                format(
                    epoch,
                    batch_idx * len(data),
                    n_samples,
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                ))

    t_delta = time_delta_now(t_start)
    logger.info("Train Epoch: {} took {}".format(epoch, t_delta))
示例#3
0
def main():
    """Run the MNIST experiment."""
    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
        [str(x) for x in ARGS.cuda_device_id])
    float_formatter = lambda x: "%.3f" % x
    np.set_printoptions(formatter={"float_kind": float_formatter})

    # Setup logging in base_dir/log.txt
    log_file = os.path.join(ARGS.result_dir, ARGS.experiment_name, "log.txt")
    setup_logging(level=ARGS.log_level, filename=log_file)
    logger.info(" -- MNIST Multilabel -- Started ")
    print("Result dir: ", ARGS.result_dir)
    print("Log file: ", log_file)

    # Save commandline arguments

    tstart = time.time()
    try:
        if not ARGS.cuda:
            # Set number of CPU threads
            torch.set_num_threads(ARGS.njobs)
        else:
            ARGS.cuda_device_id = ARGS.cuda_device_id[0]

        if ARGS.reuse_base_dir is not None:
            base_dir = ARGS.reuse_base_dir
        else:
            base_dir = generate_run_base_dir(
                suffix="debug",
                experiment="multilabel-mnist",
                result_dir=ARGS.result_dir,
                timestamp=tstart,
            )
        exp_dir = generate_experiment_dir(base_dir, ARGS.net, "test")
        save_args(ARGS, exp_dir)
        # Create and run experiment
        run_multilabel_mnist(ARGS, exp_dir)
    except Exception as e:
        logger.exception("Experiment crashed.")
        logger.exception("Exception: %s", str(e))

    # Measure time
    tstr = time_delta_now(tstart)
    logger.info(" -- MNIST -- Finished, took %s", tstr)
示例#4
0
def run(main_method, exp_dir, args, cuda_queue):
    """Run the MNIST experiment."""
    # Get cuda device from multiplrocessing queue
    cuda_device_id = cuda_queue.get()
    args.cuda_device_id = cuda_device_id
    print("Starting {} with args \n{}\non device {}.".format(
        main_method.__name__, args, cuda_device_id))
    print("os.environ[CUDA_VISIBLE_DEVICES]=",
          os.environ["CUDA_VISIBLE_DEVICES"])
    float_formatter = lambda x: "%.3f" % x
    np.set_printoptions(formatter={"float_kind": float_formatter})

    # Setup logging in exp_dir/log.txt
    log_file = os.path.join(exp_dir, "log.txt")
    setup_logging(level=args.log_level, filename=log_file)
    logger.info(" -- MNIST Multilabel -- Started ")
    print("Result dir: ", args.result_dir)
    print("Base dir: ", exp_dir)
    print("Log file: ", log_file)

    # Save commandline arguments
    save_args(args, exp_dir)

    tstart = time.time()
    try:
        # Set number of CPU threads
        torch.set_num_threads(args.njobs)

        # Create and run experiment
        main_method(args, exp_dir)
    except Exception as e:
        logger.exception("Experiment crashed.")
        logger.exception("Exception: %s", str(e))

    # Measure time
    tstr = time_delta_now(tstart)
    logger.info(" -- MNIST -- Finished, took %s", tstr)

    # Free up cuda device
    cuda_queue.put(cuda_device_id)
def run_torch(n_epochs=100, batch_size=256):
    """Run the torch code.

    Args:
        n_epochs (int, optional): Number of epochs.
        batch_size (int, optional): Batch size.
    """
    from src.spn.rat_spn import RatSpnConstructor
    from torch import optim
    from torch import nn

    assert len(sys.argv) == 2, "Usage: train.mnist cuda/cpu"
    dev = sys.argv[1]

    rg = RatSpnConstructor(in_features=28 * 28, C=10, S=10, I=20, dropout=0.0)
    n_splits = 2
    for _ in range(0, n_splits):
        rg.random_split(2, 1)

    if dev == "cpu":
        device = torch.device("cpu")
        use_cuda = False
    else:
        device = torch.device("cuda:0")
        use_cuda = True
        torch.cuda.benchmark = True

    print("Using device:", device)

    model = rg.build().to(device)
    model.train()
    print(model)
    print(f"Layer 0: {count_params(model.region_spns[0]._leaf) * n_splits}")
    for i in range(1, len(model.region_spns[0]._inner_layers) + 1):
        print(
            f"Layer {i}: {count_params(model.region_spns[0]._inner_layers[i - 1]) * n_splits}"
        )
    print("Number of pytorch parameters: ", count_params(model))

    # Define optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    train_loader, test_loader = get_mnist_loaders(use_cuda,
                                                  batch_size=batch_size,
                                                  device=device)

    log_interval = 100

    for epoch in range(n_epochs):
        t_start = time.time()
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            # Send data to correct device
            data, target = data.to(device), target.to(device)
            data = data.view(data.shape[0], -1)

            # Reset gradients
            optimizer.zero_grad()

            # Inference
            output = model(data)

            # Compute loss
            loss = loss_fn(output, target)

            # Backprop
            loss.backward()
            optimizer.step()

            # Log stuff
            running_loss += loss.item()
            if batch_idx % log_interval == (log_interval - 1):
                pred = (output.argmax(1).eq(target).sum().cpu().numpy() /
                        data.shape[0] * 100)
                print(
                    "Train Epoch: {} [{: >5}/{: <5} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.0f}%"
                    .format(
                        epoch,
                        batch_idx * len(data),
                        60000,
                        100.0 * batch_idx / len(train_loader),
                        running_loss / log_interval,
                        pred,
                    ),
                    end="\r",
                )
                running_loss = 0.0

        t_delta = time_delta_now(t_start)
        print("Train Epoch: {} took {}".format(epoch, t_delta))
        if epoch % 5 == 4:
            print("Evaluating model ...")
            evaluate_model(model, device, train_loader, "Train")
            evaluate_model(model, device, test_loader, "Test")
            spnneuron=SPNNeuronShallow,
            in_channels=in_channels,
        ).to(device)
    else:
        raise Exception("Invalid network: %s" % tag)

    return model


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    args = parser.parse_args(args=[])
    args.resnet_arch = "resnet18"
    dev = "cuda:0"
    resnet = get_model_by_tag("resnet", torch.device(dev), args, 50 ** 2, 10)
    resnetspn = get_model_by_tag("resnet+spn", torch.device(dev), args, 50 ** 2, 10)
    shallow = get_model_by_tag("spn-shallow", torch.device(dev), args, 50 ** 2, 10)

    x = torch.rand(3, 1, 50, 50).to(torch.device(dev))
    for net, name in [
        (resnet, "resnet"),
        (resnetspn, "resnetspn"),
        (shallow, "shallow"),
    ]:
        print(f"{name}: {count_params(net)}")
        t = time.time()
        net(x)
        print(name, "took", time_delta_now(t))