Exemplo n.º 1
0
def calculate_p_values(marking_percentages, batch_size):
    logfile_path = f"experiments/table1_imagenet/detect_radioactivity.log"
    setup_logger_tqdm(logfile_path)

    p_values = []

    for run in marking_percentages:
        run_name = f"{run}_percent"
        carrier_path = f"experiments/table1_imagenet/{run_name}/carriers.pth"

        target_checkpoint_path = f"experiments/table1_imagenet/{run_name}/marked_classifier/rank_0/checkpoint.pth"
        target_checkpoint = torch.load(target_checkpoint_path)
        target_checkpoint['model_state_dict'] = {
            k.replace("module.", ""): v
            for k, v in target_checkpoint['model_state_dict'].items()
        }

        (scores, p_vals,
         combined_pval) = detect_radioactivity(carrier_path,
                                               None,
                                               None,
                                               target_checkpoint,
                                               batch_size=batch_size,
                                               align=False,
                                               test_set_loader=None)
        p_values.append(combined_pval)

    return p_values
def calculate_p_values(marking_percentages, marking_checkpoint_path,
                       table_number, align):
    logfile_path = f"experiments/table{table_number}_imagenette/detect_radioactivity.log"
    setup_logger_tqdm(logfile_path)

    p_values = []

    # Load Marking Network and remove fully connected layer
    marking_network = torchvision.models.resnet18(pretrained=False,
                                                  num_classes=10)
    marking_checkpoint = torch.load(marking_checkpoint_path)
    marking_network.load_state_dict(marking_checkpoint["model_state_dict"])
    marking_network.fc = nn.Sequential()

    for run in marking_percentages:
        run_name = f"{run}_percent"
        carrier_path = f"experiments/table1_imagenette/{run_name}/carriers.pth"

        target_network = torchvision.models.resnet18(pretrained=False,
                                                     num_classes=10)
        target_checkpoint_path = f"experiments/table{table_number}_imagenette/{run_name}/marked_classifier/checkpoint.pth"
        target_checkpoint = torch.load(target_checkpoint_path)
        target_network.load_state_dict(target_checkpoint["model_state_dict"])
        target_network.fc = nn.Sequential()

        (scores, p_vals,
         combined_pval) = detect_radioactivity(carrier_path,
                                               marking_network,
                                               target_network,
                                               target_checkpoint,
                                               align=align)
        p_values.append(combined_pval)

    return p_values
Exemplo n.º 3
0
def calculate_p_values(marking_percentages):
    logfile_path = f"experiments/cifar100/table1/detect_radioactivity.log"
    setup_logger_tqdm(logfile_path)

    p_values = []

    for run in marking_percentages:
        run_name = f"{run}_percent"
        carrier_path = f"experiments/cifar100/table1/{run_name}/carriers.pth"

        # target_network = torchvision.models.resnet18(pretrained=False, num_classes=10)
        target_network = resnet(num_classes=100,
                                depth=164,
                                block_name='bottleneck')
        target_checkpoint_path = f"experiments/cifar100/table1/{run_name}/marked_classifier/checkpoint.pth"
        target_checkpoint = torch.load(target_checkpoint_path)
        target_network.load_state_dict({
            k.replace("module.", ""): v
            for k, v in target_checkpoint["model_state_dict"].items()
        })
        target_network.fc = nn.Sequential()

        # No need to align when only retraining the logistic regression
        (scores, p_vals,
         combined_pval) = detect_radioactivity(carrier_path,
                                               None,
                                               None,
                                               target_checkpoint,
                                               align=False)
        p_values.append(combined_pval)

    return p_values
Exemplo n.º 4
0
def do_marking_run_multiclass(overall_marking_percentage, experiment_directory,
                              tensorboard_log_directory, marking_network,
                              training_set, mp_args):

    # Setup experiment directory
    if os.path.isdir(experiment_directory):
        error_message = f"Directory {experiment_directory} already exists. By default we assume you don't want to "\
                        "repeat the marking stage."
        logger.info(error_message)
        return

    os.makedirs(experiment_directory)

    logfile_path = os.path.join(experiment_directory, 'marking.log')
    setup_logger_tqdm(filepath=logfile_path)

    # Carriers
    marking_network_fc_feature_size = 512
    carriers = torch.randn(len(training_set.classes),
                           marking_network_fc_feature_size)
    carriers /= torch.norm(carriers, dim=1, keepdim=True)
    torch.save(carriers, os.path.join(experiment_directory, "carriers.pth"))

    # { 0 : [(image1, original_index1),(image2, original_index2)...], 1 : [....] }
    image_data = get_images_for_marking_multiclass(training_set,
                                                   tensorboard_log_directory,
                                                   overall_marking_percentage)

    for class_id, image_list in image_data.items():
        if image_list:
            images, original_indexes = map(list, zip(*image_list))
            epochs = 250
            batch_size = 8
            output_directory = os.path.join(experiment_directory,
                                            "marked_images")
            #augmentation = differentiable_augmentations.CenterCrop(256, 224)
            augmentation = differentiable_augmentations.RandomResizedCropFlip(
                256)

            tensorboard_class_log = os.path.join(tensorboard_log_directory,
                                                 f"class_{class_id}")
            do_marking_dist(mp_args,
                            images,
                            original_indexes,
                            output_directory,
                            marking_network,
                            carriers,
                            class_id,
                            NORMALIZE_IMAGENET,
                            tensorboard_class_log,
                            epochs=epochs,
                            batch_size=batch_size,
                            overwrite=False,
                            augmentation=augmentation)

    # Record marking completion
    with open(os.path.join(experiment_directory, "marking.complete"),
              "w") as fh:
        fh.write("1")
Exemplo n.º 5
0
def main():
    logfile_path = "download_pushshift_dumps.log"
    setup_logger_tqdm(
        logfile_path)  # Logger will write messages using tqdm.write

    args = parser.parse_args()

    start_month, start_year = tuple(map(int, args.start_period.split(",")))
    start_date = datetime.datetime(start_year, start_month, 1)

    if args.finish_period:
        finish_month, finish_year = tuple(
            map(int, args.finish_period.split(",")))
        end_date = datetime.datetime(finish_year, finish_month, 1)
    else:
        end_date = datetime.datetime.now()

    logger.info("Running Script - PushShift submission dumps to sqlite")
    logger.info("Downloading and processing dumps in the following range:")
    logger.info(start_date.strftime("Start Period: %m-%Y"))
    logger.info(end_date.strftime("End Period: %m-%Y"))

    dumps_directory = os.path.join(args.output_directory, "dumps")

    if os.path.isdir(dumps_directory):
        message = f"Directory '{dumps_directory}' already exists, if there are done files" \
                   " in the directory then these particular months will be skipped. Delete" \
                   " these files or the directory to avoid this."
        logger.info(message)
        if not cutie.prompt_yes_or_no('Do you want to continue?'):
            sys.exit(0)

    os.makedirs(dumps_directory, exist_ok=True)

    logger.info("Building PushShift submission dump file list...")
    url_list = build_file_list(start_date, end_date)

    logger.info("Getting sha256sums")
    sha256sums = get_sha256sums()

    # Download and Process
    logger.info("Commencing download and processing into sqlite.")
    results = []
    for url in url_list:
        result = reddit_processing(url, sha256sums, dumps_directory,
                                   args.keep_dumps)
        results.append(result)
Exemplo n.º 6
0
def calculate_p_values(marking_percentages, batch_size, test_set_loader,
                       num_classes):
    logfile_path = f"experiments/table2_imagenet/detect_radioactivity.log"
    setup_logger_tqdm(logfile_path)

    p_values = []

    # Load Marking Network and remove fully connected layer
    marking_network = torchvision.models.resnet18(pretrained=True)
    marking_network.fc = nn.Sequential()

    for run in marking_percentages:
        run_name = f"{run}_percent"
        carrier_path = f"experiments/table1_imagenet/{run_name}/carriers.pth"  # Reuse table 1 carrier

        target_network = torchvision.models.resnet18(pretrained=False,
                                                     num_classes=num_classes)
        target_checkpoint_path = f"experiments/table2_imagenet/{run_name}/marked_classifier/rank_0/checkpoint.pth"
        target_checkpoint = torch.load(target_checkpoint_path)
        target_checkpoint['model_state_dict'] = {
            k.replace("module.", ""): v
            for k, v in target_checkpoint['model_state_dict'].items()
        }

        target_network.load_state_dict(target_checkpoint['model_state_dict'])
        target_network.fc = nn.Sequential()

        (scores, p_vals,
         combined_pval) = detect_radioactivity(carrier_path,
                                               marking_network,
                                               target_network,
                                               target_checkpoint,
                                               batch_size=batch_size,
                                               align=True,
                                               test_set_loader=test_set_loader)
        p_values.append(combined_pval)

    return p_values
def do_marking_run_multiclass(overall_marking_percentage, experiment_directory,
                              tensorboard_log_directory, marking_network,
                              training_set):

    # Setup experiment directory
    if os.path.isdir(experiment_directory):
        error_message = f"Directory {experiment_directory} already exists. By default we assume you don't want to "\
                        "repeat the marking stage."
        logger.info(error_message)
        return

    os.makedirs(experiment_directory)

    logfile_path = os.path.join(experiment_directory, 'marking.log')
    setup_logger_tqdm(filepath=logfile_path)

    # Carriers
    marking_network_fc_feature_size = 512
    carriers = torch.randn(len(training_set.classes),
                           marking_network_fc_feature_size)
    carriers /= torch.norm(carriers, dim=1, keepdim=True)
    torch.save(carriers, os.path.join(experiment_directory, "carriers.pth"))

    # { 0 : [(image1, original_index1),(image2, original_index2)...], 1 : [....] }
    image_data = get_images_for_marking_multiclass(training_set,
                                                   tensorboard_log_directory,
                                                   overall_marking_percentage)

    marked_images = []
    for class_id, image_list in image_data.items():
        if image_list:
            images, original_indexes = map(list, zip(*image_list))
            optimizer = lambda x: torch.optim.AdamW(x)
            epochs = 250
            batch_size = 8
            output_directory = os.path.join(experiment_directory,
                                            "marked_images")
            augmentation = differentiable_augmentations.CenterCrop(256, 224)
            tensorboard_class_log = os.path.join(tensorboard_log_directory,
                                                 f"class_{class_id}")
            marked_images_temp = do_marking(output_directory,
                                            marking_network,
                                            images,
                                            original_indexes,
                                            carriers,
                                            class_id,
                                            NORMALIZE_IMAGENETTE,
                                            optimizer,
                                            tensorboard_class_log,
                                            epochs=epochs,
                                            batch_size=batch_size,
                                            overwrite=False,
                                            augmentation=augmentation)

            marked_images = marked_images + marked_images_temp

    # Show marked images in Tensorboard - centercrop for grid
    from PIL import Image as im
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)
    transform = transforms.Compose(
        [transforms.CenterCrop(256),
         transforms.ToTensor()])
    images_for_tensorboard = [
        transform(im.fromarray(x)) for x in marked_images
    ]
    img_grid = torchvision.utils.make_grid(images_for_tensorboard, nrow=3)
    tensorboard_summary_writer.add_image('marked_images', img_grid)

    # Record marking completion
    with open(os.path.join(experiment_directory, "marking.complete"),
              "w") as fh:
        fh.write("1")
Exemplo n.º 8
0
def do_marking_run(overall_marking_percentage,
                   experiment_directory,
                   tensorboard_log_directory,
                   augment=True):

    # Setup experiment directory
    if os.path.isdir(experiment_directory):
        error_message = f"Directory {experiment_directory} already exists. By default we assume you don't want to "\
                        "repeat the marking stage."
        logger.info(error_message)
        return

    os.makedirs(experiment_directory)

    logfile_path = os.path.join(experiment_directory, 'marking.log')
    setup_logger_tqdm(filepath=logfile_path)

    training_set = torchvision.datasets.CIFAR10(root="experiments/datasets",
                                                download=True)

    # Marking network is the resnet18 we trained on CIFAR10
    marking_network = torchvision.models.resnet18(pretrained=False,
                                                  num_classes=10)
    checkpoint_path = "experiments/table2/step1/checkpoint.pth"
    marking_network_checkpoint = torch.load(checkpoint_path)
    marking_network.load_state_dict(
        marking_network_checkpoint["model_state_dict"])

    # Carriers
    marking_network_fc_feature_size = 512
    carriers = torch.randn(len(training_set.classes),
                           marking_network_fc_feature_size)
    carriers /= torch.norm(carriers, dim=1, keepdim=True)
    torch.save(carriers, os.path.join(experiment_directory, "carriers.pth"))

    # Load randomly sampled images from random class along with list of original indexes
    # Assume each class has equal number of images, adjust class_marking_percentage to
    # fit overall marking_percentage
    class_marking_percentage = overall_marking_percentage * len(
        training_set.classes)
    class_id, images, original_indexes = get_images_for_marking_cifar10(
        training_set, tensorboard_log_directory, class_marking_percentage)

    optimizer = lambda x: torch.optim.AdamW(x, lr=0.1)
    epochs = 100
    batch_size = 32
    output_directory = os.path.join(experiment_directory, "marked_images")
    if not augment:
        augmentation = None
    marked_images = do_marking(output_directory,
                               marking_network,
                               images,
                               original_indexes,
                               carriers,
                               class_id,
                               NORMALIZE_CIFAR10,
                               optimizer,
                               tensorboard_log_directory,
                               epochs=epochs,
                               batch_size=batch_size,
                               overwrite=True,
                               augmentation=augmentation)

    # Show marked images in Tensorboard
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)
    images_for_tensorboard = [transforms.ToTensor()(x) for x in marked_images]
    img_grid = torchvision.utils.make_grid(images_for_tensorboard, nrow=16)
    tensorboard_summary_writer.add_image('marked_images', img_grid)

    # Record marking completion
    with open(os.path.join(experiment_directory, "marking.complete"),
              "w") as fh:
        fh.write("1")
Exemplo n.º 9
0
def main(device, mp_args, dataloader_func, model, optimizer_callback,
         output_directory, tensorboard_log_directory, epochs):

    global_rank = mp_args.nr * mp_args.gpus + device
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=mp_args.world_size,
                            rank=global_rank)

    output_directory = os.path.join(output_directory, f"rank_{global_rank}")
    if not os.path.isdir(output_directory):
        os.makedirs(output_directory, exist_ok=True)

    # Setup regular log file
    logfile_path = os.path.join(output_directory, "logfile.txt")
    setup_logger_tqdm(logfile_path)

    # Setup TensorBoard logging
    tensorboard_log_directory = os.path.join(tensorboard_log_directory,
                                             f"rank_{global_rank}")
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)

    # Dataloaders
    train_set_loader, test_set_loader = dataloader_func(
        mp_args.world_size, global_rank)

    # Model & Optimizer
    model.to(device)
    optimizer = optimizer_callback(model)
    model = nn.parallel.DistributedDataParallel(model, device_ids=[device])

    logger.info(f"Epoch Count: {epochs}")

    # Load Checkpoint
    checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth")
    start_epoch = 0
    if os.path.exists(checkpoint_file_path):
        logger.info("Checkpoint Found - Loading!")

        checkpoint = torch.load(checkpoint_file_path)
        logger.info(f"Last completed epoch: {checkpoint['epoch']}")
        logger.info(f"Average Train Loss: {checkpoint['train_loss']}")
        logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}")
        logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}")
        start_epoch = checkpoint["epoch"] + 1
        logger.info(f"Resuming at epoch {start_epoch}")

        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    else:
        logger.info("No checkpoint found, starting from scratch.")

    # Training Loop
    t = Timer()
    #progress = tqdm(total=epochs, initial=start_epoch, desc="Epochs")
    for epoch in range(start_epoch, epochs):
        t.start()
        logger.info(f"Commence EPOCH {epoch}")

        # Train
        train_loss, train_accuracy = train_model(device, model,
                                                 train_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch)
        tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy,
                                              epoch)

        # Test
        test_accuracy = test_model(device, model, test_set_loader)
        tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy,
                                              epoch)

        # Save Checkpoint
        logger.info("Saving checkpoint.")
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            }, checkpoint_file_path)

        elapsed_time = t.stop()
        logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.")
        logger.info(f"Average Train Loss: {train_loss}")
        logger.info(f"Top-1 Train Accuracy: {train_accuracy}")
        logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
Exemplo n.º 10
0
        json.dump(len(url_data), open(done_file_path, "w"))

    progress.close()
    logger.info("Done!")


parser_description = 'Scrape urls extracted from Reddit.'
parser = argparse.ArgumentParser(description=parser_description)
parser.add_argument("-dir", "--job_directory", default="")
parser.add_argument("-procs", "--process_count", type=int, default=60)
parser.add_argument("-timeout", "--request_timeout", type=int, default=30)

if __name__ == "__main__":
    logfile_name = "scrape_urls.log"
    setup_logger_tqdm(logfile_name)

    args = parser.parse_args()

    urls_directory = os.path.join(args.job_directory, "urls")
    if not os.path.exists(urls_directory):
        logger.info(
            f"No 'urls' directory found in '{args.job_directory}', aborting")
        sys.exit(0)

    scrapes_directory = os.path.join(args.job_directory, "scrapes")
    os.makedirs(scrapes_directory, exist_ok=True)

    logger.info(f"Scrapes outputting to: '{scrapes_directory}'")

    scrape_urls(urls_directory, scrapes_directory, args.process_count,
def main(experiment_name,
         optimizer,
         output_directory_root="experiments/resnet18_logistic_cifar10",
         epochs=60,
         batch_size=512,
         num_workers=1):

    output_directory = os.path.join(output_directory_root, experiment_name)
    if not os.path.isdir(output_directory):
        os.makedirs(output_directory, exist_ok=True)

    # Setup regular log file + tensorboard
    logfile_path = os.path.join(output_directory, "logfile.txt")
    setup_logger_tqdm(logfile_path)

    tensorboard_log_directory = os.path.join("runs",
                                             "resnet18_logistic_cifar10",
                                             experiment_name)
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)

    # Choose Training Device
    use_cuda = torch.cuda.is_available()
    logger.info(f"CUDA Available? {use_cuda}")
    device = "cuda" if use_cuda else "cpu"

    # Datasets and Loaders
    train_set_loader, test_set_loader = get_data_loaders(
        batch_size, num_workers)

    # Create Model & Optimizer
    model = torchvision.models.resnet18(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    num_classes = 10
    model.fc = nn.Linear(model.fc.in_features, 10)
    model.to(device)
    optimizer = optimizer(model.parameters())

    logger.info("=========== Commencing Training ===========")
    logger.info(f"Epoch Count: {epochs}")
    logger.info(f"Batch Size: {batch_size}")

    # Load Checkpoint
    checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth")
    start_epoch = 0
    if os.path.exists(checkpoint_file_path):
        logger.info("Checkpoint Found - Loading!")

        checkpoint = torch.load(checkpoint_file_path)
        logger.info(f"Last completed epoch: {checkpoint['epoch']}")
        logger.info(f"Average Train Loss: {checkpoint['train_loss']}")
        logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}")
        logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}")
        start_epoch = checkpoint["epoch"] + 1
        logger.info(f"Resuming at epoch {start_epoch}")

        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    else:
        logger.info("No checkpoint found, starting from scratch.")

    # Training Loop
    t = Timer()
    for epoch in range(start_epoch, epochs):
        t.start()
        logger.info("-" * 10)
        logger.info(f"Epoch {epoch}")
        logger.info("-" * 10)

        train_loss, train_accuracy = train_model(device, model,
                                                 train_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch)
        tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy,
                                              epoch)

        test_accuracy = test_model(device, model, test_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy,
                                              epoch)

        # Save Checkpoint
        logger.info("Saving checkpoint.")
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            }, checkpoint_file_path)

        elapsed_time = t.stop()
        logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.")
        logger.info(f"Average Train Loss: {train_loss}")
        logger.info(f"Top-1 Train Accuracy: {train_accuracy}")
        logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
        logger.info("")
Exemplo n.º 12
0
                    default=1,
                    type=int,
                    help='number of gpus per node')
parser.add_argument('-nr',
                    '--nr',
                    default=0,
                    type=int,
                    help='ranking within the nodes')


class AnonObject(object):
    def __init__(self):
        pass


if __name__ == '__main__':
    setup_logger_tqdm()  # Commence logging to console

    assert (torch.cuda.is_available())
    assert (torch.distributed.is_available())
    args = parser.parse_args()
    os.environ['MASTER_ADDR'] = '127.0.0.1'
    os.environ['MASTER_PORT'] = '8888'

    mp_args = AnonObject()
    mp_args.nr = args.nr
    mp_args.gpus = args.gpus
    mp_args.world_size = args.gpus * args.nodes

    main(args.imagenet_path, args.batch_size_step_3, mp_args)
Exemplo n.º 13
0
def do_marking_run_multiclass(overall_marking_percentage,
                              experiment_directory,
                              tensorboard_log_directory,
                              augment=True):
    # Setup experiment directory
    if os.path.isdir(experiment_directory):
        error_message = f"Directory {experiment_directory} already exists. By default we assume you don't want to "\
                        "repeat the marking stage."
        logger.info(error_message)
        return

    os.makedirs(experiment_directory)

    logfile_path = os.path.join(experiment_directory, 'marking.log')
    setup_logger_tqdm(filepath=logfile_path)

    training_set = torchvision.datasets.CIFAR100(root="experiments/datasets",
                                                 download=True)

    # Marking network is the resnet18 we trained on CIFAR10
    # marking_network = torchvision.models.resnet18(pretrained=False, num_classes=10)
    marking_network = resnet(num_classes=100,
                             depth=164,
                             block_name='bottleneck')
    checkpoint_path = "experiments/cifar100/table1/step1/checkpoint.pth"
    marking_network_checkpoint = torch.load(checkpoint_path)
    marking_network.load_state_dict({
        k.replace("module.", ""): v
        for k, v in marking_network_checkpoint["model_state_dict"].items()
    })

    # Carriers
    # marking_network_fc_feature_size = 512
    marking_network_fc_feature_size = 256
    carriers = torch.randn(len(training_set.classes),
                           marking_network_fc_feature_size)
    carriers /= torch.norm(carriers, dim=1, keepdim=True)
    torch.save(carriers, os.path.join(experiment_directory, "carriers.pth"))

    # { 0 : [(image1, original_index1),(image2, original_index2)...], 1 : [....] }
    image_data = get_images_for_marking_multiclass_cifar10(
        training_set, tensorboard_log_directory, overall_marking_percentage)

    marked_images = []
    for class_id, image_list in image_data.items():
        if image_list:
            images, original_indexes = map(list, zip(*image_list))
            optimizer = lambda x: torch.optim.AdamW(x, lr=0.1)
            epochs = 100
            batch_size = 32
            output_directory = os.path.join(experiment_directory,
                                            "marked_images")
            if not augment:
                augmentation = None

            tensorboard_class_log = os.path.join(tensorboard_log_directory,
                                                 f"class_{class_id}")
            marked_images_temp = do_marking(output_directory,
                                            marking_network,
                                            images,
                                            original_indexes,
                                            carriers,
                                            class_id,
                                            NORMALIZE_CIFAR10,
                                            optimizer,
                                            tensorboard_class_log,
                                            epochs=epochs,
                                            batch_size=batch_size,
                                            overwrite=False,
                                            augmentation=augmentation)

            marked_images = marked_images + marked_images_temp

    # Show marked images in Tensorboard
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)
    images_for_tensorboard = [transforms.ToTensor()(x) for x in marked_images]
    img_grid = torchvision.utils.make_grid(images_for_tensorboard, nrow=16)
    tensorboard_summary_writer.add_image('marked_images', img_grid)

    # Record marking completion
    with open(os.path.join(experiment_directory, "marking.complete"),
              "w") as fh:
        fh.write("1")
Exemplo n.º 14
0
        on_error = on_done
        result = pool.map(process_count, progress, tasks, on_error, on_done)

    return result


parser_description = 'Generate minhashes for all documents found.'
parser = argparse.ArgumentParser(description=parser_description)
parser.add_argument("-dir", "--scrape_directory", default="")
parser.add_argument("-procs", "--process_count", type=int, default=4)

if __name__ == '__main__':
    args = parser.parse_args()
    if not os.path.isdir(args.scrape_directory):
        print("Scrape directory doesn't exist, exiting.")
        sys.exit(0)

    with redirect_stdout(open(os.devnull, "w")):
        nltk.download('punkt')

    log_file = "generate_minhashes.log"
    setup_logger_tqdm(log_file)

    logger.info("Generating document level minhashes from 5 gram sets")
    minhashes_by_file = generate_minhashes(args.scrape_directory,
                                           args.process_count)

    output_pickle_path = os.path.join(args.scrape_directory, "minhashes.pkl")
    timed_pickle_dump(minhashes_by_file, output_pickle_path,
                      "minhashes_by_file")
def main(imagenette_path, step_3_batch_size):
    setup_logger_tqdm()  # Commence logging to console
    table_1_work(imagenette_path, step_3_batch_size)
    table_2_work(imagenette_path, step_3_batch_size)
Exemplo n.º 16
0
        if document_count > (batch_count + 1) * documents_per_batch:
            batch_pickle_file_path = os.path.join(batch_directory, f"batch{batch_count}.pkl")
            timed_pickle_dump(current_batch, batch_pickle_file_path, f"batch {batch_count} minhashes")
            current_batch = []
            batch_count += 1

    if current_batch:
        batch_pickle_file_path = os.path.join(batch_directory, f"batch{batch_count}.pkl")
        timed_pickle_dump(current_batch, batch_pickle_file_path, f"batch {batch_count} minhashes") 
        current_batch = None

    file_name_lookup = [file_name for file_name, documents in minhashes]
    file_name_lookup_path = os.path.join(batch_directory, "file_name_lookup.pkl")
    timed_pickle_dump(file_name_lookup, file_name_lookup_path, "Filename lookup")

    document_count_path = os.path.join(batch_directory, "document_count.pkl")
    pickle.dump(total_documents, open(document_count_path,"wb"))


parser = argparse.ArgumentParser(description='Generate batches of minhashes for cassandra lsh dedupe.')
parser.add_argument("-dir", "--directory", default="")
parser.add_argument("-batches", "--number_of_batches", type=int, required=True)

if __name__ == '__main__':
    logfile_path = "minhash_lsh_batching.log"
    setup_logger_tqdm(logfile_path)

    args = parser.parse_args()    

    main(args.number_of_batches, args.directory)
Exemplo n.º 17
0
def main(dataloader_func,
         model,
         optimizer_callback,
         output_directory,
         tensorboard_log_directory,
         lr_scheduler=None,
         epochs=150):

    if not os.path.isdir(output_directory):
        os.makedirs(output_directory, exist_ok=True)

    # Setup regular log file
    logfile_path = os.path.join(output_directory, "logfile.txt")
    setup_logger_tqdm(logfile_path)

    # Setup TensorBoard logging
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)

    # Choose Training Device
    use_cuda = torch.cuda.is_available()
    logger.info(f"CUDA Available? {use_cuda}")
    device = "cuda" if use_cuda else "cpu"

    # Dataloaders
    train_set_loader, test_set_loader = dataloader_func()

    # Model & Optimizer
    model.to(device)
    optimizer = optimizer_callback(model)
    if lr_scheduler:
        lr_scheduler = lr_scheduler(optimizer)

    logger.info(f"Epoch Count: {epochs}")

    # Load Checkpoint
    checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth")
    start_epoch = 0
    if os.path.exists(checkpoint_file_path):
        logger.info("Checkpoint Found - Loading!")

        checkpoint = torch.load(checkpoint_file_path)
        logger.info(f"Last completed epoch: {checkpoint['epoch']}")
        logger.info(f"Average Train Loss: {checkpoint['train_loss']}")
        logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}")
        logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}")
        start_epoch = checkpoint["epoch"] + 1
        logger.info(f"Resuming at epoch {start_epoch}")

        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        if lr_scheduler:
            lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
    else:
        logger.info("No checkpoint found, starting from scratch.")

    # Training Loop
    t = Timer()
    for epoch in range(start_epoch, epochs):
        t.start()
        logger.info(f"Commence EPOCH {epoch}")

        # Train
        train_loss, train_accuracy = train_model(device, model,
                                                 train_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch)
        tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy,
                                              epoch)

        # Test
        test_accuracy = test_model(device, model, test_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy,
                                              epoch)

        scheduler_dict = None
        if lr_scheduler:
            lr_scheduler.step()
            scheduler_dict = lr_scheduler.state_dict()

        # Save Checkpoint
        logger.info("Saving checkpoint.")
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'lr_scheduler_state_dict': scheduler_dict,
                'train_loss': train_loss,
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            }, checkpoint_file_path)

        elapsed_time = t.stop()
        logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.")
        logger.info(f"Average Train Loss: {train_loss}")
        logger.info(f"Top-1 Train Accuracy: {train_accuracy}")
        logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
Exemplo n.º 18
0
            for index in original_indexes:
                images.append(transforms.ToTensor()(training_set.data[index]))
    img_grid = torchvision.utils.make_grid(images, nrow=16)
    tensorboard_summary_writer.add_image('images_for_marking', img_grid)

    return image_data

if __name__ == '__main__':

    # Setup experiment directory, logging
    experiment_directory = "experiments/radioactive"
    if not os.path.isdir(experiment_directory):
        os.makedirs(experiment_directory)

    logfile_path = os.path.join(experiment_directory, 'marking.log')
    setup_logger_tqdm(filepath=logfile_path)

    # Clear old tensorboard logs
    our_tensorboard_logs = glob.glob('runs/radioactive*') # main creates extra log dirs
    for tensorboard_log in our_tensorboard_logs:
        shutil.rmtree(tensorboard_log)
    tensorboard_log_directory="runs/radioactive"

    # Load randomly sampled images from random class along with list of original indexes 
    training_set = torchvision.datasets.CIFAR10(root="experiments/datasets", download=True)
    class_marking_percentage = 10
    class_id, images, original_indexes = get_images_for_marking_cifar10(training_set, tensorboard_log_directory, 
                                                                        class_marking_percentage)

    # Marking network is a pretrained resnet18
    marking_network = torchvision.models.resnet18(pretrained=True)
Exemplo n.º 19
0
def main(device,
         mp_args,
         experiment_name,
         optimizer,
         output_directory_root="experiments/resnet18_distributed",
         lr_scheduler=None,
         epochs=150,
         batch_size=512,
         num_workers=1):

    global_rank = mp_args.nr * mp_args.gpus + device
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=mp_args.world_size,
                            rank=global_rank)

    output_directory = os.path.join(output_directory_root, experiment_name,
                                    f"rank_{global_rank}")
    if not os.path.isdir(output_directory):
        os.makedirs(output_directory, exist_ok=True)

    # Setup regular log file + tensorboard
    logfile_path = os.path.join(output_directory, "logfile.txt")
    setup_logger_tqdm(logfile_path)

    tensorboard_log_directory = os.path.join("runs", "resnet18_distributed",
                                             experiment_name,
                                             f"rank_{global_rank}")
    tensorboard_summary_writer = SummaryWriter(
        log_dir=tensorboard_log_directory)

    # Datasets and Loaders
    train_set_loader, test_set_loader = get_data_loaders(
        mp_args.world_size, global_rank, batch_size, num_workers)

    # Create Model & Optimizer (uses Partial Functions)
    model = torchvision.models.resnet18(pretrained=False, num_classes=10)
    model.to(device)
    optimizer = optimizer(model.parameters())
    model = nn.parallel.DistributedDataParallel(model, device_ids=[device])

    if lr_scheduler:
        lr_scheduler = lr_scheduler(optimizer)

    logger.info("=========== Commencing Training ===========")
    logger.info(f"Epoch Count: {epochs}")
    logger.info(f"Batch Size: {batch_size}")

    # Load Checkpoint
    checkpoint_file_path = os.path.join(output_directory, "checkpoint.pth")
    start_epoch = 0
    if os.path.exists(checkpoint_file_path):
        logger.info("Checkpoint Found - Loading!")
        checkpoint = torch.load(checkpoint_file_path)
        logger.info(f"Last completed epoch: {checkpoint['epoch']}")
        logger.info(f"Average Train Loss: {checkpoint['train_loss']}")
        logger.info(f"Top-1 Train Accuracy: {checkpoint['train_accuracy']}")
        logger.info(f"Top-1 Test Accuracy: {checkpoint['test_accuracy']}")
        start_epoch = checkpoint["epoch"] + 1
        logger.info(f"Resuming at epoch {start_epoch}")

        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        if lr_scheduler:
            lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
    else:
        logger.info("No checkpoint found, starting from scratch.")

    # Training Loop
    t = Timer()
    for epoch in range(start_epoch, epochs):
        t.start()
        logger.info("-" * 10)
        logger.info(f"Epoch {epoch}")
        logger.info("-" * 10)

        train_loss, train_accuracy = train_model(device, model,
                                                 train_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("train_loss", train_loss, epoch)
        tensorboard_summary_writer.add_scalar("train_accuracy", train_accuracy,
                                              epoch)

        test_accuracy = test_model(device, model, test_set_loader, optimizer)
        tensorboard_summary_writer.add_scalar("test_accuracy", test_accuracy,
                                              epoch)

        scheduler_dict = None
        if lr_scheduler:
            lr_scheduler.step()
            scheduler_dict = lr_scheduler.state_dict()

        # Save Checkpoint
        logger.info("Saving checkpoint.")
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'lr_scheduler_state_dict': scheduler_dict,
                'train_loss': train_loss,
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            }, checkpoint_file_path)

        elapsed_time = t.stop()
        logger.info(f"End of epoch {epoch}, took {elapsed_time:0.4f} seconds.")
        logger.info(f"Average Train Loss: {train_loss}")
        logger.info(f"Top-1 Train Accuracy: {train_accuracy}")
        logger.info(f"Top-1 Test Accuracy: {test_accuracy}")
        logger.info("")