Пример #1
0
def download_cub():
    """ Download the birds dataset (CUB-200-2011) and text """
    BIRDS_DATASET_URL = (
        "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz"
    )

    cub_download_location = "data/CUB_200_2011.tgz"
    cub_backup_location = "data/backup/CUB_200_2011.tgz"

    if os.path.exists(cub_backup_location):
        print("Retrieving CUB dataset from: {}".format(cub_backup_location))
        shutil.copy(cub_backup_location, cub_download_location)
    else:
        print("Downloading CUB dataset from: {}".format(BIRDS_DATASET_URL))
        cub_download_location = pathlib.Path("data/CUB_200_2011.tgz")
        urllib.request.urlretrieve(BIRDS_DATASET_URL, cub_download_location)
        mkdir("data/backup")
        shutil.copy(cub_download_location, cub_backup_location)
    tar = tarfile.open(cub_download_location, "r:gz")
    tar.extractall("data/CUB_200_2011_with_text/images/")
    tar.close()
    os.remove(cub_download_location)

    download_captions(
        GDRIVE_ID="0B3y_msrWZaXLT1BZdVdycDY5TEE",
        text_download_location="data/birds.zip",
        backup_location="data/backup/birds.zip",
        res_subdir="CUB_200_2011_with_text",
    )
Пример #2
0
def download_captions(GDRIVE_ID: str, text_download_location: str,
                      backup_location: str, res_subdir: str):
    """ The Download and processing for the captions / text part of the dataset """
    extracted_text_dir = text_download_location[:-4]

    if os.path.exists(backup_location):
        print("Retrieving dataset from: {}".format(backup_location))
        shutil.copy(backup_location, text_download_location)
        with zipfile.ZipFile(backup_location, "r") as zipfd:
            zipfd.extractall("data/")
    else:
        print("Downloading text from Google Drive ID: {}".format(GDRIVE_ID))
        gdd.download_file_from_google_drive(file_id=GDRIVE_ID,
                                            dest_path=text_download_location,
                                            unzip=True)
        mkdir("data/backup")
        shutil.copy(text_download_location, backup_location)

    # Move and clean up data
    if os.path.isdir(extracted_text_dir):
        os.rename(extracted_text_dir, f"data/{res_subdir}/text")
    else:
        raise Exception(
            "Expected to find directory {}, but it does not exist".format(
                extracted_text_dir))
    os.remove(text_download_location)
Пример #3
0
def compare_generated_to_real(
    dataloader,
    num_images: int,
    noise_size: int,
    model: tf.keras.Model,
    save_location: str,
    img_size: int,
    subsequent_model: Optional[tf.keras.Model] = None,
):
    """ For a given number of images, generate the stackGAN stage 1 output by randomly sampling a dataloader.
        The generated images and the real original are saved side-by-side in the save_location.
    """
    rmdir(save_location)
    mkdir(save_location)

    noise_list = [
        np.random.normal(0, 1, (1, noise_size)).astype("float32")
        for idx in range(num_images)
    ]
    samples = sample_data(dataloader,
                          num_samples=num_images,
                          img_size=img_size)
    real_tensors, real_embeddings = zip(*samples)
    stage1_tensors = [
        model.generator([embedding, noise], training=False)[0]
        for embedding, noise in zip(real_embeddings, noise_list)
    ]

    real_images = format_as_images(real_tensors, is_real=True)
    stage1_images = format_as_images(stage1_tensors, is_real=False)

    if subsequent_model is not None:
        stage2_tensors = [
            subsequent_model.generator([generated_image, embedding],
                                       training=False)[0] for generated_image,
            embedding in zip(stage1_tensors, real_embeddings)
        ]
        stage2_images = format_as_images(stage2_tensors, is_real=False)
        for i, (real_image, stage1_image, stage2_image) in enumerate(
                zip(real_images, stage1_images, stage2_images)):
            image = concate_horizontallly(real_image,
                                          stage1_img=stage1_image,
                                          stage2_img=stage2_image)
            image.save(os.path.join(save_location, f"fake-vs-real-{i}.png"))
    else:
        for i, (real_image,
                stage1_image) in enumerate(zip(real_images, stage1_images)):
            image = concate_horizontallly(real_image, stage1_img=stage1_image)
            image.save(os.path.join(save_location, f"fake-vs-real-{i}.png"))
Пример #4
0
def check_for_xrays(directory: str):
    """ Check to see if the xray dataset has been downloaded at all.
        Raise an exception if it hasn't. If it has, move it to raw.
    """
    train_location = os.path.join(directory, "train")
    valid_location = os.path.join(directory, "valid")
    raw_location = os.path.join(directory, "raw")

    if not os.path.isdir(train_location) or not os.path.isdir(valid_location):
        raise Exception("Please first download the CheXpert dataset")

    mkdir(raw_location)
    shutil.move(train_location, raw_location)
    shutil.move(valid_location, raw_location)
    shutil.move(f"{train_location}.csv", raw_location)
    shutil.move(f"{valid_location}.csv", raw_location)
Пример #5
0
def write_records_to_file(example_iterable: Iterable, subset_name: str,
                          tfrecords_dir: str):
    """ Save the TFRecord dataset with each example in its own TFRecord file.
        Arguments:
            example_iterable: zip object (iterable)
                Each iteration yields a tuple of 4 objects
            subset_name: str
                Name of the subset (train/test)
            tfrecords_dir: str
                Directory in which the save the TFRecords
    """
    for (
            i,
        (
            file_name,
            image_small,
            image_large,
            wrong_image_small,
            wrong_image_large,
            text_embedding,
            label,
        ),
    ) in enumerate(example_iterable):
        example = tf.train.Example(features=tf.train.Features(
            feature={
                "image_small": _bytes_feature(image_small),
                "image_large": _bytes_feature(image_large),
                "wrong_image_small": _bytes_feature(wrong_image_small),
                "wrong_image_large": _bytes_feature(wrong_image_large),
                "name": _bytes_feature(file_name),
                "text": _bytes_feature(text_embedding),
                "label": _int64_feature(label),
            }))

        # Write a separate file to disk for each example
        mkdir(os.path.join(tfrecords_dir, subset_name))
        record_path_name = os.path.join(tfrecords_dir, subset_name,
                                        "example-{}.tfrecord".format(i))
        with tf.io.TFRecordWriter(record_path_name) as writer:
            serialised_example = example.SerializeToString()
            writer.write(serialised_example)
Пример #6
0
 def __init__(self, root_path: str):
     self.root_path = root_path
     self.plot_dir = os.path.join(self.root_path, "plots")
     mkdir(self.plot_dir)