Exemplo n.º 1
0
def get_batcher(ls_bands: str, nl_band: str, num_epochs: int):
    '''
    Args
    - ls_bands: one of [None, 'ms', 'rgb']
    - nl_band: one of [None, 'merge', 'split']
    - num_epochs: int

    Returns
    - b: Batcher
    - size: int, length of dataset
    - feed_dict: dict, feed_dict for initializing the dataset iterator
    '''
    tfrecord_paths = np.asarray(batcher.get_tfrecord_paths(
        DATASET_NAME, 'all'))
    size = len(tfrecord_paths)
    tfrecord_paths_ph = tf.placeholder(tf.string, shape=[size])
    feed_dict = {tfrecord_paths_ph: tfrecord_paths}

    b = batcher.Batcher(tfrecord_files=tfrecord_paths,
                        dataset=DATASET_NAME,
                        batch_size=BATCH_SIZE,
                        label_name=LABEL_NAME,
                        num_threads=4,
                        epochs=num_epochs,
                        ls_bands=ls_bands,
                        nl_band=nl_band,
                        shuffle=False,
                        augment=False,
                        normalize=True,
                        cache=(num_epochs > 1))
    return b, size, feed_dict
Exemplo n.º 2
0
 def get_batcher(tfrecord_paths: tf.Tensor, shuffle: bool, augment: bool,
                 epochs: int, cache: bool) -> batcher.Batcher:
     return batcher.Batcher(tfrecord_files=tfrecord_paths,
                            ls_bands=ls_bands,
                            nl_label=nl_label,
                            batch_size=batch_size,
                            epochs=epochs,
                            normalize=dataset,
                            shuffle=shuffle,
                            augment=augment,
                            clipneg=True,
                            cache=cache,
                            num_threads=num_threads)
Exemplo n.º 3
0
def get_batcher(dataset: str, ls_bands: str, nl_band: str, num_epochs: int,
                cache: bool) -> tuple[batcher.Batcher, int, dict]:
    '''Gets the batcher for a given dataset.

    Args
    - dataset: str, one of ['dhs', 'lsms'] # TODO
    - ls_bands: one of [None, 'ms', 'rgb']
    - nl_band: one of [None, 'merge', 'split']
    - num_epochs: int
    - cache: bool, whether to cache the dataset in memory if num_epochs > 1

    Returns
    - b: Batcher
    - size: int, length of dataset
    - feed_dict: dict, feed_dict for initializing the dataset iterator
    '''
    if dataset == 'dhs':
        tfrecord_paths = tfrecord_paths_utils.dhs()
    elif dataset == 'lsms':  # TODO
        tfrecord_paths = tfrecord_paths_utils.lsms()
    else:
        raise ValueError(f'dataset={dataset} is unsupported')

    size = len(tfrecord_paths)
    tfrecord_paths_ph = tf.placeholder(tf.string, shape=[size])
    feed_dict = {tfrecord_paths_ph: tfrecord_paths}

    if dataset == 'dhs':
        b = batcher.Batcher(tfrecord_files=tfrecord_paths_ph,
                            label_name='wealthpooled',
                            ls_bands=ls_bands,
                            nl_band=nl_band,
                            nl_label=None,
                            batch_size=BATCH_SIZE,
                            epochs=num_epochs,
                            normalize='DHS',
                            shuffle=False,
                            augment=False,
                            clipneg=True,
                            cache=(num_epochs > 1) and cache,
                            num_threads=5)
    else:  # LSMS, TODO
        raise NotImplementedError
        # b = delta_batcher.DeltaBatcher()

    return b, size, feed_dict