Пример #1
0
def test_existing_dataset(monkeypatch):
    from batchup.datasets import dataset
    import hashlib

    tdir = test_config._setup_batchup_temp(monkeypatch)

    source_path = os.path.join(tdir, 'source.txt')
    with open(source_path, 'w') as f:
        f.write('hello world')

    hasher = hashlib.sha256()
    hasher.update(b'hello world')
    expected_sha = hasher.hexdigest()
    f1 = dataset.ExistingSourceFile(source_path, sha256=expected_sha)

    @dataset.fetch_and_convert_dataset([f1], 'ds.txt')
    def existing_dataset(source_paths, target_path):
        return source_paths[0]

    dest = existing_dataset()

    # Check the resulting file
    assert dest == source_path
    assert open(dest, 'r').read() == 'hello world'

    test_config._teardown_batchup_temp(tdir)
Пример #2
0
def test_ExistingSourceFile_acquire(monkeypatch):
    from batchup.datasets import dataset
    import hashlib

    tdir = test_config._setup_batchup_temp(monkeypatch)

    source_path = os.path.join(tdir, 'source.txt')
    with open(source_path, 'w') as f:
        f.write('hello world')

    hasher = hashlib.sha256()
    hasher.update(b'hello world')
    expected_sha = hasher.hexdigest()

    f1 = dataset.ExistingSourceFile(source_path, sha256=expected_sha)
    assert f1.path == source_path

    dest = f1.acquire()
    assert dest == source_path

    assert os.path.exists(dest)
    assert open(dest, 'r').read() == 'hello world'

    # clean up - should NOT remove the file
    f1.clean_up()
    assert os.path.exists(dest)

    test_config._teardown_batchup_temp(tdir)
Пример #3
0
def test_ExistingSourceFile_constructor(monkeypatch):
    from batchup.datasets import dataset

    _patch_config_datadir(monkeypatch)

    f1 = dataset.ExistingSourceFile(
        path=os.path.join('some_place', 'other.txt'))
    assert f1.path == os.path.join('some_place', 'other.txt')
    assert str(f1) == 'file at {}'.format(
        os.path.join('some_place', 'other.txt'))

    f2 = dataset.ExistingSourceFile(
        path=lambda: os.path.join('some_place', 'other.txt'))
    assert f1.path == os.path.join('some_place', 'other.txt')
    assert str(f1) == 'file at {}'.format(
        os.path.join('some_place', 'other.txt'))

    with pytest.raises(TypeError):
        dataset.ExistingSourceFile(1)
Пример #4
0
def test_ExistingSourceFile_acquire_nonexistant(monkeypatch):
    from batchup.datasets import dataset
    import hashlib

    tdir = test_config._setup_batchup_temp(monkeypatch)

    source_path = os.path.join(tdir, 'source.txt')

    hasher = hashlib.sha256()
    hasher.update(b'hello world')
    expected_sha = hasher.hexdigest()

    f1 = dataset.ExistingSourceFile(source_path, sha256=expected_sha)
    assert f1.path == source_path

    dest = f1.acquire()
    assert dest is None

    test_config._teardown_batchup_temp(tdir)
Пример #5
0

def _syndigits_train_path():
    return os.path.join(get_data_dir('syn_digits'), 'synth_train_32x32.mat')


def _syndigits_test_path():
    return os.path.join(get_data_dir('syn_digits'), 'synth_test_32x32.mat')


def _syndigits_h5_path():
    return os.path.abspath(
        os.path.join(get_data_dir('syn_digits'), 'syn_digits.h5'))


_TRAIN_SRC = dataset.ExistingSourceFile(_syndigits_train_path, None)
_TEST_SRC = dataset.ExistingSourceFile(_syndigits_test_path, None)


@dataset.fetch_and_convert_dataset([_TRAIN_SRC, _TEST_SRC], _syndigits_h5_path)
def fetch_syn_digits(source_paths, target_path):
    train_path, test_path = source_paths

    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'syn_digits', 'Syn-Digits data')

    # Load in the training data Matlab file
    print('Converting {} to HDF5...'.format(train_path))
    train_X_u8, train_y = svhn._read_svhn_matlab(train_path)
    f_out.create_array(g_out, 'train_X_u8', train_X_u8)
    f_out.create_array(g_out, 'train_y', train_y)