def scale(filename, factor, Bfile):
    b = h5py.open(Bfile)
    bnew = h5py.open(filename, mode='a')
    bnew.create_dataset('x', data=b['x'])
    bnew.create_dataset('y', data=b['x'])
    bnew.create_dataset('z', data=b['x'])
    bnew.create_dataset('magnetic_vector_potential_x',
                        data=b['magnetic_vector_potential_x'] * factor)
    bnew.create_dataset('magnetic_vector_potential_y',
                        data=b['magnetic_vector_potential_y'] * factor)
    bnew.create_dataset('magnetic_vector_potential_z',
                        data=b['magnetic_vector_potential_z'] * factor)
    bnew.flush()
def reviews_to_dataset(dataset_filename='/Users/rob/Downloads/aclImdb/arpabetic.hdf'):
    # TODO: use process pool, write to index in HDF5 file
    with h5py.open(dataset_filename, 'w') as dataset:
        for d in ['train', 'test']:
            filenames = glob.glob('/Users/rob/Downloads/aclImdb/{}/???/*.txt'.format(d))
            dataset.create_dataset(
                d + '_x',
                (len(filenames), maxlen),
                dtype=np.uint8,
                # chunks=True,
                compression="gzip",
                compression_opts=7
            )
            dataset.create_dataset(
                d + '_y',
                (len(filenames),),
                dtype=np.bool,
                # chunks=True,
                compression="gzip",
                compression_opts=7
            )
            np.random.shuffle(filenames)
            for i, filename in enumerate(filenames[:100]):
                char_indecies = read_review(filename)
                if i % 10 == 0:
                    print "Processed file {}/{}".format(i, len(filenames))
                dataset[d + '_x'][i] = char_indecies
                dataset[d + '_y'][i] = 'pos' in filenames[i]
def sanity_check_dataset(dataset_filename, arpabetic=True):
    with h5py.open(dataset_filename, 'r') as dataset:
        sample_indecies = lambda k: np.random.choice(dataset[k].shape[0], size=10)
        indecies = sample_indecies('train_x')
        print "Checking train..."
        one_hot_to_string(dataset['train_x'][indecies], dataset['train_y'][indecies], arpabetic)
        print "Checking test..."
        one_hot_to_string(dataset['test_x'][indecies], dataset['test_y'][indecies], arpabetic)
Exemplo n.º 4
0
def read_xovers(xover_dir):

    tiles = glob.glob(xover_dir + '/*.h5')
    with h5py.File(tiles[0], 'r') as h5f:
        fields = [key for key in h5f['D0'].keys()]
    D = []
    X = []
    for tile in glob.glob(xover_dir + '/*.h5'):
        D.append([
            point_data(list_of_fields=fields).from_file(
                tile, field_dict={gr: fields}) for gr in ['D0', 'D1']
        ])
        with h5py.open(tile, 'r') as h5f:
            X.append(
                point_data(list_of_fields=['x', 'y']).from_file(
                    tile, field_dict={None: ['x', 'y']}))
    return D, X