def scale(filename, factor, Bfile): b = h5py.open(Bfile) bnew = h5py.open(filename, mode='a') bnew.create_dataset('x', data=b['x']) bnew.create_dataset('y', data=b['x']) bnew.create_dataset('z', data=b['x']) bnew.create_dataset('magnetic_vector_potential_x', data=b['magnetic_vector_potential_x'] * factor) bnew.create_dataset('magnetic_vector_potential_y', data=b['magnetic_vector_potential_y'] * factor) bnew.create_dataset('magnetic_vector_potential_z', data=b['magnetic_vector_potential_z'] * factor) bnew.flush()
def reviews_to_dataset(dataset_filename='/Users/rob/Downloads/aclImdb/arpabetic.hdf'): # TODO: use process pool, write to index in HDF5 file with h5py.open(dataset_filename, 'w') as dataset: for d in ['train', 'test']: filenames = glob.glob('/Users/rob/Downloads/aclImdb/{}/???/*.txt'.format(d)) dataset.create_dataset( d + '_x', (len(filenames), maxlen), dtype=np.uint8, # chunks=True, compression="gzip", compression_opts=7 ) dataset.create_dataset( d + '_y', (len(filenames),), dtype=np.bool, # chunks=True, compression="gzip", compression_opts=7 ) np.random.shuffle(filenames) for i, filename in enumerate(filenames[:100]): char_indecies = read_review(filename) if i % 10 == 0: print "Processed file {}/{}".format(i, len(filenames)) dataset[d + '_x'][i] = char_indecies dataset[d + '_y'][i] = 'pos' in filenames[i]
def sanity_check_dataset(dataset_filename, arpabetic=True): with h5py.open(dataset_filename, 'r') as dataset: sample_indecies = lambda k: np.random.choice(dataset[k].shape[0], size=10) indecies = sample_indecies('train_x') print "Checking train..." one_hot_to_string(dataset['train_x'][indecies], dataset['train_y'][indecies], arpabetic) print "Checking test..." one_hot_to_string(dataset['test_x'][indecies], dataset['test_y'][indecies], arpabetic)
def read_xovers(xover_dir): tiles = glob.glob(xover_dir + '/*.h5') with h5py.File(tiles[0], 'r') as h5f: fields = [key for key in h5f['D0'].keys()] D = [] X = [] for tile in glob.glob(xover_dir + '/*.h5'): D.append([ point_data(list_of_fields=fields).from_file( tile, field_dict={gr: fields}) for gr in ['D0', 'D1'] ]) with h5py.open(tile, 'r') as h5f: X.append( point_data(list_of_fields=['x', 'y']).from_file( tile, field_dict={None: ['x', 'y']})) return D, X