def test_filters(): from tables import Filters, open_file class TestContainer(Container): value = Field(-1, 'test') no_comp = Filters(complevel=0) zstd = Filters(complevel=5, complib='blosc:zstd') with tempfile.NamedTemporaryFile(suffix='.hdf5') as f: with HDF5TableWriter(f.name, group_name='data', mode='w', filters=no_comp) as writer: assert writer._h5file.filters.complevel == 0 c = TestContainer(value=5) writer.write('default', c) writer.filters = zstd writer.write('zstd', c) writer.filters = no_comp writer.write('nocomp', c) with open_file(f.name) as h5file: assert h5file.root.data.default.filters.complevel == 0 assert h5file.root.data.zstd.filters.complevel == 5 assert h5file.root.data.zstd.filters.complib == 'blosc:zstd' assert h5file.root.data.nocomp.filters.complevel == 0
def test_filters(): from tables import Filters, open_file class TestContainer(Container): value = Field(-1, "test") no_comp = Filters(complevel=0) zstd = Filters(complevel=5, complib="blosc:zstd") with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: with HDF5TableWriter(f.name, group_name="data", mode="w", filters=no_comp) as writer: assert writer._h5file.filters.complevel == 0 c = TestContainer(value=5) writer.write("default", c) writer.filters = zstd writer.write("zstd", c) writer.filters = no_comp writer.write("nocomp", c) with open_file(f.name) as h5file: assert h5file.root.data.default.filters.complevel == 0 assert h5file.root.data.zstd.filters.complevel == 5 assert h5file.root.data.zstd.filters.complib == "blosc:zstd" assert h5file.root.data.nocomp.filters.complevel == 0
def test_filters(tmp_path): from tables import Filters, open_file path = tmp_path / "test_time.hdf5" class TestContainer(Container): value = Field(-1, "test") no_comp = Filters(complevel=0) zstd = Filters(complevel=5, complib="blosc:zstd") with HDF5TableWriter(path, group_name="data", mode="w", filters=no_comp) as writer: assert writer.h5file.filters.complevel == 0 c = TestContainer(value=5) writer.write("default", c) writer.filters = zstd writer.write("zstd", c) writer.filters = no_comp writer.write("nocomp", c) with open_file(path) as h5file: assert h5file.root.data.default.filters.complevel == 0 assert h5file.root.data.zstd.filters.complevel == 5 assert h5file.root.data.zstd.filters.complib == "blosc:zstd" assert h5file.root.data.nocomp.filters.complevel == 0
def savanalysis(self, adataname, adatarray): ''' prerequisite: accesstructure, mkanalysis ''' m, n = adatarray.shape[0], adatarray.shape[1] with open_file(self.analysispath / (self.analysisfolder + ".h5"), 'w') as f: filters = Filters(complevel=5, complib='blosc') acontainer = f.create_carray(f.root, adataname, Float64Atom(), shape=(m, n), filters=filters) acontainer[:, :] = adatarray # Create a table in the root directory and append data... class About(IsDescription): task = StringCol(len(self.task), pos=1) # N-character String comment = StringCol(len(self.comment), pos=2) # N-character String tableroot = f.create_table(f.root, 'info', About, "A table at root", Filters(1)) tableroot.append( [(self.task, self.comment)] ) # , ("Mediterranean", 11, -1, 11*11, 11**2), ("Adriatic", 12, -2, 12*12, 12**2)]) return
def init_table(self): filters = Filters(complevel=3, fletcher32=True) table = self.file.create_table(self.file.root, "deposit", description=self.dtype, filters=filters) return table
def save_pytables(self, filename, title='SVD results', filters=None, **kw): from csc.divisi.pyt_utils import get_pyt_handle from tables import ObjectAtom, Filters, Atom fileh = get_pyt_handle(filename, title) if filters is None and kw: filters = Filters(**kw) try: root = fileh.root def store_tensor(name, tensor): data = tensor._data arr = fileh.createCArray(root, name, Atom.from_dtype(data.dtype), tensor.shape, filters=filters) arr[:] = data # Labeled stuff for name in ('u', 'v', 'weighted_u', 'weighted_v'): store_tensor(name, getattr(self, name).tensor) # Unlabeled stuff for name in ('svals', 'core'): store_tensor(name, getattr(self, name)) # Ordered sets def write_labels(name, view): arr = fileh.createVLArray(root, name, ObjectAtom(), filters=filters) for label in view.label_lists(): arr.append(label) write_labels('u_labels', self.u) write_labels('v_labels', self.v) finally: fileh.close()
def __ssc(self, l2f): data = gdal.Open(l2f) ds = data.GetSubDatasets() ssc_sert = gdal.Open(ds[self.dic['ssc']][0]).ReadAsArray() * 1000 sscfname = '%s_ssc_sert.l2' % (l2f.split('.')[0]) if os.path.exists(sscfname): return h5file_l2 = tables.open_file(sscfname, 'w') shape = (self.lines, self.pixels) atom = Float32Atom() filters = Filters(complevel=5, complib='zlib') h5file_l2.root._v_attrs.title = 'GOCI SSC product produced by SKLEC,Yanqun Pan' grpChla = h5file_l2.create_group(h5file_l2.root, 'SSC', 'SSC') grpChla._v_attrs.Scans = self.lines grpChla._v_attrs.Pixels = self.pixels grpChla._v_attrs.AlgorithmName = 'ATC_MPL' ca = h5file_l2.create_carray(grpChla, 'SSC_SERT', atom, shape, filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:] = ssc_sert h5file_l2.close()
def start(self, filename=None): """ Starts recording incomming events to a file. If no filename is given, a new timestamped file is created in the directory that was specified to the constructor. """ if filename is None: datestring = time.strftime("%Y-%m-%dT%H:%M:%S") filename = os.path.abspath( os.path.join(self.base_dir, "zeodata_%s.h5" % datestring)) filters = Filters(complevel=self.compression_level, fletcher32=self.checksum) h5file = tables.openFile(filename, mode="w", filters=filters) group = h5file.createGroup("/", "zeolinkdata", "Zeo Raw Data Link Recording") self.replay_data = h5file.createVLArray( group, 'data', VLStringAtom(), "Link Replay Data", expectedsizeinMB=(self.expected_hours * 3600 * 300) / (1024.0**2)) self.replay_metadata = h5file.createTable( group, 'metadata', TimestampedZeoDesc, "Link Replay Metadata", expectedrows=self.expected_hours * 3600 * 5) self.h5file = h5file print "Recording to %s started." % filename
def save_hdf(data, fn, complevel=9, key='data'): filters = Filters(complevel=complevel, complib='blosc') with open_file(fn, mode="w") as f: _ = f.create_carray('/', key, Atom.from_dtype(data.dtype), filters=filters, obj=data)
def __init__(self, meta): self.reader = ProtoSetReader("stacking_simple.bin", CylinderProtoSet) filters = Filters(complevel=3, fletcher32=True) self.reader.set_filters(filters) self.path_hdf5 = "result.hdf5" self.counter = 0 self.mess_templte = Template(MESSEGE) self.meta = meta self.step = 0.001
def make_arrays(h5file, g, size, atom, settings): chunk_elements = settings['chunk_size'] // atom.itemsize chunk_shapes = settings['chunk_shapes'] subsamples = settings['subsamples'] min_subsample_elements = settings['min_subsample_elements'] if 'compress' in settings and settings['compress']: method = settings.get('compress_method', 'zlib') level = settings.get('compress_level', 5) shuffle = settings.get('compress_shuffle', True) from tables import Filters filters = Filters(complevel = level, complib = method, shuffle = shuffle) else: filters = None arrays = [] isize, jsize, ksize = size shape = (ksize,jsize,isize) cshapes = {} # Avoid duplicating chunk shapes for csname in chunk_shapes: cshape = chunk_shape(shape, csname, chunk_elements) if not cshape in cshapes: a = h5file.create_carray(g, 'data_' + csname, atom, shape, chunkshape = cshape, filters = filters) arrays.append(((1,1,1),a)) cshapes[cshape] = True # Compute step sizes to use. steps = list(subsamples) istep,jstep,kstep = tuple(2*s for s in subsamples[-1]) if subsamples else (2,2,2) from numpy import array, int32 while (isize >= istep and jsize >= jstep and ksize >= kstep and (isize//istep)*(jsize//jstep)*(ksize//kstep) >= min_subsample_elements): steps.append((istep,jstep,kstep)) istep *= 2 jstep *= 2 kstep *= 2 # Make subsample arrays. for step in steps: istep,jstep,kstep = step shape = (1+(ksize-1)//kstep, 1+(jsize-1)//jstep, 1+(isize-1)//istep) cshapes = {} # Avoid duplicating chunk shapes for csname in chunk_shapes: cshape = chunk_shape(shape, csname, chunk_elements) if not cshape in cshapes: sstep = '%d_%d_%d' % tuple(step) a = h5file.create_carray(g, 'data_%s_%s' % (csname,sstep), atom, shape, chunkshape = cshape, filters = filters) a._v_attrs.subsample_spacing = array(step, int32) arrays.append((step, a)) cshapes[cshape] = True return arrays
def create_correlation_matrix(infiles, roi, out_type, package): import os import numpy as np import scipy.io as sio import nibabel as nb from nipype.utils.filemanip import split_filename, filename_to_list for idx, fname in enumerate(filename_to_list(infiles)): data = np.squeeze(nb.load(fname).get_data()) if idx == 0: timeseries = data else: timeseries = np.vstack((timeseries, data)) roi_data = np.genfromtxt(roi) if not len(roi_data.shape) == 2: roi_data = roi_data[:, None] corrmat = np.zeros((roi_data.shape[1], timeseries.shape[0])) print timeseries.shape for i in xrange(roi_data.shape[1]): for j in xrange(timeseries.shape[0]): r = np.corrcoef(timeseries[j, :], roi_data[:, i])[0][1] corrmat[i, j] = np.sqrt(timeseries.shape[1] - 3) * 0.5 * np.log( (1 + r) / (1 - r)) #corrmat = np.corrcoef(timeseries,roi_data.T) print corrmat.shape _, name, _ = split_filename(filename_to_list(infiles)[0]) if len(filename_to_list(infiles)) > 1: name = 'combined_' + name if 'mat' in out_type: matfile = os.path.abspath(name + '.mat') sio.savemat(matfile, {'corrmat': corrmat}) output = matfile elif 'hdf5' in out_type: hdf5file = os.path.abspath(name + '.hf5') if package == 'h5py': import h5py f = h5py.File(hdf5file, 'w') f.create_dataset('corrmat', data=corrmat, compression=5) f.close() else: from tables import openFile, Float64Atom, Filters h5file = openFile(hdf5file, 'w') arr = h5file.createCArray(h5file.root, 'corrmat', Float64Atom(), corrmat.shape, filters=Filters(complevel=5)) arr[:] = corrmat h5file.close() output = hdf5file else: raise Exception('Unknown output type') return output
def test_proto_set_convertor(self): readers = [ ProtoSetReader("gammaSeed.bin", CylinderProtoSet), ProtoSetReader("positronSeed.bin", CylinderProtoSet), ProtoSetReader("histogram.bin", HistogramProtoSet) ] path = "/home/zelenyy/npm/phd/phd-code/cxx/thunderstorm/run" filters = Filters(complevel=3, fletcher32=True) convertor = ConverterFromBinToHDF5(readers) for reader in readers: reader.set_filters(filters) convertor.convert(path, "./test.hdf5")
def getAve(self, year, filter, productName): l2files = glob.glob(os.path.join(self.l2dir, str(year), filter)) # files_stat.append([year,mon,len(l2files)]) if len(l2files) == 0: print('no data in %s!' % (year)) return print(len(l2files)) values = self.process(l2files, productName, [0, 4000]) values = cv2.blur(values, (5, 5)) if year == "*": l2binfile = os.path.join( self.l2dir, 'COMS%s%s_bin.l2' % ("2012-2016", productName.split('/')[-1])) else: l2binfile = os.path.join( self.l2dir, '%s/COMS%s%s_bin.l2' % (year, year, productName.split('/')[-1])) h5file_l2 = tables.open_file(l2binfile, 'w') atom = Float32Atom() filters = Filters(complevel=5, complib='zlib') h5file_l2.root._v_attrs.title = 'L2 bin product(%s) produced by SKLEC' % ( productName.split('/')[-1]) h5file_l2.root._v_attrs.Scans = self.LINES h5file_l2.root._v_attrs.Pixels = self.PIXELS h5file_l2.root._v_attrs.AlgorithmName = 'ATC_MPL' h5file_l2.root._v_attrs.AlgorithmAuthor = 'Yanqun Pan, State Key Laboratory of Estuarine and Coastal Research' ca = h5file_l2.create_carray(h5file_l2.root, productName.split('/')[-1], atom, (self.LINES, self.PIXELS), filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:, :] = values ca = h5file_l2.create_carray(h5file_l2.root, 'longitude', atom, (self.LINES, self.PIXELS), filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:, :] = self.longitude ca = h5file_l2.create_carray(h5file_l2.root, 'latitude', atom, (self.LINES, self.PIXELS), filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:, :] = self.latitude h5file_l2.close() print(values.shape)
def test_cumulator2D(self): if os.path.exists("test_cumulator2d.hdf5"): os.remove("test_cumulator2d.hdf5") path = "/home/zelenyy/npm/phd/phd-code/cxx/thunderstorm/run" readers = [ ProtoSetReader("electron_deposit_cumulator2d.bin", Cumulator2DProtoSet), ProtoSetReader("electron_number_cumulator2d.bin", Cumulator2DProtoSet) ] filters = Filters(complevel=3, fletcher32=True) convertor = ConverterFromBinToHDF5(readers) for reader in readers: reader.set_filters(filters) convertor.convert(path, "./test_cumulator2d.hdf5")
def get_convertor(readers: list, path_h5file, clear=False): filters = Filters(complevel=3, fletcher32=True) convertor = ConverterFromBinToHDF5(readers) for reader in readers: logging.root.debug("Reader: {} {}".format(type(reader), reader.filename)) reader.set_filters(filters) def post_run_processor(input_data: InputData): path = input_data.path convertor.convert(path, path_h5file, meta=input_data.to_meta()) if clear: shutil.rmtree(path) return post_run_processor
def make_arrays(h5file, g, size, atom, settings): chunk_elements = settings['chunk_size'] / atom.itemsize chunk_shapes = settings['chunk_shapes'] min_subsample_elements = settings['min_subsample_elements'] if 'compress' in settings and settings['compress']: from tables import Filters filters = Filters(complevel = 9) else: filters = None arrays = [] isize, jsize, ksize = size shape = (ksize,jsize,isize) cshapes = {} # Avoid duplicating chunk shapes for csname in chunk_shapes: cshape = chunk_shape(shape, csname, chunk_elements) if not cshape in cshapes: a = h5file.createCArray(g, 'data_' + csname, atom, shape, chunkshape = cshape, filters = filters) arrays.append((1,a)) cshapes[cshape] = True # Make subsample arrays. step = 2 from numpy import array, int32 while (isize >= step and jsize >= step and ksize >= step and (isize/step)*(jsize/step)*(ksize/step) >= min_subsample_elements): shape = (1+(ksize-1)/step, 1+(jsize-1)/step, 1+(isize-1)/step) cshapes = {} # Avoid duplicating chunk shapes for csname in chunk_shapes: cshape = chunk_shape(shape, csname, chunk_elements) if not cshape in cshapes: a = h5file.createCArray(g, 'data_%s_%d' % (csname,step), atom, shape, chunkshape = cshape, filters = filters) a._v_attrs.subsample_spacing = array((step,step,step), int32) arrays.append((step, a)) cshapes[cshape] = True step *= 2 return arrays
def save(self, db): """Save the input data to disk. Notes ----- Saves predictions, measurements, observables, and prior_pops to the HDF5 PyMC database. """ if db != "hdf5": return from tables import Float64Atom, Filters compression = Filters(complevel=9, complib='blosc', shuffle=True) F = self.mcmc.db._h5file F.createCArray("/", "predictions", Float64Atom(), self.predictions.shape, filters=compression) F.root.predictions[:] = self.predictions F.createCArray("/", "measurements", Float64Atom(), self.measurements.shape, filters=compression) F.root.measurements[:] = self.measurements F.createCArray("/", "uncertainties", Float64Atom(), self.uncertainties.shape, filters=compression) F.root.uncertainties[:] = self.uncertainties F.createCArray("/", "prior_pops", Float64Atom(), self.prior_pops.shape, filters=compression) F.root.prior_pops[:] = self.prior_pops
def create_correlation_matrix(infiles, out_type, package): import os import numpy as np import scipy.io as sio import nibabel as nb from nipype.utils.filemanip import split_filename, filename_to_list for idx, fname in enumerate(filename_to_list(infiles)): data = np.squeeze(nb.load(fname).get_data()) if idx == 0: timeseries = data else: timeseries = np.vstack((timeseries, data)) corrmat = np.corrcoef(timeseries) _, name, _ = split_filename(filename_to_list(infiles)[0]) if len(filename_to_list(infiles)) > 1: name = 'combined_' + name if 'mat' in out_type: matfile = os.path.abspath(name + '.mat') sio.savemat(matfile, {'corrmat': corrmat}) output = matfile elif 'hdf5' in out_type: hdf5file = os.path.abspath(name + '.hf5') if package == 'h5py': import h5py f = h5py.File(hdf5file, 'w') f.create_dataset('corrmat', data=corrmat, compression=5) f.close() else: from tables import openFile, Float64Atom, Filters h5file = openFile(hdf5file, 'w') arr = h5file.createCArray(h5file.root, 'corrmat', Float64Atom(), corrmat.shape, filters=Filters(complevel=5)) arr[:] = corrmat h5file.close() output = hdf5file else: raise Exception('Unknown output type') return output
def open_data(self, path, mode='r'): if self.repository: out = os.path.join(self.workspace_root, path) path = os.path.join(self.repository.root, path) if not os.path.isfile(out): self.info('copying {} to repository {}'.format( path, os.path.dirname(out))) if not self.repository.retrieveFile(path, out): return False path = out try: self._frame = open_file( path, mode, filters=Filters(complevel=self.compression_level)) return True except Exception: self._frame = None import traceback traceback.print_exc() return True
def test_cumulator(self): if os.path.exists("test_cumulator.hdf5"): os.remove("test_cumulator.hdf5") path = "/home/zelenyy/data/thunderstorm/test" readers = [ ProtoSetReader("electron_z_cumulator.bin", Cumulator1DProtoSet), ProtoSetReader("electron_time_cumulator.bin", Cumulator1DProtoSet) ] filters = Filters(complevel=3, fletcher32=True) convertor = ConverterFromBinToHDF5(readers) for reader in readers: reader.set_filters(filters) convertor.convert(path, "./test_cumulator.hdf5") with tables.open_file("test_cumulator.hdf5") as h5file: for i in range(10): name = "event" + str(i).rjust(5, "0") # data = h5file.get_node("/test/electron_z_cumulator", name) data = h5file.get_node("/test/electron_time_cumulator", name) plt.plot(data) plt.show()
def main(argv): # Manually change the list we iterate through to select between the data and masks. (Doing both kills the node.) for file, saveTarget in SKIN_SUBFOLDERS: h5file = saveTarget h5 = open_file(h5file, "w") X = unionJackPrep(file) atom = Atom.from_dtype(X.dtype) flt = Filters(complevel=0) h5data = h5.create_carray(h5.root, "data", atom, X.shape, filters=flt) h5data[:] = X h5data.attrs.mean = None h5data.attrs.std = None h5.flush() h5.close() del h5 del X del atom del flt del h5data gc.collect() print("No mean or std to compute.")
def __init__(self, parent, p, m, complevel): self._file = open_file(p, m, filters=Filters(complevel=complevel)) self._parent = parent self._parent._frame = self._file
def __init__(self, p, t, g, complevel, mode): self._file = open_file(p, mode, filters=Filters(complevel=complevel)) self._t = t self._g = g
#!/usr/bin/env python from __future__ import absolute_import, division, print_function # Copyright 2008-2014 Michael M. Hoffman <*****@*****.**> from argparse import ArgumentParser, FileType from contextlib import closing from gzip import open as _gzip_open from os import extsep import sys from numpy import append, array, empty from tables import Filters FILTERS_GZIP = Filters(complevel=1) EXT_GZ = "gz" SUFFIX_GZ = extsep + EXT_GZ GENOMEDATA_ENCODING = "ascii" DEFAULT_CHROMOSOME_NAME_STYLE = "UCSC-style-name" chromosome_name_map_parser = ArgumentParser(add_help=False) chromsome_names = chromosome_name_map_parser.add_argument_group( "Chromosome naming") chromsome_names.add_argument("-r", "--assembly-report", dest="assembly_report", type=FileType('r'),
def _inner_hdf5_output(meta, result): # noinspection PyProtectedMember meta_str = '_'.join( k + '_' + ('%09d' % v if type(v) == int else v.__name__) for k, v in sorted(meta._asdict().items(), key=lambda x: x[0]) ) prefix = '/results/' if immediate_prefix != '': prefix += immediate_prefix prefix += '/' + meta_str + '/' success = False local_timeout = timeout base_filename = _filename lock_file = None while not success: filename, lock_file = wait_for_lock_and_prepare_filename(base_filename, local_timeout) compression_type = 'zlib' compression_level = 6 compression_filter = Filters(complib=compression_type, complevel=compression_level) try: # race conditions # open(lock_file, 'w+') # noinspection PyUnusedLocal with acquire_lock(lock_file) as lock: store = HDFStore(filename, complevel=compression_level, complib=compression_type) # noinspection PyProtectedMember h5 = store._handle # cache for palettes # currently unused # palette_written = {} def store_image(h5path, name, data, upsample_binary=True): h5path = h5path.replace('//', '/') # hdf5 stores bitfields as well, but default 0,1 will be invisible on a fixed 0-255 palette ... if data.dtype == bool and upsample_binary: data = (data * 255).astype(np.uint8) arr = h5.create_carray(h5path, name, obj=data, createparents=True, filters=compression_filter) arr.attrs.CLASS = 'IMAGE' arr.attrs.IMAGE_SUBCLASS = 'IMAGE_GRAYSCALE' arr.attrs.IMAGE_VERSION = '1.2' def store_data(h5path, name, data): h5path = h5path.replace('//', '/') h5path_splits = [x for x in h5path.split('/') if x != ''] for i in range(len(h5path_splits)): try: h5.create_group('/' + '/'.join(h5path_splits[:i]), h5path_splits[i]) except NodeError: pass f = filenode.new_node(h5, where=h5path, name=name, filters=compression_filter) if type(data) == str: data = data.encode('utf-8') f.write(data) f.close() def store_table(name, data): _frame = DataFrame(data) store[name] = _frame # .append(name, _frame, data_columns=_frame.columns) image_counter = {} data_counter = {} table_counter = {} def process_row(result_table_rows, m, row): cresults = [] # noinspection PyProtectedMember tmp = {('meta_' + mk): (mv if type(mv) == int else -1) for mk, mv in m._asdict().items()} if type(result_table_rows) == list: result_table_rows = {key: True for key in result_table_rows} if '_plain' in result_table_rows: for v in result_table_rows['_plain']: result_table_rows[v] = True del result_table_rows['_plain'] def is_wildcarded(s): return '*' in s for k, v in list(result_table_rows.items()): if is_wildcarded(k): del result_table_rows[k] for row_key in row.keys(): if fnmatch(row_key, k): result_table_rows[row_key] = v for k, v in result_table_rows.items(): if v == 'table': if k not in table_counter: table_counter[k] = 0 if k in row and len(row[k]) > 0: if type(row[k][0]) == list: # it's a list of lists # create a mapping table # point to the mapping table the_counter = table_counter[k] new_path = '/tables/_mapping_%s' % (k,) new_name = '%s_%09d' % (k, the_counter) tmp[k] = -1 tmp['_mapping_%s' % k] = the_counter table_counter[k] += 1 i_mapping = [] for n, i_table in enumerate(row[k]): i_new_path = '/tables/_individual_%s' % (k,) i_new_name = '%s_%09d' % (k, table_counter[k]) store_table(prefix + i_new_path + '/' + i_new_name, i_table) i_mapping.append({ '_index': n, 'individual_table': table_counter[k] }) table_counter[k] += 1 store_table(prefix + new_path + '/' + new_name, i_mapping) tmp[k] = table_counter[k] table_counter[k] += 1 else: new_path = '/tables/%s' % (k,) new_name = '%s_%09d' % (k, table_counter[k]) store_table(prefix + new_path + '/' + new_name, row[k]) tmp[k] = table_counter[k] table_counter[k] += 1 else: tmp[k] = table_counter[k] table_counter[k] += 1 elif v == 'image': if k not in image_counter: image_counter[k] = 0 if k in row: new_path = '/images/%s' % (k,) new_name = '%s_%09d' % (k, image_counter[k]) store_image(prefix + new_path, new_name, return_or_uncompress(row[k])) tmp[k] = image_counter[k] image_counter[k] += 1 elif v == 'data': if k not in data_counter: data_counter[k] = 0 if k in row: new_path = '/data/%s' % (k,) new_name = '%s_%09d' % (k, data_counter[k]) store_data(prefix + new_path, new_name, return_or_uncompress(row[k])) tmp[k] = data_counter[k] data_counter[k] += 1 else: if k in row: tmp[k] = row[k] else: tmp[k] = float('nan') cresults.append(tmp) return cresults if 'collected' in result: collected = [] for m, row in result['collected'].items(): if tabular_name in row: result_table_rows = row[tabular_name] collected += process_row(result_table_rows, m, row) store_table(prefix + 'result_table_collected', collected) if tabular_name in result: store_table(prefix + 'result_table', process_row(result[tabular_name], meta, result)) store.close() success = True except NodeError: print("NodeError Exception occurred while writing, " + "apparently the file has already been used to store similar results.") # print("Leaving it LOCKED (remove manually!) and trying to write to another file!") local_timeout = 0 release_lock(lock_file) except Exception as e: print("Exception occurred while writing results: " + repr(e)) release_lock(lock_file) return release_lock(lock_file) return result
def log(self, key, item, T_env): import uuid T_env_str = "{}_{}".format(T_env, uuid.uuid4().hex[:6]) try: from tables import open_file, Filters file_T_id = T_env // self.T_per_file file_path = os.path.join(self.folder_name, "T_env_{}:{}.h5".format(file_T_id*self.T_per_file, (file_T_id + 1)*self.T_per_file)) self.h5file = open_file(file_path, mode="a", title="Experiment results: {}".format(self.name)) if isinstance(item, BatchEpisodeBuffer): group = "learner_samples"+key if not hasattr(self.h5file.root, group): self.h5file.create_group("/", group, 'Learner samples') if not hasattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)): self.h5file.create_group("/{}/".format(group), "T{}".format(T_env_str), 'Learner samples T_env:{}'.format(T_env)) if not hasattr(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)), "_transition"): self.h5file.create_group("/{}/T{}".format(group, T_env_str), "_transition", 'Transition-wide data') if not hasattr(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)), "_episode"): self.h5file.create_group("/{}/T{}".format(group, T_env_str), "_episode", 'Episode-wide data') filters = Filters(complevel=5, complib='blosc') # if table layout has not been created yet, do it now: for _c, _pos in item.columns._transition.items(): it = item.get_col(_c)[0].cpu().numpy() if not hasattr(getattr(self.h5file.root, group), _c): self.h5file.create_carray(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str))._transition, _c, obj=it, filters=filters) else: getattr(getattr(self.h5file.root, group)._transition, _c).append(it) getattr(getattr(self.h5file.root, group)._transition, _c).flush() # if table layout has not been created yet, do it now: for _c, _pos in item.columns._episode.items(): it = item.get_col(_c, scope="episode")[0].cpu().numpy() if not hasattr(getattr(self.h5file.root, group), _c): self.h5file.create_carray(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str))._episode, _c, obj=it, filters=filters) else: getattr(getattr(self.h5file.root, group)._episode, _c).append(it) getattr(getattr(self.h5file.root, group)._episode, _c).flush() else: key = "__".join(key.split(" ")) # item needs to be scalar!# import torch as th import numpy as np if isinstance(item, th.Tensor): item = np.array([item.cpu().clone().item()]) elif not isinstance(item, np.ndarray): item = np.array([item]) if not hasattr(self.h5file.root, "log_values"): self.h5file.create_group("/", "log_values", 'Log Values') if not hasattr(self.h5file.root.log_values, key): from tables import Float32Atom, IntAtom self.h5file.create_earray(self.h5file.root.log_values, key, atom=Float32Atom(), shape=[0]) self.h5file.create_earray(self.h5file.root.log_values, "{}_T_env".format(key), atom=IntAtom(), shape=[0]) else: getattr(self.h5file.root.log_values, key).append(item) getattr(self.h5file.root.log_values, key).flush() getattr(self.h5file.root.log_values, "{}_T_env".format(key)).append(np.array([T_env])) getattr(self.h5file.root.log_values, "{}_T_env".format(key)).flush() self.h5file.close() except Exception as e: self.logging_struct.py_logger.warning("Could not execute HDF logger save - no disk space, or no permissions? Error message: {}, T_env: {}, key: {}, item: {}".format(e, T_env, key, str(item))) return
def __poc(self, l2f): data = gdal.Open(l2f) ds = data.GetSubDatasets() Rrs412 = gdal.Open(ds[self.dic['Rrs_412']] [0]).ReadAsArray() * self.slope + self.intercept Rrs443 = gdal.Open(ds[self.dic['Rrs_443']] [0]).ReadAsArray() * self.slope + self.intercept Rrs490 = gdal.Open(ds[self.dic['Rrs_490']] [0]).ReadAsArray() * self.slope + self.intercept Rrs555 = gdal.Open(ds[self.dic['Rrs_555']] [0]).ReadAsArray() * self.slope + self.intercept Rrs660 = gdal.Open(ds[self.dic['Rrs_660']] [0]).ReadAsArray() * self.slope + self.intercept Rrs680 = gdal.Open(ds[self.dic['Rrs_680']] [0]).ReadAsArray() * self.slope + self.intercept ssc_sert = gdal.Open(ds[self.dic['ssc']][0]).ReadAsArray() chla_oc3 = gdal.Open(ds[self.dic['chl_oc3']][0]).ReadAsArray() m412, m443, m490, m555, m660, m680 = Rrs412 < 0, Rrs443 < 0, Rrs490 < 0, Rrs555 < 0, Rrs660 < 0, Rrs680 < 0 mask = m412 | m443 | m490 | m555 Rrs412_m = ma.array(Rrs412, mask=mask) Rrs443_m = ma.array(Rrs443, mask=mask) Rrs490_m = ma.array(Rrs490, mask=mask) Rrs555_m = ma.array(Rrs555, mask=mask) Rrs660_m = ma.array(Rrs660, mask=mask) Rrs680_m = ma.array(Rrs680, mask=mask) tempR = (Rrs443_m / Rrs555_m) * np.power(Rrs412_m / Rrs490_m, -1.012) temp = 0.342 - 2.511 * np.log10(tempR) - 0.277 * np.power( np.log10(tempR), 2) chla = np.power(10, temp) print(chla.data) chla_data = chla.data chla_data[mask] = self._fillvalue # plt.imshow(chla_data) # plt.show() pocfname = '%s_POC.l2' % (l2f.split('.')[0]) if os.path.exists(pocfname): return h5file_l2 = tables.open_file(pocfname, 'w') shape = (self.lines, self.pixels) atom = Float32Atom() filters = Filters(complevel=5, complib='zlib') h5file_l2.root._v_attrs.title = 'GOCI POC product produced by SKLEC,Yanqun Pan' grpChla = h5file_l2.create_group(h5file_l2.root, 'Chla', 'remote sensing reflectance') grpChla._v_attrs.Scans = self.lines grpChla._v_attrs.Pixels = self.pixels grpChla._v_attrs.AlgorithmName = 'ATC_MPL' ca = h5file_l2.create_carray(grpChla, 'Chla-OC3', atom, shape, filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:] = chla_oc3 ca = h5file_l2.create_carray(grpChla, 'Chla-YOC', atom, shape, filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:] = chla_data mtemp1 = Rrs660 > Rrs490 mtemp2 = Rrs660 > Rrs680 ratio = Rrs490_m / Rrs555_m POC = np.zeros((self.lines, self.pixels), dtype=np.float) POC[mtemp1] = ssc_sert[mtemp1] * 1000 * 5.06 + 37.33 POC[(~mtemp1) & (mtemp2)] = 87.3 * np.power( ratio.data[(~mtemp1) & (mtemp2)], -2.04) POC[(~mtemp1) & (~mtemp2)] = 69.9 * np.power( chla_data[(~mtemp1) & (~mtemp2)], 0.63) POC[mask] = self._fillvalue h5file_l2.root._v_attrs.title = 'GOCI POC product produced by SKLEC,Yanqun Pan' grpPOC = h5file_l2.create_group(h5file_l2.root, 'POC', 'remote sensing reflectance') grpPOC._v_attrs.Scans = self.lines grpPOC._v_attrs.Pixels = self.pixels grpPOC._v_attrs.AlgorithmName = 'ATC_MPL' ca = h5file_l2.create_carray(grpPOC, 'POC', atom, shape, filters=filters) ca._v_attrs._FillValue = self._fillvalue ca[:] = POC h5file_l2.close()
def _make_feature_hdf5(self): with gzip_open(self._gff3_gz_file_path) as gff3_gz_file: print("Getting data-start position ...") data_start_position = None line = gff3_gz_file.readline().decode() while line.startswith("#"): data_start_position = gff3_gz_file.tell() line = gff3_gz_file.readline().decode() print("Counting features per seqid ...") seqid_n_row = defaultdict(lambda: 0) n = 0 seqid = None while line: n += 1 if not line.startswith("#"): seqid_ = line.split(sep="\t")[0] if seqid_ != seqid: print("\t{} ...".format(seqid_)) seqid = seqid_ seqid_n_row[seqid_] += 1 line = gff3_gz_file.readline().decode() print("Making {} ...".format(self._feature_hdf5_file_path)) with open_file( self._feature_hdf5_file_path, mode="w", filters=Filters(complevel=1, complib="blosc"), ) as feature_hdf5: seqid_table_row = {} n_per_print = max(1, n // 10) gff3_gz_file.seek(data_start_position) for i, line in enumerate(gff3_gz_file): if i % n_per_print == 0: print("\t{:,}/{:,} ...".format(i, n)) line = line.decode(errors="replace") if line.startswith("#"): continue seqid, source, type_, start, end, score, strand, phase, attributes = line.split( "\t" ) if type_ not in self._types: continue if seqid not in seqid_table_row: print("\t\tMaking {} table ...".format(seqid)) seqid_table = feature_hdf5.create_table( "/", "seqid_{}_features".format(seqid), description=self._FeatureDescription, expectedrows=seqid_n_row[seqid], ) seqid_table_row[seqid] = seqid_table.row cursor = seqid_table_row[seqid] cursor["seqid"] = seqid cursor["start"] = start cursor["end"] = end name = None biotype = None for attribute in attributes.split(sep=";"): field, value = attribute.split(sep="=") if field == "Name": name = value elif field == "biotype": biotype = value cursor["Name"] = name cursor["biotype"] = biotype cursor.append() self._name_seqid[name] = seqid print("\tFlushing tables and making column indices ...") for seqid in seqid_table_row: print("\t\t{} table ...".format(seqid)) seqid_table = feature_hdf5.get_node( "/", "seqid_{}_features".format(seqid) ) seqid_table.flush() for column in ("seqid", "start", "end", "Name", "biotype"): seqid_table.cols._f_col(column).create_csindex() self._feature_hdf5 = feature_hdf5 print(self._feature_hdf5) print("Writing {} ...".format(self._name_seqid_pickle_gz_file_path)) with gzip_open( self._name_seqid_pickle_gz_file_path, mode="wb" ) as name_seqid_pickle_gz_file: dump(self._name_seqid, name_seqid_pickle_gz_file)
def get_convertor(readers: list): filters = Filters(complevel=3, fletcher32=True) convertor = ConverterFromBinToHDF5(readers) for reader in readers: reader.set_filters(filters) return convertor