def test_removal_of_error_parameter(self): filename = make_temp_dir('remove_errored.hdf5') traj = Trajectory(name='traj', add_time=True, filename=filename) traj.f_add_result('iii', 42) traj.f_add_result(FakeResult, 'j.j.josie', 43) file = traj.v_storage_service.filename traj.f_store(only_init=True) with self.assertRaises(RuntimeError): traj.f_store() with pt.open_file(file, mode='r') as fh: jj = fh.get_node(where='/%s/results/j/j' % traj.v_name) self.assertTrue('josie' not in jj) traj.j.j.f_remove_child('josie') traj.j.j.f_add_result(FakeResult2, 'josie2', 444) traj.f_store() with self.assertRaises(pex.NoSuchServiceError): traj.f_store_child('results', recursive=True) with pt.open_file(file, mode='r') as fh: jj = fh.get_node(where='/%s/results/j/j' % traj.v_name) self.assertTrue('josie2' in jj) josie2 =jj._f_get_child('josie2') self.assertTrue('hey' in josie2) self.assertTrue('fail' not in josie2)
def __init__(self, filename, model_state, proposal_state): """ Initialize the object. """ self.filename = filename self.ChainRecordDType = state_to_table_dtype(model_state) self.ChainRecordDType['step'] = pt.UInt32Col() self.ChainRecordDType['accepted'] = pt.UInt32Col() self.ChainRecordDType['proposal'] = pt.UInt16Col() self.ProposalRecordDType = state_to_table_dtype(proposal_state) self.ChainCounterDType = {'id': pt.UInt16Col(), 'name': pt.StringCol(itemsize=32), 'date': pt.StringCol(itemsize=26) } if os.path.exists(filename) and pt.is_pytables_file(filename): self.fd = pt.open_file(filename, mode='a') else: self.fd = pt.open_file(filename, mode='w') self.fd.create_group('/', 'mcmc', 'Metropolis-Hastings Algorithm Data') self.fd.create_table('/mcmc', 'proposals', self.ProposalRecordDType, 'MCMC Proposals') self.fd.create_table('/mcmc', 'chain_counter', self.ChainCounterDType, 'Chain Counter') self.fd.create_group('/mcmc', 'data', 'Collection of Chains')
def validate_results_node(test, expected_path, actual_path, expected_node, actual_node): """Validate results by comparing two specific nodes :param test: instance of the TestCase. :param expected_path: path to the reference data. :param actual_path: path to the output from the test. :param expected_node: path to the reference node. :param actual_node: path to the output node from the test. """ with tables.open_file(expected_path, 'r') as expected_file, \ tables.open_file(actual_path, 'r') as actual_file: expected = expected_file.get_node(expected_node) try: actual = actual_file.get_node(actual_node) except tables.NoSuchNodeError: test.fail("Node '%s' does not exist in datafile" % actual_node) if type(expected) is tables.table.Table: validate_tables(test, expected, actual) elif type(expected) is tables.vlarray.VLArray: validate_vlarrays(test, expected, actual) elif type(expected) is tables.array.Array: validate_arrays(test, expected, actual) else: raise NotImplementedError
def test_maximum_overview_size(self): filename = make_temp_dir('maxisze.hdf5') env = Environment(trajectory='Testmigrate', filename=filename, log_config=get_log_config(), add_time=True) traj = env.v_trajectory for irun in range(pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH): traj.f_add_parameter('f%d.x' % irun, 5) traj.f_store() store = pt.open_file(filename, mode='r+') table = store.root._f_get_child(traj.v_name).overview.parameters_overview self.assertEquals(table.nrows, pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH) store.close() for irun in range(pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH, 2*pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH): traj.f_add_parameter('f%d.x' % irun, 5) traj.f_store() store = pt.open_file(filename, mode='r+') table = store.root._f_get_child(traj.v_name).overview.parameters_overview self.assertEquals(table.nrows, pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH) store.close() env.f_disable_logging()
def validate_results(test, expected_path, actual_path): """Validate results by comparing in and output HDF5 files :param test: instance of the TestCase. :param expected_path: path to the reference data. :param actual_path: path to the output from the test. """ with tables.open_file(expected_path, 'r') as expected_file, \ tables.open_file(actual_path, 'r') as actual_file: for expected_node in expected_file.walk_nodes('/', 'Leaf'): try: actual_node = actual_file.get_node(expected_node._v_pathname) except tables.NoSuchNodeError: test.fail("Node '%s' does not exist in datafile" % expected_node._v_pathname) if type(expected_node) is tables.table.Table: validate_tables(test, expected_node, actual_node) elif type(expected_node) is tables.vlarray.VLArray: validate_vlarrays(test, expected_node, actual_node) elif type(expected_node) is tables.array.Array: validate_arrays(test, expected_node, actual_node) else: raise NotImplementedError validate_attributes(test, expected_node, actual_node) validate_attributes(test, expected_file.root, actual_file.root)
def test03b_Compare64EArray(self): "Comparing several written and read 64-bit time values in an EArray." # Create test EArray with data. h5file = tables.open_file(self.h5fname, "w", title="Test for comparing Time64 E arrays") ea = h5file.create_earray("/", "test", tables.Time64Atom(), shape=(0, 2)) # Size of the test. nrows = ea.nrowsinbuf + 34 # Add some more rows than buffer. # Only for home checks; the value above should check better # the I/O with multiple buffers. # nrows = 10 for i in range(nrows): j = i * 2 ea.append(((j + 0.012, j + 1 + 0.012),)) h5file.close() # Check the written data. h5file = tables.open_file(self.h5fname) arr = h5file.root.test.read() h5file.close() orig_val = numpy.arange(0, nrows * 2, dtype=numpy.int32) + 0.012 orig_val.shape = (nrows, 2) if common.verbose: print("Original values:", orig_val) print("Retrieved values:", arr) self.assertTrue(allequal(arr, orig_val), "Stored and retrieved values do not match.")
def test02_copy(self): """Checking (X)Array.copy() method ('numetic' flavor)""" srcfile = self._testFilename("oldflavor_numeric.h5") tmpfile = tempfile.mktemp(".h5") shutil.copy(srcfile, tmpfile) try: # Open the HDF5 with old numeric flavor with tables.open_file(tmpfile, "r+") as h5file: # Copy to another location self.assertWarns(FlavorWarning, h5file.root.array1.copy, '/', 'array1copy') h5file.root.array2.copy('/', 'array2copy') h5file.root.carray1.copy('/', 'carray1copy') h5file.root.carray2.copy('/', 'carray2copy') h5file.root.vlarray1.copy('/', 'vlarray1copy') h5file.root.vlarray2.copy('/', 'vlarray2copy') if self.close: h5file.close() h5file = tables.open_file(tmpfile) else: h5file.flush() # Assert other properties in array self.assertEqual(h5file.root.array1copy.flavor, 'numeric') self.assertEqual(h5file.root.array2copy.flavor, 'python') self.assertEqual(h5file.root.carray1copy.flavor, 'numeric') self.assertEqual(h5file.root.carray2copy.flavor, 'python') self.assertEqual(h5file.root.vlarray1copy.flavor, 'numeric') self.assertEqual(h5file.root.vlarray2copy.flavor, 'python') finally: os.remove(tmpfile)
def WriteRead(filename, testTuple): if common.verbose: print('\n', '-=' * 30) print("Running test for object %s" % type(testTuple)) # Create an instance of HDF5 Table fileh = tables.open_file(filename, mode="w") root = fileh.root try: # Create the array under root and name 'somearray' a = testTuple fileh.create_array(root, 'somearray', a, "Some array") finally: # Close the file fileh.close() # Re-open the file in read-only mode fileh = tables.open_file(filename, mode="r") root = fileh.root # Read the saved array try: b = root.somearray.read() # Compare them. They should be equal. if not a == b and common.verbose: print("Write and read lists/tuples differ!") print("Object written:", a) print("Object read:", b) # Check strictly the array equality assert a == b finally: # Close the file fileh.close()
def test_2D_multiphase(self): # RELOAD MODULES self.reload_modules() pnList = [(twp_navier_stokes_p, twp_navier_stokes_n), (clsvof_p, clsvof_n)] self.so = multiphase_so pList=[] nList=[] sList=[] for (pModule,nModule) in pnList: pList.append(pModule) if pList[-1].name == None: pList[-1].name = pModule.__name__ nList.append(nModule) for i in range(len(pnList)): sList.append(default_s) self.so.name += "_2D_falling_bubble" # NUMERICAL SOLUTION # ns = proteus.NumericalSolution.NS_base(self.so, pList, nList, sList, opts) ns.calculateSolution('2D_falling_bubble') # COMPARE VS SAVED FILES # expected_path = 'comparison_files/multiphase_2D_falling_bubble.h5' expected = tables.open_file(os.path.join(self._scriptdir,expected_path)) actual = tables.open_file('multiphase_2D_falling_bubble.h5','r') assert np.allclose(expected.root.phi_t2,actual.root.phi_t2,atol=1e-10) expected.close() actual.close()
def compute_rab(self): ''' Uses the current set of ICA realizations (pytabled) to compute K*(K-1)/2 cross-correlation matrices; they are indexed via tuples. R(a,b) is much smaller than the ICA realizations (all R(a,b) matrices are generally smaller than ONE realization), so R(a,b) is also retained in memory. Recomputation of the R(a,b) matrices is forced. ''' if not os.path.exists(self.rabDirectory): try: os.mkdir(self.rabDirectory) except OSError: pass icaFiles = sorted(os.listdir(self.icaDirectory)) if len(icaFiles) == 0: raise RAICARICAException for fi in icaFiles: fiPtr = tb.open_file(os.path.join(self.icaDirectory,fi),'r') si = fiPtr.get_node('/decomps/sources').read() fiPtr.close() i = np.int(deconstruct_file_name(fi)[1]) print 'Working on R(%d,b)'%i for fj in icaFiles: j = np.int(deconstruct_file_name(fj)[1]) if j > i: # sources assumed to have unit std. dev. but nonzero mean - will behave badly if not! fjPtr = tb.open_file(os.path.join(self.icaDirectory,fj),'r') sj = fjPtr.get_node('/decomps/sources').read() fjPtr.close() self.RabDict[(i,j)] = np.abs(corrmatrix(si,sj)) # pickle the result rabPtr = open(os.path.join(self.rabDirectory,'rabmatrix.db'),'wb') cPickle.dump(self.RabDict,rabPtr,protocol=-1) rabPtr.close()
def test02_CompareTable(self): "Comparing written time data with read data in a Table." wtime = 1234567890.123456 # Create test Table with data. h5file = tables.open_file( self.h5fname, 'w', title="Test for comparing Time tables") tbl = h5file.create_table('/', 'test', self.MyTimeRow) row = tbl.row row['t32col'] = int(wtime) row['t64col'] = (wtime, wtime) row.append() h5file.close() # Check the written data. h5file = tables.open_file(self.h5fname) recarr = h5file.root.test.read(0) h5file.close() self.assertEqual(recarr['t32col'][0], int(wtime), "Stored and retrieved values do not match.") comp = (recarr['t64col'][0] == numpy.array((wtime, wtime))) self.assertTrue(numpy.alltrue(comp), "Stored and retrieved values do not match.")
def test01b_Compare64VLArray(self): "Comparing several written and read 64-bit time values in a VLArray." # Create test VLArray with data. h5file = tables.open_file( self.h5fname, 'w', title="Test for comparing Time64 VL arrays") vla = h5file.create_vlarray('/', 'test', self.myTime64Atom) # Size of the test. nrows = vla.nrowsinbuf + 34 # Add some more rows than buffer. # Only for home checks; the value above should check better # the I/O with multiple buffers. # nrows = 10 for i in xrange(nrows): j = i * 2 vla.append((j + 0.012, j + 1 + 0.012)) h5file.close() # Check the written data. h5file = tables.open_file(self.h5fname) arr = h5file.root.test.read() h5file.close() arr = numpy.array(arr) orig_val = numpy.arange(0, nrows * 2, dtype=numpy.int32) + 0.012 orig_val.shape = (nrows, 1, 2) if common.verbose: print "Original values:", orig_val print "Retrieved values:", arr self.assertTrue(allequal(arr, orig_val), "Stored and retrieved values do not match.")
def test_case_2(self): # Set parameters for this test parameters.ct.test_case=2 # RELOAD MODULES self.reload_modules() pnList = [(clsvof_p, clsvof_n)] self.so = default_so self.so.tnList = clsvof_n.tnList pList=[] nList=[] sList=[] for (pModule,nModule) in pnList: pList.append(pModule) if pList[-1].name == None: pList[-1].name = pModule.__name__ nList.append(nModule) for i in range(len(pnList)): sList.append(default_s) self.so.name = "clsvof_test_case_2" # NUMERICAL SOLUTION # ns = proteus.NumericalSolution.NS_base(self.so, pList, nList, sList, opts) ns.calculateSolution('test_case_2') # COMPARE VS SAVED FILES # expected_path = 'comparison_files/clsvof_test_case_2.h5' expected = tables.open_file(os.path.join(self._scriptdir,expected_path)) actual = tables.open_file('clsvof_test_case_2.h5','r') assert np.allclose(expected.root.u_t2,actual.root.u_t2,atol=1e-10) expected.close() actual.close()
def test_EV2(self): thelper_vof.ct.STABILIZATION_TYPE = 1 # EV thelper_vof.ct.ENTROPY_TYPE = 2 #logarithmic thelper_vof.ct.cE = 0.1 thelper_vof.ct.FCT = True reload(thelper_vof_p) reload(thelper_vof_n) self.so.name = self.pList[0].name+"_EV2" # NUMERICAL SOLUTION # ns = proteus.NumericalSolution.NS_base(self.so, self.pList, self.nList, self.sList, opts) self.sim_names.append(ns.modelList[0].name) ns.calculateSolution('vof') # COMPARE VS SAVED FILES # expected_path = 'comparison_files/vof_level_3_EV2.h5' expected = tables.open_file(os.path.join(self._scriptdir,expected_path)) actual = tables.open_file('vof_level_3_EV2.h5','r') assert np.allclose(expected.root.u_t2, actual.root.u_t2, atol=1e-10) expected.close() actual.close()
def main(argv): args = parse_args(argv[1:]) fileout = os.path.abspath(args.output) start = time() for fin in args.inputs: filein = os.path.abspath(fin) print 'Concatenating %s' % filein if not os.path.exists(fileout): copyfile(filein, fileout) else: # Can't use HdfStorage.readCoordinates because it needs an # Ice.Communicator object, so there's no point using the # OMERO.tables interface tout = tables.open_file(fileout, 'r+') tin = tables.open_file(filein, 'r') nrows = tin.root.OME.Measurements.nrows for a in range(0, nrows, ROW_CHUNK): b = min(nrows, a + ROW_CHUNK) print '\tRows %d:%d' % (a, b) rows = tin.root.OME.Measurements.read_coordinates(range(a, b)) tout.root.OME.Measurements.append(rows) tin.close() tout.close() print '\tCumulative time: %d seconds' % (time() - start) print 'Done'
def _runTest(self): self.ns = NumericalSolution.NS_base(self.so, self.pList, self.nList, self.so.sList, opts) self.ns.calculateSolution('stokes') relpath = 'comparison_files/drivenCavityNSE_LSC_expected.h5' expected = tables.open_file(os.path.join(self._scriptdir,relpath)) actual = tables.open_file('drivenCavityNSETrial.h5','r') assert numpy.allclose(expected.root.velocity_t7, actual.root.velocity_t7, atol=1e-2) expected.close() actual.close() relpath = 'comparison_files/drivenCavityNSE_LSC_expected.log' actual_log = TestTools.NumericResults.build_from_proteus_log('proteus.log') expected_log = TestTools.NumericResults.build_from_proteus_log(os.path.join(self._scriptdir, relpath)) plot_lst = [(3.7,0,3),(3.2,0,2),(2.7,0,2),(2.2,0,1),(1.7,0,1)] L1 = expected_log.get_ksp_resid_it_info(plot_lst) L2 = actual_log.get_ksp_resid_it_info(plot_lst) assert L1 == L2
def test_estimator_pytables(): m1 = MyEstimator(a=1, b='a', c=None, d=False, e=np.zeros(3)).fit(None) f = tables.open_file(fn, 'w') m1.to_pytables(f.root) f.close() g = tables.open_file(fn) m2 = MyEstimator.from_pytables(g.root.MyEstimator) print m1.__dict__ print m2.__dict__ for key, value in m1.get_params().iteritems(): if any(isinstance(value, t) for t in [int, float, str]): assert value == getattr(m2, key, object()) else: eq(value, getattr(m2, key, object()), err_msg='error on param key=%s' % key) for key in m1._get_estimate_names(): value = getattr(m1, key) if any(isinstance(value, t) for t in [int, float, str]): assert value == getattr(m2, key, object()) else: eq(value, getattr(m2, key, object()), err_msg='error on estimate key=%s' % key) g.close()
def test_3D_hex(self): # Set parameters for test parameters_for_poisson.ct.nd = 3 parameters_for_poisson.useHex = True # Reload _p and _n modules reload(poisson_p) reload(poisson_n) poisson_n.nnx=poisson_p.nn poisson_n.nny=poisson_p.nn poisson_n.nnz=poisson_p.nn # Update name self.so.name = "3D_"+self.pList[0].name+"_hex_degree2" # NUMERICAL SOLUTION # ns = proteus.NumericalSolution.NS_base(self.so, self.pList, self.nList, self.sList, opts) self.sim_names.append(ns.modelList[0].name) ns.calculateSolution('poisson') # COMPARE VS SAVED FILES # expected_path = 'comparison_files/'+self.so.name+'.h5' expected = tables.open_file(os.path.join(self._scriptdir,expected_path)) actual = tables.open_file(self.so.name+'.h5','r') assert np.allclose(expected.root.u0_t1, actual.root.u0_t1, atol=1e-10)
def store_and_sort_corsika_data(source, destination, overwrite=False, progress=False): """First convert the data to HDF5 and create a sorted version""" if os.path.exists(destination): if not overwrite: if progress: raise Exception("Destination already exists, doing nothing") return else: os.remove(destination) corsika_data = CorsikaFile(source) temp_dir = os.path.dirname(destination) temp_path = create_tempfile_path(temp_dir) with tables.open_file(temp_path, 'a') as hdf_temp: store_corsika_data(corsika_data, hdf_temp, progress=progress) with tables.open_file(temp_path, 'a') as hdf_temp: create_index(hdf_temp, progress=progress) with tables.open_file(temp_path, 'r') as hdf_temp, \ tables.open_file(destination, 'w') as hdf_data: copy_and_sort_node(hdf_temp, hdf_data, progress=progress) os.remove(temp_path)
def __init__(self, h5_filename_queue): """ param h5_filename_queue: a queue of temporary hdf5 files """ self.h5_filename_queue = h5_filename_queue tables.open_file(table_path, 'w').close() #creates a new file super(WriteHDF5Thread, self).__init__()
def dropfields(input_path, output_path, todrop): input_file = tables.open_file(input_path, mode="r") input_root = input_file.root output_file = tables.open_file(output_path, mode="w") output_globals = output_file.create_group("/", "globals", "Globals") print(" * copying globals ...", end=' ') copy_table(input_root.globals.periodic, output_globals) print("done.") output_entities = output_file.create_group("/", "entities", "Entities") for table in input_file.iterNodes(input_root.entities): table_fields = get_fields(table) table_fields = [(fname, ftype) for fname, ftype in table_fields if fname not in todrop] size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0 #noinspection PyProtectedMember print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size), end=' ') copy_table(table, output_entities, table_fields) print("done.") input_file.close() output_file.close()
def __init__(self, parent, filename): if not isinstance(filename, string_types): raise ValueError( 'Pytables requires filename parameter as string. Got {} instead.' .format(filename.__class__)) self.parent = parent self.version = HDFPartition.VERSION self.n_rows = 0 self.n_cols = 0 self.cache = [] if os.path.exists(filename): self._h5_file = open_file(filename, mode='a') self.meta = HDFReader._read_meta(self._h5_file) self.version, self.n_rows, self.n_cols = _get_file_header( self._h5_file.root.partition.file_header) else: # No, doesn't exist self._h5_file = open_file(filename, mode='w') self.meta = deepcopy(MPRowsFile.META_TEMPLATE) self.header_mangler = lambda name: re.sub('_+', '_', re.sub('[^\w_]', '_', name).lower()).rstrip('_') if self.n_rows == 0: self.meta['about']['create_time'] = time.time()
def __init__( self, fnContigLengths, fnWssd, overwrite, openMode, groupsToCheck=[], compression=False ): self.compress = compression assert os.path.exists(fnContigLengths) if openMode=='r': assert not overwrite assert os.path.exists(fnWssd), fnWssd debug_output('WssdBase: reading contig lengths from file %s'%fnContigLengths) self.mContigNameLen = {} for l in open(fnContigLengths,'r'): l=l.replace('\n','').split('\t') self.mContigNameLen[l[0]]=int(l[1]) debug_output('WSSD space: %d contigs totaling %d bp'%( len(self.mContigNameLen), sum(self.mContigNameLen.values()) )) if overwrite or not os.path.exists(fnWssd): self.tbl = tables.open_file( fnWssd, 'w' ) else: if openMode=='r': self.tbl = tables.open_file( fnWssd, 'r' ) else: self.tbl = tables.open_file( fnWssd, 'a' )
def create_synth(kind, prec): prefix_orig = "cellzome/cellzome-" iname = dirname + prefix_orig + "none-" + prec + ".h5" f = tb.open_file(iname, "r") if prec == "single": type_ = tb.Float32Atom() else: type_ = tb.Float64Atom() prefix = "synth/synth-" for clevel in range(10): oname = "%s/%s-%s%d-%s.h5" % (dirname, prefix, kind, clevel, prec) # print "creating...", iname f2 = tb.open_file(oname, "w") if kind in ["none", "numpy"]: filters = None else: filters = tb.Filters(complib=kind, complevel=clevel, shuffle=shuffle) for name in ["maxarea", "mascotscore"]: col = f.get_node("/", name) r = f2.create_carray("/", name, type_, col.shape, filters=filters) if name == "maxarea": r[:] = np.arange(col.nrows, dtype=type_.dtype) else: r[:] = np.arange(col.nrows, 0, dtype=type_.dtype) f2.close() if clevel == 0: size = 1.5 * float(os.stat(oname)[6]) f.close() return size
def open(self, mode, ncols=1, nrows=1, xll=0, yll=0, cellsize=1, nodatavalue=-9999.0, dataset_name="dummy", group_prefix="row", table_prefix="col", index_format="04i", variables=[], units=[]): # Initialise fpath = os.path.join(self.folder, self.name); if (mode[0] == 'w'): # Open the file self.__datafile = tables.open_file(fpath, 'w'); # Assign the data attributes self.ncols = ncols; self.nrows = nrows; self.xll = xll; self.yll = yll; self.cellsize = cellsize; self.nodatavalue = nodatavalue; self.dataset_name = dataset_name; self.group_prefix = group_prefix; self.table_prefix = table_prefix; self.index_format = index_format; self.variables = variables; self.units = units; self.writeheader(); else: # If file does not exist, then ... if os.path.exists(fpath): # Retrieve the data attributes from the header file self.readheader(); GridEnvelope2D.__init__(self, self.ncols, self.nrows, self.xll, self.yll, self.cellsize, self.cellsize); self.__datafile = tables.open_file(fpath, 'r'); return True; else: return False;
def __init__(self, cfg): self.cfg = cfg self.path = os.path.join(self.cfg.subsets_path, 'data.db') self.results = None if os.path.exists(self.path): try: self.h5 = tables.open_file(self.path, 'a') self.results = self.h5.root.results except: # If anything fails, we just create a new database... log.warning("""Failed to open existing database at %s, or database is corrupted. Creating a new one""", self.path) self.results = None # Something went wrong! if not self.results: try: # Try closing this, just in case self.h5.close() except: pass # Compression is good -- and faster, according to the pytables docs... f = tables.Filters(complib='blosc', complevel=5) self.h5 = tables.open_file(self.path, 'w', filters=f) self.results = self.h5.create_table( '/', 'results', cfg.data_layout.data_type) self.results.cols.subset_id.create_csindex() assert isinstance(self.results, tables.Table) assert self.results.indexed
def h5_apply_func(input_path, output_path, node_func): """ Apply node_func to all nodes of input_path and store the result in output_path Parameters ---------- input_path : str path to .h5 input file output_path : str path to .h5 output file node_func : function function that will be applied to all nodes func(node, new_parent) -> new_node new_node must be node if node must be copied None if node must not be copied another Node if node must not be copied (was already handled/copied/modified by func) """ with tables.open_file(input_path) as input_file, \ tables.open_file(output_path, mode="w") as output_file: for node in input_file.walk_nodes(classname='Leaf'): if node is not input_file.root: print(node._v_pathname, "...", end=' ') parent_path = node._v_parent._v_pathname if parent_path in output_file: new_parent = output_file.get_node(parent_path) else: new_parent = output_file._create_path(parent_path) new_node = node_func(node, new_parent) if new_node is node: print("copying (without modifications) ...", end=' ') node._f_copy(new_parent) print("done.")
def process(self, rows_slice): with Worker.hdf5_lock: with tables.open_file(self.hdf5_file, 'r+') as fileh: T = fileh.get_node(self.path + '/temporaries') tmp = T[rows_slice, ...] ind = np.arange(0, rows_slice.stop - rows_slice.start) # tmp = - A_new tmp -= self.rows_sum diag_A = tmp[ind, rows_slice.start + ind].copy() np.clip(tmp, 0, np.inf, tmp) tmp[ind, rows_slice.start + ind] = diag_A Worker.hdf5_lock.acquire() with tables.open_file(self.hdf5_file, 'r+') as fileh: A = fileh.get_node(self.path + '/availabilities') a = A[rows_slice, ...] Worker.hdf5_lock.release() # yet more damping a = a * self.damping - tmp * (1 - self.damping) with Worker.hdf5_lock: with tables.open_file(self.hdf5_file, 'r+') as fileh: A = fileh.get_node(self.path + '/availabilities') T = fileh.get_node(self.path + '/temporaries') A[rows_slice, ...] = a T[rows_slice, ...] = tmp del a, tmp
def ptconcat(output_file, input_files, overwrite=False): """Concatenate HDF5 Files""" filt = tb.Filters( complevel=5, shuffle=True, fletcher32=True, complib='zlib' ) out_tabs = {} dt_file = input_files[0] log.info("Reading data struct '%s'..." % dt_file) h5struc = tb.open_file(dt_file, 'r') log.info("Opening output file '%s'..." % output_file) if overwrite: outmode = 'w' else: outmode = 'a' h5out = tb.open_file(output_file, outmode) for node in h5struc.walk_nodes('/', classname='Table'): path = node._v_pathname log.debug(path) dtype = node.dtype p, n = os.path.split(path) out_tabs[path] = h5out.create_table( p, n, description=dtype, filters=filt, createparents=True ) h5struc.close() for fname in input_files: log.info('Reading %s...' % fname) h5 = tb.open_file(fname) for path, out in out_tabs.items(): tab = h5.get_node(path) out.append(tab[:]) h5.close() h5out.close()
def __init__(self, output_dir, chrom_list): # combined allele-specific read counts as_count_filename = "%s/combined_as_count.h5" % output_dir self.as_count_h5 = tables.open_file(as_count_filename, "w") # combined mapped read counts read_count_filename = "%s/combined_read_count.h5" % output_dir self.read_count_h5 = tables.open_file(read_count_filename, "w") # counts of genotypes ref_count_filename = "%s/combined_ref_count.h5" % output_dir self.ref_count_h5 = tables.open_file(ref_count_filename, "w") alt_count_filename = "%s/combined_alt_count.h5" % output_dir self.alt_count_h5 = tables.open_file(alt_count_filename, "w") het_count_filename = "%s/combined_het_count.h5" % output_dir self.het_count_h5 = tables.open_file(het_count_filename, "w") self.filenames = [as_count_filename, read_count_filename, ref_count_filename, alt_count_filename, het_count_filename] self.h5_files = [self.as_count_h5, self.read_count_h5, self.ref_count_h5, self.alt_count_h5, self.het_count_h5] # initialize all of these files atom = tables.UInt16Atom(dflt=0) for h5f in self.h5_files: for chrom in chrom_list: self.create_carray(h5f, chrom, atom)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # Author: Vicent Mas - [email protected] # # This script is based on a set of scripts by Francesc Alted. """Several simple EArrays.""" import tables import numpy fileh = tables.open_file('earray_samples.h5', mode='w') root = fileh.root a = tables.StringAtom(itemsize=8) # Use ``a`` as the object type for the enlargeable array. array_c = fileh.create_earray(root, 'array_c', a, (0, ), "Chars") array_c.append(numpy.array(['a' * 2, 'b' * 4], dtype='S8')) array_c.append(numpy.array(['a' * 6, 'b' * 8, 'c' * 10], dtype='S8')) # Create an string atom a = tables.StringAtom(itemsize=1) # Use it as a type for the enlargeable array hdfarray = fileh.create_earray(root, 'array_char', a, (0, ), "Character array") hdfarray.append(numpy.array(['a', 'b', 'c'])) # The next is legal: hdfarray.append(numpy.array(['c', 'b', 'c', 'd']))
from snn.optimizer.snnsgd import SNNSGD import tables from neurodata.load_data import create_dataloader from snn.utils.utils_snn import get_acc_and_loss from snn.utils.utils_snn import get_acc_layered sample_length = 2000000 # length of samples during training in ms dt = 25000 # us polarity = False T = int(sample_length / dt) # number of timesteps in a sample input_size = (1 + polarity) * 26 * 26 dataset_path = r"C:\Users\K1804053\OneDrive - King's College London\PycharmProjects\datasets\mnist-dvs\mnist_dvs_events_new.hdf5" ds = 1 dataset = tables.open_file(dataset_path) x_max = dataset.root.stats.train_data[1] // ds dataset.close() n_outputs = 2 n_hidden = 16 n_neurons_per_layer = [64] network = LayeredSNN(input_size, n_neurons_per_layer, n_outputs, synaptic_filter=filters.raised_cosine_pillow_08, n_basis_feedforward=[8], n_basis_feedback=[1], tau_ff=[10], tau_fb=[10], mu=[0.5], device='cpu') topology = torch.zeros([n_hidden + n_outputs, n_hidden + input_size + n_outputs]) topology[:n_hidden, :input_size] = 1 topology[n_hidden:, input_size:-n_outputs] = 1 network2 = BinarySNN(**make_network_parameters(network_type='snn',
def setUp(self): self.data_path = self.create_tempfile_from_testdata() self.data = tables.open_file(self.data_path, 'a')
def __init__(self,parent,subgui=True): # get the current path curpath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) constpath = os.path.join(os.path.split(curpath)[0],'SimISR','const') # set the root self.parent = parent self.subgui = subgui # set up frames for list self.frame1 = Tkinter.Frame(self.parent) self.frame1.grid(row=0,column=0) self.frame2 = Tkinter.Frame(self.parent) self.frame2.grid(row=0,column=1) self.output = [] self.beamhandle = None if subgui: self.sizecanv = [500,500] self.beamcodeent= Tkinter.Entry(self.frame1) self.beamcodeent.grid(row=1,column=1) self.beamcodeentlabel = Tkinter.Label(self.frame1,text="Enter Beamcodes") self.beamcodeentlabel.grid(row=1,column=0,sticky='e') self.beambuttex = Tkinter.Button(self.frame1, text="Read", command=self.readbcobar) self.beambuttex.grid(row=1,column=2,sticky='w') self.beambutt = Tkinter.Button(self.frame1, text="Import", command=self.beambuttonClick) self.beambutt.grid(row=2,column=2,sticky='w') canvrow = 3 else: self.sizecanv = [1000,1000] self.leb = Tkinter.Label(self.frame1, text="Beam Selector",font=("Helvetica", 16)) self.leb.grid(row=0, sticky=Tkinter.W+Tkinter.E+Tkinter.N+Tkinter.S,columnspan=2) self.butt = Tkinter.Button(self.frame1, text="Finished", command=self.buttonClick) self.butt.grid(row=1,column=1,sticky='w') self.beamcodeent= Tkinter.Entry(self.frame1) self.beamcodeent.grid(row=2,column=1,sticky='w') self.beamcodeentlabel = Tkinter.Label(self.frame1,text="Enter Beamcodes") self.beamcodeentlabel.grid(row=2,column = 0,sticky='e') self.beambuttex = Tkinter.Button(self.frame1, text="Read", command=self.readbcobar) self.beambuttex.grid(row=2,column=2,sticky='w') self.beambutt = Tkinter.Button(self.frame1, text="Import", command=self.beambuttonClick) self.beambutt.grid(row=3,column=2,sticky='w') canvrow = 4 self.off_x = self.sizecanv[0]/2 self.off_y = self.sizecanv[1]/2 self.div = 75.0*self.sizecanv[0]/1000.0 self.lat = [80,70,60,50,40,30] self.angles = np.arange(0,180,30) self.var = Tkinter.StringVar() self.var.set("PFISR") self.choices = {"PFISR":get_files('PFISR_PARAMS.h5'), "RISR-N":get_files('RISR_PARAMS.h5'), "Sondrestrom":get_files('Sondrestrom_PARAMS.h5'), "Millstone":get_files('Millstone_PARAMS.h5')}#, "RISR-S":'file3'} self.option = Tkinter.OptionMenu(self.frame1, self.var, *self.choices) self.option.grid(row=1,column=0,sticky='w') hfile=tables.open_file(self.choices[self.var.get()]) self.lines = hfile.root.Params.Kmat.read() hfile.close() self.readfile = Tkinter.StringVar() # set up the canvas self.canv = Tkinter.Canvas(self.frame1 , width=self.sizecanv[0], height=self.sizecanv[1],background='white') self.canv.grid(row=canvrow,column=0,columnspan=2) self.Drawlines() self.Drawbeams() self.canv.bind('<ButtonPress-1>', self.onCanvasClick) self.canv.bind('<ButtonPress-2>', self.onCanvasRightClick) self.var.trace('w', self.Changefile) self.canv.update() # beam list self.bidlabel = Tkinter.Label(self.frame2,text="Beam ID") self.bidlabel.grid(row=0,column=0) self.azlabel = Tkinter.Label(self.frame2,text="Azimuth") self.azlabel.grid(row=0,column=1) self.ellabel = Tkinter.Label(self.frame2,text="Elevation") self.ellabel.grid(row=0,column=2) self.scroll = Tkinter.Scrollbar(self.frame2) self.scroll.grid(row=1,column=3) self.beamtext = Tkinter.Text(self.frame2,yscrollcommand=self.scroll.set) self.beamtext.config(width=50,state=Tkinter.DISABLED) self.beamtext.grid(row = 1,column = 0,columnspan=3) self.beamlines = [] self.scroll.config(command=self.beamtext.yview) # bounding box self.boxbutton= Tkinter.Button(self.frame2, text="Angle Box", command=self.boxbuttonClick) self.boxbutton.grid(row=2,column=0,sticky='w') self.azminmaxlabel = Tkinter.Label(self.frame2,text="Az min and max") self.azminmaxlabel.grid(row=3,column=0,sticky='e') self.azmin= Tkinter.Entry(self.frame2) self.azmin.grid(row=3,column=1,sticky='w') self.azmax= Tkinter.Entry(self.frame2) self.azmax.grid(row=3,column=2,sticky='w') self.elminmaxlabel = Tkinter.Label(self.frame2,text="El min and max") self.elminmaxlabel.grid(row=4,column=0,sticky='e') self.elmin= Tkinter.Entry(self.frame2) self.elmin.grid(row=4,column=1,sticky='w') self.elmax= Tkinter.Entry(self.frame2) self.elmax.grid(row=4,column=2,sticky='w') # Az choice self.azbutton=Tkinter.Button(self.frame2, text="Az Choice", command=self.azbuttonClick) self.azbutton.grid(row=5,column=0,sticky='w') self.azchoice= Tkinter.Entry(self.frame2) self.azchoice.grid(row=5,column=1,sticky='w') # Az choice self.elbutton=Tkinter.Button(self.frame2, text="El Choice", command=self.elbuttonClick) self.elbutton.grid(row=6,column=0,sticky='w') self.elchoice= Tkinter.Entry(self.frame2) self.elchoice.grid(row=6,column=1,sticky='w') self.azsortbutton=Tkinter.Button(self.frame2, text="Az sort", command=self.azsortbuttonClick) self.azsortbutton.grid(row=7,column=0,sticky='w') self.elsortbutton=Tkinter.Button(self.frame2, text="El Sort", command=self.elsortbuttonClick) self.elsortbutton.grid(row=7,column=1,sticky='w')
def h5_read(file): h5file = tables.open_file(file, driver="H5FD_CORE") array = h5file.root.somename.read() #h5file.close() return array, h5file
def test_source_to_sink(): """Tests simulations with one facility that has a conversion factor. The trivial cycle simulation involves only one KFacility which provides what it requests itself. The conversion factors for requests and bids are kept the same so that the facility provides exactly what it requests. The amount of the transactions follow a power law. Amount = InitialAmount * ConversionFactor ^ Time This equation is used to test each transaction amount. """ if not cyclus_has_coin(): raise SkipTest("Cyclus does not have COIN") # A reference simulation input for the trivial cycle simulation. ref_input = os.path.join(INPUT, "trivial_cycle.xml") # Conversion factors for the three simulations k_factors = [0.95, 1, 2] for k_factor in k_factors: clean_outs() sim_input = create_sim_input(ref_input, k_factor, k_factor) holdsrtn = [1] # needed because nose does not send() to test generator outfile = which_outfile() cmd = ["cyclus", "-o", outfile, "--input-file", sim_input] yield check_cmd, cmd, '.', holdsrtn rtn = holdsrtn[0] if rtn != 0: return # don't execute further commands # tables of interest paths = ["/AgentEntry", "/Resources", "/Transactions", "/Info"] # Check if these tables exist yield assert_true, tables_exist(outfile, paths) if not tables_exist(outfile, paths): outfile.close() clean_outs() return # don't execute further commands # Get specific tables and columns if outfile == h5out: output = tables.open_file(h5out, mode="r") agent_entry = output.get_node("/AgentEntry")[:] info = output.get_node("/Info")[:] resources = output.get_node("/Resources")[:] transactions = output.get_node("/Transactions")[:] output.close() else: conn = sqlite3.connect(sqliteout) conn.row_factory = sqlite3.Row cur = conn.cursor() exc = cur.execute agent_entry = exc('SELECT * FROM AgentEntry').fetchall() info = exc('SELECT * FROM Info').fetchall() resources = exc('SELECT * FROM Resources').fetchall() transactions = exc('SELECT * FROM Transactions').fetchall() conn.close() # Find agent ids agent_ids = to_ary(agent_entry, "AgentId") spec = to_ary(agent_entry, "Spec") facility_id = find_ids(":agents:KFacility", spec, agent_ids) # Test for only one KFacility yield assert_equal, len(facility_id), 1 sender_ids = to_ary(transactions, "SenderId") receiver_ids = to_ary(transactions, "ReceiverId") expected_sender_array = np.empty(sender_ids.size) expected_sender_array.fill(facility_id[0]) expected_receiver_array = np.empty(receiver_ids.size) expected_receiver_array.fill(facility_id[0]) yield assert_array_equal, sender_ids, expected_sender_array yield assert_array_equal, receiver_ids, expected_receiver_array # Transaction ids must be equal range from 1 to the number of rows expected_trans_ids = np.arange(0, sender_ids.size, 1) yield assert_array_equal, \ to_ary(transactions, "TransactionId"), \ expected_trans_ids # Track transacted resources resource_ids = to_ary(resources, "ResourceId") quantities = to_ary(resources, "Quantity") # Almost equal cases due to floating point k_factors i = 0 initial_capacity = quantities[0] for q in quantities: yield assert_almost_equal, q, initial_capacity * k_factor**i i += 1 clean_outs() os.remove(sim_input)
def create_trx(path, n_clusters=6, weights='', window=1): try: trx_ = sio.loadmat(path + "/trx.mat") trx = trx_['trx'] data_types = ( 'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol', 'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine', 'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba', 'bend_proba', 'curl_proba', 'ball_proba', 'straight_and_light_bend_proba', 'global_state', 'x_neck_down', 'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck', 'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1', 'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5', 'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5', 'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5', 'eig_smooth_5', 'eig_deriv_smooth_5', 'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5', 'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5', 'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5', 'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5', 'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5', 'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast', 'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop', 't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5', 'global_state_large_state', 'global_state_small_large_state', 'start_stop_large', 't_start_stop_large', 'duration_large', 'n_duration_large', 'nb_action_large', 'start_stop_large_small', 't_start_stop_large_small', 'duration_large_small', 'n_duration_large_small', 'nb_action_large_small', 'run_large', 'cast_large', 'stop_large', 'hunch_large', 'back_large', 'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak', 'back_weak', 'roll_weak', 'run_strong', 'cast_strong', 'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong', 'global_state_clustering', 'start_stop_clustering', 't_start_stop_clustering', 'duration_clustering', 'n_duration_clustering', 'nb_action_clustering') + tuple( 'clustering_' + str(i) for i in range(n_clusters)) x = load_transform(path, window=window) trx_new = [] ae, decoder = autoencoder( dims=[x[0].shape[-1] - 2, 500, 500, 2000, 10]) n_stacks = len([x[0].shape[-1] - 2, 500, 500, 2000, 10]) - 1 hidden = ae.get_layer(name='encoder_%d' % (n_stacks - 1)).output clustering_layer = ClusteringLayer(n_clusters, name='clustering')(hidden) modele = Model(inputs=ae.input, outputs=[clustering_layer, ae.output]) modele.load_weights(weights) for j, larva in enumerate(x): t = larva[:, -1] X = larva[:, :-2] res = modele.predict(X) predictions = res[0].argmax(axis=1) if predictions[0] != predictions[1]: predictions[0] = predictions[1] if predictions[-1] != predictions[-2]: predictions[-1] = predictions[-2] for i in range(1, len(predictions) - 1): if (predictions[i] != predictions[i + 1]) & ( predictions[i] != predictions[i - 1]): predictions[i] = predictions[i - 1] predictions = pd.DataFrame(predictions) global_state_clustering = np.array( [i + 1 for i in predictions.values]) start_stop_clustering = [[] for i in range(n_clusters)] t_start_stop_clustering = [[] for i in range(n_clusters)] duration_clustering = [[] for i in range(n_clusters)] n_duration_clustering = [[] for i in range(n_clusters)] nb_action_clustering = [[] for i in range(n_clusters)] states = [] for i in range(n_clusters): indices = list(predictions[predictions[0] == i].index) if len(indices) > 0: indices_change = [ indices[i] + 1 for i in range(len(indices) - 1) if (indices[i] + 1 != indices[i + 1]) or (indices[i - 1] + 1 != indices[i]) ] + [indices[-1] + 1] times_change = [t[i - 1] for i in indices_change] indices_change = [[ indices_change[i], indices_change[i + 1] ] for i in range(len(indices_change)) if (i % 2 == 0)] times_change = [[times_change[i], times_change[i + 1]] for i in range(len(times_change)) if (i % 2 == 0)] len_behavior = [[sublist[1] - sublist[0]] for sublist in times_change] n_duration = [[sublist[1] - sublist[0]] for sublist in indices_change] nb_action = len(indices_change) start_stop_clustering[i] = indices_change t_start_stop_clustering[i] = times_change duration_clustering[i] = len_behavior n_duration_clustering[i] = n_duration nb_action_clustering[i] = nb_action states.append([[1] if predictions.values[j] == i else [-1] for j in range(len(predictions))]) start_stop_clustering = np.array( [[i for i in start_stop_clustering]]) t_start_stop_clustering = np.array( [[i for i in t_start_stop_clustering]]) duration_clustering = np.array( [[i if (len(i) > 0) else [[0]] for i in duration_clustering]]) n_duration_clustering = np.array( [i if (len(i) > 0) else [[0]] for i in n_duration_clustering]) nb_action_clustering = np.array([[ np.array([i]).astype('O') if (i) else np.array([0]).astype('O') for i in nb_action_clustering ]]) tmp = [ global_state_clustering, start_stop_clustering, t_start_stop_clustering, duration_clustering, n_duration_clustering, nb_action_clustering ] tmp += states tmp = tuple(tmp) trx_new.append( np.reshape( np.array(tuple(trx[j][0]) + tmp, dtype=[(n, d) for (n, d) in zip(data_types, ["O"] * len(data_types))]), [ 1, ])) trx_new = np.array(trx_new) trx_['trx'] = trx_new sio.savemat(path + '/trx_new_%d_clusters' % n_clusters, trx_, long_field_names=True) except NotImplementedError: trx_ = tables.open_file(path + "trx.mat") data_titles_old = ( 'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol', 'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine', 'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba', 'bend_proba', 'curl_proba', 'ball_proba', 'straight_and_light_bend_proba', 'global_state', 'x_neck_down', 'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck', 'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1', 'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5', 'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5', 'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5', 'eig_smooth_5', 'eig_deriv_smooth_5', 'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5', 'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5', 'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5', 'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5', 'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5', 'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast', 'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop', 't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5', 'global_state_large_state', 'global_state_small_large_state', 'start_stop_large', 't_start_stop_large', 'duration_large', 'n_duration_large', 'nb_action_large', 'start_stop_large_small', 't_start_stop_large_small', 'duration_large_small', 'n_duration_large_small', 'nb_action_large_small', 'run_large', 'cast_large', 'stop_large', 'hunch_large', 'back_large', 'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak', 'back_weak', 'roll_weak', 'run_strong', 'cast_strong', 'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong') data_titles = ( 'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol', 'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine', 'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba', 'bend_proba', 'curl_proba', 'ball_proba', 'straight_and_light_bend_proba', 'global_state', 'x_neck_down', 'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck', 'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1', 'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5', 'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5', 'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5', 'eig_smooth_5', 'eig_deriv_smooth_5', 'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5', 'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5', 'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5', 'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5', 'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5', 'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast', 'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop', 't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5', 'global_state_large_state', 'global_state_small_large_state', 'start_stop_large', 't_start_stop_large', 'duration_large', 'n_duration_large', 'nb_action_large', 'start_stop_large_small', 't_start_stop_large_small', 'duration_large_small', 'n_duration_large_small', 'nb_action_large_small', 'run_large', 'cast_large', 'stop_large', 'hunch_large', 'back_large', 'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak', 'back_weak', 'roll_weak', 'run_strong', 'cast_strong', 'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong', 'global_state_clustering', 'start_stop_clustering', 't_start_stop_clustering', 'duration_clustering', 'n_duration_clustering', 'nb_action_clustering') + tuple( 'clustering_' + str(i) for i in range(n_clusters)) x = load_transform(path, window=window) trx_new = [] ae, decoder = autoencoder( dims=[x[0].shape[-1] - 2, 500, 500, 2000, 10]) n_stacks = len([x[0].shape[-1] - 2, 500, 500, 2000, 10]) - 1 hidden = ae.get_layer(name='encoder_%d' % (n_stacks - 1)).output clustering_layer = ClusteringLayer(n_clusters, name='clustering')(hidden) modele = Model(inputs=ae.input, outputs=[clustering_layer, ae.output]) modele.load_weights(weights) for j, larva in enumerate(x): t = larva[:, -1] X = larva[:, :-2] res = modele.predict(X) predictions = res[0].argmax(axis=1) if predictions[0] != predictions[1]: predictions[0] = predictions[1] if predictions[-1] != predictions[-2]: predictions[-1] = predictions[-2] for i in range(1, len(predictions) - 1): if (predictions[i] != predictions[i + 1]) & ( predictions[i] != predictions[i - 1]): predictions[i] = predictions[i - 1] predictions = pd.DataFrame(predictions) global_state_clustering = np.array( [i + 1 for i in predictions.values]) start_stop_clustering = [[] for i in range(n_clusters)] t_start_stop_clustering = [[] for i in range(n_clusters)] duration_clustering = [[] for i in range(n_clusters)] n_duration_clustering = [[] for i in range(n_clusters)] nb_action_clustering = [[] for i in range(n_clusters)] states = [] for i in range(n_clusters): indices = list(predictions[predictions[0] == i].index) if len(indices) > 0: indices_change = [ indices[i] + 1 for i in range(len(indices) - 1) if (indices[i] + 1 != indices[i + 1]) or (indices[i - 1] + 1 != indices[i]) ] + [indices[-1] + 1] times_change = [t[i - 1] for i in indices_change] indices_change = [[ indices_change[i], indices_change[i + 1] ] for i in range(len(indices_change)) if (i % 2 == 0)] times_change = [[times_change[i], times_change[i + 1]] for i in range(len(times_change)) if (i % 2 == 0)] len_behavior = [[sublist[1] - sublist[0]] for sublist in times_change] n_duration = [[sublist[1] - sublist[0]] for sublist in indices_change] nb_action = len(indices_change) start_stop_clustering[i] = indices_change t_start_stop_clustering[i] = times_change duration_clustering[i] = len_behavior n_duration_clustering[i] = n_duration nb_action_clustering[i] = nb_action states.append([[1] if predictions.values[j] == i else [-1] for j in range(len(predictions))]) start_stop_clustering = np.array( [[i for i in start_stop_clustering]]) t_start_stop_clustering = np.array( [[i for i in t_start_stop_clustering]]) duration_clustering = np.array( [[i if (len(i) > 0) else [[0]] for i in duration_clustering]]) n_duration_clustering = np.array( [i if (len(i) > 0) else [[0]] for i in n_duration_clustering]) nb_action_clustering = np.array([[ np.array([i]).astype('O') if (i) else np.array([0]).astype('O') for i in nb_action_clustering ]]) tmp = [ global_state_clustering, start_stop_clustering, t_start_stop_clustering, duration_clustering, n_duration_clustering, nb_action_clustering ] tmp += states tmp = tuple(tmp) trx_new.append( np.reshape( np.array(tuple(trx_.root.trx[k][0][j][0].T for k in data_titles_old) + tmp, dtype=[(n, d) for (n, d) in zip(data_titles, ["O"] * len(data_titles))]), [ 1, ])) trx_new = np.array(trx_new, ndmin=2) trx_new = {'__header__': '', '__version__': '', 'trx': trx_new} sio.savemat(path + '/trx_new_%d_clusters' % n_clusters, trx_new, long_field_names=True) trx_.close()
def test_pmap_writer(config_tmpdir, s1_dataframe_converted, s2_dataframe_converted, s2si_dataframe_converted): # setup temporary file filename = 'test_pmaps_auto.h5' PMP_file_name = os.path.join(config_tmpdir, filename) # Get test data s1_dict, _ = s1_dataframe_converted s2_dict, _ = s2_dataframe_converted s2si_dict, _ = s2si_dataframe_converted #P = PMaps(s1_dict, s2_dict, s2si_dict) event_numbers = sorted(set(s1_dict).union(set(s2si_dict))) timestamps = {e: int(time.time() % 1 * 10**9) for e in event_numbers} run_number = 632 # Write pmaps to disk. with tb.open_file(PMP_file_name, 'w') as h5out: write_pmap = pmap_writer(h5out) write_run_and_event = run_and_event_writer(h5out) for event_no in event_numbers: timestamp = timestamps[event_no] s1 = s1_dict[event_no] s2 = s2_dict[event_no] s2si = s2si_dict[event_no] write_pmap(event_no, s1, s2, s2si) write_run_and_event(run_number, event_no, timestamp) # Read back the data we have just written S1D, S2D, S2SiD = load_pmaps(PMP_file_name) rundf, evtdf = read_run_and_event_from_pmaps_file(PMP_file_name) # Convert them into our transient format # S1D = df_to_s1_dict (s1df) # S2D = df_to_s2_dict (s2df) # S2SiD = df_to_s2si_dict(s2df, s2sidf) ###################################################################### # Compare original data to those read back for event_no, s1 in s1_dict.items(): s1 = s1_dict[event_no] S1 = S1D[event_no] for peak_no in s1.peak_collection(): PEAK = S1.peak_waveform(peak_no) peak = s1.peak_waveform(peak_no) np.testing.assert_allclose(peak.t, PEAK.t) np.testing.assert_allclose(peak.E, PEAK.E) for event_no, s2 in s2_dict.items(): s2 = s2_dict[event_no] S2 = S2D[event_no] for peak_no in s2.peak_collection(): PEAK = S2.peak_waveform(peak_no) peak = s2.peak_waveform(peak_no) np.testing.assert_allclose(peak.t, PEAK.t) np.testing.assert_allclose(peak.E, PEAK.E) for event_no, si in s2si_dict.items(): si = s2si_dict[event_no] Si = S2SiD[event_no] for peak_no in si.peak_collection(): PEAK = Si.peak_waveform(peak_no) peak = si.peak_waveform(peak_no) np.testing.assert_allclose(peak.t, PEAK.t) np.testing.assert_allclose(peak.E, PEAK.E) for sipm_no in si.sipms_in_peak(peak_no): sipm_wfm = si.sipm_waveform(peak_no, sipm_no) SIPM_wfm = Si.sipm_waveform(peak_no, sipm_no) np.testing.assert_allclose(sipm_wfm.t, SIPM_wfm.t) # Event numbers np.testing.assert_equal(evtdf.evt_number.values, np.array(event_numbers, dtype=np.int32)) # Run numbers np.testing.assert_equal( rundf.run_number.values, np.full(len(event_numbers), run_number, dtype=np.int32))
@author: seb18121 """ import numpy as np # import matplotlib.pyplot as plt import tables, sys, gc # python /mnt/d/My_python_script/feedback_aperp_1D.py XXX_aperp_NO feedbackfactor detuning basename = sys.argv[1] # retreive the base name # filename1 = 'CEP_aperp_41' filename = basename + ".h5" outfilename = "entrance.h5" print("Reading the field file ...\n") h5 = tables.open_file(filename, 'r') wavelength = h5.root.runInfo._v_attrs.lambda_r nx = h5.root.runInfo._v_attrs.nX ny = h5.root.runInfo._v_attrs.nY nz = h5.root.runInfo._v_attrs.nZ2 Lc = h5.root.runInfo._v_attrs.Lc Lg = h5.root.runInfo._v_attrs.Lg meshsizeX = h5.root.runInfo._v_attrs.sLengthOfElmX meshsizeY = h5.root.runInfo._v_attrs.sLengthOfElmY meshsizeZ2 = h5.root.runInfo._v_attrs.sLengthOfElmZ2 meshsizeXSI = meshsizeX * np.sqrt(Lc * Lg) meshsizeYSI = meshsizeY * np.sqrt(Lc * Lg) meshsizeZSI = meshsizeZ2 * Lc fieldin = h5.root.aperp.read()
#print("Adding cut: %d" % (storeIdx)) self.add(constraint=cplex.SparsePair(thevars,thecoefs), sense = "G", rhs = (b_Lext[0,minIdx])) full = int(sys.argv[1]) feuer = int(sys.argv[2]) xnL = int(sys.argv[3]) xnU = int(sys.argv[4]) tnL = int(sys.argv[5]) tnU = int(sys.argv[6]) stepX = int(sys.argv[7]) stepT = int(sys.argv[8]) for timeVar in range(tnL,tnU+1,stepT): for countVar in range(xnL,xnU+1,stepX): if feuer: matlabData = tables.open_file('data/feuerData%d_%d_%d.mat' % (countVar,timeVar,2)) else: matlabData = tables.open_file('data/contaData%d_%d_%d.mat' % (countVar,timeVar,5)) A2=matlabData.root.A2.data[...] Amipred=scipy.sparse.csc_matrix((matlabData.root.A2.data[...],matlabData.root.A2.ir[...], matlabData.root.A2.jc[...])) Amipred=scipy.sparse.lil_matrix(Amipred) b_Lred=matlabData.root.b_L2[0] b_Ured=matlabData.root.b_U2[0] c=matlabData.root.c[0] xn=int(matlabData.root.xn[0,0]) tn=int(matlabData.root.tn[0,0]) Amipext=scipy.sparse.csc_matrix((matlabData.root.Aext.data[...],matlabData.root.Aext.ir[...], matlabData.root.Aext.jc[...])) Amipext=scipy.sparse.lil_matrix(Amipext) b_Uext=matlabData.root.b_Uext b_Lext=matlabData.root.b_Lext intVarN=int(matlabData.root.intVarN[0])
def load(self): """ Loads a matrix stored in h5 format :param matrix_filename: :return: matrix, cut_intervals, nan_bins, distance_counts, correction_factors """ log.debug('Load in h5 format') with tables.open_file(self.matrixFileName, 'r') as f: parts = {} try: for matrix_part in ('data', 'indices', 'indptr', 'shape'): parts[matrix_part] = getattr(f.root.matrix, matrix_part).read() except Exception as e: log.info( 'No h5 file. Please check parameters concerning the file type!' ) e matrix = csr_matrix(tuple( [parts['data'], parts['indices'], parts['indptr']]), shape=parts['shape']) # matrix = hiCMatrix.fillLowerTriangle(matrix) # get intervals intvals = {} for interval_part in ('chr_list', 'start_list', 'end_list', 'extra_list'): if toString(interval_part) == toString('chr_list'): chrom_list = getattr(f.root.intervals, interval_part).read() intvals[interval_part] = toString(chrom_list) else: intvals[interval_part] = getattr(f.root.intervals, interval_part).read() cut_intervals = list( zip(intvals['chr_list'], intvals['start_list'], intvals['end_list'], intvals['extra_list'])) assert len(cut_intervals) == matrix.shape[0], \ "Error loading matrix. Length of bin intervals ({}) is different than the " \ "size of the matrix ({})".format(len(cut_intervals), matrix.shape[0]) # get nan_bins try: if hasattr(f.root, 'nan_bins'): nan_bins = f.root.nan_bins.read() else: nan_bins = np.array([]) except Exception: nan_bins = np.array([]) # get correction factors try: if hasattr(f.root, 'correction_factors'): correction_factors = f.root.correction_factors.read() assert len(correction_factors) == matrix.shape[0], \ "Error loading matrix. Length of correction factors does not" \ "match size of matrix" correction_factors = np.array(correction_factors) mask = np.isnan(correction_factors) correction_factors[mask] = 0 mask = np.isinf(correction_factors) correction_factors[mask] = 0 else: correction_factors = None except Exception: correction_factors = None try: # get correction factors if hasattr(f.root, 'distance_counts'): distance_counts = f.root.correction_factors.read() else: distance_counts = None except Exception: distance_counts = None return matrix, cut_intervals, nan_bins, distance_counts, correction_factors
def main(): dummy_parser = argparse.ArgumentParser(description='train_mm.py') opts.model_opts(dummy_parser) dummy_opt = dummy_parser.parse_known_args([])[0] os.makedirs(os.path.dirname(opt.output), exist_ok=True) device = torch.device("cpu") # opt.cuda = opt.gpu > -1 opt.cuda = False if opt.gpuid: device = torch.device("cuda:{}".format(opt.gpuid[0])) cuda.set_device(device) opt.cuda = True # torch.cuda.set_device(opt.gpu) # loading checkpoint just to find multimodal model type checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) opt.multimodal_model_type = checkpoint['opt'].multimodal_model_type del checkpoint if opt.batch_size > 1: print( "Batch size > 1 not implemented! Falling back to batch_size = 1 ..." ) opt.batch_size = 1 # load test image features test_file = tables.open_file(opt.path_to_test_img_feats, mode='r') if opt.multimodal_model_type in ['imgd', 'imge', 'imgw']: test_img_feats = test_file.root.global_feats[:] elif opt.multimodal_model_type in ['src+img']: test_img_feats = test_file.root.local_feats[:] else: raise Exception("Model type not implemented: %s" % opt.multimodal_model_type) test_file.close() # Load the model. fields, model, model_opt = \ onmt.ModelConstructor.load_test_model(opt, dummy_opt.__dict__) #opt.multimodal_model_type = checkpoint['opt'].multimodal_model_type # File to write sentences to. out_file = codecs.open(opt.output, 'w', 'utf-8') # Test data data = onmt.io.build_dataset(fields, opt.data_type, opt.src, opt.tgt, src_dir=opt.src_dir, sample_rate=opt.sample_rate, window_size=opt.window_size, window_stride=opt.window_stride, window=opt.window, use_filter_pred=False) # Sort batch by decreasing lengths of sentence required by pytorch. # sort=False means "Use dataset's sortkey instead of iterator's". data_iter = onmt.io.OrderedIterator(dataset=data, device=device, batch_size=opt.batch_size, train=False, sort=False, sort_within_batch=True, shuffle=False) # Translator scorer = onmt.translate.GNMTGlobalScorer(opt.alpha, opt.beta) translator = onmt.translate.TranslatorMultimodal( model, fields, beam_size=opt.beam_size, n_best=opt.n_best, global_scorer=scorer, max_length=opt.max_length, copy_attn=model_opt.copy_attn, cuda=opt.cuda, beam_trace=opt.dump_beam != "", min_length=opt.min_length, test_img_feats=test_img_feats, multimodal_model_type=opt.multimodal_model_type) builder = onmt.translate.TranslationBuilder(data, translator.fields, opt.n_best, opt.replace_unk, opt.tgt) # Statistics counter = count(1) pred_score_total, pred_words_total = 0, 0 gold_score_total, gold_words_total = 0, 0 for sent_idx, batch in enumerate(data_iter): batch_data = translator.translate_batch(batch, data, sent_idx) translations = builder.from_batch(batch_data) for trans in translations: pred_score_total += trans.pred_scores[0] pred_words_total += len(trans.pred_sents[0]) if opt.tgt: gold_score_total += trans.gold_score gold_words_total += len(trans.gold_sent) n_best_preds = [ " ".join(pred) for pred in trans.pred_sents[:opt.n_best] ] out_file.write('\n'.join(n_best_preds)) out_file.write('\n') out_file.flush() if opt.verbose: sent_number = next(counter) output = trans.log(sent_number) os.write(1, output.encode('utf-8')) _report_score('PRED', pred_score_total, pred_words_total) if opt.tgt: _report_score('GOLD', gold_score_total, gold_words_total) if opt.report_bleu: _report_bleu() if opt.report_rouge: _report_rouge() if opt.dump_beam: import json json.dump(translator.beam_accum, codecs.open(opt.dump_beam, 'w', 'utf-8'))
def get_metadata(): """ Reads metadata content (i. e. model parametrizations and objectives) of specified .h5 file. GET parameters: - datasetName with "dataset". - drKernelName with "drk". :return: """ app.config["DATASET_NAME"] = InputDataset.check_dataset_name( request.args.get('datasetName')) app.config[ "DR_KERNEL_NAME"] = DimensionalityReductionKernel.check_kernel_name( request.args.get('drKernelName')) app.config["CACHE_ROOT"] = "/tmp/" + app.config[ "DATASET_NAME"] + "_" + app.config["DR_KERNEL_NAME"] # Update root storage path with new dataset name. app.config["STORAGE_PATH"] = app.config["ROOT_STORAGE_PATH"] + app.config[ "DATASET_NAME"] + "/" app.config["SURROGATE_MODELS_PATH"] = app.config["STORAGE_PATH"] + app.config["DR_KERNEL_NAME"] + \ "_surrogatemodels.pkl" app.config["EXPLAINER_VALUES_PATH"] = app.config["STORAGE_PATH"] + app.config["DR_KERNEL_NAME"] + \ "_explainervalues.pkl" dataset_name_class_links = { "movie": MovieDataset, "happiness": HappinessDataset } app.config["DATASET_CLASS"] = dataset_name_class_links[ app.config["DATASET_NAME"]] # Compile metadata template. update_and_get_metadata_template() # Build file name. file_name: str = (app.config["STORAGE_PATH"] + "embedding_" + app.config["DR_KERNEL_NAME"] + ".h5") app.config["FULL_FILE_NAME"] = file_name # Open .h5 file, if dataset name and DR kernel name are valid and file exists. if app.config["DATASET_NAME"] is not None and \ app.config["DR_KERNEL_NAME"] is not None and \ os.path.isfile(file_name): ################################################### # Load dataset. ################################################### h5file = tables.open_file(filename=file_name, mode="r") # Cast to dataframe, then return as JSON. df = pandas.DataFrame(h5file.root.metadata[:]).set_index("id") # Close file. h5file.close() ################################################### # Preprocess and cache dataset. ################################################### # Prepare dataframe for ratings. app.config["RATINGS"] = df.copy(deep=True) app.config["RATINGS"]["rating"] = 0 # Assemble embedding-level metadata. app.config["EMBEDDING_METADATA"]["original"] = df app.config["EMBEDDING_METADATA"]["features_preprocessed"], \ app.config["EMBEDDING_METADATA"]["labels"], \ app.config["EMBEDDING_METADATA"]["features_categorical_encoding_translation"] = \ Utils.preprocess_embedding_metadata_for_predictor( metadata_template=app.config["METADATA_TEMPLATE"], embeddings_metadata=df ) ################################################### # Load global surrogate models and local # explainer values. ################################################### # Compute regressor for each objective. with open(app.config["SURROGATE_MODELS_PATH"], "rb") as file: app.config["GLOBAL_SURROGATE_MODELS"] = pickle.load(file) # Load explainer values. # Replace specific metric references with an arbitrary "metric" for parsing in frontend. app.config["EXPLAINER_VALUES"] = pd.read_pickle( app.config["EXPLAINER_VALUES_PATH"]) # Return JSON-formatted embedding data. df["rating"] = app.config["RATINGS"]["rating"] # todo (remove, generate data cleanly) Hack: Rename target_domain_performance and n_components here, dismiss # b_nx. return jsonify( df.rename( columns={ "target_domain_performance": "rdp", "separability_metric": "separability" }).drop(["b_nx"], axis=1).to_json(orient='index')) else: return "File/kernel does not exist.", 400
def save(self, filename, pSymmetric=True, pApplyCorrection=None): """ Saves a matrix using hdf5 format :param filename: :return: None """ log.debug('Save in h5 format') # self.restoreMaskedBins() if not filename.endswith(".h5"): filename += ".h5" # if the file name already exists # try to find a new suitable name if os.path.isfile(filename): log.warning("*WARNING* File already exists {}\n " "Overwriting ...\n".format(filename)) unlink(filename) if self.nan_bins is None: self.nan_bins = np.array([]) elif not isinstance(self.nan_bins, np.ndarray): self.nan_bins = np.array(self.nan_bins) # save only the upper triangle of the if pSymmetric: # symmetric matrix matrix = triu(self.matrix, k=0, format='csr') else: matrix = self.matrix matrix.eliminate_zeros() filters = tables.Filters(complevel=5, complib='blosc') with tables.open_file(filename, mode="w", title="HiCExplorer matrix") as h5file: matrix_group = h5file.create_group( "/", "matrix", ) # save the parts of the csr matrix for matrix_part in ('data', 'indices', 'indptr', 'shape'): arr = np.array(getattr(matrix, matrix_part)) atom = tables.Atom.from_dtype(arr.dtype) ds = h5file.create_carray(matrix_group, matrix_part, atom, shape=arr.shape, filters=filters) ds[:] = arr # save the matrix intervals intervals_group = h5file.create_group( "/", "intervals", ) chr_list, start_list, end_list, extra_list = zip( *self.cut_intervals) for interval_part in ('chr_list', 'start_list', 'end_list', 'extra_list'): arr = np.array(eval(interval_part)) atom = tables.Atom.from_dtype(arr.dtype) ds = h5file.create_carray(intervals_group, interval_part, atom, shape=arr.shape, filters=filters) ds[:] = arr # save nan bins if len(self.nan_bins): atom = tables.Atom.from_dtype(self.nan_bins.dtype) ds = h5file.create_carray(h5file.root, 'nan_bins', atom, shape=self.nan_bins.shape, filters=filters) ds[:] = self.nan_bins # save corrections factors if self.correction_factors is not None and len( self.correction_factors): self.correction_factors = np.array(self.correction_factors) mask = np.isnan(self.correction_factors) self.correction_factors[mask] = 0 atom = tables.Atom.from_dtype(self.correction_factors.dtype) ds = h5file.create_carray(h5file.root, 'correction_factors', atom, shape=self.correction_factors.shape, filters=filters) ds[:] = np.array(self.correction_factors) # save distance counts if self.distance_counts is not None and len(self.distance_counts): atom = tables.Atom.from_dtype(self.distance_counts.dtype) ds = h5file.create_carray(h5file.root, 'distance_counts', atom, shape=self.distance_counts.shape, filters=filters) ds[:] = np.array(self.distance_counts)
def read_data(fname,args,p): data = dict() c51_data = h5.open_file(fname) # check how many Nbs are in file if args.Nbs == None: Nbs = c51_data.get_node('/gA/'+p['ensembles'][0]+'/bs').read().shape[0] else: Nbs = args.Nbs p['Nbs'] = Nbs print('using Nbs = %d samples' %Nbs) ga_bs = np.zeros([Nbs,p['l_d']]) ga_b0 = np.zeros([p['l_d']]) epi_bs = np.zeros_like(ga_bs) epi_b0 = np.zeros_like(ga_b0) mL_b0 = np.zeros([p['l_d']]) mL_bs = np.zeros([Nbs,p['l_d']]) aw0_b0 = np.zeros([p['l_d']]) aw0_bs = np.zeros([Nbs,p['l_d']]) aSaw0_b0 = np.zeros([p['l_d']]) aSaw0_bs = np.zeros([Nbs,p['l_d']]) eju_bs = np.zeros_like(ga_bs) eju_b0 = np.zeros_like(ga_b0) epqsq_bs = np.zeros_like(ga_bs) epqsq_b0 = np.zeros_like(ga_b0) for i,ens in enumerate(p['ensembles']): ga_bs[:,i] = c51_data.get_node('/gA/'+ens+'/bs').read()[0:Nbs] ga_b0[i] = float(c51_data.get_node('/gA/'+ens+'/b0').read()) epi_bs[:,i] = c51_data.get_node('/epi/'+ens+'/bs').read()[0:Nbs] epi_b0[i] = float(c51_data.get_node('/epi/'+ens+'/b0').read()) mL_bs[:,i] = c51_data.get_node('/mpiL/'+ens+'/bs').read()[0:Nbs] mL_b0[i] = float(c51_data.get_node('/mpiL/'+ens+'/b0').read()) aw0_bs[:,i] = c51_data.get_node('/aw0/'+ens+'/bs').read()[0:Nbs] aw0_b0[i] = float(c51_data.get_node('/aw0/'+ens+'/b0').read()) # we have to multiply a by sqrt(alpha_S) as a is squared in the extrapolation functions # to swap sqrt(alpha_S) a in for a aSaw0_bs[:,i] = aw0_bs[:,i] * np.sqrt(p['afs'][ens]) aSaw0_b0[i] = aw0_b0[i] * np.sqrt(p['afs'][ens]) eju_bs[:,i] = c51_data.get_node('/eju/'+ens+'/bs').read()[0:Nbs] eju_b0[i] = float(c51_data.get_node('/eju/'+ens+'/b0').read()) epqsq_bs[:,i] = c51_data.get_node('/epqsq/'+ens+'/bs').read()[0:Nbs] epqsq_b0[i] = float(c51_data.get_node('/epqsq/'+ens+'/b0').read()) print('%s gA = %.4f +- %.4f, epi = %.5f +- %.5f, mpiL = %.4f +- %.4f' \ %(ens,ga_b0[i],ga_bs.std(axis=0)[i],epi_b0[i],epi_bs.std(axis=0)[i], mL_b0[i],mL_bs.std(axis=0)[i])) data['ga_bs'] = ga_bs data['ga_b0'] = ga_b0 data['epi_bs'] = epi_bs data['epi_b0'] = epi_b0 data['mL_bs'] = mL_bs data['mL_b0'] = mL_b0 data['aw0_bs'] = aw0_bs data['aw0_b0'] = aw0_b0 data['eju_b0'] = eju_b0 data['eju_bs'] = eju_bs data['epqsq_b0'] = epqsq_b0 data['epqsq_bs'] = epqsq_bs data['aSaw0_bs'] = aSaw0_bs data['aSaw0_b0'] = aSaw0_b0 c51_data.close() return data
__author__ = 'zelalem' import fastcluster import numpy import scipy.spatial.distance as dist import csv import tables hdf5_path = "/home/zelalem/Downloads/popular_year.hdf5" a = tables.StringAtom(itemsize=24) hdf5_file = tables.open_file(hdf5_path, mode='w') data_storage = hdf5_file.create_earray(hdf5_file.root, 'e_array',a, (0,)) f = open('/home/zelalem/Desktop/PS_year_month.csv','rb') reader = list(csv.reader(f)) dataMatrix = [] for row in reader[1:]: dataMatrix.append(row[1:]) dataMatrix = numpy.array(dataMatrix) # distanceMatrix = dist.pdist(dataMatrix,'euclidean') # Metric: 'euclidean', 'seuclidean', 'cosine', 'hamming', 'correlation' data_storage.append(dist.pdist(dataMatrix,'euclidean')) # Metric: 'euclidean', 'seuclidean', 'cosine', 'hamming', 'correlation' linkage = fastcluster.linkage(data_storage, method='median') # Method 'single','complete','average','weighted','ward','centroid','median' num_of_cluster = 8 clust_dict = {i: [i] for i in xrange(len(linkage)+1)} for i in xrange(len(linkage)-num_of_cluster+1): clust1= int(linkage[i][0])
#print(y) #print('After processing, sample:', X[0]) # print('After processing, labels:', y_batch[0]) #print('y SHAPE AFTER', np.array(y_batch, dtype=object).shape) # ====================================================================================================================== # Write pre-processed TRAIN data to csv file # ====================================================================================================================== # Set the compression level filters = tables.Filters(complib='blosc', complevel=5) # Save X batches into file f = tables.open_file(x_filename + '.hdf', 'a') ds = f.create_carray( '/', 'x_data' + str(i), obj=X_batch, filters=filters, ) ds[:] = X_batch #print(ds) f.close() if not x_filename == 'data\\preprocessed_data\\x_TEST_SENTENC_data_preprocessed': # do NOT write for TEST DATA # Save y batches into file f = tables.open_file(y_filename + '.hdf', 'a') ds = f.create_carray('/', 'y_data' + str(i),
print('BAD FILE: ' + str(filelist[nums])) print("Done!") print(featuresarray.shape) return group, truth #pr = cProfile.Profile() #pr.enable() group, truth = test_my_little_function() #pr.disable() #s = StringIO.StringIO() #sortby = 'cumulative' #ps = pstats.Stats(pr,stream=s).sort_stats(sortby) #ps.print_stats() #print(s.getvalue()) print(group.shape) print(truth.shape) h5file = 1 if h5file == 1: import tables f = tables.open_file(outputname, 'w', filters=tables.Filters(complib='zlib', complevel=6)) f.create_carray('/', 'features', obj=group) f.create_carray('/', 'truth', obj=truth) f.close()
def sub_wrapper(day_datetimes, j): print day_datetimes[j] date = day_datetimes[j] try: date_1 = date + datetime.timedelta(minutes=13) global_data, time_slot = pinkdust.load_channels(date, date_1) data_087 = global_data[8.7].data data_108 = global_data[10.8].data data_120 = global_data[12.0].data data = np.zeros( (data_087.shape[0], data_087.shape[1], 3)) data[:, :, 0] = data_087 data[:, :, 1] = data_108 data[:, :, 2] = data_120 except: print 'Adding date to list of missing dates' with open('/soge-home/projects/seviri_dust' '/raw_seviri_data/bt_native/missing_msgnative_dates' '.txt', 'a') as my_file: my_file.write('\n') my_file.write(date.strftime('%Y%m%d%H%M%S')) data = np.zeros( (msg_area.shape[0], msg_area.shape[1], 3)) data[:] = np.nan if (data.shape[0] != msg_area.shape[0]) or ( data.shape[1] != msg_area.shape[1]): print 'Native file data has wrong dimensions - using the second half of the array' data_copy = deepcopy(data) data = np.zeros( (msg_area.shape[0], msg_area.shape[1], 3)) data[:] = data_copy[msg_area.shape[0]:, :] msg_con_nn = image.ImageContainerNearest(data, msg_area, radius_of_influence=50000) area_con_nn = msg_con_nn.resample(target_area) result_data_nn = area_con_nn.image_data bt_data = np.zeros((3, lons.shape[0], lons.shape[1])) bt_data[0] = result_data_nn[:, :, 0] bt_data[1] = result_data_nn[:, :, 1] bt_data[2] = result_data_nn[:, :, 2] cloudmask = None # Now, instead of writing to day array, you write to hdf f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_087_' + date.strftime( '%Y%m%d%H%M%S.hdf'), 'w') atom = tables.Atom.from_dtype(bt_data[0].dtype) filters = tables.Filters(complib='blosc', complevel=5) ds = f.create_carray(f.root, 'data', atom, bt_data[0].shape, filters=filters) ds[:] = bt_data[0] f.close() f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_108_' + date.strftime( '%Y%m%d%H%M%S.hdf'), 'w') atom = tables.Atom.from_dtype(bt_data[1].dtype) filters = tables.Filters(complib='blosc', complevel=5) ds = f.create_carray(f.root, 'data', atom, bt_data[0].shape, filters=filters) ds[:] = bt_data[1] f.close() f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_120_' + date.strftime( '%Y%m%d%H%M%S.hdf'), 'w') atom = tables.Atom.from_dtype(bt_data[2].dtype) filters = tables.Filters(complib='blosc', complevel=5) ds = f.create_carray(f.root, 'data', atom, bt_data[0].shape, filters=filters) ds[:] = bt_data[2] f.close() print 'Wrote', day_datetimes[j]
def __enter__(self): self.file = open_file(self.out_fp, 'w') self.group = self.file.create_group("/", 'experiments', '') self.table = self.file.create_table(self.group, self.table_name, self.row_class, "") return self
def __enter__(self): """ Enter context """ if self.hdf is None: self.hdf = tables.open_file(self.source) return self
def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None, merge_arrays=False, filters=HDF5_ZSTD_FILTERS): """ Automatic merge of HDF5 files. A list of nodes keys can be provided to merge only these nodes. If None, all nodes are merged. Parameters ---------- file_list: list of path output_filename: path nodes_keys: list of path """ if nodes_keys is None: keys = set(get_dataset_keys(file_list[0])) else: keys = set(nodes_keys) bar = tqdm(total=len(file_list)) with open_file(output_filename, 'w', filters=filters) as merge_file: with open_file(file_list[0]) as f1: for k in keys: if type(f1.root[k]) == tables.table.Table: merge_file.create_table(os.path.join( '/', k.rsplit('/', maxsplit=1)[0]), os.path.basename(k), createparents=True, obj=f1.root[k].read()) if type(f1.root[k]) == tables.array.Array: if merge_arrays: merge_file.create_earray(os.path.join( '/', k.rsplit('/', maxsplit=1)[0]), os.path.basename(k), createparents=True, obj=f1.root[k].read()) else: merge_file.create_array(os.path.join( '/', k.rsplit('/', maxsplit=1)[0]), os.path.basename(k), createparents=True, obj=f1.root[k].read()) bar.update(1) for filename in file_list[1:]: common_keys = keys.intersection(get_dataset_keys(filename)) with open_file(filename) as file: for k in common_keys: try: if merge_arrays: merge_file.root[k].append(file.root[k].read()) else: if type(file.root[k]) == tables.table.Table: merge_file.root[k].append(file.root[k].read()) except: print("Can't append node {} from file {}".format( k, filename)) bar.update(1)
def run_case(self, stop_at_ignition=False, restart=False): """Run simulation case set up ``setup_case``. :param bool stop_at_ignition: If ``True``, stop integration at ignition point. :param bool restart: If ``True``, skip if results file exists. """ if restart and os.path.isfile(self.meta['save-file']): print('Skipped existing case ', self.meta['id']) return # Save simulation results in hdf5 table format. table_def = { 'time': tables.Float64Col(pos=0), 'temperature': tables.Float64Col(pos=1), 'pressure': tables.Float64Col(pos=2), 'mass_fractions': tables.Float64Col(shape=(self.reac.thermo.n_species), pos=3), } with tables.open_file(self.save_file, mode='w', title=str(self.idx)) as h5file: table = h5file.create_table(where=h5file.root, name='simulation', description=table_def) # Row instance to save timestep information to timestep = table.row # Save initial conditions timestep['time'] = self.reac_net.time timestep['temperature'] = self.reac.T timestep['pressure'] = self.reac.thermo.P timestep['mass_fractions'] = self.reac.Y # Add ``timestep`` to table timestep.append() ignition_flag = False # Main time integration loop; continue integration while time of # the ``ReactorNet`` is less than specified end time. while self.reac_net.time < self.time_end: self.reac_net.step() # Save new timestep information timestep['time'] = self.reac_net.time timestep['temperature'] = self.reac.T timestep['pressure'] = self.reac.thermo.P timestep['mass_fractions'] = self.reac.Y if self.reac.T > self.properties[ 'temperature'] + 400.0 and not ignition_flag: self.ignition_delay = self.reac_net.time ignition_flag = True if stop_at_ignition: continue # Add ``timestep`` to table timestep.append() # Write ``table`` to disk table.flush() return self.ignition_delay
def wrapper(y): Year_lower = years_list[y] Year_upper = years_list[y] Month_lower = 9 Month_upper = 9 Day_lower = 1 Day_upper = 8 Hour_lower = 0 Hour_upper = 23 Minute_lower = 0 Minute_upper = 45 # Generate datetime objects corresponding to these time bounds time_params = np.array( [Year_lower, Year_upper, Month_lower, Month_upper, Day_lower, Day_upper, Hour_lower, Hour_upper, Minute_lower, Minute_upper]) datetimes = utilities.get_datetime_objects(time_params) years = np.unique(np.asarray(([j.year for j in datetimes]))) months = np.unique(np.asarray(([j.month for j in datetimes]))) days = np.unique(np.asarray(([j.day for j in datetimes]))) for m in np.arange(0, len(years)): for k in np.arange(0, len(months)): for i in np.arange(0, len(days)): day_datetimes_bool = np.asarray( [j.day == days[i] and j.month == months[k] and j.year == years[m] for j in datetimes]) day_datetimes = datetimes[day_datetimes_bool] if len(day_datetimes) == 0: continue ncfile = pinkdust.create_time_nc_file( '/soge-home/projects/seviri_dust/raw_seviri_data/bt_nc/' + day_datetimes[0].strftime( "%B%Y") + '/BT_' + day_datetimes[ 0].strftime( '%Y%m%d') + '.nc', day_datetimes, lats, lons) day_array[:] = 0 pool = multiprocessing.Pool() for j in np.arange(0, len(day_datetimes)): pool.apply_async(sub_wrapper, args=(day_datetimes, j,)) pool.close() pool.join() # Now read back in all the intermediaries and write to nc for j in np.arange(0, len(day_datetimes)): f = tables.open_file( '/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_087_' + day_datetimes[ j].strftime( '%Y%m%d%H%M%S.hdf')) arrobj = f.get_node('/data') bt_087 = arrobj.read() f.close() f = tables.open_file( '/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_108_' + day_datetimes[ j].strftime( '%Y%m%d%H%M%S.hdf')) arrobj = f.get_node('/data') bt_108 = arrobj.read() f.close() f = tables.open_file( '/soge-home/projects/seviri_dust/raw_seviri_data' '/intermediary_files/BT_120_' + day_datetimes[ j].strftime( '%Y%m%d%H%M%S.hdf')) arrobj = f.get_node('/data') bt_120 = arrobj.read() f.close() ncfile = pinkdust.save_to_existing_nc(ncfile, bt_087, bt_108, bt_120, None, day_datetimes[j]) print 'Day ' + str(days[i]) + ' done' ncfile.close() print 'Month ' + str(months[k]) + ' done'
# FILE INFO ETC. s = sys.stdin.read() if (s.split(" ") == ""): raise ValueError("Please provide at least one directory.") else: DIR_TOREAD = s.split(" ") if CF.VERBOSE: print("Directories to be read:") print(DIR_TOREAD) ##### Initialize the file. ##### # Open file connection, writing new file to disk. myh5 = tables.open_file(CF.FILE_NAME, mode="w", title=CF.FILE_TITLE) # Create the EArray for actual values. a = tables.UInt8Atom() myh5.create_earray(myh5.root, name="riskLevels", atom=a, shape=(0, CF.NUMLAT, CF.NUMLON), title=CF.DATA_TITLE) # Create the EArray for grid information, and populate it. a = tables.Float64Atom() myh5.create_earray(myh5.root, name="gridInfo", atom=a, shape=(0, len(CF.INFO_LIST)),
elif args.flux == "GaisserHillas" : flux = GaisserHillas() elif args.flux == "Hoerandel" : flux = Hoerandel() elif args.flux == "Hoerandel5" : flux = Hoerandel5() elif args.flux == "Hoerandel_IT" : flux = Hoerandel_IT() eventcount = 0 #generator = weighting.from_simprod(21269,False,'vm-simprod2.icecube.wisc.edu') #generator = weighting.icetop_mc_weights(21269,'/home/tmcelroy/icecube/domeff/datasetConfig.json') nfiles = len(file_list) rand = ROOT.TRandom3() for filename in file_list : h5file = 0 try : h5file = open_file(filename, mode="r") except : continue domtable = h5file.root.doms eventtable = h5file.root.events runtable = h5file.root.runinfo domindex = 0 eventindex = -1 for event in eventtable.iterrows() : eventcount += 1 weight = 1.0 if args.flux != "data" :
import tables import pandas as pd import numpy as np h5_file = tables.open_file("../../../../../restricted_images_national.h5") table_train = h5_file.get_node("/{}".format("train")) #len=58263 table_validate = h5_file.get_node("/{}".format("validate")) #len=7744 table_test = h5_file.get_node("/{}".format("test")) #len= 13509 #table_train.colnames #Table[0][0:11] ['img0','img1','img_id','inc0','inc1','lat','lng','pop0','pop1','pop_centile','sol0','sol1'] #print(len(table_train[0]), len(table_train[0][0]), len(table_train[0][0][0]), len(table_train[0][0][0][0])) #(12, 94, 94, 7) #table.colnames #Table[0][0:11] lats = [] #latitudes of points lngs = [] #longitudes of points pop0 = [] #Population of 2000 of points pop1 = [] #Population of 2010 of points popdiff = [] #Pop1 - Pop0 inc0 = [] #Income of 2000 inc1 = [] #Income of 2010 of points incdiff = [] #inc1 - inc0 of points for table in [table_train, table_validate, table_test]: for row in table: #row[3]=inc0, row[4]=inc1, row[5]=lat, row[6]=lng, row[7]=pop0, row[8]=pop1 inc0.append(row[3]) inc1.append(row[4]) incdiff.append(row[4] - row[3]) lats.append(row[5]) lngs.append(row[6]) pop0.append(row[7]) pop1.append(row[8])
def main(): #reads from hdf5 file (must already exist) h5file = tables.open_file("t", mode="a", title="Mag Data for AUTUMNX") stationlist = [ 'SALU', 'AKUL', 'PUVR', 'INUK', 'KJPK', 'RADI', 'VLDR', 'STFL', 'SEPT', 'SCHF' ] table = {} #get yesterday's date date = datetime.datetime.utcnow() - datetime.timedelta(days=1) strd = date.strftime('%Y_%m_%d') year, month, day = re.split('_', strd) #main loop for station in stationlist: #should probably clean this up, this is pretty bad right now if station == 'SALU': table[station] = h5file.root.magnetometer.SALU elif station == 'AKUL': table[station] = h5file.root.magnetometer.AKUL elif station == 'PUVR': table[station] = h5file.root.magnetometer.PUVR elif station == 'INUK': table[station] = h5file.root.magnetometer.INUK elif station == 'KJPK': table[station] = h5file.root.magnetometer.KJPK elif station == 'RADI': table[station] = h5file.root.magnetometer.RADI elif station == 'VLDR': table[station] = h5file.root.magnetometer.VLDR elif station == 'STFL': table[station] = h5file.root.magnetometer.STFL elif station == 'SEPT': table[station] = h5file.root.magnetometer.SEPT elif station == 'SCHF': table[station] = h5file.root.magnetometer.SCHF print station #read data from http url = 'http://autumn.athabascau.ca/magdata/L1/{0}/fluxgate/{1}/{2}/{3}/AUTUMNX_{0}_TGBO_{4}_PT0,5S.txt'.format( station, year, month, day, strd) try: response = urllib2.urlopen(url) except: continue cd = re.split('\n', response.read()) del cd[0:13] data = [] #parse datetime object into seconds from epoch for line in cd: ld = re.split('\s+', line) try: datet = ld[0] + ' ' + ld[1] + '000' datet = ( datetime.datetime.strptime(datet, '%Y-%m-%d %H:%M:%S.%f') - datetime.datetime.utcfromtimestamp(0)).total_seconds() ld = [datet, float(ld[3]), float(ld[4]), float(ld[5])] data.append(ld) except: continue #append data to table in hdf5 file mag = table[station].row then = datetime.datetime.now() print "Starting Data append..." for line in data: mag['time'] = line[0] mag['Bx'] = line[1] mag['By'] = line[2] mag['Bz'] = line[3] mag.append() #flush table buffer table[station].flush() print "{0} s to complete".format(datetime.datetime.now() - then) #close file h5file.close()
"length", "x", "y", "z", "r", "energy", "xmin", "ymin", "zmin", "rmin", "xmax", "ymax", "zmax", "rmax", "xb1", "yb1", "zb1", "eb1", "xb2", "yb2", "zb2", "eb2", "ovlp_e" ] data = namedtuple("data", columns) # auxiliar namedtuple paolina_algorithm = track_blob_info_creator_extractor(**paolina_params) out_df = pd.DataFrame(columns=columns) try: DECO = pd.read_hdf(in_filename, "DECO/Events") except KeyError: # save empty file with tb.open_file(out_filename, "w") as h5out: df_writer(h5out, out_df, group_name="tracks", table_name="events") index_tables(out_filename) sys.exit() for (event, peak), deco in DECO.groupby(["event", "npeak"]): # pre-proccess deco.loc[:, "time"] = 0 deco.loc[:, "Ec"] = deco["E"] deco.loc[:, "Ep"] = deco["E"] deco.loc[:, ("Q", "Xrms", "Yrms", "nsipm")] = np.nan # Paolina hitc = hits_from_df(deco)[event] df, voxels, track_hitc, out_of_map = paolina_algorithm(hitc)
print(args) local_data_path = r'/path/to/datasets' if args.dataset[:3] == 'shd': dataset = local_data_path + r'/shd/' + args.dataset elif args.dataset[:5] == 'mnist': dataset = local_data_path + r'/mnist-dvs/' + args.dataset elif args.dataset[:11] == 'dvs_gesture': dataset = local_data_path + r'/DvsGesture/' + args.dataset elif args.dataset[:7] == 'swedish': dataset = local_data_path + r'/SwedishLeaf_processed/' + args.dataset else: print('Error: dataset not found') args.dataset = tables.open_file(dataset) args.disable_cuda = str2bool(args.disable_cuda) args.device = None if not args.disable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda') else: args.device = torch.device('cpu') ### Network parameters args.n_input_neurons = dataset.root.stats.train_data[1] args.n_output_neurons = dataset.root.stats.train_label[1] args.n_hidden_neurons = args.n_h ### Learning parameters if not args.num_samples_train: