def writeBarcodeH5(labeledZmws, labeler, outFile, writeExtendedInfo=False): """Write a barcode file from a list of labeled ZMWs. In addition to labeledZmws, this function takes a pbbarcode.BarcodeLabeler.""" bestScores = [z.toBestRecord() for z in labeledZmws] outDta = n.vstack(bestScores) outH5 = h5py.File(outFile, 'a') if BC_DS_PATH in outH5: del outH5[BC_DS_PATH] bestDS = outH5.create_dataset(BC_DS_PATH, data=outDta, dtype="int32") bestDS.attrs['movieName'] = labeler.movieName bestDS.attrs['barcodes'] = n.array(labeler.barcodeLabels, dtype=h5py.new_vlen(str)) bestDS.attrs['columnNames'] = n.array([ 'holeNumber', 'nAdapters', 'barcodeIdx1', 'barcodeScore1', 'barcodeIdx2', 'barcodeScore2' ], dtype=h5py.new_vlen(str)) bestDS.attrs['scoreMode'] = labeler.scoreMode if writeExtendedInfo: # here we use the 'names' because each barcode is scored # individually. nBarcodes = len(labeler.barcodeNames) def makeArray(l, v): a = n.zeros(l, dtype=type(v)) a.fill(v) return a def makeRecord(lZmw): zmws = makeArray(nBarcodes * lZmw.nScored, lZmw.holeNumber) adapters = n.concatenate([makeArray(nBarcodes, i) for i in \ range(1, lZmw.nScored + 1)]) idxs = n.concatenate([list(range(0, nBarcodes)) for i in \ range(0, lZmw.nScored)]) scores = n.concatenate(lZmw.allScores) return n.transpose(n.vstack((zmws, adapters, idxs, scores))) records = [makeRecord(lZmw) for lZmw in labeledZmws if lZmw.allScores] records = n.vstack(records) if BC_DS_ALL_PATH in outH5: del outH5[BC_DS_ALL_PATH] allDS = outH5.create_dataset(BC_DS_ALL_PATH, data=records, dtype='int32') allDS.attrs['movieName'] = labeler.movieName # note names versus labels. allDS.attrs['barcodes'] = n.array(labeler.barcodeNames, dtype=h5py.new_vlen(str)) allDS.attrs['columnNames'] = n.array( ['holeNumber', 'adapter', 'barcodeIdx', 'score'], dtype=h5py.new_vlen(str)) # close the file at the very end. outH5.close()
def create_neurohdf_file(filename, data): with closing(h5py.File(filename, 'w')) as hfile: hfile.attrs['neurohdf_version'] = '0.1' mcgroup = hfile.create_group("Microcircuit") mcgroup.attrs['node_type'] = 'irregular_dataset' vert = mcgroup.create_group("vertices") conn = mcgroup.create_group("connectivity") vert.create_dataset("id", data=data['vert']['id']) vert.create_dataset("location", data=data['vert']['location']) verttype=vert.create_dataset("type", data=data['vert']['type']) # create rec array with two columns, value and name my_dtype = np.dtype([('value', 'l'), ('name', h5py.new_vlen(str))]) helpdict={VerticesTypeSkeletonRootNode['id']: VerticesTypeSkeletonRootNode['name'], VerticesTypeSkeletonNode['id']: VerticesTypeSkeletonNode['name'], VerticesTypeConnectorNode['id']: VerticesTypeConnectorNode['name'] } arr=np.recarray( len(helpdict), dtype=my_dtype ) for i,kv in enumerate(helpdict.items()): arr[i][0] = kv[0] arr[i][1] = kv[1] verttype.attrs['value_name']=arr vert.create_dataset("confidence", data=data['vert']['confidence']) vert.create_dataset("userid", data=data['vert']['userid']) vert.create_dataset("radius", data=data['vert']['radius']) vert.create_dataset("skeletonid", data=data['vert']['skeletonid']) vert.create_dataset("creation_time", data=data['vert']['creation_time']) vert.create_dataset("modification_time", data=data['vert']['modification_time']) conn.create_dataset("id", data=data['conn']['id']) if data['conn'].has_key('type'): conntype=conn.create_dataset("type", data=data['conn']['type']) helpdict={ConnectivityNeurite['id']: ConnectivityNeurite['name'], ConnectivityPresynaptic['id']: ConnectivityPresynaptic['name'], ConnectivityPostsynaptic['id']: ConnectivityPostsynaptic['name'] } arr=np.recarray( len(helpdict), dtype=my_dtype ) for i,kv in enumerate(helpdict.items()): arr[i][0] = kv[0] arr[i][1] = kv[1] conntype.attrs['value_name']=arr if data['conn'].has_key('skeletonid'): conn.create_dataset("skeletonid", data=data['conn']['skeletonid']) if data.has_key('meta'): metadata=mcgroup.create_group('metadata') # create recarray with two columns, skeletonid and string my_dtype = np.dtype([('skeletonid', 'l'), ('name', h5py.new_vlen(str))]) arr=np.recarray( len(data['meta']), dtype=my_dtype ) for i,kv in enumerate(data['meta'].items()): arr[i][0] = kv[0] arr[i][1] = kv[1] metadata.create_dataset('skeleton_name', data=arr )
def writeBarcodeH5(labeledZmws, labeler, outFile, writeExtendedInfo = False): """Write a barcode file from a list of labeled ZMWs. In addition to labeledZmws, this function takes a pbbarcode.BarcodeLabeler.""" bestScores = map(lambda z: z.toBestRecord(), labeledZmws) outDta = n.vstack(bestScores) outH5 = h5.File(outFile, 'a') if BC_DS_PATH in outH5: del outH5[BC_DS_PATH] bestDS = outH5.create_dataset(BC_DS_PATH, data = outDta, dtype = "int32") bestDS.attrs['movieName'] = labeler.movieName bestDS.attrs['barcodes'] = n.array(labeler.barcodeLabels, dtype = h5.new_vlen(str)) bestDS.attrs['columnNames'] = n.array(['holeNumber', 'nAdapters', 'barcodeIdx1', 'barcodeScore1', 'barcodeIdx2', 'barcodeScore2'], dtype = h5.new_vlen(str)) bestDS.attrs['scoreMode'] = labeler.scoreMode if writeExtendedInfo: # here we use the 'names' because each barcode is scored # individually. nBarcodes = len(labeler.barcodeNames) def makeArray(l, v): a = n.zeros(l, dtype = type(v)) a.fill(v) return a def makeRecord(lZmw): zmws = makeArray(nBarcodes * lZmw.nScored, lZmw.holeNumber) adapters = n.concatenate([makeArray(nBarcodes, i) for i in \ xrange(1, lZmw.nScored + 1)]) idxs = n.concatenate([range(0, nBarcodes) for i in \ xrange(0, lZmw.nScored)]) scores = n.concatenate(lZmw.allScores) return n.transpose(n.vstack((zmws, adapters, idxs, scores))) records = [makeRecord(lZmw) for lZmw in labeledZmws if lZmw.allScores] records = n.vstack(records) if BC_DS_ALL_PATH in outH5: del outH5[BC_DS_ALL_PATH] allDS = outH5.create_dataset(BC_DS_ALL_PATH, data = records, dtype = 'int32') allDS.attrs['movieName'] = labeler.movieName # note names versus labels. allDS.attrs['barcodes'] = n.array(labeler.barcodeNames, dtype = h5.new_vlen(str)) allDS.attrs['columnNames'] = n.array(['holeNumber', 'adapter', 'barcodeIdx', 'score'], dtype = h5.new_vlen(str)) # close the file at the very end. outH5.close()
def add_mesh_from_string(self, name, shape_data, scale=None, insideMargin=None, outsideMargin=None): """ Add a mesh shape from a string. Accepted format : mesh encoded in VTK .vtp format """ if name not in self._ref: shape = self._ref.create_dataset(name, (1, ), dtype=h5py.new_vlen(str)) shape[:] = shape_data shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'vtp' if scale is not None: shape.attrs['scale'] = scale if insideMargin is not None: shape.attrs['insideMargin'] = insideMargin if outsideMargin is not None: shape.attrs['outsideMargin'] = outsideMargin self._number_of_shapes += 1
def add_occ_shape(self, name, occ_shape): """ Add an OpenCascade TopoDS_Shape. """ if name not in self._ref: from OCC.STEPControl import STEPControl_Writer, STEPControl_AsIs # step format is used for the storage. step_writer = STEPControl_Writer() step_writer.Transfer(occ_shape, STEPControl_AsIs) shape_data = None with tmpfile() as tmpf: step_writer.Write(tmpf[1]) tmpf[0].flush() shape_data = str_of_file(tmpf[1]) shape = self._ref.create_dataset(name, (1, ), dtype=h5py.new_vlen(str)) shape[:] = shape_data shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'step' self._number_of_shapes += 1
def add_interaction(self, name, body1_name, contactor1_name=None, body2_name=None, contactor2_name=None, distance_calculator='cadmbtb', offset1=0.0, offset2=0.0): """ Add permanent interactions between two objects contactors. """ if name not in self.permanent_interactions(): pinter = self.permanent_interactions().create_dataset( name, (1, ), dtype=h5py.new_vlen(str)) pinter.attrs['id'] = self._number_of_permanent_interactions pinter.attrs['type'] = 'permanent_interaction' pinter.attrs['body1_name'] = body1_name pinter.attrs['body2_name'] = body2_name if contactor1_name is not None: pinter.attrs['contactor1_name'] = contactor1_name if contactor2_name is not None: pinter.attrs['contactor2_name'] = contactor2_name pinter.attrs['distance_calculator'] = distance_calculator pinter.attrs['offset1'] = offset1 pinter.attrs['offset2'] = offset2 self._number_of_permanent_interactions += 1
def _save_hdf5(self, filename, group = "OrbitResponseMatrix"): """ save data in hdf5 format in HDF5 group (h5py.Group object). Note ----- h5py before v2.0 does not accept unicode directly. """ h5zip = None # 'gzip' works in default install f = h5py.File(filename, 'w') grp = f.create_group(group) str_type = h5py.new_vlen(str) m, n = np.shape(self.m) dst = grp.create_dataset('m', (m,n), data=self.m, compression=h5zip) # name, spos, plane = zip(*self.bpm) name = [v.encode('ascii') for v in name] dst.attrs["bpm_name"] = name dst.attrs["bpm_field"] = plane name, spos, plane = zip(*self.trim) dst.attrs["cor_name"] = name dst.attrs["cor_field"] = plane if self.bpm_pv: dst.attrs["bpm_pv"] = self.bpm_pv if self.cor_pv: dst.attrs["cor_pv"] = self.cor_pv if self.cor_pvrb: dst.attrs["cor_pvrb"] = self.cor_pvrb f.close()
def addOccShape(self, name, occ_shape): """ Add an OpenCascade TopoDS_Shape """ if name not in self._ref: from OCC.STEPControl import STEPControl_Writer, STEPControl_AsIs # step format is used for the storage. step_writer = STEPControl_Writer() step_writer.Transfer(occ_shape, STEPControl_AsIs) shape_data = None with tmpfile() as tmpf: status = step_writer.Write(tmpf[1]) tmpf[0].flush() shape_data = str_of_file(tmpf[1]) shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) shape[:] = shape_data shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'step' self._shapeid[name] = shape.attrs['id'] self._number_of_shapes += 1
def filter_vocab(sample_hdf_fname, tdict_pkl_fname, filtered_hdf_fname): log.info("opening original samples file " + sample_hdf_fname) sample_hdfile = h5py.File(sample_hdf_fname, "r") columns_selector, filtered_vocab = make_new_vocab(sample_hdfile, tdict_pkl_fname) log.info("creating filtered samples file " + filtered_hdf_fname) filtered_hdfile = h5py.File(filtered_hdf_fname, "w") log.info("storing filtered vocabulary ({0} terms)".format(len(filtered_vocab))) # create new type for variable-length strings # see http://code.google.com/p/h5py/wiki/HowTo#Variable-length_strings str_type = h5py.new_vlen(str) # hdf5 can't handle unicode strings, so encode terms as utf-8 byte strings filtered_hdfile.create_dataset("vocab", data=[t.encode("utf-8") for t in filtered_vocab], dtype=str_type) make_new_samples(sample_hdfile, filtered_hdfile, columns_selector) log.info("closing " + sample_hdf_fname) sample_hdfile.close() log.info("closing " + filtered_hdf_fname) filtered_hdfile.close()
def write_probe_map(h5, probe_map): if not "probe_map" in h5: h5.create_group("probe_map") probe_type = numpy.dtype( [ ('hugo', 'S'), ('chrome', 'S'), ('start', 'i'), ('stop', 'i'), ('strand', 'S'), ]) h_probe_type = h5py.new_vlen(probe_type) print probe_type probes = probe_map.geneMap.keys() probes.sort() pm_count = len(probes) ds = h5.create_dataset( "/probe_map/%s" % (probe_map['name']), [pm_count], dtype=h_probe_type) i = 0 val = numpy.zeros(1, dtype=probe_type) for probe in probes: ds[i] = i i += 1
def filter_sample_vocab(lang_pair): """ Filter vocabulary words which do not occur in the translation lexicon. This reduces the size of the vocabulary and adjusts the context samples accordingly. Assumes that vocab dos NOT contain: - POS tags (i.e. lempos combination) - multi-word units (MWUs) """ sample_hdf_fname = config["sample"][lang_pair]["samples_fname"] log.info("opening original samples file " + sample_hdf_fname) sample_hdfile = h5py.File(sample_hdf_fname, "r") filtered_hdf_fname = config["sample"][lang_pair]["samples_filt_fname"] log.info("creating filtered samples file " + filtered_hdf_fname) filtered_hdfile = h5py.File(filtered_hdf_fname, "w") tdict_pkl_fname = config["dict"][lang_pair]["pkl_fname"] columns_selector, filtered_vocab = make_new_vocab(sample_hdfile, tdict_pkl_fname) log.info("storing filtered vocabulary") # create new type for variable-length strings # see http://code.google.com/p/h5py/wiki/HowTo#Variable-length_strings str_type = h5py.new_vlen(str) # hdf5 can't handle unicode strings, so encode terms as utf-8 byte strings filtered_hdfile.create_dataset("vocab", data=[t.encode("utf-8") for t in filtered_vocab], dtype=str_type) make_new_samples(sample_hdfile, filtered_hdfile, columns_selector) log.info("closing " + sample_hdf_fname) sample_hdfile.close() log.info("closing " + filtered_hdf_fname) filtered_hdfile.close()
def _createDatasetInFile(self, hdf5File, datasetName, roi): shape = tuple(roi[1] - roi[0]) chunks = self._description.chunks if chunks is not None: # chunks must not be bigger than the data in any dim chunks = numpy.minimum(chunks, shape) chunks = tuple(chunks) compression = self._description.compression compression_opts = self._description.compression_opts dtype = self._description.dtype if dtype == object: dtype = h5py.new_vlen(str) dataset = hdf5File.create_dataset(datasetName, shape=shape, dtype=dtype, chunks=chunks, compression=compression, compression_opts=compression_opts) # Set data attributes if self._description.drange is not None: dataset.attrs['drange'] = self._description.drange if _use_vigra: dataset.attrs['axistags'] = vigra.defaultAxistags( self._description.axes).toJSON()
def _createDatasetInFile(self, hdf5File, datasetName, roi): shape = tuple( roi[1] - roi[0] ) chunks = self._description.chunks if chunks is not None: # chunks must not be bigger than the data in any dim chunks = numpy.minimum( chunks, shape ) chunks = tuple(chunks) compression = self._description.compression compression_opts = self._description.compression_opts dtype=self._description.dtype if dtype == object: dtype = h5py.new_vlen(str) dataset = hdf5File.create_dataset( datasetName, shape=shape, dtype=dtype, chunks=chunks, compression=compression, compression_opts=compression_opts ) # Set data attributes if self._description.drange is not None: dataset.attrs['drange'] = self._description.drange if _use_vigra: dataset.attrs['axistags'] = vigra.defaultAxistags( self._description.axes ).toJSON()
def _createEmptyDataset_(self, parent, dataset_name, shape, dtype, **kwargs): """ Creates an empty dataset in the data file and returns a pointer to it. Raises IOError exception if the dataset already exists. """ dataset_key = self.hdfObjectKey(dataset_name) if dataset_key in parent.keys(): errmsg = "'%s' dataset already exists in current data file." raise IOError, errmsg % dataset_name create_args = {} attributes = {} for name in kwargs: if name in DATASET_CREATE_ARGS: create_args[safestring(name)] = safevalue(kwargs[name]) else: attributes[safestring(name)] = safevalue(kwargs[name]) if 'created' not in attributes: attributes['created'] = self._timestamp_() if dtype == N.dtype(object): create_args['dtype'] = h5py.new_vlen(str) else: create_args['dtype'] = dtype dataset = parent.create_dataset(dataset_key, shape, **create_args) for attr_name, attr_value in attributes.items(): dataset.attrs[attr_name] = attr_value return dataset
def _save_hdf5(self, filename, group="OrbitResponseMatrix"): """ save data in hdf5 format in HDF5 group (h5py.Group object). Note ----- h5py before v2.0 does not accept unicode directly. """ h5zip = None # 'gzip' works in default install f = h5py.File(filename, 'w') grp = f.create_group(group) str_type = h5py.new_vlen(str) m, n = np.shape(self.m) dst = grp.create_dataset('m', (m, n), data=self.m, compression=h5zip) # name, plane = zip(*self.bpm) name = [v.encode('ascii') for v in name] dst.attrs["bpm_name"] = name dst.attrs["bpm_field"] = plane name, plane = zip(*self.cor) dst.attrs["cor_name"] = name dst.attrs["cor_field"] = plane if self.bpm_pv: dst.attrs["bpm_pv"] = self.bpm_pv if self.cor_pv: dst.attrs["cor_pv"] = self.cor_pv if self.cor_pvrb: dst.attrs["cor_pvrb"] = self.cor_pvrb f.close()
def _recursive_write(self, f, struct): for thing in dir(struct): #Skip everything starting with '_' if thing.startswith('_'): continue try: #Get the attribute value = getattr(struct, thing) except (AttributeError,ValueError) as E: print(thing, E) #If it can't get the attribute, just go to the next thing continue #If it is an integer, floating point value, or numpy array if isinstance(value,(int, float)): #Save it as a value, go to next thing f.create_dataset(thing, data = value) continue elif isinstance(value, np.ndarray): if not value.shape: # value.shape is an empty tuple # It's a one-element numpy array f.create_dataset(thing, data = value) else: #Save it with compression, go to next thing f.create_dataset(thing, data = value, compression = 'gzip') continue elif isinstance(value, basestring): str_type = h5py.new_vlen(str) f.create_dataset(thing, dtype=str_type, data = value) continue import inspect #Skip methods, functions, built-in functions, routines, and modules if (inspect.ismethod(value) or inspect.isfunction(value) or inspect.isbuiltin(value) or inspect.isroutine(value) or inspect.ismodule(value)): continue if type(value) is types.DictType: dict_group = f.create_group(thing) # Recurse into the entries in the dictionary by turning the # dictionary into a class self._recursive_write(dict_group, StubClass(value)) elif isinstance(value, (list,tuple)): dict_group = f.create_group(thing) #Convert to numpy array #List/Tuple to a class cls = StubClass({str(i):v for i,v in enumerate(value)}) #Write class recursively self._recursive_write(dict_group, cls) else: f.create_group(thing) #Recurse into the class self._recursive_write(f[thing], value)
def generate_random_hdf5(fname, templatefn): """ sample HDF5 file generation Generates sample HDF5 files with random data and metadata Parameters ---------- fname: str Name of the HDF5 to be generated templatefn : str Name of the configuration file describing HDF5 structure Examples -------- >>> fname = generate_random_hdf5('fname.h5', 'template.cfg') """ # read configuration file config = ConfigParser.SafeConfigParser() config.read(templatefn) ini = config._sections # open hdf5 file f = h5py.File(fname, 'w') # fill data and metadata meta = config.items("h5meta") strdt = h5py.new_vlen(str) for val in meta: # If it is data, generate a random dataset if val[0][-5:] == "/data": shape_tmp = map(int, val[1].strip("(").strip(")").split(",")) shape = [i for i in shape_tmp] data = np.random.rand(*shape) group = f.require_group(val[0].rsplit('/', 1)[0]) dset = group.require_dataset(val[0].rsplit('/', 1)[1], shape, dtype=float) dset[:] = data continue if len(val[0].split('/')) > 1: groupname, dsetname = val[0].rsplit('/', 1) group = f.require_group(groupname) dset = group.require_dataset(dsetname, (1, ), strdt) else: dset = f.require_dataset(val[0], (1, ), strdt) rndval = val[1].split(',')[random.randint(0, len(val[1].split(',')) - 1)].strip() print dset, rndval dset[...] = rndval ini['h5meta'][val[0]] = rndval f.close() # what could I return? print "Done" return 0
def save_xml_str_to_hdf5_dataset(file_path, xml='', dataset_name='something.xml'): # Write the xml file... with h5py.File(file_path, 'a') as f: str_type = h5py.new_vlen(str) ds = f.create_dataset(dataset_name, shape=(2, ), dtype=str_type) ds[:] = xml
def _recursive_write(self, f, struct): for thing in dir(struct): #Skip everything starting with '_' if thing.startswith('_'): continue try: #Get the attribute value = getattr(struct, thing) except AttributeError: #If it can't get the attribute, just go to the next thing continue #If it is an integer, floating point value, or numpy array if isinstance(value,(int, float)): #Save it as a value, go to next thing f.create_dataset(thing, data = value) continue elif isinstance(value, np.ndarray): if not value.shape: # value.shape is an empty tuple # It's a one-element numpy array f.create_dataset(thing, data = value) else: #Save it with compression, go to next thing f.create_dataset(thing, data = value, compression = 'gzip') continue elif isinstance(value, basestring): str_type = h5py.new_vlen(str) f.create_dataset(thing, dtype=str_type, data = value) continue import inspect #Skip methods, functions, built-in functions and routines if (inspect.ismethod(value) or inspect.isfunction(value) or inspect.isbuiltin(value) or inspect.isroutine(value)): continue if type(value) is types.DictType: dict_group = f.create_group(thing) # Recurse into the entries in the dictionary by turning the # dictionary into a class self._recursive_write(dict_group, StubClass(value)) elif isinstance(value, (list,tuple)): dict_group = f.create_group(thing) #Convert to numpy array #List/Tuple to a class cls = StubClass({str(i):v for i,v in enumerate(value)}) #Write class recursively self._recursive_write(dict_group, cls) else: f.create_group(thing) #Recurse into the class self._recursive_write(f[thing], value)
def link_file(self, label, array_idx, filename): dtype = h5py.new_vlen(type(filename)) dataset = self._get_dataset(label, dtype=dtype) # And update the HDF5 dataset with the new data try: self._set_data_point(dataset, array_idx, filename) except: logger.error(u'Error updating dataset', exc_info=True)
def add_plugin_source(self, name, filename): """ Add C source plugin """ if name not in self._plugins: plugin_src = self._plugins.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) plugin_src[:] = str_of_file(filename) plugin_src.attrs['filename'] = filename
def add_plugin_source(self, name, filename): """ Add C source plugin """ if name not in self._plugins: plugin_src = self._plugins.create_dataset(name, (1, ), dtype=h5py.new_vlen(str)) plugin_src[:] = str_of_file(filename) plugin_src.attrs['filename'] = filename
def labelAlignments(): logging.info("Labeling alignments using: %s" % runner.args.inputFofn) bcFofn = BarcodeH5Fofn(runner.args.inputFofn) with CmpH5Reader(runner.args.cmpH5) as cmpH5: bcDS = n.zeros((len(cmpH5), 5), dtype="int32") for (i, aln) in enumerate(cmpH5): bcReader = bcFofn.readerForMovie(aln.movieInfo.Name) try: lZmw = bcReader.labeledZmwFromHoleNumber(aln.HoleNumber) if lZmw.nScored < runner.args.minNumBarcodes or \ lZmw.averageScore < runner.args.minAvgBarcodeScore or \ lZmw.scoreRatio < runner.args.minScoreRatio: lZmw = None except KeyError: lZmw = None if lZmw: bcDS[i, :] = n.array([ lZmw.nScored, lZmw.bestIdx, lZmw.bestScore, lZmw.secondBestIdx, lZmw.secondBestScore ]) else: # either no barcode was found for this guy or they got # filtered, hence the NULL_BARCODE bcDS[i, :] = n.array([ 0, len(bcReader.barcodeLabels), 0, len(bcReader.barcodeLabels), 0 ]) # write to the cmp.h5 file. H5 = h5.File(runner.args.cmpH5, 'r+') if BC_INFO_ID in H5: del H5[BC_INFO_ID] if BC_INFO_NAME in H5: del H5[BC_INFO_NAME] # we use the first one to get the labels, if somehow they # don't have all of the same stuff that will be an issue. bcLabels = n.concatenate( (bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER]))) H5.create_dataset(BC_INFO_ID, data=n.array(range(0, len(bcLabels))), dtype='int32') H5.create_dataset(BC_INFO_NAME, data=bcLabels, dtype=h5.new_vlen(str)) if BC_ALN_INFO_DS in H5: del H5[BC_ALN_INFO_DS] bcDS = H5.create_dataset(BC_ALN_INFO_DS, data=bcDS, dtype='int32') bcDS.attrs['ColumnNames'] = n.array( ['count', 'index1', 'score1', 'index2', 'score2']) #force BarcodeMode to have numpy dtype for CmpH5Sort 'extra datasets' routine bcDS.attrs['BarcodeMode'] = n.array(bcFofn.scoreMode) H5.close()
def _write_without_iterate(self, D, group_prefix="/"): for k in D.keys(): if isinstance(D[k],dict): group_prefix_new = group_prefix + k + "/" log_debug(logger, "Writing group %s" % group_prefix_new) if k not in self._f[group_prefix]: self._f.create_group(group_prefix_new) self._write_without_iterate(D[k], group_prefix_new) else: name = group_prefix + k log_debug(logger, "Writing dataset %s" % name) data = D[k] if k not in self._f[group_prefix]: if numpy.isscalar(data): maxshape = (None,) shape = (self._chunksize,) if (isinstance(data, str)): dtype = numpy.dtype(type(data.encode('utf8'))) else: dtype = numpy.dtype(type(data)) if dtype == "S": dtype = h5py.new_vlen(str) axes = "experiment_identifier:value" else: data = numpy.asarray(data) try: h5py.h5t.py_create(data.dtype, logical=1) except TypeError: log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name) continue maxshape = tuple([None]+list(data.shape)) shape = tuple([self._chunksize]+list(data.shape)) dtype = data.dtype ndim = data.ndim axes = "experiment_identifier" if ndim == 1: axes = axes + ":x" elif ndim == 2: axes = axes + ":y:x" elif ndim == 3: axes = axes + ":z:y:x" log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype))) self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs) self._f[name].attrs.modify("axes",[axes.encode('utf8')]) if self._f[name].shape[0] <= self._i: if numpy.isscalar(data): data_shape = [] else: data_shape = data.shape new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape)) log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape))) self._f[name].resize(new_shape) log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i)) if numpy.isscalar(data): self._f[name][self._i] = data else: self._f[name][self._i,:] = data[:]
def note(self, note): """Add a timestamped note to HDF file, in a dataset called 'notes'""" ts = datetime.datetime.now() try: ds = self['notes'] except: ds = self.create_dataset('notes', (0,), maxshape=(None,), dtype=h5py.new_vlen(str)) shape = list(ds.shape) shape[0] = shape[0] + 1 ds.resize(shape) ds[-1] = str(ts) + ' -- ' + note self.flush()
def _write_without_iterate(self, D, group_prefix="/"): for k in D.keys(): if isinstance(D[k],dict): group_prefix_new = group_prefix + k + "/" log.log_debug(logger, "Writing group %s" % group_prefix_new) if k not in self._f[group_prefix]: self._f.create_group(group_prefix_new) self._write_without_iterate(D[k], group_prefix_new) else: name = group_prefix + k log.log_debug(logger, "Writing dataset %s" % name) data = D[k] if k not in self._f[group_prefix]: if numpy.isscalar(data): maxshape = (None,) shape = (self._chunksize,) dtype = numpy.dtype(type(data)) if dtype == "S": dtype = h5py.new_vlen(str) axes = "experiment_identifier:value" else: data = numpy.asarray(data) try: h5py.h5t.py_create(data.dtype, logical=1) except TypeError: log.log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name) continue maxshape = tuple([None]+list(data.shape)) shape = tuple([self._chunksize]+list(data.shape)) dtype = data.dtype ndim = data.ndim axes = "experiment_identifier" if ndim == 1: axes = axes + ":x" elif ndim == 2: axes = axes + ":y:x" elif ndim == 3: axes = axes + ":z:y:x" log.log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype))) self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs) self._f[name].attrs.modify("axes",[axes]) if self._f[name].shape[0] <= self._i: if numpy.isscalar(data): data_shape = [] else: data_shape = data.shape new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape)) log.log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape))) self._f[name].resize(new_shape) log.log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i)) if numpy.isscalar(data): self._f[name][self._i] = data else: self._f[name][self._i,:] = data[:]
def addMeshFromString(self, name, shape_data): """ Add a mesh shape from a string. Accepted format : mesh encoded in VTK .vtp format """ if name not in self._ref: shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) shape[:] = shape_data shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'vtp' self._shapeid[name] = shape.attrs['id'] self._number_of_shapes += 1
def write_proceesed_data(prdata,grp): str_type = h5py.new_vlen(str); #(hdf_path,samplerate,samplename,objref) meta_info = numpy.dtype([ ('Path',str_type), ('SampleRate',numpy.int), ('SampleName',str_type), ('objref',h5py.h5t.special_dtype(ref=h5py.Reference))]); if not 'raw_sample_info' in grp: # simple case grp.create_dataset("raw_sample_info",(len(prdata),),meta_info,numpy.array(prdata,dtype=meta_info),chunks = True,maxshape=None); pass;
def labelAlignments(): logging.info("Labeling alignments using: %s" % runner.args.inputFofn) bcFofn = BarcodeH5Fofn(runner.args.inputFofn) with CmpH5Reader(runner.args.cmpH5) as cmpH5: bcDS = n.zeros((len(cmpH5), 5), dtype = "int32") for (i, aln) in enumerate(cmpH5): bcReader = bcFofn.readerForMovie(aln.movieInfo.Name) try: lZmw = bcReader.labeledZmwFromHoleNumber(aln.HoleNumber) if lZmw.nScored < runner.args.minNumBarcodes or \ lZmw.averageScore < runner.args.minAvgBarcodeScore or \ lZmw.scoreRatio < runner.args.minScoreRatio: lZmw = None except KeyError: lZmw = None if lZmw: bcDS[i,:] = n.array([lZmw.nScored, lZmw.bestIdx, lZmw.bestScore, lZmw.secondBestIdx, lZmw.secondBestScore]) else: # either no barcode was found for this guy or they got # filtered, hence the NULL_BARCODE bcDS[i,:] = n.array([0, len(bcReader.barcodeLabels), 0, len(bcReader.barcodeLabels), 0]) # write to the cmp.h5 file. H5 = h5.File(runner.args.cmpH5, 'r+') if BC_INFO_ID in H5: del H5[BC_INFO_ID] if BC_INFO_NAME in H5: del H5[BC_INFO_NAME] # we use the first one to get the labels, if somehow they # don't have all of the same stuff that will be an issue. bcLabels = n.concatenate((bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER]))) H5.create_dataset(BC_INFO_ID, data = n.array(range(0, len(bcLabels))), dtype = 'int32') H5.create_dataset(BC_INFO_NAME, data = bcLabels, dtype = h5.new_vlen(str)) if BC_ALN_INFO_DS in H5: del H5[BC_ALN_INFO_DS] bcDS = H5.create_dataset(BC_ALN_INFO_DS, data = bcDS, dtype = 'int32') bcDS.attrs['ColumnNames'] = n.array(['count', 'index1', 'score1', 'index2', 'score2']) #force BarcodeMode to have numpy dtype for CmpH5Sort 'extra datasets' routine bcDS.attrs['BarcodeMode'] = n.array( bcFofn.scoreMode ) H5.close()
def add_shape_data_from_file(self, name, filename): """ Add shape data from a file. """ if name not in self._ref: shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) shape[:] = str_of_file(filename) shape.attrs['id'] = self._number_of_shapes try: shape.attrs['type'] = os.path.splitext(filename)[1][1:] except: shape.attrs['type'] = 'unknown' self._number_of_shapes += 1
def add_shape_data_from_file(self, name, filename): """ Add shape data from a file. """ if name not in self._ref: shape = self._ref.create_dataset(name, (1, ), dtype=h5py.new_vlen(str)) shape[:] = str_of_file(filename) shape.attrs['id'] = self._number_of_shapes try: shape.attrs['type'] = os.path.splitext(filename)[1][1:] except: shape.attrs['type'] = 'unknown' self._number_of_shapes += 1
def writeRegionsTable( regions, fileName, types=PLS_REGION_TYPES, descriptions=PLS_REGION_DESC, sources=PLS_REGION_SRC ): """Writes out a pls.h5 file containing a regions table defined by the arguments to this function.""" outFile = h5py.File( fileName, 'w' ) shape = ( max( 1, len(regions) ), len(PlsRegion.TABLE_COLUMNS) ) pd = outFile.create_group( "PulseData" ) regionTable = pd.create_dataset( "Regions", shape, numpy.int32, maxshape=(None,shape[1]) ) rows = numpy.zeros( shape=shape, dtype=numpy.int32 ) for i, row in enumerate([ region.toTableRow() for region in regions ]): rows[i] = row regionTable[:] = rows regionTable.attrs[ "ColumnNames" ] = numpy.array( PlsRegion.TABLE_COLUMNS, dtype=h5py.new_vlen(str) ) regionTable.attrs[ "RegionTypes" ] = numpy.array( types, dtype=h5py.new_vlen(str) ) regionTable.attrs[ "RegionDescriptions" ] = numpy.array( descriptions, dtype=h5py.new_vlen(str) ) regionTable.attrs[ "RegionSources" ] = numpy.array( sources, dtype=h5py.new_vlen(str) ) outFile.close()
def __init__(self, fname, mode="a"): """ open the hdf5 file depending on the mode defaults are set NEW 2014: if the opened file exists 8and contains data read it in this is used in the pdlp script (not with pydlpoly) """ self.verbose = 0 # self.fname = fname self.mode = mode # self.h5file = h5py.File(fname, mode) # self.file_version = 1.1 # if "version" in self.h5file.attrs.keys(): if (self.mode == "a") or (self.mode == "w"): assert self.file_version == self.h5file.attrs[ "version"], "Exisiting file has a different version! Can not add data" else: self.h5file.attrs["version"] = self.file_version # defaults self.pd = None self.stagelist = [] self.track_data = None self.traj_nstep = 1 self.rest_nstep = 1 # # if "system" in self.h5file.keys(): # ok there is some system so initalize data from here self.stagelist = self.h5file.keys() self.stagelist.remove("system") self.system = self.h5file["system"] self.natoms = self.system["elems"].shape[0] self.bcd = self.system.attrs["bcd"] else: self.system = self.h5file.require_group("system") self.natoms = 0 self.bcd = 0 # # helper object for hdf5 variable length strings self.str_dt = h5py.new_vlen(str) # track charges if floating_charges is True self.floating_charges = False return
def __set_tree(self): ''' Setup/Check the tree structure of the file''' self.param=self.f.require_group('param') # Setup param group self.param.require_group('dim') # Read parameters from dim.dat files self.param.require_group('init') #Read parameters from params.log file self.param.require_group('run') self.param.require_group('index') # Read parameters from index.pro file self.data=self.f.require_group('data') #Setup the data group self.etc=self.f.require_group('etc') #Setup the notes group self.etc.require_group('ext') try: dt=h5py.new_vlen(str) self.notes=self.etc.require_dataset('notes',(1,),dtype=dt,maxshape=(None,)) except TypeError: # additional notes already inserted self.notes=self.etc['notes'] self.__accessed()
def addContactFromOccShape(self, name, occ_shape_name, contact_type, index, collision_group=0, associated_shape=None): """ Add contact reference from a previously added brep. """ if name not in self._ref: shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'step' shape.attrs['contact'] = contact_type shape.attrs['step'] = occ_shape_name shape.attrs['index'] = index if associated_shape is not None: shape.attrs['associated_shape'] = associated_shape self._shapeid[name] = shape.attrs['id'] self._number_of_shapes += 1
def write_to_dataset(self, name, data, i): if self.logger is not None: self.logger.debug("Write dataset %s of event %i." % (name, i)) if name not in self.f: # print name t0 = time.time() if numpy.isscalar(data): if i == -1: s = [1] else: s = [self.N] t = numpy.dtype(type(data)) if t == "S": t = h5py.new_vlen(str) axes = "experiment_identifier:value" else: data = numpy.array(data) s = list(data.shape) ndims = len(s) axes = "experiment_identifier" if ndims == 1: axes = axes + ":x" elif ndims == 2: axes = axes + ":y:x" elif ndims == 3: axes = axes + ":z:y:x" if i != -1: s.insert(0, self.N) t = data.dtype self.f.create_dataset(name, s, t) self.f[name].attrs.modify("axes", [axes]) t1 = time.time() if self.logger != None: self.logger.debug("Create dataset %s within %.1f sec.", name, t1 - t0) if i == -1: if numpy.isscalar(data): self.f[name][0] = data else: self.f[name][:] = data[:] else: if numpy.isscalar(data): self.f[name][i] = data else: self.f[name][i, :] = data[:]
def write_to_dataset(self,name,data,i): if self.logger is not None: self.logger.debug("Write dataset %s of event %i." % (name,i)) if name not in self.f: #print name t0 = time.time() if numpy.isscalar(data): if i == -1: s = [1] else: s= [self.N] t=numpy.dtype(type(data)) if t == "S": t = h5py.new_vlen(str) axes = "experiment_identifier:value" else: data = numpy.array(data) s = list(data.shape) ndims = len(s) axes = "experiment_identifier" if ndims == 2: axes = axes + ":x" elif ndims == 3: axes = axes + ":y:x" elif ndims == 4: axes = axes + ":z:y:x" if i != -1: s.insert(0,self.N) t=data.dtype self.f.create_dataset(name,s,t) self.f[name].attrs.modify("axes",[axes]) t1 = time.time() if self.logger != None: self.logger.debug("Create dataset %s within %.1f sec.",name,t1-t0) if i == -1: if numpy.isscalar(data): self.f[name][0] = data else: self.f[name][:] = data[:] else: if numpy.isscalar(data): self.f[name][i] = data else: #print name,data,numpy.array(self.f[name]).shape self.f[name][i,:] = data[:]
def _rdkfeats_writer(output_file=None, features=None): """Returns a (molindex, molid, smiles) processor that computes descriptors using RDKit and stores then in a h5 file. Parameters: - output_file: where the descriptors will be written; this file will be overwritten. - features: a list of the names of the RDKit features that will be computed (by default all the descriptors exposed by the Descriptor class in RDKit) Returns: - a processor function ready to be used as a parameter to _molidsmiles_it. The h5 file has the following data: - 'rdkdescs': a float matrix num_mols x num_descs this will all be nans if the computation failed completely - 'fnames': the name of the feature in each column (num_cols) - 'molids': the molid corresponding to each row (num_rows) """ ensure_dir(op.dirname(output_file)) h5 = h5py.File(output_file, mode='w', dtype=np.float32) computer = RDKitDescriptorsComputer(features) fnames = computer.fnames() nf = len(fnames) descs = h5.create_dataset('rdkdescs', (0, nf), maxshape=(None, nf), compression='lzf') str_type = h5py.new_vlen(str) h5.create_dataset('fnames', data=fnames) molids = h5.create_dataset('molids', shape=(0,), maxshape=(None,), dtype=str_type) def process(molid, smiles): if molid is _END_MOLID: h5.close() return ne = len(molids) try: molids.resize((ne + 1,)) molids[ne] = molid mol = to_rdkit_mol(smiles) descs.resize((ne + 1, nf)) descs[ne, :] = computer.compute(mol)[0] except: info('Failed molecule %s: %s' % (molid, smiles)) descs[ne, :] = [np.nan] * nf return process
def __init__(self, filename, mode): self._h5FileHandle = h5py.File(filename, mode) self._assignAlignmentIndexCol() self._refSeqName2Id = {} self._readGroupPath2Id = {} self.mode = mode self._vlType = h5py.new_vlen(str) if mode in ['a','w']: self._createIndexTables() if mode != 'w': self._globalIndex = self._h5FileHandle["AlignmentIndex"] self._refSeqName = self._h5FileHandle["RefSeqName"] self._refSeqID = self._h5FileHandle["RefSeqID"] self._readGroupPath = self._h5FileHandle["ReadGroupPath"] self._readGroupPathID = self._h5FileHandle["ReadGroupPathID"] self._updateRefSeqDict() self._updateReadGroupDict()
def add_brep_from_string(self, name, shape_data): """ Add a brep contained in a string. """ if name not in self._ref: shape = self._ref.create_dataset(name, (1, ), dtype=h5py.new_vlen(str)) if type(shape_data) == str: # raw str shape[:] = shape_data else: # __getstate__ as with pythonocc shape[:] = shape_data[0] shape.attrs['occ_indx'] = shape_data[1] shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'brep' self._number_of_shapes += 1
def add_brep_from_string(self, name, shape_data): """ Add a brep contained in a string. """ if name not in self._ref: shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) if type(shape_data) == str: # raw str shape[:] = shape_data else: # __getstate__ as with pythonocc shape[:] = shape_data[0] shape.attrs['occ_indx'] = shape_data[1] shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'brep' self._number_of_shapes += 1
def _createDataset_(self, parent, dataset_name, numpy_array_or_shape, **kwargs): """ Creates a new dataset in the data file and returns a pointer to it. Raises IOError exception if the dataset already exists. """ dataset_key = self.hdfObjectKey(dataset_name) if dataset_key in parent.keys(): errmsg = "'%s' dataset already exists in current data file." raise IOError, errmsg % dataset_name create_args = {} attributes = {} for name in kwargs: if name in DATASET_CREATE_ARGS: create_args[safestring(name)] = safevalue(kwargs[name]) else: attributes[safestring(name)] = safevalue(kwargs[name]) if 'created' not in attributes: attributes['created'] = self._timestamp_() if 'fillvalue' in create_args: if 'dtype' not in create_args: errmsg = "'dtype' is required for empty or extendable datasets." raise IOError, errmsg shape = numpy_array_or_shape dataset = parent.create_dataset(dataset_key, shape, **create_args) else: #TODO: need to have a better set of checks here if 'dtype' not in create_args\ and numpy_array_or_shape.dtype == N.dtype(object): create_args['dtype'] = h5py.new_vlen(str) dataset = parent.create_dataset(dataset_key, data=numpy_array_or_shape, **create_args) for attr_name, attr_value in attributes.items(): dataset.attrs[attr_name] = attr_value return dataset
def add_mesh_from_string(self, name, shape_data, scale=None, insideMargin=None, outsideMargin=None): """ Add a mesh shape from a string. Accepted format : mesh encoded in VTK .vtp format """ if name not in self._ref: shape = self._ref.create_dataset(name, (1,), dtype=h5py.new_vlen(str)) shape[:] = shape_data shape.attrs['id'] = self._number_of_shapes shape.attrs['type'] = 'vtp' if scale is not None: shape.attrs['scale'] = scale if insideMargin is not None: shape.attrs['insideMargin'] = insideMargin if outsideMargin is not None: shape.attrs['outsideMargin'] = outsideMargin self._number_of_shapes += 1
def __init__(self, sys, filename, mode='w'): self.hdf = hd = h5py.File(filename, mode) if mode == 'w': #Variable length string str_type = h5py.new_vlen(str) tl =hd.create_dataset('types', (sys.n,), str_type) tl[:] = [at.type for at in sys.atoms] bsize = hd.create_dataset('boxsize', (1,), 'f') bsize[0] = sys.boxsize # The coordinate list cl = hd.create_dataset('coordlist', (1, sys.n, 3), 'f', maxshape=(None, sys.n, 3), compression='gzip', compression_opts=4) cl[0] = sys.rarray # The velocity list vl = hd.create_dataset('velocitylist', (1, sys.n, 3), 'f', maxshape=(None, sys.n, 3), compression='gzip', compression_opts=4) vl[0] = sys.varray
def write_to_dataset(self, data, name, i=None): if self.logger != None: self.logger.debug("Write dataset %s of event %i." % (name, i)) if name not in self.f: t0 = time.time() if numpy.isscalar(data): if i is None: s = [1] else: s = [self.N] t = numpy.dtype(type(data)) if t == "S": t = h5py.new_vlen(str) axes = "experiment_identifier:value" else: s = list(data.shape) if i is not None: s.insert(0, self.N) t = data.dtype axes = "experiment_identifier:y:x" self.f.create_dataset(name, s, t) self.f[name].attrs.modify("axes", [axes]) t1 = time.time() if self.logger != None: self.logger.debug("Create dataset %s within %.1f sec.", name, t1 - t0) if i is None: if numpy.isscalar(data): self.f[name][0] = data else: self.f[name][:] = data[:] else: if numpy.isscalar(data): self.f[name][i] = data else: self.f[name][i, :] = data[:]
def _createDataset_(self, parent, dataset_name, numpy_array, attributes, **kwargs): """ Creates a new dataset in the data file and returns a pointer to it. Raises IOError exception if the dataset already exists. """ dataset_key = safeDataKey(dataset_name) if dataset_key in parent.keys(): errmsg = "'%s' dataset already exists in current data file." raise IOError, errmsg % dataset_name create_args = {} for arg_name in kwargs: create_args[safe_name(arg_name)] = kwargs[arg_name] if 'maxshape' in create_args: if 'dtype' not in create_args: raise IOError, "'dtype' is required for extendable datasets." if len(numpy_array) != len(create_args['maxshape']): errmg = '3rd argument must be the initial shape of the array.' raise IOError, errmsg initial_shape = numpy_array dataset = parent.create_dataset(dataset_key, initial_shape, **create_args) else: if 'dtype' not in create_args\ and numpy_array.dtype == N.dtype(object): create_args['dtype'] = h5py.new_vlen(str) dataset = parent.create_dataset(dataset_key, data=numpy_array, **create_args) for attr_name, attr_value in attributes.items(): if attr_name != 'dtype' and attr_value is not None: dataset.attrs[safeDataKey(attr_name)] = safevalue(attr_value) return dataset
def write_exchange_dataset_from_stack(file_name, image_stack, energies): #And adds the necessary groups, datasets and attributes(Scientific Data Exchange standard HDF5 format data) xnumber = np.arange(image_stack.shape[0] * 1.0) ynumber = np.arange(image_stack.shape[1] * 1.0) enumber = np.arange(image_stack.shape[2] * 1.0) #print xnumber.shape,ynumber.shape,xnumber,ynumber inumber = np.ones(image_stack.shape[2]) comment = '' f1 = h5py.File(file_name, 'w') dset = f1.create_group("exchange") dset2 = dset.create_dataset("data", data=image_stack) dset2.attrs['axes'] = 'x:y' dset2.attrs['signal'] = 1 dset3 = dset.create_dataset("energy", data=energies) dset3.attrs['units'] = 'eV' dset4 = dset.create_dataset("x", data=xnumber) dset5 = dset.create_dataset("y", data=ynumber) str_type = h5py.new_vlen(str) eset = f1.create_dataset("implements", shape=(1, ), dtype=str_type) eset[:] = 'information:exchange:spectromicroscopy' fset = f1.create_group("information") fset2 = fset.create_dataset("comment", shape=(1, ), dtype=str_type) fset2[:] = comment fset3 = fset.create_dataset("file_creation_datetime", shape=(1, ), dtype=str_type) fset3[:] = "2012-07-11T09:15" fset3.attrs['file_creation_datetime'] = 'time' gset = f1.create_group("spectromicroscopy") gset2 = gset.create_group("normalization") gset3 = gset2.create_dataset("white_spectrum", data=inumber) gset4 = gset2.create_dataset("white_spectrum_energy", data=enumber) gset4.attrs['units'] = 'eV' hset = f1.create_dataset("version", shape=(1, ), dtype=str_type) hset[:] = '1.0' f1.close()
def add_interaction(self, name, body1_name, contactor1_name=None, body2_name=None, contactor2_name=None, distance_calculator='cadmbtb', offset1=0.0, offset2=0.0): """ Add permanent interactions between two objects contactors. """ if name not in self.permanent_interactions(): pinter = self.permanent_interactions().\ create_dataset(name, (1,), dtype=h5py.new_vlen(str)) pinter.attrs['id'] = self._number_of_permanent_interactions pinter.attrs['type'] = 'permanent_interaction' pinter.attrs['body1_name'] = body1_name pinter.attrs['body2_name'] = body2_name if contactor1_name is not None: pinter.attrs['contactor1_name'] = contactor1_name if contactor2_name is not None: pinter.attrs['contactor2_name'] = contactor2_name pinter.attrs['distance_calculator'] = distance_calculator pinter.attrs['offset1'] = offset1 pinter.attrs['offset2'] = offset2 self._number_of_permanent_interactions += 1
def write_to_dataset(self,data,name,i=None): if self.logger != None: self.logger.debug("Write dataset %s of event %i." % (name,i)) if name not in self.f: t0 = time.time() if numpy.isscalar(data): if i is None: s = [1] else: s= [self.N] t=numpy.dtype(type(data)) if t == "S": t = h5py.new_vlen(str) axes = "experiment_identifier:value" else: s = list(data.shape) if i is not None: s.insert(0,self.N) t=data.dtype axes = "experiment_identifier:y:x" self.f.create_dataset(name,s,t) self.f[name].attrs.modify("axes",[axes]) t1 = time.time() if self.logger != None: self.logger.debug("Create dataset %s within %.1f sec.",name,t1-t0) if i is None: if numpy.isscalar(data): self.f[name][0] = data else: self.f[name][:] = data[:] else: if numpy.isscalar(data): self.f[name][i] = data else: self.f[name][i,:] = data[:]
def postprocessClusterSubResult(self, roi, result, blockwise_fileset): """ This function is only used by special cluster scripts. When the batch-processing mechanism was rewritten, this function broke. It could probably be fixed with minor changes. """ # TODO: Here, we hard-code to select from the first lane only. opBatchClassify = self.opBatchClassify[0] from lazyflow.utility.io_uti.blockwiseFileset import vectorized_pickle_dumps # Assume that roi always starts as a multiple of the blockshape block_shape = opBatchClassify.get_blockshape() assert all(block_shape == blockwise_fileset.description.sub_block_shape), "block shapes don't match" assert all((roi[0] % block_shape) == 0), "Sub-blocks must exactly correspond to the blockwise object classification blockshape" sub_block_index = roi[0] / blockwise_fileset.description.sub_block_shape sub_block_start = sub_block_index sub_block_stop = sub_block_start + 1 sub_block_roi = (sub_block_start, sub_block_stop) # FIRST, remove all objects that lie outside the block (i.e. remove the ones in the halo) region_features = opBatchClassify.BlockwiseRegionFeatures( *sub_block_roi ).wait() region_features_dict = region_features.flat[0] region_centers = region_features_dict['Default features']['RegionCenter'] opBlockPipeline = opBatchClassify._blockPipelines[ tuple(roi[0]) ] # Compute the block offset within the image coordinates halo_roi = opBlockPipeline._halo_roi translated_region_centers = region_centers + halo_roi[0][1:-1] # TODO: If this is too slow, vectorize this mask = numpy.zeros( region_centers.shape[0], dtype=numpy.bool_ ) for index, translated_region_center in enumerate(translated_region_centers): # FIXME: Here we assume t=0 and c=0 mask[index] = opBatchClassify.is_in_block( roi[0], (0,) + tuple(translated_region_center) + (0,) ) # Always exclude the first object (it's the background??) mask[0] = False # Remove all 'negative' predictions, emit only 'positive' predictions # FIXME: Don't hardcode this? POSITIVE_LABEL = 2 objectwise_predictions = opBlockPipeline.ObjectwisePredictions([]).wait()[0] assert objectwise_predictions.shape == mask.shape mask[objectwise_predictions != POSITIVE_LABEL] = False filtered_features = {} for feature_group, feature_dict in region_features_dict.items(): filtered_group = filtered_features[feature_group] = {} for feature_name, feature_array in feature_dict.items(): filtered_group[feature_name] = feature_array[mask] # SECOND, translate from block-local coordinates to global (file) coordinates. # Unfortunately, we've got multiple translations to perform here: # Coordinates in the region features are relative to their own block INCLUDING HALO, # so we need to add the start of the block-with-halo as an offset. # BUT the image itself may be offset relative to the BlockwiseFileset coordinates # (due to the view_origin setting), so we also need to add an offset for that, too # Get the image offset relative to the file coordinates image_offset = blockwise_fileset.description.view_origin total_offset_5d = halo_roi[0] + image_offset total_offset_3d = total_offset_5d[1:-1] filtered_features["Default features"]["RegionCenter"] += total_offset_3d filtered_features["Default features"]["Coord<Minimum>"] += total_offset_3d filtered_features["Default features"]["Coord<Maximum>"] += total_offset_3d # Finally, write the features to hdf5 h5File = blockwise_fileset.getOpenHdf5FileForBlock( roi[0] ) if 'pickled_region_features' in h5File: del h5File['pickled_region_features'] # Must use str dtype dtype = h5py.new_vlen(str) dataset = h5File.create_dataset( 'pickled_region_features', shape=(1,), dtype=dtype ) pickled_features = vectorized_pickle_dumps(numpy.array((filtered_features,))) dataset[0] = pickled_features object_centers_xyz = filtered_features["Default features"]["RegionCenter"].astype(int) object_min_coords_xyz = filtered_features["Default features"]["Coord<Minimum>"].astype(int) object_max_coords_xyz = filtered_features["Default features"]["Coord<Maximum>"].astype(int) object_sizes = filtered_features["Default features"]["Count"][:,0].astype(int) # Also, write out selected features as a 'point cloud' csv file. # (Store the csv file next to this block's h5 file.) dataset_directory = blockwise_fileset.getDatasetDirectory(roi[0]) pointcloud_path = os.path.join( dataset_directory, "block-pointcloud.csv" ) logger.info("Writing to csv: {}".format( pointcloud_path )) with open(pointcloud_path, "w") as fout: csv_writer = csv.DictWriter(fout, OUTPUT_COLUMNS, **CSV_FORMAT) csv_writer.writeheader() for obj_id in range(len(object_sizes)): fields = {} fields["x_px"], fields["y_px"], fields["z_px"], = object_centers_xyz[obj_id] fields["min_x_px"], fields["min_y_px"], fields["min_z_px"], = object_min_coords_xyz[obj_id] fields["max_x_px"], fields["max_y_px"], fields["max_z_px"], = object_max_coords_xyz[obj_id] fields["size_px"] = object_sizes[obj_id] csv_writer.writerow( fields ) #fout.flush() logger.info("FINISHED csv export")
#processing potentials voltage_path = os.path.join(path_folder, 'v', 'tmp') voltage_name = 'v' h = read_analog(voltage_path, voltage_name, t_max, all_pops, h) print('Done processing all voltages') #processing input spike data spike_path = os.path.join(path_folder, 'input.dat') spike_name = 'input' h = read_digital(spike_path, spike_name, t_max, all_pops, h) print('Done processing all input spikes') #processing output spike data spike_path = os.path.join(path_folder, 'output.dat') spike_name = 'output' h = read_digital(spike_path, spike_name, t_max, all_pops, h) print('Done processing all output spikes') #processing log information log_file = os.path.join(path_folder, log_filename) f = open(log_file, 'r') str_type = h5py.new_vlen(str) #ds = h.create_dataset('/model/log.txt', data=f.read().replace('\n',''), dtype=str_type) ds = h.create_dataset('/model/log.txt', data=f.readlines(), dtype=str_type) f.close() # #process time data add_time(h, t_max, t_step) h.close()
def _h5write(filename, mode, *args, **kwargs): """\ _h5write(filename, mode, {'var1'=..., 'var2'=..., ...}) _h5write(filename, mode, var1=..., var2=..., ...) _h5write(filename, mode, dict, var1=..., var2=...) Writes variables var1, var2, ... to file filename. The file mode can be chosen according to the h5py documentation. The key-value arguments have precedence on the provided dictionnary. supported variable types are: * scalars * numpy arrays * strings * lists * dictionaries (if the option UNSUPPORTED is equal to 'pickle', any other type is pickled and saved. UNSUPPORTED = 'ignore' silently eliminates unsupported types. Default is 'fail', which raises an error.) The file mode can be chosen according to the h5py documentation. It defaults to overwriting an existing file. """ filename = os.path.abspath(os.path.expanduser(filename)) ctime = time.asctime() mtime = ctime # Update input dictionnary if args: d = args[0].copy() # shallow copy else: d = {} d.update(kwargs) # List of object ids to make sure we are not saving something twice. ids = [] # This is needed to store strings dt = h5py.new_vlen(str) def check_id(id): if id in ids: raise RuntimeError('Circular reference detected! Aborting save.') else: ids.append(id) def pop_id(id): ids[:] = [x for x in ids if x != id] #@sdebug def _store_numpy(group, a, name, compress=True): if compress: dset = group.create_dataset(name, data=a, compression='gzip') else: dset = group.create_dataset(name, data=a) dset.attrs['type'] = 'array' return dset #@sdebug def _store_string(group, s, name): dset = group.create_dataset(name, data=np.asarray(s), dtype=dt) dset.attrs['type'] = 'string' return dset #@sdebug def _store_unicode(group, s, name): dset = group.create_dataset(name, data=np.asarray(s.encode('utf8')), dtype=dt) dset.attrs['type'] = 'unicode' return dset #@sdebug def _store_list(group, l, name): check_id(id(l)) arrayOK = len(set([type(x) for x in l])) == 1 if arrayOK: try: # Try conversion to a numpy array la = np.array(l) if la.dtype.type is np.string_: arrayOK = False else: dset = _store_numpy(group, la, name) dset.attrs['type'] = 'arraylist' except: arrayOK = False if not arrayOK: # inhomogenous list. Store all elements individually dset = group.create_group(name) for i, v in enumerate(l): _store(dset, v, '%05d' % i) dset.attrs['type'] = 'list' pop_id(id(l)) return dset #@sdebug def _store_tuple(group, t, name): dset = _store_list(group, list(t), name) dset_type = dset.attrs['type'] dset.attrs[ 'type'] = 'arraytuple' if dset_type == 'arraylist' else 'tuple' return dset #@sdebug def _store_dict(group, d, name): check_id(id(d)) if any([type(k) not in [str, unicode] for k in d.keys()]): raise RuntimeError( 'Only dictionaries with string keys are supported.') dset = group.create_group(name) dset.attrs['type'] = 'dict' for k, v in d.iteritems(): if k.find('/') > -1: k = k.replace('/', h5options['SLASH_ESCAPE']) ndset = _store(dset, v, k) if ndset is not None: ndset.attrs['escaped'] = '1' else: _store(dset, v, k) pop_id(id(d)) return dset def _store_dict_new(group, d, name): check_id(id(d)) dset = group.create_group(name) dset.attrs['type'] = 'dict' for i, kv in enumerate(d.iteritems()): _store(dset, kv, '%05d' % i) pop_id(id(d)) return dset #@sdebug def _store_None(group, a, name): dset = group.create_dataset(name, data=np.zeros((1, ))) dset.attrs['type'] = 'None' return dset #@sdebug def _store_pickle(group, a, name): apic = cPickle.dumps(a) dset = group.create_dataset(name, data=np.asarray(apic), dtype=dt) dset.attrs['type'] = 'pickle' return dset #@sdebug def _store(group, a, name): if type(a) is str: dset = _store_string(group, a, name) elif type(a) is unicode: dset = _store_unicode(group, a, name) elif type(a) is dict: dset = _store_dict(group, a, name) elif type(a) is list: dset = _store_list(group, a, name) elif type(a) is tuple: dset = _store_tuple(group, a, name) elif type(a) is np.ndarray: dset = _store_numpy(group, a, name) elif np.isscalar(a): dset = _store_numpy(group, np.asarray(a), name, compress=False) dset.attrs['type'] = 'scalar' elif a is None: dset = _store_None(group, a, name) else: if h5options['UNSUPPORTED'] == 'fail': raise RuntimeError('Unsupported data type : %s' % type(a)) elif h5options['UNSUPPORTED'] == 'pickle': dset = _store_pickle(group, a, name) else: dset = None return dset # Open the file and save everything with h5py.File(filename, mode) as f: f.attrs['h5rw_version'] = h5options['H5RW_VERSION'] f.attrs['ctime'] = ctime f.attrs['mtime'] = mtime for k, v in d.iteritems(): _store(f, v, k) return
def agglomeration(options, agglom_stack, supervoxels, prediction, image_stack, session_location, sp_outs, master_logger): seg_thresholds = sorted(options.segmentation_thresholds) for threshold in seg_thresholds: if threshold != 0 or not options.use_neuroproof: master_logger.info("Starting agglomeration to threshold " + str(threshold) + " with " + str(agglom_stack.number_of_nodes())) agglom_stack.agglomerate(threshold) master_logger.info("Finished agglomeration to threshold " + str(threshold) + " with " + str(agglom_stack.number_of_nodes())) if options.inclusion_removal: inclusion_removal(agglom_stack, master_logger) segmentation = agglom_stack.get_segmentation() if options.h5_output: imio.write_image_stack(segmentation, session_location + "/agglom-" + str(threshold) + ".lzf.h5", compression='lzf') md5hex = hashlib.md5(' '.join(sys.argv)).hexdigest() file_base = os.path.abspath(session_location) + "/seg_data/seg-" + str( threshold) + "-" + md5hex + "-" transforms = imio.compute_sp_to_body_map(supervoxels, segmentation) seg_loc = file_base + "v1.h5" if not os.path.exists(session_location + "/seg_data"): os.makedirs(session_location + "/seg_data") imio.write_mapped_segmentation(supervoxels, transforms, seg_loc) if options.synapse_file is not None: h5temp = h5py.File(seg_loc, 'a') syn_data = json.load(open((options.synapse_file))) meta = syn_data['metadata'] meta['username'] = "******" syn_data_str = json.dumps(syn_data, indent=4) str_type = h5py.new_vlen(str) ds = h5temp.create_dataset("synapse-annotations", data=syn_data_str, shape=(1, ), dtype=str_type) graph_loc = file_base + "graphv1.json" json_data = {} json_data['graph'] = graph_loc json_data['border'] = options.border_size subvolume = {} subvolume['segmentation-file'] = seg_loc subvolume['prediction-file'] = os.path.abspath( session_location) + "/STACKED_prediction.h5" gray_file_whole = os.path.abspath(glob.glob(options.image_stack)[0]) gray_path = os.path.dirname(gray_file_whole) gray_file = os.path.basename(gray_file_whole) field_width = len(re.findall(r'\d', gray_file)) field_rep = "%%0%dd" % field_width gray_file = re.sub(r'\d+', field_rep, gray_file) subvolume['grayscale-files'] = gray_path + "/" + gray_file # get extant x1 = options.border_size y1 = options.border_size z1 = options.border_size z2, y2, x2 = supervoxels.shape z2 = z2 - options.border_size - 1 y2 = y2 - options.border_size - 1 x2 = x2 - options.border_size - 1 extant = re.findall(r'\d+-\d+_\d+-\d+_\d+-\d+', gray_path) if len(extant) > 0: bbox = extant[0] x1, x2, y1, y2, z1, z2 = re.findall(r'\d+', bbox) subvolume["far-upper-right"] = [int(x2), int(y2), int(z2)] subvolume["near-lower-left"] = [int(x1), int(y1), int(z1)] json_data['subvolumes'] = [subvolume] agglom_stack.write_plaza_json(graph_loc, options.synapse_file, (int(z1) - (options.border_size))) # write out json file json_str = json.dumps(json_data, indent=4) json_file = session_location + "/seg-" + str( threshold) + "-" + md5hex + "-v1.json" jw = open(json_file, 'w') jw.write(json_str)
def postprocessClusterSubResult(self, roi, result, blockwise_fileset): """ This function is only used by special cluster scripts. When the batch-processing mechanism was rewritten, this function broke. It could probably be fixed with minor changes. """ # TODO: Here, we hard-code to select from the first lane only. opBatchClassify = self.opBatchClassify[0] from lazyflow.utility.io.blockwiseFileset import vectorized_pickle_dumps # Assume that roi always starts as a multiple of the blockshape block_shape = opBatchClassify.get_blockshape() assert all(block_shape == blockwise_fileset.description.sub_block_shape), "block shapes don't match" assert all((roi[0] % block_shape) == 0), "Sub-blocks must exactly correspond to the blockwise object classification blockshape" sub_block_index = roi[0] / blockwise_fileset.description.sub_block_shape sub_block_start = sub_block_index sub_block_stop = sub_block_start + 1 sub_block_roi = (sub_block_start, sub_block_stop) # FIRST, remove all objects that lie outside the block (i.e. remove the ones in the halo) region_features = opBatchClassify.BlockwiseRegionFeatures( *sub_block_roi ).wait() region_features_dict = region_features.flat[0] region_centers = region_features_dict['Default features']['RegionCenter'] opBlockPipeline = opBatchClassify._blockPipelines[ tuple(roi[0]) ] # Compute the block offset within the image coordinates halo_roi = opBlockPipeline._halo_roi translated_region_centers = region_centers + halo_roi[0][1:-1] # TODO: If this is too slow, vectorize this mask = numpy.zeros( region_centers.shape[0], dtype=numpy.bool_ ) for index, translated_region_center in enumerate(translated_region_centers): # FIXME: Here we assume t=0 and c=0 mask[index] = opBatchClassify.is_in_block( roi[0], (0,) + tuple(translated_region_center) + (0,) ) # Always exclude the first object (it's the background??) mask[0] = False # Remove all 'negative' predictions, emit only 'positive' predictions # FIXME: Don't hardcode this? POSITIVE_LABEL = 2 objectwise_predictions = opBlockPipeline.ObjectwisePredictions([]).wait()[0] assert objectwise_predictions.shape == mask.shape mask[objectwise_predictions != POSITIVE_LABEL] = False filtered_features = {} for feature_group, feature_dict in region_features_dict.items(): filtered_group = filtered_features[feature_group] = {} for feature_name, feature_array in feature_dict.items(): filtered_group[feature_name] = feature_array[mask] # SECOND, translate from block-local coordinates to global (file) coordinates. # Unfortunately, we've got multiple translations to perform here: # Coordinates in the region features are relative to their own block INCLUDING HALO, # so we need to add the start of the block-with-halo as an offset. # BUT the image itself may be offset relative to the BlockwiseFileset coordinates # (due to the view_origin setting), so we also need to add an offset for that, too # Get the image offset relative to the file coordinates image_offset = blockwise_fileset.description.view_origin total_offset_5d = halo_roi[0] + image_offset total_offset_3d = total_offset_5d[1:-1] filtered_features["Default features"]["RegionCenter"] += total_offset_3d filtered_features["Default features"]["Coord<Minimum>"] += total_offset_3d filtered_features["Default features"]["Coord<Maximum>"] += total_offset_3d # Finally, write the features to hdf5 h5File = blockwise_fileset.getOpenHdf5FileForBlock( roi[0] ) if 'pickled_region_features' in h5File: del h5File['pickled_region_features'] # Must use str dtype dtype = h5py.new_vlen(str) dataset = h5File.create_dataset( 'pickled_region_features', shape=(1,), dtype=dtype ) pickled_features = vectorized_pickle_dumps(numpy.array((filtered_features,))) dataset[0] = pickled_features object_centers_xyz = filtered_features["Default features"]["RegionCenter"].astype(int) object_min_coords_xyz = filtered_features["Default features"]["Coord<Minimum>"].astype(int) object_max_coords_xyz = filtered_features["Default features"]["Coord<Maximum>"].astype(int) object_sizes = filtered_features["Default features"]["Count"][:,0].astype(int) # Also, write out selected features as a 'point cloud' csv file. # (Store the csv file next to this block's h5 file.) dataset_directory = blockwise_fileset.getDatasetDirectory(roi[0]) pointcloud_path = os.path.join( dataset_directory, "block-pointcloud.csv" ) logger.info("Writing to csv: {}".format( pointcloud_path )) with open(pointcloud_path, "w") as fout: csv_writer = csv.DictWriter(fout, OUTPUT_COLUMNS, **CSV_FORMAT) csv_writer.writeheader() for obj_id in range(len(object_sizes)): fields = {} fields["x_px"], fields["y_px"], fields["z_px"], = object_centers_xyz[obj_id] fields["min_x_px"], fields["min_y_px"], fields["min_z_px"], = object_min_coords_xyz[obj_id] fields["max_x_px"], fields["max_y_px"], fields["max_z_px"], = object_max_coords_xyz[obj_id] fields["size_px"] = object_sizes[obj_id] csv_writer.writerow( fields ) #fout.flush() logger.info("FINISHED csv export")
import numpy as np import h5py import argparse import sys import os import west # h5py storage types vstr_dtype = h5py.new_vlen(str) idtype = np.dtype([('iter_name', vstr_dtype), ('string_index', np.int32)]) print '-----------------------' print os.path.basename(__file__) print '-----------------------' env = os.environ for k in env: if 'WEST' in k: print k, env[k] parser = argparse.ArgumentParser('get_strings', description='''\ Retrieve strings from west.h5 file and write them to new file ''') west.rc.add_args(parser) parser.add_argument('-o', dest='h5out', help='name of output file') args = parser.parse_args() west.rc.process_args(args)