def go(self): self.data_reader.open('r') #Create a new 'trajectories' group if this is the first trace try: trajs_group = h5io.create_hdf5_group(self.output_file, 'trajectories', replace=False, creating_program=self.prog) except ValueError: trajs_group = self.output_file['trajectories'] for n_iter, seg_id in self.endpoints: trajname = self.output_pattern % (n_iter, seg_id) trajgroup = trajs_group.create_group(trajname) trace = Trace.from_data_manager(n_iter, seg_id, self.data_reader.data_manager) with open(trajname + '_trace.txt', 'wt') as trace_output: self.emit_trace_text(trace, trace_output) self.emit_trace_h5(trace, trajgroup) aux_h5files = {} for dsinfo in self.datasets: dsname = dsinfo['dsname'] filename = dsinfo.get('file') if filename: try: aux_h5file = aux_h5files[filename] except KeyError: aux_h5file = aux_h5files[filename] = h5py.File( filename, 'r') else: aux_h5file = None slice_ = dsinfo.get('slice') alias = dsinfo.get('alias', dsname) index = dsinfo.get('index') data, weights = trace.trace_timepoint_dataset( dsname, auxfile=aux_h5file, slice_=slice_, index_ds=index) # Save data to HDF5 try: del trajgroup[alias] except KeyError: pass trajgroup[alias] = data # All weight vectors will be the same length, so only store in HDF5 once if not ('weights' in trajgroup and trajgroup['weights'].shape == weights.shape): try: del trajgroup['weights'] except KeyError: pass trajgroup['weights'] = weights
def go(self): self.data_reader.open('r') #Create a new 'trajectories' group if this is the first trace try: trajs_group = h5io.create_hdf5_group(self.output_file, 'trajectories', replace=False, creating_program=self.prog) except ValueError: trajs_group = self.output_file['trajectories'] for n_iter, seg_id in self.endpoints: trajname = self.output_pattern % (n_iter,seg_id) trajgroup = trajs_group.create_group(trajname) trace = Trace.from_data_manager(n_iter,seg_id, self.data_reader.data_manager) with open(trajname + '_trace.txt', 'wt') as trace_output: self.emit_trace_text(trace, trace_output) self.emit_trace_h5(trace, trajgroup) aux_h5files = {} for dsinfo in self.datasets: dsname = dsinfo['dsname'] filename = dsinfo.get('file') if filename: try: aux_h5file = aux_h5files[filename] except KeyError: aux_h5file = aux_h5files[filename] = h5py.File(filename, 'r') else: aux_h5file = None slice_ = dsinfo.get('slice') alias = dsinfo.get('alias', dsname) index = dsinfo.get('index') data, weights = trace.trace_timepoint_dataset(dsname, auxfile=aux_h5file, slice_=slice_,index_ds=index) # Save data to HDF5 try: del trajgroup[alias] except KeyError: pass trajgroup[alias] = data # All weight vectors will be the same length, so only store in HDF5 once if not ('weights' in trajgroup and trajgroup['weights'].shape == weights.shape): try: del trajgroup['weights'] except KeyError: pass trajgroup['weights'] = weights
def calc_store_flux_data(self): westpa.rc.pstatus( 'Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata), ), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets, ), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.items()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group( 'target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm, fmm[::-1]) acorr = acorr[len(acorr) // 2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl( {'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus( ' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus( ' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format( avg, lb_ci, ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata
def calc_store_flux_data(self): westpa.rc.pstatus('Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata),), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets,), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.iteritems()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group('target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm,fmm[::-1]) acorr = acorr[len(acorr)//2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl({'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable ) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus(' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus(' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format(avg,lb_ci,ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata