Пример #1
0
    def calc_store_flux_data(self):
        westpa.rc.pstatus(
            'Calculating mean flux and confidence intervals for iterations [{},{})'
            .format(self.iter_range.iter_start, self.iter_range.iter_stop))

        fluxdata = extract_fluxes(self.iter_range.iter_start,
                                  self.iter_range.iter_stop, self.data_reader)

        # Create a group to store data in
        output_group = h5io.create_hdf5_group(self.output_h5file,
                                              'target_flux',
                                              replace=False,
                                              creating_program=self.prog)
        self.output_group = output_group
        output_group.attrs['version_code'] = self.output_format_version
        self.iter_range.record_data_iter_range(output_group)

        n_targets = len(fluxdata)
        index = numpy.empty((len(fluxdata), ), dtype=target_index_dtype)
        avg_fluxdata = numpy.empty((n_targets, ), dtype=ci_dtype)

        for itarget, (target_label,
                      target_fluxdata) in enumerate(fluxdata.items()):
            # Create group and index entry
            index[itarget]['target_label'] = str(target_label)
            target_group = output_group.create_group(
                'target_{}'.format(itarget))

            self.target_groups[target_label] = target_group

            # Store per-iteration values
            target_group['n_iter'] = target_fluxdata['n_iter']
            target_group['count'] = target_fluxdata['count']
            target_group['flux'] = target_fluxdata['flux']
            h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1'])

            # Calculate flux autocorrelation
            fluxes = target_fluxdata['flux']
            mean_flux = fluxes.mean()
            fmm = fluxes - mean_flux
            acorr = fftconvolve(fmm, fmm[::-1])
            acorr = acorr[len(acorr) // 2:]
            acorr /= acorr[0]
            acorr_ds = target_group.create_dataset('flux_autocorrel',
                                                   data=acorr)
            h5io.label_axes(acorr_ds, ['lag'], ['tau'])

            # Calculate overall averages and CIs
            #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
            #                                                     autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean)
            avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl(
                {'dataset': fluxes},
                estimator=(lambda stride, dataset: numpy.mean(dataset)),
                alpha=self.alpha,
                n_sets=self.n_sets,
                autocorrel_alpha=self.autocorrel_alpha,
                subsample=numpy.mean,
                do_correl=self.do_correl,
                mcbs_enable=self.mcbs_enable)
            avg_fluxdata[itarget] = (self.iter_range.iter_start,
                                     self.iter_range.iter_stop, avg, lb_ci,
                                     ub_ci, sterr, correl_len)
            westpa.rc.pstatus('target {!r}:'.format(target_label))
            westpa.rc.pstatus(
                '  correlation length = {} tau'.format(correl_len))
            westpa.rc.pstatus(
                '  mean flux and CI   = {:e} ({:e},{:e}) tau^(-1)'.format(
                    avg, lb_ci, ub_ci))
            index[itarget]['mean_flux'] = avg
            index[itarget]['mean_flux_ci_lb'] = lb_ci
            index[itarget]['mean_flux_ci_ub'] = ub_ci
            index[itarget]['mean_flux_correl_len'] = correl_len

        # Write index and summary
        index_ds = output_group.create_dataset('index', data=index)
        index_ds.attrs['mcbs_alpha'] = self.alpha
        index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
        index_ds.attrs['mcbs_n_sets'] = self.n_sets

        self.fluxdata = fluxdata
        self.output_h5file['avg_flux'] = avg_fluxdata
Пример #2
0
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nstates = len(state_labels)
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop  # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter

            weights_ring = deque(maxlen=self.window_size)
            parent_ids_ring = deque(maxlen=self.window_size)
            bin_assignments_ring = deque(maxlen=self.window_size)
            label_assignments_ring = deque(maxlen=self.window_size)

            labeled_matrix_shape = (iter_count, nstates, nstates, nbins, nbins)
            unlabeled_matrix_shape = (iter_count, nbins, nbins)
            labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins)
            unlabeled_matrix_chunks = (1, nbins, nbins)

            labeled_bin_fluxes_ds = self.output_file.create_dataset(
                'labeled_bin_fluxes',
                shape=labeled_matrix_shape,
                chunks=labeled_matrix_chunks if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)
            labeled_bin_rates_ds = self.output_file.create_dataset(
                'labeled_bin_rates',
                shape=labeled_matrix_shape,
                chunks=labeled_matrix_chunks if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)
            unlabeled_bin_rates_ds = self.output_file.create_dataset(
                'bin_rates',
                shape=unlabeled_matrix_shape,
                chunks=unlabeled_matrix_chunks
                if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)

            fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:],
                                          weight_dtype)

            for ds in (self.output_file, labeled_bin_fluxes_ds,
                       labeled_bin_rates_ds, unlabeled_bin_rates_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds):
                h5io.label_axes(ds, [
                    'iteration', 'initial state', 'final state', 'inital bin',
                    'final bin'
                ])

            for ds in (unlabeled_bin_rates_ds, ):
                h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin'])

            pi.new_operation('Calculating flux matrices', iter_count)
            # Calculate instantaneous rate matrices and trace trajectories
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2]
                weights = seg_index['weight']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(
                    n_iter)

                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(
                    self.assignments_file, n_iter)
                bin_assignments = numpy.require(
                    self.assignments_file['assignments'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)
                label_assignments = numpy.require(
                    self.assignments_file['trajlabels'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)
                labeled_pops = self.assignments_file['labeled_populations'][
                    assignment_iiter]

                # Prepare to run analysis
                weights_ring.append(weights)
                parent_ids_ring.append(parent_ids)
                bin_assignments_ring.append(bin_assignments)
                label_assignments_ring.append(label_assignments)

                # Estimate rates using bin-to-bin fluxes
                estimate_rates(nbins, state_labels, weights_ring,
                               parent_ids_ring, bin_assignments_ring,
                               label_assignments_ring, state_map, labeled_pops,
                               self.all_lags, fluxes, labeled_rates,
                               unlabeled_rates)

                # Store bin-based kinetics data
                labeled_bin_fluxes_ds[iiter] = fluxes
                labeled_bin_rates_ds[iiter] = labeled_rates
                unlabeled_bin_rates_ds[iiter] = unlabeled_rates

                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops
                pi.progress += 1
Пример #3
0
    def go(self):
        assert self.data_reader.parent_id_dsspec._h5file is None
        assert self.data_reader.weight_dsspec._h5file is None
        if hasattr(self.dssynth.dsspec, '_h5file'):
            assert self.dssynth.dsspec._h5file is None
        pi = self.progress.indicator
        pi.operation = 'Initializing'
        with pi, self.data_reader, WESTPAH5File(
                self.output_filename, 'w',
                creating_program=True) as self.output_file:
            assign = self.binning.mapper.assign

            # We always assign the entire simulation, so that no trajectory appears to start
            # in a transition region that doesn't get initialized in one.
            iter_start = 1
            iter_stop = self.data_reader.current_iteration

            h5io.stamp_iter_range(self.output_file, iter_start, iter_stop)

            nbins = self.binning.mapper.nbins
            self.output_file.attrs['nbins'] = nbins

            state_map = numpy.empty((self.binning.mapper.nbins + 1, ),
                                    index_dtype)
            state_map[:] = 0  # state_id == nstates => unknown state

            # Recursive mappers produce a generator rather than a list of labels
            # so consume the entire generator into a list
            labels = [
                numpy.string_(label) for label in self.binning.mapper.labels
            ]

            self.output_file.create_dataset('bin_labels',
                                            data=labels,
                                            compression=9)

            if self.states:
                nstates = len(self.states)
                state_map[:] = nstates  # state_id == nstates => unknown state
                state_labels = [
                    numpy.string_(state['label']) for state in self.states
                ]

                for istate, sdict in enumerate(self.states):
                    assert state_labels[istate] == numpy.string_(
                        sdict['label'])  #sanity check
                    state_assignments = assign(sdict['coords'])
                    for assignment in state_assignments:
                        state_map[assignment] = istate
                self.output_file.create_dataset('state_map',
                                                data=state_map,
                                                compression=9,
                                                shuffle=True)
                self.output_file[
                    'state_labels'] = state_labels  #+ ['(unknown)']
            else:
                nstates = 0
            self.output_file.attrs['nstates'] = nstates
            # Stamp if this has been subsampled.
            self.output_file.attrs['subsampled'] = self.subsample

            iter_count = iter_stop - iter_start
            nsegs = numpy.empty((iter_count, ), seg_id_dtype)
            npts = numpy.empty((iter_count, ), seg_id_dtype)

            # scan for largest number of segments and largest number of points
            pi.new_operation('Scanning for segment and point counts',
                             iter_stop - iter_start)
            for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
                iter_group = self.data_reader.get_iter_group(n_iter)
                nsegs[iiter], npts[iiter] = iter_group['pcoord'].shape[0:2]
                pi.progress += 1
                del iter_group

            pi.new_operation('Preparing output')

            # create datasets
            self.output_file.create_dataset('nsegs',
                                            data=nsegs,
                                            shuffle=True,
                                            compression=9)
            self.output_file.create_dataset('npts',
                                            data=npts,
                                            shuffle=True,
                                            compression=9)

            max_nsegs = nsegs.max()
            max_npts = npts.max()

            assignments_shape = (iter_count, max_nsegs, max_npts)
            assignments_dtype = numpy.min_scalar_type(nbins)
            assignments_ds = self.output_file.create_dataset(
                'assignments',
                dtype=assignments_dtype,
                shape=assignments_shape,
                compression=4,
                shuffle=True,
                chunks=h5io.calc_chunksize(assignments_shape,
                                           assignments_dtype),
                fillvalue=nbins)
            if self.states:
                trajlabel_dtype = numpy.min_scalar_type(nstates)
                trajlabels_ds = self.output_file.create_dataset(
                    'trajlabels',
                    dtype=trajlabel_dtype,
                    shape=assignments_shape,
                    compression=4,
                    shuffle=True,
                    chunks=h5io.calc_chunksize(assignments_shape,
                                               trajlabel_dtype),
                    fillvalue=nstates)
                statelabels_ds = self.output_file.create_dataset(
                    'statelabels',
                    dtype=trajlabel_dtype,
                    shape=assignments_shape,
                    compression=4,
                    shuffle=True,
                    chunks=h5io.calc_chunksize(assignments_shape,
                                               trajlabel_dtype),
                    fillvalue=nstates)

            pops_shape = (iter_count, nstates + 1, nbins + 1)
            pops_ds = self.output_file.create_dataset(
                'labeled_populations',
                dtype=weight_dtype,
                shape=pops_shape,
                compression=4,
                shuffle=True,
                chunks=h5io.calc_chunksize(pops_shape, weight_dtype))
            h5io.label_axes(
                pops_ds,
                [numpy.string_(i) for i in ['iteration', 'state', 'bin']])

            pi.new_operation('Assigning to bins', iter_stop - iter_start)
            last_labels = None  # mapping of seg_id to last macrostate inhabited
            for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
                #get iteration info in this block

                if iiter == 0:
                    last_labels = numpy.empty((nsegs[iiter], ), index_dtype)
                    last_labels[:] = nstates  #unknown state

                #Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together
                assignments, trajlabels, pops, statelabels = self.assign_iteration(
                    n_iter, nstates, nbins, state_map, last_labels)

                ##Do stuff with this iteration's results

                last_labels = trajlabels[:, -1].copy()
                assignments_ds[iiter, 0:nsegs[iiter],
                               0:npts[iiter]] = assignments
                pops_ds[iiter] = pops
                if self.states:
                    trajlabels_ds[iiter, 0:nsegs[iiter],
                                  0:npts[iiter]] = trajlabels
                    statelabels_ds[iiter, 0:nsegs[iiter],
                                   0:npts[iiter]] = statelabels

                pi.progress += 1
                del assignments, trajlabels, pops, statelabels

            for dsname in 'assignments', 'npts', 'nsegs', 'labeled_populations', 'statelabels':
                h5io.stamp_iter_range(self.output_file[dsname], iter_start,
                                      iter_stop)
Пример #4
0
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nstates = len(state_labels)
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter

            weights_ring = deque(maxlen=self.window_size)
            parent_ids_ring = deque(maxlen=self.window_size)
            bin_assignments_ring = deque(maxlen=self.window_size)
            label_assignments_ring = deque(maxlen=self.window_size)

            labeled_matrix_shape = (iter_count,nstates,nstates,nbins,nbins)
            unlabeled_matrix_shape = (iter_count,nbins,nbins)
            labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins)
            unlabeled_matrix_chunks = (1, nbins, nbins)

            labeled_bin_fluxes_ds = self.output_file.create_dataset('labeled_bin_fluxes',
                                                                    shape=labeled_matrix_shape,
                                                                    chunks=labeled_matrix_chunks if self.do_compression else None,
                                                                    compression=9 if self.do_compression else None,
                                                                    dtype=weight_dtype)
            labeled_bin_rates_ds = self.output_file.create_dataset('labeled_bin_rates',
                                                                   shape=labeled_matrix_shape,
                                                                   chunks=labeled_matrix_chunks if self.do_compression else None,
                                                                   compression=9 if self.do_compression else None,
                                                                   dtype=weight_dtype)
            unlabeled_bin_rates_ds = self.output_file.create_dataset('bin_rates', shape=unlabeled_matrix_shape,
                                                                     chunks=unlabeled_matrix_chunks if self.do_compression else None,
                                                                     compression=9 if self.do_compression else None,
                                                                     dtype=weight_dtype)

            fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype)

            for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds):
                h5io.label_axes(ds, ['iteration','initial state','final state','inital bin','final bin'])

            for ds in (unlabeled_bin_rates_ds,):
                h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin'])

            pi.new_operation('Calculating flux matrices', iter_count)
            # Calculate instantaneous rate matrices and trace trajectories
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2] 
                weights = seg_index['weight']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter)

                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
                bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                dtype=index_dtype)
                label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                  dtype=index_dtype)
                labeled_pops = self.assignments_file['labeled_populations'][assignment_iiter]

                # Prepare to run analysis
                weights_ring.append(weights)
                parent_ids_ring.append(parent_ids)
                bin_assignments_ring.append(bin_assignments)
                label_assignments_ring.append(label_assignments)

                # Estimate rates using bin-to-bin fluxes
                estimate_rates(nbins, state_labels,
                               weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map,
                               labeled_pops,
                               self.all_lags,
                               fluxes, labeled_rates, unlabeled_rates)

                # Store bin-based kinetics data
                labeled_bin_fluxes_ds[iiter] = fluxes
                labeled_bin_rates_ds[iiter] = labeled_rates
                unlabeled_bin_rates_ds[iiter] = unlabeled_rates

                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops
                pi.progress += 1
Пример #5
0
    def go(self):
        assert self.data_reader.parent_id_dsspec._h5file is None
        assert self.data_reader.weight_dsspec._h5file is None
        if hasattr(self.dssynth.dsspec, '_h5file'):
            assert self.dssynth.dsspec._h5file is None
        pi = self.progress.indicator
        pi.operation = 'Initializing'
        with pi, self.data_reader, WESTPAH5File(self.output_filename, 'w', creating_program=True) as self.output_file:
            assign = self.binning.mapper.assign

            # We always assign the entire simulation, so that no trajectory appears to start
            # in a transition region that doesn't get initialized in one.
            iter_start = 1 
            iter_stop =  self.data_reader.current_iteration

            h5io.stamp_iter_range(self.output_file, iter_start, iter_stop)

            nbins = self.binning.mapper.nbins
            self.output_file.attrs['nbins'] = nbins 

            state_map = numpy.empty((self.binning.mapper.nbins+1,), index_dtype)
            state_map[:] = 0 # state_id == nstates => unknown state

            # Recursive mappers produce a generator rather than a list of labels
            # so consume the entire generator into a list
            labels = [label for label in self.binning.mapper.labels]

            self.output_file.create_dataset('bin_labels', data=labels, compression=9)

            if self.states:
                nstates = len(self.states)
                state_map[:] = nstates # state_id == nstates => unknown state
                state_labels = [state['label'] for state in self.states]

                for istate, sdict in enumerate(self.states):
                    assert state_labels[istate] == sdict['label'] #sanity check
                    state_assignments = assign(sdict['coords'])
                    for assignment in state_assignments:
                        state_map[assignment] = istate
                self.output_file.create_dataset('state_map', data=state_map, compression=9, shuffle=True)
                self.output_file['state_labels'] = state_labels #+ ['(unknown)']
            else:
                nstates = 0
            self.output_file.attrs['nstates'] = nstates

            iter_count = iter_stop - iter_start
            nsegs = numpy.empty((iter_count,), seg_id_dtype)
            npts = numpy.empty((iter_count,), seg_id_dtype)

            # scan for largest number of segments and largest number of points
            pi.new_operation ('Scanning for segment and point counts', iter_stop-iter_start)
            for iiter, n_iter in enumerate(xrange(iter_start,iter_stop)):
                iter_group = self.data_reader.get_iter_group(n_iter)
                nsegs[iiter], npts[iiter] = iter_group['pcoord'].shape[0:2]
                pi.progress += 1
                del iter_group

            pi.new_operation('Preparing output')

            # create datasets
            self.output_file.create_dataset('nsegs', data=nsegs, shuffle=True, compression=9)
            self.output_file.create_dataset('npts', data=npts, shuffle=True, compression=9)

            max_nsegs = nsegs.max()
            max_npts = npts.max()

            assignments_shape = (iter_count,max_nsegs,max_npts)
            assignments_dtype = numpy.min_scalar_type(nbins)
            assignments_ds = self.output_file.create_dataset('assignments', dtype=assignments_dtype, shape=assignments_shape,
                                                             compression=4, shuffle=True,
                                                             chunks=h5io.calc_chunksize(assignments_shape, assignments_dtype),
                                                             fillvalue=nbins)
            if self.states:
                trajlabel_dtype = numpy.min_scalar_type(nstates)
                trajlabels_ds = self.output_file.create_dataset('trajlabels', dtype=trajlabel_dtype, shape=assignments_shape,
                                                                compression=4, shuffle=True,
                                                                chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype),
                                                                fillvalue=nstates)

            pops_shape = (iter_count,nstates+1,nbins+1)
            pops_ds = self.output_file.create_dataset('labeled_populations', dtype=weight_dtype, shape=pops_shape,
                                                      compression=4, shuffle=True,
                                                      chunks=h5io.calc_chunksize(pops_shape, weight_dtype))
            h5io.label_axes(pops_ds, ['iteration', 'state', 'bin'])

            pi.new_operation('Assigning to bins', iter_stop-iter_start)
            last_labels = None # mapping of seg_id to last macrostate inhabited      
            for iiter, n_iter in enumerate(xrange(iter_start,iter_stop)):
                #get iteration info in this block

                if iiter == 0:
                    last_labels = numpy.empty((nsegs[iiter],), index_dtype)
                    last_labels[:] = nstates #unknown state

                #Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together
                assignments, trajlabels, pops = self.assign_iteration(n_iter, nstates, nbins, state_map, last_labels)

                ##Do stuff with this iteration's results

                last_labels = trajlabels[:,-1].copy()
                assignments_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = assignments
                pops_ds[iiter] = pops
                if self.states:
                    trajlabels_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]]  = trajlabels

                pi.progress += 1
                del assignments, trajlabels, pops

            for dsname in 'assignments', 'npts', 'nsegs', 'labeled_populations':
                h5io.stamp_iter_range(self.output_file[dsname], iter_start, iter_stop)
Пример #6
0
    def w_postanalysis_matrix(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        
        self.data_reader.open('r')
        nbins = self.assignments_file.attrs['nbins']

        state_labels = self.assignments_file['state_labels'][...]
        state_map = self.assignments_file['state_map'][...]
        nstates = len(state_labels)

        start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
        iter_count = stop_iter - start_iter

        nfbins = nbins * nstates

        flux_shape = (iter_count, nfbins, nfbins)
        pop_shape = (iter_count, nfbins)

        h5io.stamp_iter_range(self.output_file, start_iter, stop_iter)

        bin_populations_ds = self.output_file.create_dataset('bin_populations', shape=pop_shape, dtype=weight_dtype)
        h5io.stamp_iter_range(bin_populations_ds, start_iter, stop_iter)
        h5io.label_axes(bin_populations_ds, ['iteration', 'bin'])


        flux_grp = self.output_file.create_group('iterations')
        self.output_file.attrs['nrows'] = nfbins
        self.output_file.attrs['ncols'] = nfbins


        fluxes = np.empty(flux_shape[1:], weight_dtype)
        populations = np.empty(pop_shape[1:], weight_dtype)
        trans = np.empty(flux_shape[1:], np.int64)

        # Check to make sure this isn't a data set with target states
        #tstates = self.data_reader.data_manager.get_target_states(0)
        #if len(tstates) > 0:
        #    raise ValueError('Postanalysis reweighting analysis does not support WE simulation run under recycling conditions')

        pi.new_operation('Calculating flux matrices', iter_count)
        # Calculate instantaneous statistics
        for iiter, n_iter in enumerate(range(start_iter, stop_iter)):
            # Get data from the main HDF5 file
            iter_group = self.data_reader.get_iter_group(n_iter)
            seg_index = iter_group['seg_index']
            nsegs, npts = iter_group['pcoord'].shape[0:2] 
            weights = seg_index['weight']


            # Get bin and traj. ensemble assignments from the previously-generated assignments file
            assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
            bin_assignments = np.require(self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs,:npts]],
                                            dtype=index_dtype)

            mask_unknown = np.zeros_like(bin_assignments, dtype=np.uint16)

            macrostate_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
            macrostate_assignments = np.require(self.assignments_file['trajlabels'][macrostate_iiter + np.s_[:nsegs,:npts]],
                                        dtype=index_dtype)

            # Transform bin_assignments to take macrostate membership into account
            bin_assignments  = nstates * bin_assignments + macrostate_assignments

            mask_indx = np.where(macrostate_assignments == nstates)
            mask_unknown[mask_indx] = 1

            # Calculate bin-to-bin fluxes, bin populations and number of obs transitions
            calc_stats(bin_assignments, weights, fluxes, populations, trans, mask_unknown, self.sampling_frequency)

            # Store bin-based kinetics data
            bin_populations_ds[iiter] = populations

            # Setup sparse data structures for flux and obs
            fluxes_sp = sp.coo_matrix(fluxes)
            trans_sp = sp.coo_matrix(trans)

            assert fluxes_sp.nnz == trans_sp.nnz

            flux_iter_grp = flux_grp.create_group('iter_{:08d}'.format(n_iter))
            flux_iter_grp.create_dataset('flux', data=fluxes_sp.data, dtype=weight_dtype)
            flux_iter_grp.create_dataset('obs', data=trans_sp.data, dtype=np.int32)
            flux_iter_grp.create_dataset('rows', data=fluxes_sp.row, dtype=np.int32)
            flux_iter_grp.create_dataset('cols', data=fluxes_sp.col, dtype=np.int32)
            flux_iter_grp.attrs['nrows'] = nfbins
            flux_iter_grp.attrs['ncols'] = nfbins

            # Do a little manual clean-up to prevent memory explosion
            del iter_group, weights, bin_assignments
            del macrostate_assignments

            pi.progress += 1

            # Check and save the number of intermediate time points; this will be used to normalize the
            # flux and kinetics to tau in w_postanalysis_reweight.
            if self.assignments_file.attrs['subsampled'] == True or self.sampling_frequency == 'iteration':
                self.output_file.attrs['npts'] = 2
            else:
                #self.output_file.attrs['npts'] = npts if self.sampling_frequency == 'timepoint' else 2
                self.output_file.attrs['npts'] = npts
Пример #7
0
    def calc_store_flux_data(self):         
        westpa.rc.pstatus('Calculating mean flux and confidence intervals for iterations [{},{})'
                        .format(self.iter_range.iter_start, self.iter_range.iter_stop))
        
        fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader)
        
        # Create a group to store data in
        output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog)        
        self.output_group = output_group
        output_group.attrs['version_code'] = self.output_format_version
        self.iter_range.record_data_iter_range(output_group)
        
        n_targets = len(fluxdata)
        index = numpy.empty((len(fluxdata),), dtype=target_index_dtype)
        avg_fluxdata = numpy.empty((n_targets,), dtype=ci_dtype)
        

        for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.iteritems()):
            # Create group and index entry
            index[itarget]['target_label'] = str(target_label)
            target_group = output_group.create_group('target_{}'.format(itarget))

            self.target_groups[target_label] = target_group
            
            # Store per-iteration values
            target_group['n_iter'] = target_fluxdata['n_iter']
            target_group['count'] = target_fluxdata['count']
            target_group['flux'] = target_fluxdata['flux']
            h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1'])
            
            
            # Calculate flux autocorrelation
            fluxes = target_fluxdata['flux']
            mean_flux = fluxes.mean()
            fmm = fluxes - mean_flux
            acorr = fftconvolve(fmm,fmm[::-1])
            acorr = acorr[len(acorr)//2:]
            acorr /= acorr[0]
            acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr)
            h5io.label_axes(acorr_ds, ['lag'], ['tau'])
            
            # Calculate overall averages and CIs
            #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
            #                                                     autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean)
            avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl({'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets,
                                                                 autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable )
            avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len)
            westpa.rc.pstatus('target {!r}:'.format(target_label))
            westpa.rc.pstatus('  correlation length = {} tau'.format(correl_len))
            westpa.rc.pstatus('  mean flux and CI   = {:e} ({:e},{:e}) tau^(-1)'.format(avg,lb_ci,ub_ci))
            index[itarget]['mean_flux'] = avg
            index[itarget]['mean_flux_ci_lb'] = lb_ci
            index[itarget]['mean_flux_ci_ub'] = ub_ci
            index[itarget]['mean_flux_correl_len'] = correl_len

        # Write index and summary        
        index_ds = output_group.create_dataset('index', data=index)
        index_ds.attrs['mcbs_alpha'] = self.alpha
        index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
        index_ds.attrs['mcbs_n_sets'] = self.n_sets
        
        self.fluxdata = fluxdata
        self.output_h5file['avg_flux'] = avg_fluxdata