Пример #1
0
 def stamp_hash(self, h5file_name, new_hash):
     '''Loads a file, stamps it, and returns the opened file in read only'''
     h5file = h5io.WESTPAH5File(h5file_name, 'r+')
     h5file.attrs['arg_hash'] = new_hash
     h5file.close()
     h5file = h5io.WESTPAH5File(h5file_name, 'r')
     return h5file
Пример #2
0
 def open_files(self):
     self.output_file = h5io.WESTPAH5File(self.output_filename, 'a', creating_program=True)
     h5io.stamp_creator_data(self.output_file)
     self.assignments_file = h5io.WESTPAH5File(self.assignments_filename, 'r')#, driver='core', backing_store=False)
     self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')#, driver='core', backing_store=False)
     if not self.iter_range.check_data_iter_range_least(self.assignments_file):
         raise ValueError('assignments data do not span the requested iterations')
Пример #3
0
    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_file,
                                             'w',
                                             creating_program=True)
        h5io.stamp_creator_data(self.output_file)

        opened_files = self.generate_file_list([self.west])
        self.westH5 = opened_files[self.west]
Пример #4
0
    def generate_file_list(self, key_list):
        '''A convenience function which takes in a list of keys that are filenames, and returns a dictionary
        which contains all the individual files loaded inside of a dictionary keyed to the filename.'''
        return_dict = {}
        if self.ntrials == 0:
            raise self.NoSimulationsException('You must specify the number of simulations.')

        for key in key_list:
            return_dict[key] = {}
        for i in range(1, self.ntrials + 1):
            # Need to not make this hard coded, but who cares for now.
            for key in key_list:
                return_dict[key][i] = h5io.WESTPAH5File(os.path.join(self.master, str(i).zfill(2), key), 'r')
        return return_dict
Пример #5
0
def _find_matching_segments(west_datafile_name, n_iter, predicate, invert=False):
    '''Find all segments in iteration ``n_iter`` that match (or do not match, if
    ``invert`` is true) the given ``predicate``. Returns a sequence of matching
    seg_ids.'''

    with h5io.WESTPAH5File(west_datafile_name, 'r') as west_datafile:
        iter_group = west_datafile.get_iter_group(n_iter)
        nsegs = iter_group['seg_index'].shape[0]
        matching_ids = set(map(int, predicate(n_iter, iter_group)))

        if invert:
            matching_ids = set(range(nsegs)) - matching_ids

        matchvec = sorted(np.fromiter(matching_ids, dtype=seg_id_dtype, count=len(matching_ids)))
        return n_iter, matchvec
Пример #6
0
    def process_args(self, args):
        self.progress.process_args(args)
        self.kinetics_filename = args.kinetics
        self.istate = args.istate
        self.fstate = args.fstate
        self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')

        self.iter_start = args.iter_start
        if args.iter_stop is None:
            self.iter_stop = self.kinetics_file.attrs['iter_stop']
        else:
            self.iter_stop = args.iter_stop + 1

        self.binspec = args.bins
        self.output_filename = args.output
        self.ignore_out_of_range = bool(args.ignore_out_of_range)
        self.compress_output = args.compress or False
Пример #7
0
    def go(self):
        self.data_reader.open('r')
        assignments_file = h5py.File(self.assignments_filename, mode='r')
        output_file = h5io.WESTPAH5File(self.output_filename, mode='w')
        pi = self.progress.indicator
        count = self.count
        timepoint = self.timepoint

        nbins = assignments_file.attrs['nbins'] + 1
        assignments_ds = assignments_file['assignments']

        iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
        iter_count = iter_stop - iter_start
        h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop)
        nsegs = assignments_file['nsegs'][h5io.get_iteration_slice(
            assignments_file['nsegs'], iter_start, iter_stop)]

        output_file.create_dataset('n_iter',
                                   dtype=n_iter_dtype,
                                   data=list(range(iter_start, iter_stop)))

        seg_count_ds = output_file.create_dataset('nsegs',
                                                  dtype=np.uint,
                                                  shape=(iter_count, nbins))
        matching_segs_ds = output_file.create_dataset(
            'seg_ids',
            shape=(iter_count, nbins, count),
            dtype=seg_id_dtype,
            chunks=h5io.calc_chunksize((iter_count, nbins, count),
                                       seg_id_dtype),
            shuffle=True,
            compression=9)
        weights_ds = output_file.create_dataset('weights',
                                                shape=(iter_count, nbins,
                                                       count),
                                                dtype=weight_dtype,
                                                chunks=h5io.calc_chunksize(
                                                    (iter_count, nbins, count),
                                                    weight_dtype),
                                                shuffle=True,
                                                compression=9)
        what = self.what

        with pi:
            pi.new_operation('Finding matching segments', extent=iter_count)
            for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
                assignments = np.require(assignments_ds[
                    h5io.get_iteration_entry(assignments_ds, n_iter) +
                    np.index_exp[:, timepoint]],
                                         dtype=westpa.binning.index_dtype)
                all_weights = self.data_reader.get_iter_group(
                    n_iter)['seg_index']['weight']

                # the following Cython function just executes this loop:
                #for iseg in xrange(nsegs[iiter]):
                #    segs_by_bin[iseg,assignments[iseg]] = True
                segs_by_bin = assignments_list_to_table(
                    nsegs[iiter], nbins, assignments)
                for ibin in range(nbins):
                    segs = np.nonzero(segs_by_bin[:, ibin])[0]

                    seg_count_ds[iiter, ibin] = min(len(segs), count)

                    if len(segs):
                        weights = all_weights.take(segs)

                        if what == 'lowweight':
                            indices = np.argsort(weights)[:count]
                        elif what == 'highweight':
                            indices = np.argsort(weights)[::-1][:count]
                        else:
                            assert what == 'random'
                            indices = np.random.permutation(len(weights))

                        matching_segs_ds[iiter,
                                         ibin, :len(segs)] = segs.take(indices)
                        weights_ds[iiter,
                                   ibin, :len(segs)] = weights.take(indices)
                        del segs, weights

                del assignments, segs_by_bin, all_weights
                pi.progress += 1
Пример #8
0
    def go(self):
        self.data_reader.open('r')
        output_file = h5io.WESTPAH5File(self.output_filename, mode='w')
        pi = self.progress.indicator

        iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
        iter_count = iter_stop - iter_start

        output_file.create_dataset('n_iter',
                                   dtype=n_iter_dtype,
                                   data=list(range(iter_start, iter_stop)))
        current_seg_count = 0
        seg_count_ds = output_file.create_dataset('n_segs',
                                                  dtype=np.uint,
                                                  shape=(iter_count, ))
        matching_segs_ds = output_file.create_dataset(
            'seg_ids',
            shape=(iter_count, 0),
            maxshape=(iter_count, None),
            dtype=seg_id_dtype,
            chunks=h5io.calc_chunksize((iter_count, 1000000), seg_id_dtype),
            shuffle=True,
            compression=9,
        )
        weights_ds = output_file.create_dataset(
            'weights',
            shape=(iter_count, 0),
            maxshape=(iter_count, None),
            dtype=weight_dtype,
            chunks=h5io.calc_chunksize((iter_count, 1000000), weight_dtype),
            shuffle=True,
            compression=9,
        )

        with pi:
            pi.new_operation('Finding matching segments', extent=iter_count)
            #             futures = set()
            #             for n_iter in xrange(iter_start,iter_stop):
            #                 futures.add(self.work_manager.submit(_find_matching_segments,
            #                                                      args=(self.data_reader.we_h5filename,n_iter,self.predicate,self.invert)))

            #             for future in self.work_manager.as_completed(futures):
            for future in self.work_manager.submit_as_completed(
                ((_find_matching_segments,
                  (self.data_reader.we_h5filename, n_iter, self.predicate,
                   self.invert), {})
                 for n_iter in range(iter_start, iter_stop)),
                    self.max_queue_len,
            ):
                n_iter, matching_ids = future.get_result()
                n_matches = len(matching_ids)

                if n_matches:
                    if n_matches > current_seg_count:
                        current_seg_count = len(matching_ids)
                        matching_segs_ds.resize((iter_count, n_matches))
                        weights_ds.resize((iter_count, n_matches))
                        current_seg_count = n_matches

                    seg_count_ds[n_iter - iter_start] = n_matches
                    matching_segs_ds[n_iter -
                                     iter_start, :n_matches] = matching_ids
                    weights_ds[n_iter - iter_start, :
                               n_matches] = self.data_reader.get_iter_group(
                                   n_iter)['seg_index']['weight'][sorted(
                                       matching_ids)]
                del matching_ids
                pi.progress += 1

            if self.include_ancestors:
                pi.new_operation('Tracing ancestors of matching segments',
                                 extent=iter_count)
                from_previous = set()
                current_seg_count = matching_segs_ds.shape[1]
                for n_iter in range(iter_stop - 1, iter_start - 1, -1):
                    iiter = n_iter - iter_start
                    n_matches = seg_count_ds[iiter]
                    matching_ids = set(from_previous)
                    if n_matches:
                        matching_ids.update(
                            matching_segs_ds[iiter, :seg_count_ds[iiter]])
                    from_previous.clear()

                    n_matches = len(matching_ids)
                    if n_matches > current_seg_count:
                        matching_segs_ds.resize((iter_count, n_matches))
                        weights_ds.resize((iter_count, n_matches))
                        current_seg_count = n_matches

                    if n_matches > 0:
                        seg_count_ds[iiter] = n_matches
                        matching_ids = sorted(matching_ids)
                        matching_segs_ds[iiter, :n_matches] = matching_ids
                        weights_ds[
                            iiter, :
                            n_matches] = self.data_reader.get_iter_group(
                                n_iter)['seg_index']['weight'][sorted(
                                    matching_ids)]
                        parent_ids = self.data_reader.get_iter_group(n_iter)[
                            'seg_index']['parent_id'][sorted(matching_ids)]
                        from_previous.update(
                            parent_id for parent_id in parent_ids
                            if parent_id >= 0)  # filter initial states
                        del parent_ids
                    del matching_ids
                    pi.progress += 1
Пример #9
0
    def analysis_structure(self):
        '''
        Run automatically on startup.  Parses through the configuration file, and loads up all the data files from the different
        analysis schematics.  If they don't exist, it creates them automatically by hooking in to existing analysis routines
        and going from there.

        It does this by calling in the make_parser_and_process function for w_{assign,reweight,direct} using a custom built list
        of args.  The user can specify everything in the configuration file that would have been specified on the command line.

        For instance, were one to call w_direct as follows:

            w_direct --evolution cumulative --step-iter 1 --disable-correl

        the west.cfg would look as follows:

        west:
          analysis:
            w_direct:
              evolution: cumulative
              step_iter: 1
              extra: ['disable-correl']

        Alternatively, if one wishes to use the same options for both w_direct and w_reweight, the key 'w_direct' can be replaced
        with 'kinetics'.
        '''
        # Make sure everything exists.
        try:
            os.mkdir(self.__settings['directory'])
        except Exception:
            pass
        # Now, check to see whether they exist, and then load them.
        self.__analysis_schemes__ = {}
        # We really need to implement some sort of default behavior if an analysis scheme isn't set.
        # Right now, we just crash.  That isn't really graceful.
        for scheme in self.__settings['analysis_schemes']:
            if self.__settings['analysis_schemes'][scheme]['enabled']:
                if self.work_manager.running is False:
                    self.work_manager.startup()
                path = os.path.join(os.getcwd(), self.__settings['directory'],
                                    scheme)
                # if 'postanalysis' in self.__settings['analysis_schemes'][scheme] and 'postanalysis' in self.__settings['postanalysis']:
                # Should clean this up.  But it uses the default global setting if a by-scheme one isn't set.
                if 'postanalysis' in self.__settings:
                    if 'postanalysis' in self.__settings['analysis_schemes'][
                            scheme]:
                        pass
                    else:
                        self.__settings['analysis_schemes'][scheme][
                            'postanalysis'] = self.__settings['postanalysis']
                try:
                    os.mkdir(path)
                except Exception:
                    pass
                self.__analysis_schemes__[scheme] = {}
                try:
                    if (self.__settings['analysis_schemes'][scheme]
                        ['postanalysis'] is True
                            or self.__settings['postanalysis'] is True):
                        analysis_files = ['assign', 'direct', 'reweight']
                    else:
                        analysis_files = ['assign', 'direct']
                except Exception:
                    analysis_files = ['assign', 'direct']
                    self.__settings['analysis_schemes'][scheme][
                        'postanalysis'] = False
                reanalyze_kinetics = False
                assign_hash = None
                for name in analysis_files:
                    arg_hash = None
                    if self.reanalyze is True:
                        reanalyze_kinetics = True
                        try:
                            os.remove(os.path.join(path, '{}.h5'.format(name)))
                        except Exception:
                            pass
                    else:
                        try:
                            # Try to load the hash.  If we fail to load the hash or the file, we need to reload.
                            # if self.reanalyze == True:
                            #    raise ValueError('Reanalyze set to true.')
                            self.__analysis_schemes__[scheme][
                                name] = h5io.WESTPAH5File(
                                    os.path.join(path, '{}.h5'.format(name)),
                                    'r')
                            arg_hash = self.__analysis_schemes__[scheme][
                                name].attrs['arg_hash']
                            if name == 'assign':
                                assign_hash = arg_hash
                        except Exception:
                            pass
                            # We shouldn't rely on this.
                            # self.reanalyze = True
                    if True:
                        if name == 'assign':
                            assign = w_assign.WAssign()

                            w_assign_config = {
                                'output': os.path.join(path,
                                                       '{}.h5'.format(name))
                            }
                            try:
                                w_assign_config.update(
                                    self.__settings['w_assign'])
                            except Exception:
                                pass
                            try:
                                w_assign_config.update(
                                    self.__settings['analysis_schemes'][scheme]
                                    ['w_assign'])
                            except Exception:
                                pass
                            args = []
                            for key, value in w_assign_config.items():
                                if key != 'extra':
                                    args.append(
                                        str('--') + str(key).replace('_', '-'))
                                    args.append(str(value))
                            # This is for stuff like disabling correlation analysis, etc.
                            if 'extra' in list(w_assign_config.keys()):
                                # We're sorting to ensure that the order doesn't matter.
                                for value in sorted(w_assign_config['extra']):
                                    args.append(
                                        str('--') +
                                        str(value).replace('_', '-'))
                            # We're just calling the built in function.
                            # This is a lot cleaner than what we had in before, and far more workable.
                            args.append('--config-from-file')
                            args.append('--scheme-name')
                            args.append('{}'.format(scheme))
                            # Why are we calling this if we're not sure we're remaking the file?
                            # We need to load up the bin mapper and states and see if they're the same.
                            assign.make_parser_and_process(args=args)
                            import pickle

                            # new_hash = self.hash_args(args=args, path=path, extra=[self.niters, pickle.dumps(assign.binning.mapper), assign.states])
                            # We need to encode it properly to ensure that some OS specific thing doesn't kill us.  Same goes for the args, ultimately.
                            # Mostly, we just need to ensure that we're consistent.
                            new_hash = self.hash_args(
                                args=args,
                                path=path,
                                extra=[
                                    int(self.niters),
                                    codecs.encode(
                                        pickle.dumps(assign.binning.mapper),
                                        "base64"),
                                    base64.b64encode(
                                        str(assign.states).encode()),
                                ],
                            )
                            # Let's check the hash.  If the hash is the same, we don't need to reload.
                            if self.debug_mode is True:
                                print('{:<10}: old hash, new hash -- {}, {}'.
                                      format(name, arg_hash, new_hash))
                            if self.ignore_hash is False and (
                                    arg_hash != new_hash
                                    or self.reanalyze is True):
                                # If the hashes are different, or we need to reanalyze, delete the file.
                                try:
                                    os.remove(
                                        os.path.join(path,
                                                     '{}.h5'.format(name)))
                                except Exception:
                                    pass
                                print('Reanalyzing file {}.h5 for scheme {}.'.
                                      format(name, scheme))
                                # reanalyze_kinetics = True
                                # We want to use the work manager we have here.  Otherwise, just let the tool sort out what it needs, honestly.
                                assign.work_manager = self.work_manager

                                assign.go()
                                assign.data_reader.close()

                                # Stamp w/ hash, then reload as read only.
                                self.__analysis_schemes__[scheme][
                                    name] = self.stamp_hash(
                                        os.path.join(path,
                                                     '{}.h5'.format(name)),
                                        new_hash)
                            del assign
                            # Update the assignment hash.
                            assign_hash = new_hash

                        # Since these are all contained within one tool, now, we want it to just... load everything.
                        if name == 'direct' or name == 'reweight':
                            if name == 'direct':
                                analysis = w_direct.WDirect()
                            if name == 'reweight':
                                analysis = w_reweight.WReweight()

                            analysis_config = {
                                'assignments':
                                os.path.join(path, '{}.h5'.format('assign')),
                                'output':
                                os.path.join(path, '{}.h5'.format(name)),
                                'kinetics':
                                os.path.join(path, '{}.h5'.format(name)),
                            }

                            # Pull from general analysis options, then general SPECIFIC options for each analysis,
                            # then general options for that analysis scheme, then specific options for the analysis type in the scheme.

                            try:
                                analysis_config.update(
                                    self.__settings['kinetics'])
                            except Exception:
                                pass
                            try:
                                analysis_config.update(
                                    self.__settings['w_{}'.format(name)])
                            except Exception:
                                pass
                            try:
                                analysis_config.update(
                                    self.__settings['analysis_schemes'][scheme]
                                    ['kinetics'])
                            except Exception:
                                pass
                            try:
                                analysis_config.update(
                                    self.__settings['analysis_schemes'][scheme]
                                    ['w_{}'.format(name)])
                            except Exception:
                                pass

                            # We're pulling in a default set of arguments, then updating them with arguments from the west.cfg file, if appropriate, after setting the appropriate command
                            # Then, we call the magic function 'make_parser_and_process' with the arguments we've pulled in.
                            # The tool has no real idea it's being called outside of its actual function, and we're good to go.
                            args = ['all']
                            for key, value in analysis_config.items():
                                if key != 'extra':
                                    args.append(
                                        str('--') + str(key).replace('_', '-'))
                                    args.append(str(value))
                            # This is for stuff like disabling correlation analysis, etc.
                            if 'extra' in list(analysis_config.keys()):
                                for value in sorted(analysis_config['extra']):
                                    args.append(
                                        str('--') +
                                        str(value).replace('_', '-'))
                            # We want to not display the averages, so...
                            args.append('--disable-averages')
                            new_hash = self.hash_args(
                                args=args,
                                path=path,
                                extra=[int(self.niters), assign_hash])
                            # if arg_hash != new_hash or self.reanalyze == True or reanalyze_kinetics == True:
                            if self.debug_mode is True:
                                print('{:<10}: old hash, new hash -- {}, {}'.
                                      format(name, arg_hash, new_hash))
                            if self.ignore_hash is False and (
                                    arg_hash != new_hash
                                    or reanalyze_kinetics is True):
                                try:
                                    os.remove(
                                        os.path.join(path,
                                                     '{}.h5'.format(name)))
                                except Exception:
                                    pass
                                print('Reanalyzing file {}.h5 for scheme {}.'.
                                      format(name, scheme))
                                analysis.make_parser_and_process(args=args)
                                # We want to hook into the existing work manager.
                                analysis.work_manager = self.work_manager

                                analysis.go()

                                # Open!
                                self.__analysis_schemes__[scheme][
                                    name] = self.stamp_hash(
                                        os.path.join(path,
                                                     '{}.h5'.format(name)),
                                        new_hash)
                            del analysis

        # Make sure this doesn't get too far out, here.  We need to keep it alive as long as we're actually analyzing things.
        # self.work_manager.shutdown()
        print("")
        print("Complete!")
Пример #10
0
from westpa import rc
from westpa.core import h5io

data_manager = rc.get_data_manager()

# Store west.h5 file in RAM for testing
west_file_name = 'west.h5'
west_file = h5io.WESTPAH5File(west_file_name,
                              driver='core',
                              backing_store=False)

data_manager.we_h5file = west_file
data_manager.we_h5file_version = int(west_file['/'].attrs.get(
    'west_file_format_version', 0))
Пример #11
0
 def process_args(self, args):
     self.output_file = h5io.WESTPAH5File(args.output, 'w', creating_program=True)
     self.assignments_file = h5io.WESTPAH5File(args.assignments, 'r')
     # Force a build of the transition matrix at the iteration level.
     self.sampling_frequency = 'iteration' if self.assignments_file.attrs['subsampled'] == True else args.sampling_frequency