def run(self, summary_file_name, n_trajs_per_block=None):
        # this will basically create the move_info_list for part of the
        # summary_file, and then call super's RUN
        summary = open(summary_file_name, 'r')
        if self.summary_root_dir is None:
            self.summary_root_dir = os.path.dirname(summary_file_name)
        lines = [l for l in summary]
        n_steps = len(lines)

        if n_trajs_per_block is None:
            n_trajs_per_block = n_steps

        line_num = 0
        while line_num < n_steps:
            if self.report_progress is not None:
                refresh_output("Working on MC step " + str(line_num) + "\n",
                               output_stream=self.report_progress)

            end = min(line_num + n_trajs_per_block, n_steps)
            block = lines[line_num:end]
            moves = [self.parse_summary_line(l) for l in block]
            super(OneWayTPSConverter, self).run(moves)
            line_num += n_trajs_per_block
    def run(self, summary_file_name, n_trajs_per_block=None):
        # this will basically create the move_info_list for part of the
        # summary_file, and then call super's RUN
        summary = open(summary_file_name, 'r')
        if self.summary_root_dir is None:
            self.summary_root_dir = os.path.dirname(summary_file_name)
        lines = [l for l in summary]
        n_steps = len(lines)

        if n_trajs_per_block is None:
            n_trajs_per_block = n_steps

        line_num = 0
        while line_num < n_steps:
            if self.report_progress is not None:
                refresh_output("Working on MC step " + str(line_num) + "\n",
                               output_stream=self.report_progress)

            end = min(line_num + n_trajs_per_block, n_steps)
            block = lines[line_num:end]
            moves = [self.parse_summary_line(l) for l in block]
            super(OneWayTPSConverter, self).run(moves)
            line_num += n_trajs_per_block
Exemplo n.º 3
0
    def generate_from_trajectories(
            self,
            ensembles,
            trajectories,
            preconditions=None,
            strategies=None,
            reuse_strategy='avoid',
            engine=None):
        """
        Create a SampleSet with as many initial samples as possible.

        The goal of this is to give the initial SampleSet that would be
        desired.

        Parameters
        ----------
        trajectories : list of :class:`.Trajectory` or :class:`.Trajectory`
            the input trajectories to use
        ensembles : list of :class:`Ensemble` or list of :class:`Ensemble`
            the list of ensembles to be generated. If an element is itself a
            list then one sample for one of the ensembles in that list if
            generated
        preconditions : list of str
            a list of possible steps to modify the initial list of trajectories.
            possible choices are

                1.  `sort-shortest` - sorting by shortest first,
                2.  `sort_median` - sorting by the middle one first and then in
                    move away from the median length
                3.  `sort-longest` - sorting by the longest first
                4.  `reverse` - reverse the order and
                5.  `mirror` which will add the reversed trajectories to the
                    list in the same order

            Default is `None` which means to do nothing.
        strategies : dict
            a dict that specifies the options used when ensemble functions
            are used to create a new sample.
        reuse_strategy : str
            if `avoid` then in a second attempt the used trajectories are
            tried
        engine : :class:`openpathsampling.engines.DyanmicsEngine`
            the engine used for extending moves

        Returns
        -------
        :class:`.SampleSet`
            sampleset with samples for every initial ensemble for this
            scheme that could be satisfied by the given trajectories

        See Also
        --------
        list_initial_ensembles
        """

        implemented_strategies = [
            'get',              # look for existing trajectories
            'split',            # look for existing sub-trajectories
            'extend-complex',   # try to extend long sub-trajectories
            'extend-minimal'    # try to extend short sub-trajectories
        ]

        implemented_preconditions = [
            'sort-shortest',
            'sort-median',
            'sort-longest',
            'reverse',
            'mirror'
        ]

        if preconditions is None:
            preconditions = ['mirror']

        # create a list of trajectories
        trajectories = paths.Trajectory._to_list_of_trajectories(trajectories)

        for pre in preconditions:
            if pre not in implemented_preconditions:
                raise RuntimeError(
                    '%s is not a valid precondition strategy. Choose from %s.' %
                    (pre, implemented_preconditions)
                )

            if pre == 'sort-shortest':
                trajectories = sorted(trajectories, key=len)
            elif pre == 'sort-longest':
                trajectories = sorted(trajectories, key=len)
            elif pre == 'sort-median':
                sorted_trajectories = sorted(trajectories, key=len)
                trajectories = list([p for p2 in zip(
                    sorted_trajectories[len(sorted_trajectories) / 2:],
                    reversed(sorted_trajectories[:len(sorted_trajectories) / 2])
                ) for p in p2])

                if len(sorted_trajectories) & 1:
                    trajectories.append(sorted_trajectories[-1])
            elif pre == 'reverse':
                trajectories = list(reversed(trajectories))
            elif pre == 'mirror':
                trajectories = trajectories + \
                    [traj.reversed for traj in trajectories]

        # let's always try the short trajectories first
        # print map(lambda x: hex(id(x)), trajectories)

        # we will try forward/backward interleaved
        used_trajectories = []

        # if we start with an existing sample set look at what we got
        # if we avoid we move the used ones to the back of the list
        # if we remove we remove the used ones
        for s in self:
            traj = s.trajectory
            if traj in trajectories:
                used_trajectories.append(traj)

        used_trajectories = sorted(used_trajectories, key=len)

        # print map(lambda x: hex(id(x)), used_trajectories)

        ensembles = [[x] if type(x) is not list else x for x in ensembles]

        # 1. look in the existing sample_set
        ensembles_to_fill, extra_ensembles = self.check_ensembles(ensembles)

        # we reverse because we want to be able to remove elements
        # from the list as we discover samples. This is easier to do
        # when the list is traversed backwards since indices to not
        # change, hence we reverse the list of ensemble and then traverse it
        # in reversed order
        ensembles_to_fill = \
            list(reversed(ensembles_to_fill))

        # 2. try strategies
        if strategies is None:
            # this is the default
            strategies = [
                'get',
                'split'
            ]

        for idx, strategy in enumerate(strategies):
            if type(strategy) is str:
                strategies[idx] = (strategy, dict())

            if strategies[idx][0] not in implemented_strategies:
                raise RuntimeError(
                    'Strategy `%s` is not known. Chose from %s.' % (
                        strategies[idx][0],
                        implemented_strategies
                    )
                )

        found_samples_str = ''
        for pos, ens_list in enumerate(ensembles):
            found_samples_str += '.' if ens_list in ensembles_to_fill else '+'

        for str_idx, (strategy, options) in enumerate(strategies):
            for idx, ens_list in reversed(list(enumerate(ensembles_to_fill))):
                pos = ensembles.index(ens_list)

                found_samples_str = \
                    found_samples_str[:pos] + \
                    '?' + found_samples_str[pos + 1:]

                refresh_output((
                    '# trying strategy #%d `%s`: still missing %d samples\n'
                    '%s\n'
                ) % (
                        str_idx + 1,
                        strategy,
                        len(ensembles_to_fill),
                        found_samples_str
                    ), ipynb_display_only=True, print_anyway=False)
                if type(ens_list) is not list:
                    ens_list = [ens_list]

                found = False

                for ens in ens_list:
                    # create the list of options to be passed on
                    opts = {key: value for key, value in options.items()
                            if key not in ['exclude']}

                    # exclude contains the Ensemble classes to be ignored
                    if 'exclude' in options:
                        if isinstance(ens, options['exclude']):
                            continue

                    # fill only the first in ens_list that can be filled

                    sample = None

                    if strategy == 'get':
                        sample = ens.get_sample_from_trajectories(
                            trajectories=trajectories,
                            used_trajectories=used_trajectories,
                            reuse_strategy=reuse_strategy,
                            **opts
                        )
                    elif strategy == 'split':
                        sample = ens.split_sample_from_trajectories(
                            trajectories=trajectories,
                            used_trajectories=used_trajectories,
                            reuse_strategy=reuse_strategy,
                            **opts
                        )
                    elif strategy == 'extend-complex' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='complex',
                                **opts
                            )
                    elif strategy == 'extend-minimal' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='minimal',
                                **opts
                            )
                    elif strategy == 'extend-native' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='native',
                                **opts
                            )

                    # now, if we've found a sample, add it and
                    # make sure we chose a proper replica ID
                    if sample is not None:
                        found = True

                        # another way would be to look for the smallest not
                        # taken id. This one is simpler
                        if len(self.replicas) > 0:
                            replica_idx = max(0, max(self.replicas) + 1)
                        else:
                            replica_idx = 0

                        sample.replica = replica_idx

                        logger.info((
                            'generating - ensemble `%s` found sample '
                            'replica %d, length %d\n')
                            % (
                                ens.name, sample.replica, len(sample)
                            ))

                        self.append(sample)
                        if reuse_strategy != 'all':
                            # we mark the trajectory and its reversed as used
                            if sample.trajectory not in used_trajectories and (
                                not reuse_strategy.endswith('symmetric') or
                                sample.trajectory.reversed in used_trajectories
                            ):
                                used_trajectories.append(sample.trajectory)
                            # if reuse_strategy.endswith('symmetric'):
                            #     used_trajectories.append(
                            #         sample.trajectory.reversed)

                            # we want the list of used_trajectories to be
                            # sorted. Short ones first. So if we have to chose
                            # from the used_ones, use the shortest one
                            # used_trajectories = sorted(
                            #     used_trajectories, key=len)

                        # found a sample in this category so remove it for
                        # other tries
                        del ensembles_to_fill[idx]

                        # do not try other ensembles in this category
                        break

                found_samples_str = \
                    found_samples_str[:pos] + \
                    (str(str_idx + 1)[0] if found else '.') + \
                    found_samples_str[pos + 1:]

        refresh_output((
            '# finished generating: still missing %d samples\n'
            '%s\n'
        ) % (
            len(ensembles_to_fill),
            found_samples_str
        ), ipynb_display_only=True, print_anyway=False)

        return self
Exemplo n.º 4
0
    def initial_conditions_from_trajectories(self, trajectories,
                                             sample_set=None,
                                             strategies=None,
                                             preconditions=None,
                                             reuse_strategy='avoid-symmetric',
                                             engine=None):
        """
        Create a SampleSet with as many initial samples as possible.

        The goal of this is to give the initial SampleSet that would be
        desired. 

        Parameters
        ----------
        trajectories : list of :class:`.Trajectory` or :class:`.Trajectory`
            the input trajectories to use
        sample_set : :class:`.SampleSet`, optional
            if given, add samples to this sampleset. Default is None, which
            means that this will start a new sampleset.
        strategies : dict
            a dict that specifies the options used when ensemble functions
            are used to create a new sample.
        preconditions : list of str
            a list of possible steps to modify the initial list of trajectories.
            possible choices are

                1.  `sort-shortest` - sorting by shortest first,
                2.  `sort_median` - sorting by the middle one first and then in
                    move away from the median length
                3.  `sort-longest` - sorting by the longest first
                4.  `reverse` - reverse the order and
                5.  `mirror` which will add the reversed trajectories to the
                    list in the same order

            Default is `None` which means to do nothing.
        reuse_strategy : str
            if `avoid` then reusing the same same trajectory twice is avoided.
            `avoid-symmetric` will also remove reversed copies
            if possible. `all` will not attempt to avoid already existing ones.
            `once` will strictly not reuse a trajectory and `once-symmetric`
            will also not use reversed copies.
        engine : :class:`openpathsampling.engines.DyanmicsEngine`
            the engine used for extending moves

        Returns
        -------
        :class:`.SampleSet`
            sampleset with samples for every initial ensemble for this
            scheme that could be satisfied by the given trajectories

        See Also
        --------
        list_initial_ensembles
        check_initial_conditions
        assert_initial_conditions
        """

        if sample_set is None:
            sample_set = paths.SampleSet([])

        ensembles = self.list_initial_ensembles()

        sample_set = sample_set.generate_from_trajectories(
            ensembles,
            trajectories,
            preconditions,
            strategies,
            reuse_strategy,
            engine
        )
        refresh_output(self.initial_conditions_report(sample_set),
                       ipynb_display_only=True, print_anyway=False)
        return sample_set
Exemplo n.º 5
0
    def initial_conditions_from_trajectories(self, trajectories,
                                             sample_set=None,
                                             strategies=None,
                                             preconditions=None,
                                             reuse_strategy='avoid-symmetric',
                                             engine=None):
        """
        Create a SampleSet with as many initial samples as possible.

        The goal of this is to give the initial SampleSet that would be
        desired. 

        Parameters
        ----------
        trajectories : list of :class:`.Trajectory` or :class:`.Trajectory`
            the input trajectories to use
        sample_set : :class:`.SampleSet`, optional
            if given, add samples to this sampleset. Default is None, which
            means that this will start a new sampleset.
        strategies : dict
            a dict that specifies the options used when ensemble functions
            are used to create a new sample.
        preconditions : list of str
            a list of possible steps to modify the initial list of trajectories.
            possible choices are

                1.  `sort-shortest` - sorting by shortest first,
                2.  `sort_median` - sorting by the middle one first and then in
                    move away from the median length
                3.  `sort-longest` - sorting by the longest first
                4.  `reverse` - reverse the order and
                5.  `mirror` which will add the reversed trajectories to the
                    list in the same order

            Default is `None` which means to do nothing.
        reuse_strategy : str
            if `avoid` then reusing the same same trajectory twice is avoided.
            `avoid-symmetric` will also remove reversed copies
            if possible. `all` will not attempt to avoid already existing ones.
            `once` will strictly not reuse a trajectory and `once-symmetric`
            will also not use reversed copies.
        engine : :class:`openpathsampling.engines.DyanmicsEngine`
            the engine used for extending moves

        Returns
        -------
        :class:`.SampleSet`
            sampleset with samples for every initial ensemble for this
            scheme that could be satisfied by the given trajectories

        See Also
        --------
        list_initial_ensembles
        check_initial_conditions
        assert_initial_conditions
        """

        if sample_set is None:
            sample_set = paths.SampleSet([])

        ensembles = self.list_initial_ensembles()

        sample_set = sample_set.generate_from_trajectories(
            ensembles,
            trajectories,
            preconditions,
            strategies,
            reuse_strategy,
            engine
        )
        refresh_output(self.initial_conditions_report(sample_set),
                       ipynb_display_only=True, print_anyway=False)
        return sample_set
Exemplo n.º 6
0
    def generate_from_trajectories(self,
                                   ensembles,
                                   trajectories,
                                   preconditions=None,
                                   strategies=None,
                                   reuse_strategy='avoid',
                                   engine=None):
        """
        Create a SampleSet with as many initial samples as possible.

        The goal of this is to give the initial SampleSet that would be
        desired.

        Parameters
        ----------
        trajectories : list of :class:`.Trajectory` or :class:`.Trajectory`
            the input trajectories to use
        ensembles : list of :class:`Ensemble` or list of :class:`Ensemble`
            the list of ensembles to be generated. If an element is itself a
            list then one sample for one of the ensembles in that list if
            generated
        preconditions : list of str
            a list of possible steps to modify the initial list of trajectories.
            possible choices are

                1.  `sort-shortest` - sorting by shortest first,
                2.  `sort_median` - sorting by the middle one first and then in
                    move away from the median length
                3.  `sort-longest` - sorting by the longest first
                4.  `reverse` - reverse the order and
                5.  `mirror` which will add the reversed trajectories to the
                    list in the same order

            Default is `None` which means to do nothing.
        strategies : dict
            a dict that specifies the options used when ensemble functions
            are used to create a new sample.
        reuse_strategy : str
            if `avoid` then in a second attempt the used trajectories are
            tried
        engine : :class:`openpathsampling.engines.DyanmicsEngine`
            the engine used for extending moves

        Returns
        -------
        :class:`.SampleSet`
            sampleset with samples for every initial ensemble for this
            scheme that could be satisfied by the given trajectories

        See Also
        --------
        list_initial_ensembles
        """

        implemented_strategies = [
            'get',  # look for existing trajectories
            'split',  # look for existing sub-trajectories
            'extend-complex',  # try to extend long sub-trajectories
            'extend-minimal'  # try to extend short sub-trajectories
        ]

        implemented_preconditions = [
            'sort-shortest', 'sort-median', 'sort-longest', 'reverse', 'mirror'
        ]

        if preconditions is None:
            preconditions = ['mirror']

        # create a list of trajectories
        trajectories = paths.Trajectory._to_list_of_trajectories(trajectories)

        for pre in preconditions:
            if pre not in implemented_preconditions:
                raise RuntimeError(
                    '%s is not a valid precondition strategy. Choose from %s.'
                    % (pre, implemented_preconditions))

            if pre == 'sort-shortest':
                trajectories = sorted(trajectories, key=len)
            elif pre == 'sort-longest':
                trajectories = sorted(trajectories, key=len)
            elif pre == 'sort-median':
                sorted_trajectories = sorted(trajectories, key=len)
                trajectories = list([
                    p for p2 in zip(
                        sorted_trajectories[len(sorted_trajectories) / 2:],
                        reversed(
                            sorted_trajectories[:len(sorted_trajectories) /
                                                2])) for p in p2
                ])

                if len(sorted_trajectories) & 1:
                    trajectories.append(sorted_trajectories[-1])
            elif pre == 'reverse':
                trajectories = list(reversed(trajectories))
            elif pre == 'mirror':
                trajectories = trajectories + \
                    [traj.reversed for traj in trajectories]

        # let's always try the short trajectories first
        # print map(lambda x: hex(id(x)), trajectories)

        # we will try forward/backward interleaved
        used_trajectories = []

        # if we start with an existing sample set look at what we got
        # if we avoid we move the used ones to the back of the list
        # if we remove we remove the used ones
        for s in self:
            traj = s.trajectory
            if traj in trajectories:
                used_trajectories.append(traj)

        used_trajectories = sorted(used_trajectories, key=len)

        # print map(lambda x: hex(id(x)), used_trajectories)

        ensembles = [[x] if type(x) is not list else x for x in ensembles]

        # 1. look in the existing sample_set
        ensembles_to_fill, extra_ensembles = self.check_ensembles(ensembles)

        # we reverse because we want to be able to remove elements
        # from the list as we discover samples. This is easier to do
        # when the list is traversed backwards since indices to not
        # change, hence we reverse the list of ensemble and then traverse it
        # in reversed order
        ensembles_to_fill = \
            list(reversed(ensembles_to_fill))

        # 2. try strategies
        if strategies is None:
            # this is the default
            strategies = ['get', 'split']

        for idx, strategy in enumerate(strategies):
            if type(strategy) is str:
                strategies[idx] = (strategy, dict())

            if strategies[idx][0] not in implemented_strategies:
                raise RuntimeError(
                    'Strategy `%s` is not known. Chose from %s.' %
                    (strategies[idx][0], implemented_strategies))

        found_samples_str = ''
        for pos, ens_list in enumerate(ensembles):
            found_samples_str += '.' if ens_list in ensembles_to_fill else '+'

        for str_idx, (strategy, options) in enumerate(strategies):
            for idx, ens_list in reversed(list(enumerate(ensembles_to_fill))):
                pos = ensembles.index(ens_list)

                found_samples_str = \
                    found_samples_str[:pos] + \
                    '?' + found_samples_str[pos + 1:]

                refresh_output(
                    ('# trying strategy #%d `%s`: still missing %d samples\n'
                     '%s\n') % (str_idx + 1, strategy, len(ensembles_to_fill),
                                found_samples_str),
                    ipynb_display_only=True,
                    print_anyway=False)
                if type(ens_list) is not list:
                    ens_list = [ens_list]

                found = False

                for ens in ens_list:
                    # create the list of options to be passed on
                    opts = {
                        key: value
                        for key, value in options.items()
                        if key not in ['exclude']
                    }

                    # exclude contains the Ensemble classes to be ignored
                    if 'exclude' in options:
                        if isinstance(ens, options['exclude']):
                            continue

                    # fill only the first in ens_list that can be filled

                    sample = None

                    if strategy == 'get':
                        sample = ens.get_sample_from_trajectories(
                            trajectories=trajectories,
                            used_trajectories=used_trajectories,
                            reuse_strategy=reuse_strategy,
                            **opts)
                    elif strategy == 'split':
                        sample = ens.split_sample_from_trajectories(
                            trajectories=trajectories,
                            used_trajectories=used_trajectories,
                            reuse_strategy=reuse_strategy,
                            **opts)
                    elif strategy == 'extend-complex' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='complex',
                                **opts)
                    elif strategy == 'extend-minimal' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='minimal',
                                **opts)
                    elif strategy == 'extend-native' and engine:
                        if hasattr(ens, 'extend_sample_from_trajectories'):
                            sample = ens.extend_sample_from_trajectories(
                                trajectories=trajectories,
                                engine=engine,
                                level='native',
                                **opts)

                    # now, if we've found a sample, add it and
                    # make sure we chose a proper replica ID
                    if sample is not None:
                        found = True

                        # another way would be to look for the smallest not
                        # taken id. This one is simpler
                        if len(self.replicas) > 0:
                            replica_idx = max(0, max(self.replicas) + 1)
                        else:
                            replica_idx = 0

                        sample.replica = replica_idx

                        logger.info(('generating - ensemble `%s` found sample '
                                     'replica %d, length %d\n') %
                                    (ens.name, sample.replica, len(sample)))

                        self.append(sample)
                        if reuse_strategy != 'all':
                            # we mark the trajectory and its reversed as used
                            if sample.trajectory not in used_trajectories and (
                                    not reuse_strategy.endswith('symmetric')
                                    or sample.trajectory.reversed
                                    in used_trajectories):
                                used_trajectories.append(sample.trajectory)
                            # if reuse_strategy.endswith('symmetric'):
                            #     used_trajectories.append(
                            #         sample.trajectory.reversed)

                            # we want the list of used_trajectories to be
                            # sorted. Short ones first. So if we have to chose
                            # from the used_ones, use the shortest one
                            # used_trajectories = sorted(
                            #     used_trajectories, key=len)

                        # found a sample in this category so remove it for
                        # other tries
                        del ensembles_to_fill[idx]

                        # do not try other ensembles in this category
                        break

                found_samples_str = \
                    found_samples_str[:pos] + \
                    (str(str_idx + 1)[0] if found else '.') + \
                    found_samples_str[pos + 1:]

        refresh_output(('# finished generating: still missing %d samples\n'
                        '%s\n') % (len(ensembles_to_fill), found_samples_str),
                       ipynb_display_only=True,
                       print_anyway=False)

        return self