示例#1
0
    async def _handle_job_query(self, stream, message_length):
        """An asynchronous routine for handling the receiving and processing
        of job queries from a client

        Parameters
        ----------
        stream: IOStream
            An IO stream used to pass messages between the
            server and client.
        message_length: int
            The length of the message being received.
        """

        encoded_request_id = await stream.read_bytes(message_length)
        client_request_id = encoded_request_id.decode()

        response = None

        if client_request_id not in self._server_request_ids_per_client_id:

            response = PropertyEstimatorException(
                directory='',
                message=f'The {client_request_id} request id was not found '
                f'on the server.')

        else:
            response = self._query_client_request_status(client_request_id)

        encoded_response = response.json().encode()
        length = pack_int(len(encoded_response))

        await stream.write(length + encoded_response)
    def execute(self, directory, available_resources):

        logging.info(f'Generating coordinates for {self.substance.identifier}: {self.id}')

        if self.substance is None:
            return PropertyEstimatorException(directory=directory,
                                              message='The substance input is non-optional')

        if self.solute_coordinate_file is None:
            return PropertyEstimatorException(directory=directory,
                                              message='The solute coordinate file input is non-optional')

        molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)

        if exception is not None:
            return exception

        packmol_directory = path.join(directory, 'packmol_files')

        # Create packed box
        topology, positions = packmol.pack_box(molecules=molecules,
                                               number_of_copies=number_of_molecules,
                                               structure_to_solvate=self.solute_coordinate_file,
                                               mass_density=self.mass_density,
                                               verbose=self.verbose_packmol,
                                               working_directory=packmol_directory,
                                               retain_working_files=self.retain_packmol_files)

        if topology is None or positions is None:
            return PropertyEstimatorException(directory=directory,
                                              message='Packmol failed to complete.')

        self._save_results(directory, topology, positions)

        return self._get_output_dictionary()
    def execute(self, directory, available_resources):

        import mdtraj

        if len(self.input_coordinate_paths) != len(self.input_trajectory_paths):

            return PropertyEstimatorException(directory=directory, message='There should be the same number of '
                                                                           'coordinate and trajectory paths.')

        if len(self.input_trajectory_paths) == 0:

            return PropertyEstimatorException(directory=directory, message='No trajectories were '
                                                                           'given to concatenate.')

        trajectories = []

        output_coordinate_path = None

        for coordinate_path, trajectory_path in zip(self.input_coordinate_paths,
                                                    self.input_trajectory_paths):

            output_coordinate_path = output_coordinate_path or coordinate_path
            trajectories.append(mdtraj.load_dcd(trajectory_path, coordinate_path))

        self.output_coordinate_path = output_coordinate_path
        output_trajectory = trajectories[0] if len(trajectories) == 1 else mdtraj.join(trajectories, False, False)

        self.output_trajectory_path = path.join(directory, 'output_trajectory.dcd')
        output_trajectory.save_dcd(self.output_trajectory_path)

        return self._get_output_dictionary()
    def execute(self, directory, available_resources):

        assert len(self.component.components) == 1

        main_component = self.component.components[0]
        amounts = self.full_substance.get_amounts(main_component)

        if len(amounts) != 1:

            return PropertyEstimatorException(
                directory=directory,
                message=
                f'More than one type of amount was defined for component '
                f'{main_component}. Only a single mole fraction must be '
                f'defined.')

        amount = next(iter(amounts))

        if not isinstance(amount, Substance.MoleFraction):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The component {main_component} was given as an '
                f'exact amount, and not a mole fraction')

        self.weighted_value = self._weight_values(amount.value)
        return self._get_output_dictionary()
    def execute(self, directory, available_resources):

        if len(self._reference_observables) == 0:

            return PropertyEstimatorException(directory=directory,
                                              message='There were no observables to reweight.')

        if not isinstance(self._reference_observables[0], unit.Quantity):

            return PropertyEstimatorException(directory=directory,
                                              message='The reference_observables input should be'
                                                      'a list of unit.Quantity wrapped ndarray\'s.')

        observables = self._prepare_observables_array(self._reference_observables)
        observable_unit = self._reference_observables[0].units

        if self.bootstrap_uncertainties:
            error = self._execute_with_bootstrapping(observable_unit, observables=observables)
        else:
            error = self._execute_without_bootstrapping(observable_unit, observables=observables)

        if error is not None:

            error.directory = directory
            return error

        return self._get_output_dictionary()
示例#6
0
    def execute(self, directory, available_resources):

        if len(self.input_data_path) != 3:

            return PropertyEstimatorException(
                directory=directory,
                message='The input data path should be a tuple '
                'of a path to the data object, directory, and a path '
                'to the force field used to generate it.')

        data_object_path = self.input_data_path[0]
        data_directory = self.input_data_path[1]
        force_field_path = self.input_data_path[2]

        if not path.isfile(data_object_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data object'
                'is invalid: {}'.format(data_object_path))

        if not path.isdir(data_directory):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data directory'
                'is invalid: {}'.format(data_directory))

        if not path.isfile(force_field_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the force field'
                'is invalid: {}'.format(force_field_path))

        with open(data_object_path, 'r') as file:
            data_object = json.load(file, cls=TypedJSONDecoder)

        if not isinstance(data_object, StoredDataCollection):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The data object must be a `StoredDataCollection` '
                f'and not a {type(data_object)}')

        self.collection_data_paths = {}

        for data_key, inner_data_object in data_object.data.items():

            inner_object_path = path.join(directory, f'{data_key}.json')
            inner_directory_path = path.join(data_directory, data_key)

            with open(inner_object_path, 'w') as file:
                json.dump(inner_data_object, file, cls=TypedJSONEncoder)

            self.collection_data_paths[data_key] = (inner_object_path,
                                                    inner_directory_path,
                                                    force_field_path)

        return self._get_output_dictionary()
示例#7
0
    def execute(self, directory, available_resources):

        if len(self.simulation_data_path) != 3:

            return PropertyEstimatorException(
                directory=directory,
                message='The simulation data path should be a tuple '
                'of a path to the data object, directory, and a path '
                'to the force field used to generate it.')

        data_object_path = self.simulation_data_path[0]
        data_directory = self.simulation_data_path[1]
        force_field_path = self.simulation_data_path[2]

        if not path.isdir(data_directory):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data directory'
                'is invalid: {}'.format(data_directory))

        if not path.isfile(force_field_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the force field'
                'is invalid: {}'.format(force_field_path))

        with open(data_object_path, 'r') as file:
            data_object = json.load(file, cls=TypedJSONDecoder)

        self.substance = data_object.substance
        self.total_number_of_molecules = data_object.total_number_of_molecules

        self.thermodynamic_state = data_object.thermodynamic_state

        self.statistical_inefficiency = data_object.statistical_inefficiency

        self.coordinate_file_path = path.join(data_directory,
                                              data_object.coordinate_file_name)
        self.trajectory_file_path = path.join(data_directory,
                                              data_object.trajectory_file_name)

        self.statistics_file_path = path.join(data_directory,
                                              data_object.statistics_file_name)

        self.force_field_path = force_field_path

        return self._get_output_dictionary()
    def _build_molecule_arrays(self, directory):
        """Converts the input substance into a list of openeye OEMol's and a list of
        counts for how many of each there should be as determined by the `max_molecules`
        input and the molecules respective mole fractions.

        Parameters
        ----------
        directory: The directory in which this protocols working files are being saved.

        Returns
        -------
        list of openeye.oechem.OEMol
            The list of openeye molecules.
        list of int
            The number of each molecule which should be added to the system.
        PropertyEstimatorException, optional
            None if no exceptions occurred, otherwise the exception.
        """

        molecules = []

        for component in self.substance.components:

            molecule = create_molecule_from_smiles(component.smiles)

            if molecule is None:

                return None, None, PropertyEstimatorException(directory=directory,
                                                              message=f'{component} could not be converted '
                                                                      f'to a Molecule')

            molecules.append(molecule)

        # Determine how many molecules of each type will be present in the system.
        molecules_per_component = self.substance.get_molecules_per_component(self.max_molecules)
        number_of_molecules = [0] * self.substance.number_of_components

        for index, component in enumerate(self.substance.components):
            number_of_molecules[index] = molecules_per_component[component.identifier]

        if sum(number_of_molecules) > self.max_molecules:

            return None, None, PropertyEstimatorException(directory=directory,
                                                          message=f'The number of molecules to create '
                                                                  f'({sum(number_of_molecules)}) is greater '
                                                                  f'than the maximum number requested '
                                                                  f'({self.max_molecules}).')

        return molecules, number_of_molecules, None
    def _execute_without_bootstrapping(self, observable_unit, **observables):
        """Calculates the average reweighted observables at the target state,
        using the built-in pymbar method to estimate uncertainties.

        Parameters
        ----------
        observables: dict of str and numpy.ndarray
            The observables to reweight which have been stripped of their units.
        """

        if len(observables) > 1:

            raise ValueError('Currently only a single observable can be reweighted at'
                             'any one time.')

        reference_reduced_potentials, target_reduced_potentials = self._load_reduced_potentials()

        values, uncertainties, self.effective_samples = self._reweight_observables(reference_reduced_potentials,
                                                                                   target_reduced_potentials,
                                                                                   **observables)

        observable_key = next(iter(observables))
        uncertainty = uncertainties[observable_key]

        if self.effective_samples < self.required_effective_samples:

            return PropertyEstimatorException(message=f'{self.id}: There was not enough effective samples '
                                                      f'to reweight - {self.effective_samples} < '
                                                      f'{self.required_effective_samples}')

        self.value = EstimatedQuantity(values[observable_key] * observable_unit,
                                       uncertainty * observable_unit,
                                       self.id)
示例#10
0
    def execute(self, directory, available_resources):

        logging.info('Subsampling statistics: {}'.format(self.id))

        if self.input_statistics_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The ExtractUncorrelatedStatisticsData protocol '
                                                       'requires a previously calculated statisitics file')

        statistics_array = StatisticsArray.from_pandas_csv(self.input_statistics_path)

        uncorrelated_indices = timeseries.get_uncorrelated_indices(len(statistics_array) - self.equilibration_index,
                                                                   self.statistical_inefficiency)

        uncorrelated_indices = [index + self.equilibration_index for index in uncorrelated_indices]
        uncorrelated_statistics = StatisticsArray.from_existing(statistics_array, uncorrelated_indices)

        self.output_statistics_path = path.join(directory, 'uncorrelated_statistics.csv')
        uncorrelated_statistics.to_pandas_csv(self.output_statistics_path)

        logging.info('Statistics subsampled: {}'.format(self.id))

        self.number_of_uncorrelated_samples = len(uncorrelated_statistics)

        return self._get_output_dictionary()
示例#11
0
    def execute(self, directory, available_resources):

        logging.info('Extracting {}: {}'.format(self.statistics_type, self.id))

        if self.statistics_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The ExtractAverageStatistic protocol '
                                                       'requires a previously calculated statistics file')

        self._statistics = statistics.StatisticsArray.from_pandas_csv(self.statistics_path)

        if self.statistics_type not in self._statistics:

            return PropertyEstimatorException(directory=directory,
                                              message=f'The {self.statistics_path} statistics file contains no '
                                                      f'data of type {self.statistics_type}.')

        values = self._statistics[self.statistics_type]

        statistics_unit = values[0].units
        unitless_values = values.to(statistics_unit).magnitude

        divisor = self.divisor

        if isinstance(self.divisor, unit.Quantity):
            statistics_unit /= self.divisor.units
            divisor = self.divisor.magnitude

        unitless_values = np.array(unitless_values) / divisor

        unitless_values, self.equilibration_index, self.statistical_inefficiency = \
            timeseries.decorrelate_time_series(unitless_values)

        final_value, final_uncertainty = bootstrap(self._bootstrap_function,
                                                   self.bootstrap_iterations,
                                                   self.bootstrap_sample_size,
                                                   values=unitless_values)

        self.uncorrelated_values = unitless_values * statistics_unit

        self.value = EstimatedQuantity(final_value * statistics_unit,
                                       final_uncertainty * statistics_unit, self.id)

        logging.info('Extracted {}: {}'.format(self.statistics_type, self.id))

        return self._get_output_dictionary()
示例#12
0
    def execute(self, directory, available_resources):

        if self.trajectory_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The AverageTrajectoryProperty protocol '
                                                       'requires a previously calculated trajectory')

        return self._get_output_dictionary()
    def execute(self, directory, available_resources):

        if len(self.values) < 1:
            return PropertyEstimatorException(
                directory, 'There were no gradients to add together')

        if not all(isinstance(x, type(self.values[0])) for x in self.values):

            return PropertyEstimatorException(
                directory, f'All values to add together must be '
                f'the same type ({" ".join(map(str, self.values))}).')

        self.result = self.values[0]

        for value in self.values[1:]:
            self.result += value

        return self._get_output_dictionary()
示例#14
0
    def _execute_with_bootstrapping(self, observable_unit, **observables):
        """Calculates the average reweighted observables at the target state,
        using bootstrapping to estimate uncertainties.

        Parameters
        ----------
        observable_unit: propertyestimator.unit.Unit:
            The expected unit of the reweighted observable.
        observables: dict of str and numpy.ndarray
            The observables to reweight which have been stripped of their units.

        Returns
        -------
        PropertyEstimatorException, optional
            None if the method executed normally, otherwise the exception that was raised.
        """

        reference_reduced_potentials, target_reduced_potentials = self._load_reduced_potentials()

        frame_counts = np.array([len(observable) for observable in self._reference_observables])

        # Construct a dummy mbar object to get out the number of effective samples.
        mbar = self._construct_mbar_object(reference_reduced_potentials)

        (self.effective_samples,
         effective_sample_indices) = self._compute_effective_samples(mbar, target_reduced_potentials)

        if self.effective_samples < self.required_effective_samples:

            return PropertyEstimatorException(message=f'{self.id}: There was not enough effective samples '
                                                      f'to reweight - {self.effective_samples} < '
                                                      f'{self.required_effective_samples}')

        # Transpose the observables ready for bootstrapping.
        reference_reduced_potentials = np.transpose(reference_reduced_potentials)
        target_reduced_potentials = np.transpose(target_reduced_potentials)

        transposed_observables = {}

        for observable_key in observables:
            transposed_observables[observable_key] = np.transpose(observables[observable_key])

        value, uncertainty = bootstrap(self._bootstrap_function,
                                       self.bootstrap_iterations,
                                       self.bootstrap_sample_size,
                                       frame_counts,
                                       reference_reduced_potentials=reference_reduced_potentials,
                                       target_reduced_potentials=target_reduced_potentials,
                                       **transposed_observables)

        self.effective_sample_indices = effective_sample_indices

        self.value = EstimatedQuantity(value * observable_unit,
                                       uncertainty * observable_unit,
                                       self.id)
示例#15
0
    def process_failed_property(physical_property, **_):
        """Return a result as if the property could not be estimated.
        """

        return_object = CalculationLayerResult()
        return_object.property_id = physical_property.id

        return_object.exception = PropertyEstimatorException(
            directory='', message='Failure Message')

        return return_object
示例#16
0
    def execute(self, directory, available_resources):

        if self.statistics_paths is None or len(self.statistics_paths) == 0:
            return PropertyEstimatorException(directory, 'No statistics paths were provided.')

        if len(self.frame_counts) > 0 and len(self.statistics_paths) != 1:
            return PropertyEstimatorException(directory, 'The frame counts input can only be used when only'
                                                         'a single path is passed to the `statistics_paths`'
                                                         'input.')

        if self.statistics_type == ObservableType.KineticEnergy:
            return PropertyEstimatorException(directory, f'Kinetic energies cannot be reweighted.')

        statistics_arrays = [StatisticsArray.from_pandas_csv(file_path) for file_path in self.statistics_paths]

        self._reference_observables = []

        if len(self.frame_counts) > 0:

            statistics_array = statistics_arrays[0]
            current_index = 0

            for frame_count in self.frame_counts:

                if frame_count <= 0:
                    return PropertyEstimatorException(directory, 'The frame counts must be > 0.')

                observables = statistics_array[self.statistics_type][current_index:current_index + frame_count]
                self._reference_observables.append(observables)

                current_index += frame_count

        else:

            for statistics_array in statistics_arrays:

                observables = statistics_array[self.statistics_type]
                self._reference_observables.append(observables)

        return super(ReweightStatistics, self).execute(directory, available_resources)
示例#17
0
    def execute(self, directory, available_resources):

        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        logging.info('Subsampling trajectory: {}'.format(self.id))

        if self.input_trajectory_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The ExtractUncorrelatedTrajectoryData protocol '
                                                       'requires a previously calculated trajectory')

        # Set the output path.
        self.output_trajectory_path = path.join(directory, 'uncorrelated_trajectory.dcd')

        # Load in the trajectories topology.
        topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology
        # Parse the internal mdtraj distance unit. While private access is undesirable,
        # this is never publicly defined and I believe this route to be preferable
        # over hard coding this unit.
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Determine the stride that needs to be taken to yield uncorrelated frames.
        stride = timeseries.get_uncorrelated_stride(self.statistical_inefficiency)
        frame_count = 0

        with DCDTrajectoryFile(self.input_trajectory_path, 'r') as input_file:

            # Skip the equilibration configurations.
            if self.equilibration_index > 0:
                input_file.seek(self.equilibration_index)

            with DCDTrajectoryFile(self.output_trajectory_path, 'w') as output_file:

                for frame in self._yield_frame(input_file, topology, stride):

                    output_file.write(
                        xyz=in_units_of(frame.xyz, base_distance_unit, output_file.distance_unit),
                        cell_lengths=in_units_of(frame.unitcell_lengths, base_distance_unit, output_file.distance_unit),
                        cell_angles=frame.unitcell_angles[0]
                    )

                    frame_count += 1

        self.number_of_uncorrelated_samples = frame_count

        logging.info('Trajectory subsampled: {}'.format(self.id))

        return self._get_output_dictionary()
    def execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role != self.component_role:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, Substance.MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if (self.expected_components != UNDEFINED
                and self.expected_components != len(filtered_components)):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The filtered substance does not contain the expected '
                f'number of components ({self.expected_components}) - '
                f'{filtered_components}')

        inverse_mole_fraction = 1.0 if np.isclose(
            total_mole_fraction, 0.0) else 1.0 / total_mole_fraction

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, Substance.MoleFraction):
                    amount = Substance.MoleFraction(amount.value *
                                                    inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)

        return self._get_output_dictionary()
示例#19
0
    def execute(self, directory, available_resources):

        yaml_filename = os.path.join(directory, 'yank.yaml')

        # Create the yank yaml input file from a dictionary of options.
        with open(yaml_filename, 'w') as file:
            yaml.dump(self._get_full_input_dictionary(available_resources),
                      file,
                      sort_keys=False)

        setup_only = self.setup_only

        # Yank is not safe to be called from anything other than the main thread.
        # If the current thread is not detected as the main one, then yank should
        # be spun up in a new process which should itself be safe to run yank in.
        if threading.current_thread() is threading.main_thread():
            logging.info('Launching YANK in the main thread.')
            free_energy, free_energy_uncertainty = self._run_yank(
                directory, available_resources, setup_only)
        else:

            from multiprocessing import Process, Queue

            logging.info('Launching YANK in a new process.')

            # Create a queue to pass the results back to the main process.
            queue = Queue()
            # Create the process within which yank will run.
            process = Process(
                target=BaseYankProtocol._run_yank_as_process,
                args=[queue, directory, available_resources, setup_only])

            # Start the process and gather back the output.
            process.start()
            free_energy, free_energy_uncertainty, error = queue.get()
            process.join()

            if error is not None:
                return PropertyEstimatorException(directory, error)

        self.estimated_free_energy = EstimatedQuantity(
            openmm_quantity_to_pint(free_energy),
            openmm_quantity_to_pint(free_energy_uncertainty), self._id)

        return self._get_output_dictionary()
示例#20
0
def test_serialize_layer_result():
    """Tests that the `CalculationLayerResult` can be properly
    serialized and deserialized."""

    dummy_result = CalculationLayerResult()

    dummy_result.property_id = str(uuid.uuid4())

    dummy_result.calculated_property = create_dummy_property(Density)
    dummy_result.exception = PropertyEstimatorException()

    dummy_result.data_to_store = [('dummy_object_path', 'dummy_directory')]

    dummy_result_json = json.dumps(dummy_result, cls=TypedJSONEncoder)

    recreated_result = json.loads(dummy_result_json, cls=TypedJSONDecoder)
    recreated_result_json = json.dumps(recreated_result, cls=TypedJSONEncoder)

    assert recreated_result_json == dummy_result_json
示例#21
0
    def execute(self, directory, available_resources):

        if len(self.input_statistics_paths) == 0:

            return PropertyEstimatorException(directory=directory, message='No statistics arrays were '
                                                                           'given to concatenate.')

        arrays = [StatisticsArray.from_pandas_csv(file_path) for
                  file_path in self.input_statistics_paths]

        if len(arrays) > 1:
            output_array = StatisticsArray.join(*arrays)
        else:
            output_array = arrays[0]

        self.output_statistics_path = path.join(directory, 'output_statistics.csv')
        output_array.to_pandas_csv(self.output_statistics_path)

        return self._get_output_dictionary()
示例#22
0
    def execute(self, directory, available_resources):

        if self.forward_parameter_value < self.reverse_parameter_value:

            return PropertyEstimatorException(f'The forward parameter value ({self.forward_parameter_value}) must '
                                              f'be larger than the reverse value ({self.reverse_parameter_value}).')

        reverse_value = self.reverse_observable_value
        forward_value = self.forward_observable_value

        if isinstance(reverse_value, EstimatedQuantity):
            reverse_value = reverse_value.value

        if isinstance(forward_value, EstimatedQuantity):
            forward_value = forward_value.value

        gradient = ((forward_value - reverse_value) /
                    (self.forward_parameter_value - self.reverse_parameter_value))

        self.gradient = ParameterGradient(self.parameter_key, gradient)

        return self._get_output_dictionary()
示例#23
0
    def execute(self, directory, available_resources):

        from simtk.openmm import XmlSerializer

        solute_components = [
            component for component in self.solute.components
            if component.role == Substance.ComponentRole.Solute
        ]

        solvent_1_components = [
            component for component in self.solvent_1.components
            if component.role == Substance.ComponentRole.Solvent
        ]

        solvent_2_components = [
            component for component in self.solvent_2.components
            if component.role == Substance.ComponentRole.Solvent
        ]

        if len(solute_components) != 1:
            return PropertyEstimatorException(
                directory,
                'There must only be a single component marked as a solute.')
        if len(solvent_1_components) == 0 and len(solvent_2_components) == 0:
            return PropertyEstimatorException(
                directory, 'At least one of the solvents must not be vacuum.')

        # Because of quirks in where Yank looks files while doing temporary
        # directory changes, we need to copy the coordinate files locally so
        # they are correctly found.
        shutil.copyfile(
            self.solvent_1_coordinates,
            os.path.join(directory, self._local_solvent_1_coordinates))
        shutil.copyfile(self.solvent_1_system,
                        os.path.join(directory, self._local_solvent_1_system))

        shutil.copyfile(
            self.solvent_2_coordinates,
            os.path.join(directory, self._local_solvent_2_coordinates))
        shutil.copyfile(self.solvent_2_system,
                        os.path.join(directory, self._local_solvent_2_system))

        # Disable the pbc of the any solvents which should be treated
        # as vacuum.
        vacuum_system_path = None

        if len(solvent_1_components) == 0:
            vacuum_system_path = self._local_solvent_1_system
        elif len(solvent_2_components) == 0:
            vacuum_system_path = self._local_solvent_2_system

        if vacuum_system_path is not None:

            logging.info(
                f'Disabling the periodic boundary conditions in {vacuum_system_path} '
                f'by setting the cutoff type to NoCutoff')

            with open(os.path.join(directory, vacuum_system_path),
                      'r') as file:
                vacuum_system = XmlSerializer.deserialize(file.read())

            disable_pbc(vacuum_system)

            with open(os.path.join(directory, vacuum_system_path),
                      'w') as file:
                file.write(XmlSerializer.serialize(vacuum_system))

        # Set up the yank input file.
        result = super(SolvationYankProtocol,
                       self).execute(directory, available_resources)

        if isinstance(result, PropertyEstimatorException):
            return result

        if self.setup_only:
            return self._get_output_dictionary()

        solvent_1_yank_path = os.path.join(directory, 'experiments',
                                           'solvent1.nc')
        solvent_2_yank_path = os.path.join(directory, 'experiments',
                                           'solvent2.nc')

        self.solvent_1_trajectory_path = os.path.join(directory,
                                                      'solvent1.dcd')
        self.solvent_2_trajectory_path = os.path.join(directory,
                                                      'solvent2.dcd')

        self._extract_trajectory(solvent_1_yank_path,
                                 self.solvent_1_trajectory_path)
        self._extract_trajectory(solvent_2_yank_path,
                                 self.solvent_2_trajectory_path)

        return self._get_output_dictionary()
示例#24
0
    def execute(self, directory, available_resources):

        import mdtraj

        from openforcefield.topology import Molecule, Topology

        logging.info(f'Calculating the reduced gradient potentials for {self.parameter_key}: {self._id}')

        if len(self.reference_force_field_paths) != 1 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'A single reference force field must be '
                                                         'provided when calculating the reduced '
                                                         'potentials using a subset of the full force')

        if len(self.reference_statistics_path) <= 0 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'The path to the statistics evaluated using '
                                                         'the full force field must be provided.')

        with open(self.force_field_path) as file:
            target_force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(target_force_field_source, SmirnoffForceFieldSource):

            return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                         'this protocol.')

        target_force_field = target_force_field_source.to_force_field()

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        unique_molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)
            unique_molecules.append(molecule)

        pdb_file = app.PDBFile(self.coordinate_file_path)
        topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules)

        # If we are using only a subset of the system object, load in the reference
        # statistics containing the full system energies to correct the output
        # forward and reverse potential energies.
        reference_statistics = None
        subset_energy_corrections = None

        if self.use_subset_of_force_field:
            reference_statistics = StatisticsArray.from_pandas_csv(self.reference_statistics_path)

        # Compute the reduced reference energy if any reference force field files
        # have been provided.
        self.reference_potential_paths = []

        for index, reference_force_field_path in enumerate(self.reference_force_field_paths):

            with open(reference_force_field_path) as file:
                reference_force_field_source = ForceFieldSource.parse_json(file.read())

            if not isinstance(reference_force_field_source, SmirnoffForceFieldSource):
                return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                             'this protocol.')

            reference_force_field = reference_force_field_source.to_force_field()
            reference_system, _ = self._build_reduced_system(reference_force_field, topology)

            reference_potentials_path = path.join(directory, f'reference_{index}.csv')

            self._evaluate_reduced_potential(reference_system, trajectory,
                                             reference_potentials_path,
                                             available_resources)

            self.reference_potential_paths.append(reference_potentials_path)

            if reference_statistics is not None:

                subset_energies = StatisticsArray.from_pandas_csv(reference_potentials_path)
                subset_energy_corrections = (reference_statistics[ObservableType.PotentialEnergy] -
                                             subset_energies[ObservableType.PotentialEnergy])

                subset_energies[ObservableType.PotentialEnergy] = reference_statistics[ObservableType.PotentialEnergy]
                subset_energies.to_pandas_csv(reference_potentials_path)

        # Build the slightly perturbed system.
        reverse_system, reverse_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             -self.perturbation_scale)

        forward_system, forward_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             self.perturbation_scale)

        self.reverse_parameter_value = openmm_quantity_to_pint(reverse_parameter_value)
        self.forward_parameter_value = openmm_quantity_to_pint(forward_parameter_value)

        # Calculate the reduced potentials.
        self.reverse_potentials_path = path.join(directory, 'reverse.csv')
        self.forward_potentials_path = path.join(directory, 'forward.csv')

        self._evaluate_reduced_potential(reverse_system, trajectory, self.reverse_potentials_path,
                                         available_resources, subset_energy_corrections)
        self._evaluate_reduced_potential(forward_system, trajectory, self.forward_potentials_path,
                                         available_resources, subset_energy_corrections)

        logging.info(f'Finished calculating the reduced gradient potentials.')

        return self._get_output_dictionary()
示例#25
0
    def _query_client_request_status(self, client_request_id):
        """Queries the current status of a client request by querying
        the state of the individual server requests it was split into.

        Parameters
        ----------
        client_request_id: str
            The id of the client request to query.

        Returns
        -------
        PropertyEstimatorResult
            The current results of the client request.
        """

        request_results = PropertyEstimatorResult(result_id=client_request_id)

        for server_request_id in self._server_request_ids_per_client_id[
                client_request_id]:

            server_request = None

            if server_request_id in self._queued_calculations:
                server_request = self._queued_calculations[server_request_id]

            elif server_request_id in self._finished_calculations:

                server_request = self._finished_calculations[server_request_id]

                if len(server_request.queued_properties) > 0:

                    return PropertyEstimatorException(
                        message=
                        f'An internal error occurred - the {server_request_id} '
                        f'was prematurely marked us finished.')

            else:

                return PropertyEstimatorException(
                    message=
                    f'An internal error occurred - the {server_request_id} '
                    f'request was not found on the server.')

            for physical_property in server_request.queued_properties:

                substance_id = physical_property.substance.identifier

                if substance_id not in request_results.queued_properties:
                    request_results.queued_properties[substance_id] = []

                request_results.queued_properties[substance_id].append(
                    physical_property)

            for substance_id in server_request.unsuccessful_properties:

                physical_property = server_request.unsuccessful_properties[
                    substance_id]

                if substance_id not in request_results.unsuccessful_properties:
                    request_results.unsuccessful_properties[substance_id] = []

                request_results.unsuccessful_properties[substance_id].append(
                    physical_property)

            for substance_id in server_request.estimated_properties:

                physical_properties = server_request.estimated_properties[
                    substance_id]

                if substance_id not in request_results.estimated_properties:
                    request_results.estimated_properties[substance_id] = []

                request_results.estimated_properties[substance_id].extend(
                    physical_properties)

            request_results.exceptions.extend(server_request.exceptions)

        return request_results
示例#26
0
    def execute(self, directory, available_resources):

        # We handle most things in OMM units here.
        temperature = self.thermodynamic_state.temperature
        openmm_temperature = pint_quantity_to_openmm(temperature)

        pressure = None if self.ensemble == Ensemble.NVT else self.thermodynamic_state.pressure
        openmm_pressure = pint_quantity_to_openmm(pressure)

        if openmm_temperature is None:

            return PropertyEstimatorException(
                directory=directory,
                message='A temperature must be set to perform '
                'a simulation in any ensemble')

        if Ensemble(self.ensemble) == Ensemble.NPT and openmm_pressure is None:

            return PropertyEstimatorException(
                directory=directory,
                message='A pressure must be set to perform an NPT simulation')

        if Ensemble(
                self.ensemble) == Ensemble.NPT and self.enable_pbc is False:

            return PropertyEstimatorException(
                directory=directory,
                message='PBC must be enabled when running in the NPT ensemble.'
            )

        logging.info('Performing a simulation in the ' + str(self.ensemble) +
                     ' ensemble: ' + self.id)

        # Set up the internal file paths
        self._checkpoint_path = os.path.join(directory, 'checkpoint.json')
        self._state_path = os.path.join(directory, 'checkpoint_state.xml')

        self._local_trajectory_path = os.path.join(directory, 'trajectory.dcd')
        self._local_statistics_path = os.path.join(directory,
                                                   'openmm_statistics.csv')

        # Set up the simulation objects.
        if self._context is None or self._integrator is None:

            self._context, self._integrator = self._setup_simulation_objects(
                openmm_temperature, openmm_pressure, available_resources)

        # Save a copy of the starting configuration if it doesn't already exist
        local_input_coordinate_path = os.path.join(directory, 'input.pdb')

        if not os.path.isfile(local_input_coordinate_path):

            input_pdb_file = app.PDBFile(self.input_coordinate_file)

            with open(local_input_coordinate_path, 'w+') as configuration_file:
                app.PDBFile.writeFile(input_pdb_file.topology,
                                      input_pdb_file.positions,
                                      configuration_file)

        # Run the simulation.
        result = self._simulate(directory, self._context, self._integrator)

        if isinstance(result, PropertyEstimatorException):
            return result

        # Set the output paths.
        self.trajectory_file_path = self._local_trajectory_path
        self.statistics_file_path = os.path.join(directory, 'statistics.csv')

        # Save out the final statistics in the property estimator format
        self._save_final_statistics(self.statistics_file_path, temperature,
                                    pressure)

        return self._get_output_dictionary()
示例#27
0
    def execute(self, directory, available_resources):

        logging.info('Reweighting dielectric: {}'.format(self.id))

        if len(self.reference_dipole_moments) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no dipole moments to reweight.')

        if len(self.reference_volumes) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no volumes to reweight.')

        if (not isinstance(self.reference_dipole_moments[0], unit.Quantity)
                or not isinstance(self.reference_volumes[0], unit.Quantity)):

            return PropertyEstimatorException(
                directory=directory,
                message='The reference observables should be '
                'a list of unit.Quantity wrapped ndarray\'s.')

        if len(self.reference_dipole_moments) != len(self.reference_volumes):
            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        for reference_dipoles, reference_volumes in zip(
                self.reference_dipole_moments, self.reference_volumes):

            if len(reference_dipoles) == len(reference_volumes):
                continue

            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        self._reference_observables = self.reference_dipole_moments

        dipole_moments = self._prepare_observables_array(
            self.reference_dipole_moments)
        dipole_moments_sqr = np.array([[
            np.dot(dipole, dipole) for dipole in np.transpose(dipole_moments)
        ]])

        volumes = self._prepare_observables_array(self.reference_volumes)

        if self.bootstrap_uncertainties:
            error = self._execute_with_bootstrapping(
                unit.dimensionless,
                dipoles=dipole_moments,
                dipoles_sqr=dipole_moments_sqr,
                volumes=volumes)
        else:

            return PropertyEstimatorException(
                directory=directory,
                message=
                'Dielectric constant can only be reweighted in conjunction '
                'with bootstrapped uncertainties.')

        if error is not None:

            error.directory = directory
            return error

        return self._get_output_dictionary()
示例#28
0
    def _schedule_server_request(self, server_request):
        """Schedules the estimation of the requested properties.

        This method will recursively cascade through all allowed calculation
        layers or until all properties have been calculated.

        Parameters
        ----------
        server_request : PropertyEstimatorServer.ServerEstimationRequest
            The object containing instructions about which calculations
            should be performed.
        """

        if len(server_request.options.allowed_calculation_layers) == 0 or \
           len(server_request.queued_properties) == 0:

            # Move any remaining properties to the unsuccessful list.
            for physical_property in server_request.queued_properties:

                substance_id = physical_property.substance.identifier

                if substance_id not in server_request.unsuccessful_properties:
                    server_request.unsuccessful_properties[substance_id] = []

                server_request.unsuccessful_properties[substance_id].append(
                    physical_property)

                server_request.queued_properties = []

            self._queued_calculations.pop(server_request.id)
            self._finished_calculations[server_request.id] = server_request

            logging.info(f'Finished server request {server_request.id}')
            return

        current_layer_type = server_request.options.allowed_calculation_layers.pop(
            0)

        if current_layer_type not in available_layers:

            # Kill all remaining properties if we reach an unsupported calculation layer.
            error_object = PropertyEstimatorException(
                message=f'The {current_layer_type} layer is not '
                f'supported by / available on the server.')

            server_request.exceptions.append(error_object)

            server_request.options.allowed_calculation_layers.append(
                current_layer_type)
            server_request.queued_properties = []

            self._schedule_server_request(server_request)
            return

        logging.info(
            f'Launching server request {server_request.id} using the {current_layer_type} layer'
        )

        layer_directory = path.join(self._working_directory,
                                    current_layer_type, server_request.id)

        if not path.isdir(layer_directory):
            makedirs(layer_directory)

        current_layer = available_layers[current_layer_type]

        current_layer.schedule_calculation(self._calculation_backend,
                                           self._storage_backend,
                                           layer_directory, server_request,
                                           self._schedule_server_request)
示例#29
0
    def execute(self, directory, available_resources):

        logging.info('Reweighting dielectric: {}'.format(self.id))

        if len(self._reference_observables) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no dipole moments to reweight.')

        if len(self._reference_volumes) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no volumes to reweight.')

        if (not isinstance(self._reference_observables[0], unit.Quantity)
                or not isinstance(self._reference_volumes[0], unit.Quantity)):

            return PropertyEstimatorException(
                directory=directory,
                message='The reference observables should be '
                'a list of unit.Quantity wrapped ndarray\'s.')

        if len(self._reference_observables) != len(self._reference_volumes):
            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        for reference_dipoles, reference_volumes in zip(
                self._reference_observables, self._reference_volumes):

            if len(reference_dipoles) == len(reference_volumes):
                continue

            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        dipole_moments = self._prepare_observables_array(
            self._reference_observables)
        dipole_moments_sqr = np.array([[
            np.dot(dipole, dipole) for dipole in np.transpose(dipole_moments)
        ]])

        volumes = self._prepare_observables_array(self._reference_volumes)

        if self._bootstrap_uncertainties:

            reference_potentials = np.transpose(
                np.array(self._reference_reduced_potentials))
            target_potentials = np.transpose(
                np.array(self._target_reduced_potentials))

            frame_counts = np.array([
                len(observable) for observable in self._reference_observables
            ])

            # Construct an mbar object to get out the number of effective samples.
            import pymbar
            mbar = pymbar.MBAR(self._reference_reduced_potentials,
                               frame_counts,
                               verbose=False,
                               relative_tolerance=1e-12)

            effective_samples = mbar.computeEffectiveSampleNumber().max()

            value, uncertainty = bootstrap(
                self._bootstrap_function,
                self._bootstrap_iterations,
                self._bootstrap_sample_size,
                frame_counts,
                reference_reduced_potentials=reference_potentials,
                target_reduced_potentials=target_potentials,
                dipoles=np.transpose(dipole_moments),
                dipoles_sqr=np.transpose(dipole_moments_sqr),
                volumes=np.transpose(volumes))

            if effective_samples < self._required_effective_samples:
                uncertainty = sys.float_info.max

            self._value = EstimatedQuantity(unit.Quantity(value, None),
                                            unit.Quantity(uncertainty, None),
                                            self.id)

        else:

            return PropertyEstimatorException(
                directory=directory,
                message='Dielectric uncertainties may only'
                'be bootstrapped.')

        logging.info('Dielectric reweighted: {}'.format(self.id))

        return self._get_output_dictionary()
示例#30
0
    def _simulate(self, directory, context, integrator):
        """Performs the simulation using a given context
        and integrator.

        Parameters
        ----------
        directory: str
            The directory the trajectory is being run in.
        context: simtk.openmm.Context
            The OpenMM context to run with.
        integrator: simtk.openmm.Integrator
            The integrator to evolve the simulation with.
        """

        # Define how many steps should be taken.
        total_number_of_steps = self.total_number_of_iterations * self.steps_per_iteration

        # Try to load the current state from any available checkpoint information
        current_step = self._resume_from_checkpoint(context)

        if current_step == total_number_of_steps:
            return None

        # Build the reporters which we will use to report the state
        # of the simulation.
        append_trajectory = os.path.isfile(self._local_trajectory_path)
        dcd_reporter = app.DCDReporter(self._local_trajectory_path, 0,
                                       append_trajectory)

        statistics_file = open(self._local_statistics_path, 'a+')

        statistics_reporter = app.StateDataReporter(statistics_file,
                                                    0,
                                                    step=True,
                                                    potentialEnergy=True,
                                                    kineticEnergy=True,
                                                    totalEnergy=True,
                                                    temperature=True,
                                                    volume=True,
                                                    density=True)

        # Create the object which will transfer simulation output to the
        # reporters.
        topology = app.PDBFile(self.input_coordinate_file).topology

        with open(self.system_path, 'r') as file:
            system = openmm.XmlSerializer.deserialize(file.read())

        simulation = self._Simulation(integrator, topology, system,
                                      current_step)

        # Perform the simulation.
        checkpoint_counter = 0

        try:

            while current_step < total_number_of_steps:

                steps_to_take = min(self.output_frequency,
                                    total_number_of_steps - current_step)
                integrator.step(steps_to_take)

                current_step += steps_to_take

                state = context.getState(getPositions=True,
                                         getEnergy=True,
                                         getVelocities=False,
                                         getForces=False,
                                         getParameters=False,
                                         enforcePeriodicBox=self.enable_pbc)

                simulation.currentStep = current_step

                # Write out the current state using the reporters.
                dcd_reporter.report(simulation, state)
                statistics_reporter.report(simulation, state)

                if checkpoint_counter >= self.checkpoint_frequency:
                    # Save to the checkpoint file if needed.
                    self._write_checkpoint_file(current_step, context)
                    checkpoint_counter = 0

                checkpoint_counter += 1

        except Exception as e:

            formatted_exception = f'{traceback.format_exception(None, e, e.__traceback__)}'

            return PropertyEstimatorException(
                directory=directory,
                message=f'The simulation failed unexpectedly: '
                f'{formatted_exception}')

        # Save out the final positions.
        self._write_checkpoint_file(current_step, context)

        final_state = context.getState(getPositions=True)

        positions = final_state.getPositions()
        topology.setPeriodicBoxVectors(final_state.getPeriodicBoxVectors())

        self.output_coordinate_file = os.path.join(directory, 'output.pdb')

        with open(self.output_coordinate_file, 'w+') as configuration_file:
            app.PDBFile.writeFile(topology, positions, configuration_file)

        logging.info(
            f'Simulation performed in the {str(self.ensemble)} ensemble: {self._id}'
        )
        return None