def test_nested_replicators(): dummy_schema = WorkflowSchema() dummy_protocol = DummyReplicableProtocol('dummy_$(rep_a)_$(rep_b)') dummy_protocol.replicated_value_a = ReplicatorValue('rep_a') dummy_protocol.replicated_value_b = ReplicatorValue('rep_b') dummy_schema.protocols[dummy_protocol.id] = dummy_protocol.schema replicator_a = ProtocolReplicator(replicator_id='rep_a') replicator_a.template_values = ['a', 'b'] replicator_b = ProtocolReplicator(replicator_id='rep_b') replicator_b.template_values = [1, 2] dummy_schema.replicators = [ replicator_a, replicator_b ] dummy_schema.validate_interfaces() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata(dummy_property, 'smirnoff99Frosst-1.1.0.offxml', []) dummy_workflow = Workflow(dummy_property, dummy_metadata, '') dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 4 assert dummy_workflow.protocols['dummy_0_0'].replicated_value_a == 'a' assert dummy_workflow.protocols['dummy_0_1'].replicated_value_a == 'a' assert dummy_workflow.protocols['dummy_1_0'].replicated_value_a == 'b' assert dummy_workflow.protocols['dummy_1_1'].replicated_value_a == 'b' assert dummy_workflow.protocols['dummy_0_0'].replicated_value_b == 1 assert dummy_workflow.protocols['dummy_0_1'].replicated_value_b == 2 assert dummy_workflow.protocols['dummy_1_0'].replicated_value_b == 1 assert dummy_workflow.protocols['dummy_1_1'].replicated_value_b == 2 print(dummy_workflow.schema)
def test_group_replicators(): dummy_schema = WorkflowSchema() replicator_id = 'replicator' dummy_replicated_protocol = DummyInputOutputProtocol(f'dummy_$({replicator_id})') dummy_replicated_protocol.input_value = ReplicatorValue(replicator_id) dummy_group = ProtocolGroup('dummy_group') dummy_group.add_protocols(dummy_replicated_protocol) dummy_schema.protocols[dummy_group.id] = dummy_group.schema dummy_protocol_single_value = DummyInputOutputProtocol(f'dummy_single_$({replicator_id})') dummy_protocol_single_value.input_value = ProtocolPath('output_value', dummy_group.id, dummy_replicated_protocol.id) dummy_schema.protocols[dummy_protocol_single_value.id] = dummy_protocol_single_value.schema dummy_protocol_list_value = AddValues(f'dummy_list') dummy_protocol_list_value.values = ProtocolPath('output_value', dummy_group.id, dummy_replicated_protocol.id) dummy_schema.protocols[dummy_protocol_list_value.id] = dummy_protocol_list_value.schema replicator = ProtocolReplicator(replicator_id) replicator.template_values = [ EstimatedQuantity(1.0 * unit.kelvin, 1.0 * unit.kelvin, 'dummy_source'), EstimatedQuantity(2.0 * unit.kelvin, 2.0 * unit.kelvin, 'dummy_source') ] dummy_schema.replicators.append(replicator) dummy_schema.validate_interfaces() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata(dummy_property, 'smirnoff99Frosst-1.1.0.offxml', []) dummy_workflow = Workflow(dummy_property, dummy_metadata, '') dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 4 assert dummy_workflow.protocols[dummy_group.id].protocols['dummy_0'].input_value == replicator.template_values[0] assert dummy_workflow.protocols[dummy_group.id].protocols['dummy_1'].input_value == replicator.template_values[1] assert dummy_workflow.protocols['dummy_single_0'].input_value == ProtocolPath('output_value', dummy_group.id, 'dummy_0') assert dummy_workflow.protocols['dummy_single_1'].input_value == ProtocolPath('output_value', dummy_group.id, 'dummy_1') assert len(dummy_workflow.protocols['dummy_list'].values) == 2 assert dummy_workflow.protocols['dummy_list'].values[0] == ProtocolPath('output_value', dummy_group.id, 'dummy_0') assert dummy_workflow.protocols['dummy_list'].values[1] == ProtocolPath('output_value', dummy_group.id, 'dummy_1')
def test_advanced_nested_replicators(): dummy_schema = WorkflowSchema() replicator_a = ProtocolReplicator(replicator_id='replicator_a') replicator_a.template_values = ['a', 'b'] replicator_b = ProtocolReplicator(replicator_id=f'replicator_b_{replicator_a.placeholder_id}') replicator_b.template_values = ProtocolPath(f'dummy_list[{replicator_a.placeholder_id}]', 'global') dummy_protocol = DummyReplicableProtocol(f'dummy_' f'{replicator_a.placeholder_id}_' f'{replicator_b.placeholder_id}') dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id) dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id) dummy_schema.protocols[dummy_protocol.id] = dummy_protocol.schema dummy_schema.replicators = [replicator_a, replicator_b] dummy_schema.validate_interfaces() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata(dummy_property, 'smirnoff99Frosst-1.1.0.offxml', []) dummy_metadata['dummy_list'] = [[1], [2]] dummy_workflow = Workflow(dummy_property, dummy_metadata, '') dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 2 assert dummy_workflow.protocols['dummy_0_0'].replicated_value_a == 'a' assert dummy_workflow.protocols['dummy_0_0'].replicated_value_b == 1 assert dummy_workflow.protocols['dummy_1_0'].replicated_value_a == 'b' assert dummy_workflow.protocols['dummy_1_0'].replicated_value_b == 2 print(dummy_workflow.schema)
def test_index_replicated_protocol(): dummy_schema = WorkflowSchema() dummy_replicator = ProtocolReplicator('dummy_replicator') dummy_replicator.template_values = ['a', 'b', 'c', 'd'] dummy_schema.replicators = [dummy_replicator] replicated_protocol = DummyInputOutputProtocol(f'protocol_{dummy_replicator.placeholder_id}') replicated_protocol.input_value = ReplicatorValue(dummy_replicator.id) dummy_schema.protocols[replicated_protocol.id] = replicated_protocol.schema for index in range(len(dummy_replicator.template_values)): indexing_protocol = DummyInputOutputProtocol(f'indexing_protocol_{index}') indexing_protocol.input_value = ProtocolPath('output_value', f'protocol_{index}') dummy_schema.protocols[indexing_protocol.id] = indexing_protocol.schema dummy_schema.validate_interfaces() dummy_property = create_dummy_property(Density) dummy_workflow = Workflow(dummy_property, {}) dummy_workflow.schema = dummy_schema
def get_default_reweighting_workflow_schema(options=None): """Returns the default workflow to use when estimating this property by reweighting existing data. Parameters ---------- options: WorkflowOptions The default options to use when setting up the estimation workflow. Returns ------- WorkflowSchema The schema to follow when estimating this property. """ # Set up a replicator that will re-run the component reweighting workflow for each # component in the system. component_replicator = ProtocolReplicator( replicator_id='component_replicator') component_replicator.template_values = ProtocolPath( 'components', 'global') gradient_replicator = ProtocolReplicator('gradient') gradient_replicator.template_values = ProtocolPath( 'parameter_gradient_keys', 'global') # Set up the protocols which will reweight data for the full system. full_data_replicator_id = 'full_data_replicator' (full_protocols, full_volume, full_data_replicator, full_gradient_group, full_gradient_source) = ExcessMolarVolume._get_reweighting_protocols( '_full', gradient_replicator.id, full_data_replicator_id, options=options) # Set up the protocols which will reweight data for each component. component_data_replicator_id = f'component_{component_replicator.placeholder_id}_data_replicator' (component_protocols, component_volumes, component_data_replicator, component_gradient_group, component_gradient_source ) = ExcessMolarVolume._get_reweighting_protocols( '_component', gradient_replicator.id, component_data_replicator_id, replicator_id=component_replicator.id, weight_by_mole_fraction=True, substance_reference=ReplicatorValue(component_replicator.id), options=options) # Make sure the replicator is only replicating over component data. component_data_replicator.template_values = ProtocolPath( f'component_data[$({component_replicator.id})]', 'global') add_component_molar_volumes = miscellaneous.AddValues( 'add_component_molar_volumes') add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( 'calculate_excess_potential') calculate_excess_volume.value_b = full_volume calculate_excess_volume.value_a = ProtocolPath( 'result', add_component_molar_volumes.id) # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f'add_component_gradients' f'_{gradient_replicator.placeholder_id}') add_component_gradients.values = component_gradient_source combine_gradients = miscellaneous.SubtractValues( f'combine_gradients_{gradient_replicator.placeholder_id}') combine_gradients.value_b = full_gradient_source combine_gradients.value_a = ProtocolPath('result', add_component_gradients.id) # Build the final workflow schema. schema = WorkflowSchema(property_type=ExcessMolarVolume.__name__) schema.id = '{}{}'.format(ExcessMolarVolume.__name__, 'Schema') schema.protocols = dict() schema.protocols.update( {protocol.id: protocol.schema for protocol in full_protocols}) schema.protocols.update( {protocol.id: protocol.schema for protocol in component_protocols}) schema.protocols[add_component_molar_volumes. id] = add_component_molar_volumes.schema schema.protocols[ calculate_excess_volume.id] = calculate_excess_volume.schema schema.protocols[full_gradient_group.id] = full_gradient_group.schema schema.protocols[ component_gradient_group.id] = component_gradient_group.schema schema.protocols[ add_component_gradients.id] = add_component_gradients.schema schema.protocols[combine_gradients.id] = combine_gradients.schema schema.replicators = [ full_data_replicator, component_replicator, component_data_replicator, gradient_replicator ] schema.gradients_sources = [ ProtocolPath('result', combine_gradients.id) ] schema.final_value_source = ProtocolPath('result', calculate_excess_volume.id) return schema
def get_default_simulation_workflow_schema(options=None): """Returns the default workflow to use when estimating this property from direct simulations. Parameters ---------- options: WorkflowOptions The default options to use when setting up the estimation workflow. Returns ------- WorkflowSchema The schema to follow when estimating this property. """ # Define the id of the replicator which will clone the gradient protocols # for each gradient key to be estimated. gradient_replicator_id = 'gradient_replicator' # Set up a workflow to calculate the molar volume of the full, mixed system. (full_system_protocols, full_system_molar_molecules, full_system_volume, full_output, full_system_gradient_group, full_system_gradient_replicator, full_system_gradient) = ExcessMolarVolume._get_simulation_protocols( '_full', gradient_replicator_id, options=options) # Set up a general workflow for calculating the molar volume of one of the system components. component_replicator_id = 'component_replicator' component_substance = ReplicatorValue(component_replicator_id) # Make sure to weight by the mole fractions of the actual full system as these may be slightly # different to the mole fractions of the measure property due to rounding. full_substance = ProtocolPath( 'output_substance', full_system_protocols.build_coordinates.id) (component_protocols, component_molar_molecules, component_volumes, component_output, component_gradient_group, component_gradient_replicator, component_gradient) = ExcessMolarVolume._get_simulation_protocols( '_component', gradient_replicator_id, replicator_id=component_replicator_id, weight_by_mole_fraction=True, component_substance_reference=component_substance, full_substance_reference=full_substance, options=options) # Finally, set up the protocols which will be responsible for adding together # the component molar volumes, and subtracting these from the mixed system molar volume. add_component_molar_volumes = miscellaneous.AddValues( 'add_component_molar_volumes') add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( 'calculate_excess_volume') calculate_excess_volume.value_b = full_system_volume calculate_excess_volume.value_a = ProtocolPath( 'result', add_component_molar_volumes.id) # Create the replicator object which defines how the pure component # molar volume estimation protocols will be replicated for each component. component_replicator = ProtocolReplicator( replicator_id=component_replicator_id) component_replicator.template_values = ProtocolPath( 'components', 'global') # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f'add_component_gradients' f'_$({gradient_replicator_id})') add_component_gradients.values = component_gradient combine_gradients = miscellaneous.SubtractValues( f'combine_gradients_$({gradient_replicator_id})') combine_gradients.value_b = full_system_gradient combine_gradients.value_a = ProtocolPath('result', add_component_gradients.id) # Combine the gradient replicators. gradient_replicator = ProtocolReplicator( replicator_id=gradient_replicator_id) gradient_replicator.template_values = ProtocolPath( 'parameter_gradient_keys', 'global') # Build the final workflow schema schema = WorkflowSchema(property_type=ExcessMolarVolume.__name__) schema.id = '{}{}'.format(ExcessMolarVolume.__name__, 'Schema') schema.protocols = { component_protocols.build_coordinates.id: component_protocols.build_coordinates.schema, component_protocols.assign_parameters.id: component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.id: component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.id: component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.id: component_protocols.converge_uncertainty.schema, component_molar_molecules.id: component_molar_molecules.schema, full_system_protocols.build_coordinates.id: full_system_protocols.build_coordinates.schema, full_system_protocols.assign_parameters.id: full_system_protocols.assign_parameters.schema, full_system_protocols.energy_minimisation.id: full_system_protocols.energy_minimisation.schema, full_system_protocols.equilibration_simulation.id: full_system_protocols.equilibration_simulation.schema, full_system_protocols.converge_uncertainty.id: full_system_protocols.converge_uncertainty.schema, full_system_molar_molecules.id: full_system_molar_molecules.schema, component_protocols.extract_uncorrelated_trajectory.id: component_protocols.extract_uncorrelated_trajectory.schema, component_protocols.extract_uncorrelated_statistics.id: component_protocols.extract_uncorrelated_statistics.schema, full_system_protocols.extract_uncorrelated_trajectory.id: full_system_protocols.extract_uncorrelated_trajectory.schema, full_system_protocols.extract_uncorrelated_statistics.id: full_system_protocols.extract_uncorrelated_statistics.schema, add_component_molar_volumes.id: add_component_molar_volumes.schema, calculate_excess_volume.id: calculate_excess_volume.schema, component_gradient_group.id: component_gradient_group.schema, full_system_gradient_group.id: full_system_gradient_group.schema, add_component_gradients.id: add_component_gradients.schema, combine_gradients.id: combine_gradients.schema } schema.replicators = [gradient_replicator, component_replicator] # Finally, tell the schemas where to look for its final values. schema.gradients_sources = [ ProtocolPath('result', combine_gradients.id) ] schema.final_value_source = ProtocolPath('result', calculate_excess_volume.id) schema.outputs_to_store = { 'full_system': full_output, f'component_$({component_replicator_id})': component_output } return schema
def get_enthalpy_workflow(id_prefix='', weight_by_mole_fraction=False, options=None): """Returns the set of protocols which when combined in a workflow will yield the enthalpy of a substance. Parameters ---------- id_prefix: str A prefix to append to the id of each of the returned protocols. weight_by_mole_fraction: bool If true, an extra protocol will be added to weight the calculated enthalpy by the mole fraction of the component inside of the convergence loop. options: PropertyWorkflowOptions The options to use when setting up the workflows. Returns ------- EnthalpyOfMixing.EnthalpyWorkflow The protocols used to estimate the enthalpy of a substance. """ build_coordinates = protocols.BuildCoordinatesPackmol( id_prefix + 'build_coordinates') build_coordinates.substance = ProtocolPath('substance', 'global') assign_topology = protocols.BuildSmirnoffSystem(id_prefix + 'build_topology') assign_topology.force_field_path = ProtocolPath( 'force_field_path', 'global') assign_topology.coordinate_file_path = ProtocolPath( 'coordinate_file_path', build_coordinates.id) assign_topology.substance = ProtocolPath('substance', 'global') # Equilibration energy_minimisation = protocols.RunEnergyMinimisation( id_prefix + 'energy_minimisation') energy_minimisation.input_coordinate_file = ProtocolPath( 'coordinate_file_path', build_coordinates.id) energy_minimisation.system_path = ProtocolPath('system_path', assign_topology.id) npt_equilibration = protocols.RunOpenMMSimulation(id_prefix + 'npt_equilibration') npt_equilibration.ensemble = Ensemble.NPT npt_equilibration.steps = 100000 # Debug settings. npt_equilibration.output_frequency = 5000 # Debug settings. npt_equilibration.thermodynamic_state = ProtocolPath( 'thermodynamic_state', 'global') npt_equilibration.input_coordinate_file = ProtocolPath( 'output_coordinate_file', energy_minimisation.id) npt_equilibration.system_path = ProtocolPath('system_path', assign_topology.id) # Production npt_production = protocols.RunOpenMMSimulation(id_prefix + 'npt_production') npt_production.ensemble = Ensemble.NPT npt_production.steps = 500000 # Debug settings. npt_production.output_frequency = 5000 # Debug settings. npt_production.thermodynamic_state = ProtocolPath( 'thermodynamic_state', 'global') npt_production.input_coordinate_file = ProtocolPath( 'output_coordinate_file', npt_equilibration.id) npt_production.system_path = ProtocolPath('system_path', assign_topology.id) # Analysis extract_enthalpy = protocols.ExtractAverageStatistic( id_prefix + 'extract_enthalpy') extract_enthalpy.statistics_type = ObservableType.Enthalpy extract_enthalpy.statistics_path = ProtocolPath( 'statistics_file_path', npt_production.id) # Set up a conditional group to ensure convergence of uncertainty converge_uncertainty = groups.ConditionalGroup(id_prefix + 'converge_uncertainty') converge_uncertainty.add_protocols(npt_production, extract_enthalpy) converge_uncertainty.max_iterations = 1 condition = groups.ConditionalGroup.Condition() condition.left_hand_value = ProtocolPath('value.uncertainty', converge_uncertainty.id, extract_enthalpy.id) condition.right_hand_value = ProtocolPath('per_component_uncertainty', 'global') condition.condition_type = groups.ConditionalGroup.ConditionType.LessThan converge_uncertainty.add_condition(condition) statistical_inefficiency = ProtocolPath('statistical_inefficiency', converge_uncertainty.id, extract_enthalpy.id) equilibration_index = ProtocolPath('equilibration_index', converge_uncertainty.id, extract_enthalpy.id) if weight_by_mole_fraction: # The component workflows need an extra step to multiply their enthalpies by their # relative mole fraction. weight_by_mole_fraction = WeightValueByMoleFraction( id_prefix + 'weight_by_mole_fraction') weight_by_mole_fraction.value = ProtocolPath( 'value', extract_enthalpy.id) weight_by_mole_fraction.full_substance = ProtocolPath( 'substance', 'global') # Again, set the component as a placeholder which will be set by the replicator. weight_by_mole_fraction.component = ReplicatorValue('repl') converge_uncertainty.add_protocols(weight_by_mole_fraction) # Extract the uncorrelated trajectory. extract_uncorrelated_trajectory = protocols.ExtractUncorrelatedTrajectoryData( id_prefix + 'extract_traj') extract_uncorrelated_trajectory.statistical_inefficiency = statistical_inefficiency extract_uncorrelated_trajectory.equilibration_index = equilibration_index extract_uncorrelated_trajectory.input_coordinate_file = ProtocolPath( 'output_coordinate_file', converge_uncertainty.id, npt_production.id) extract_uncorrelated_trajectory.input_trajectory_path = ProtocolPath( 'trajectory_file_path', converge_uncertainty.id, npt_production.id) # Extract the uncorrelated statistics. extract_uncorrelated_statistics = protocols.ExtractUncorrelatedStatisticsData( id_prefix + 'extract_stats') extract_uncorrelated_statistics.statistical_inefficiency = statistical_inefficiency extract_uncorrelated_statistics.equilibration_index = equilibration_index extract_uncorrelated_statistics.input_statistics_path = ProtocolPath( 'statistics_file_path', converge_uncertainty.id, npt_production.id) # noinspection PyCallByClass return EnthalpyOfMixing.EnthalpyWorkflow( build_coordinates, assign_topology, energy_minimisation, npt_equilibration, converge_uncertainty, extract_uncorrelated_trajectory, extract_uncorrelated_statistics)
def get_default_reweighting_workflow_schema(options=None): """Returns the default workflow to use when estimating this property by reweighting existing data. Parameters ---------- options: PropertyWorkflowOptions The default options to use when setting up the estimation workflow. Returns ------- WorkflowSchema The schema to follow when estimating this property. """ # Set up the protocols which will reweight data for the full system. extract_mixed_enthalpy = protocols.ExtractAverageStatistic( 'extract_enthalpy_$(mix_data_repl)_mixture') extract_mixed_enthalpy.statistics_type = ObservableType.Enthalpy mixture_protocols, mixture_data_replicator = generate_base_reweighting_protocols( extract_mixed_enthalpy, 'mix_data_repl', '_mixture') extract_mixed_enthalpy.statistics_path = ProtocolPath( 'statistics_file_path', mixture_protocols.unpack_stored_data.id) # Set up the protocols which will reweight data for each of the components. extract_pure_enthalpy = protocols.ExtractAverageStatistic( 'extract_enthalpy_$(pure_data_repl)_comp_$(comp_repl)') extract_pure_enthalpy.statistics_type = ObservableType.Enthalpy pure_protocols, pure_data_replicator = generate_base_reweighting_protocols( extract_pure_enthalpy, 'pure_data_repl', '_pure_$(comp_repl)') extract_pure_enthalpy.statistics_path = ProtocolPath( 'statistics_file_path', pure_protocols.unpack_stored_data.id) # Make sure the replicator is only replicating over data from the pure component. pure_data_replicator.template_values = ProtocolPath( 'component_data[$(comp_repl)]', 'global') # Set up the protocols which will be responsible for adding together # the component enthalpies, and subtracting these from the mixed system enthalpy. weight_by_mole_fraction = WeightValueByMoleFraction( 'weight_comp_$(comp_repl)') weight_by_mole_fraction.value = ProtocolPath( 'value', pure_protocols.mbar_protocol.id) weight_by_mole_fraction.full_substance = ProtocolPath( 'substance', 'global') weight_by_mole_fraction.component = ReplicatorValue('comp_repl') add_component_enthalpies = protocols.AddQuantities( 'add_component_enthalpies') add_component_enthalpies.values = [ ProtocolPath('weighted_value', weight_by_mole_fraction.id) ] calculate_enthalpy_of_mixing = protocols.SubtractQuantities( 'calculate_enthalpy_of_mixing') calculate_enthalpy_of_mixing.value_b = ProtocolPath( 'value', mixture_protocols.mbar_protocol.id) calculate_enthalpy_of_mixing.value_a = ProtocolPath( 'result', add_component_enthalpies.id) # Set up a replicator that will re-run the pure reweighting workflow for each # component in the system. pure_component_replicator = ProtocolReplicator( replicator_id='comp_repl') pure_component_replicator.protocols_to_replicate = [ ProtocolPath('', weight_by_mole_fraction.id) ] for pure_protocol in pure_protocols: pure_component_replicator.protocols_to_replicate.append( ProtocolPath('', pure_protocol.id)) pure_component_replicator.template_values = ProtocolPath( 'components', 'global') # Build the final workflow schema. schema = WorkflowSchema(property_type=EnthalpyOfMixing.__name__) schema.id = '{}{}'.format(EnthalpyOfMixing.__name__, 'Schema') schema.protocols = {} schema.protocols.update( {protocol.id: protocol.schema for protocol in mixture_protocols}) schema.protocols.update( {protocol.id: protocol.schema for protocol in pure_protocols}) schema.protocols[ weight_by_mole_fraction.id] = weight_by_mole_fraction.schema schema.protocols[ add_component_enthalpies.id] = add_component_enthalpies.schema schema.protocols[calculate_enthalpy_of_mixing. id] = calculate_enthalpy_of_mixing.schema schema.replicators = [ mixture_data_replicator, pure_component_replicator, pure_data_replicator ] schema.final_value_source = ProtocolPath( 'result', calculate_enthalpy_of_mixing.id) return schema
def get_default_simulation_workflow_schema(options=None): """Returns the default workflow to use when estimating this property from direct simulations. Parameters ---------- options: PropertyWorkflowOptions The default options to use when setting up the estimation workflow. Returns ------- WorkflowSchema The schema to follow when estimating this property. """ schema = WorkflowSchema(property_type=EnthalpyOfMixing.__name__) schema.id = '{}{}'.format(EnthalpyOfMixing.__name__, 'Schema') # Set up a general workflow for calculating the enthalpy of one of the system components. # Here we affix a prefix which contains the special string $(comp_index). Protocols which are # replicated by a replicator will have the $(comp_index) tag in their id replaced by the index # of the replication. component_workflow = EnthalpyOfMixing.get_enthalpy_workflow( 'component_$(repl)_', True, options) # Set the substance of the build_coordinates and assign_topology protocols # as a placeholder for now - these will be later set by the replicator. component_workflow.build_coordinates.substance = ReplicatorValue( 'repl') component_workflow.assign_topology.substance = ReplicatorValue('repl') # Set up a workflow to calculate the enthalpy of the full, mixed system. mixed_system_workflow = EnthalpyOfMixing.get_enthalpy_workflow( 'mixed_', False, options) # Finally, set up the protocols which will be responsible for adding together # the component enthalpies, and subtracting these from the mixed system enthalpy. add_component_enthalpies = protocols.AddQuantities( 'add_component_enthalpies') # Although we only give a list of a single ProtocolPath pointing to our template # component workflow's `weight_by_mole_fraction` protocol, the replicator # will actually populate this list with references to all of the newly generated # protocols of the individual components. add_component_enthalpies.values = [ ProtocolPath('weighted_value', component_workflow.converge_uncertainty.id, 'component_$(repl)_weight_by_mole_fraction') ] schema.protocols[ add_component_enthalpies.id] = add_component_enthalpies.schema calculate_enthalpy_of_mixing = protocols.SubtractQuantities( 'calculate_enthalpy_of_mixing') calculate_enthalpy_of_mixing.value_b = ProtocolPath( 'value', mixed_system_workflow.converge_uncertainty.id, 'mixed_extract_enthalpy') calculate_enthalpy_of_mixing.value_a = ProtocolPath( 'result', add_component_enthalpies.id) schema.protocols[calculate_enthalpy_of_mixing. id] = calculate_enthalpy_of_mixing.schema for component_protocol in component_workflow: schema.protocols[component_protocol.id] = component_protocol.schema for mixed_protocol in mixed_system_workflow: schema.protocols[mixed_protocol.id] = mixed_protocol.schema # Create the replicator object which defines how the pure component # enthalpy estimation workflow will be replicated for each component. component_replicator = ProtocolReplicator(replicator_id='repl') component_replicator.protocols_to_replicate = [] # Pass it paths to the protocols to be replicated. for component_protocol in component_workflow: component_replicator.protocols_to_replicate.append( ProtocolPath('', component_protocol.id)) for component_protocol_id in component_workflow.converge_uncertainty.protocols: path_to_protocol = ProtocolPath( '', component_workflow.converge_uncertainty.id, component_protocol_id) component_replicator.protocols_to_replicate.append( path_to_protocol) # Tell the replicator to take the components of a properties substance, # and pass these to the replicated workflows being produced, and in particular, # the inputs specified by the `template_targets` component_replicator.template_values = ProtocolPath( 'components', 'global') schema.replicators = [component_replicator] # Finally, tell the schemas where to look for its final values. schema.final_value_source = ProtocolPath( 'result', calculate_enthalpy_of_mixing.id) mixed_output_to_store = WorkflowOutputToStore() mixed_output_to_store.trajectory_file_path = ProtocolPath( 'output_trajectory_path', mixed_system_workflow.subsample_trajectory.id) mixed_output_to_store.coordinate_file_path = ProtocolPath( 'output_coordinate_file', mixed_system_workflow.converge_uncertainty.id, 'mixed_npt_production') mixed_output_to_store.statistics_file_path = ProtocolPath( 'output_statistics_path', mixed_system_workflow.subsample_statistics.id) mixed_output_to_store.statistical_inefficiency = ProtocolPath( 'statistical_inefficiency', mixed_system_workflow.converge_uncertainty.id, 'mixed_extract_enthalpy') component_output_to_store = WorkflowOutputToStore() component_output_to_store.substance = ReplicatorValue('repl') component_output_to_store.trajectory_file_path = ProtocolPath( 'output_trajectory_path', component_workflow.subsample_trajectory.id) component_output_to_store.coordinate_file_path = ProtocolPath( 'output_coordinate_file', component_workflow.converge_uncertainty.id, 'component_$(repl)_npt_production') component_output_to_store.statistics_file_path = ProtocolPath( 'output_statistics_path', component_workflow.subsample_statistics.id) component_output_to_store.statistical_inefficiency = ProtocolPath( 'statistical_inefficiency', component_workflow.converge_uncertainty.id, 'component_$(repl)_extract_enthalpy') schema.outputs_to_store = { 'mixed_system': mixed_output_to_store, 'component_$(repl)': component_output_to_store } return schema
def test_nested_replicators(): dummy_schema = WorkflowSchema() dummy_protocol = DummyReplicableProtocol('dummy_$(rep_a)_$(rep_b)') dummy_protocol.replicated_value_a = ReplicatorValue('rep_a') dummy_protocol.replicated_value_b = ReplicatorValue('rep_b') dummy_schema.protocols[dummy_protocol.id] = dummy_protocol.schema dummy_schema.final_value_source = ProtocolPath('final_value', dummy_protocol.id) replicator_a = ProtocolReplicator(replicator_id='rep_a') replicator_a.template_values = ['a', 'b'] replicator_a.protocols_to_replicate = [ProtocolPath('', dummy_protocol.id)] replicator_b = ProtocolReplicator(replicator_id='rep_b') replicator_b.template_values = [1, 2] replicator_b.protocols_to_replicate = [ProtocolPath('', dummy_protocol.id)] dummy_schema.replicators = [replicator_a, replicator_b] dummy_schema.validate_interfaces() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata( dummy_property, get_data_filename('forcefield/smirnoff99Frosst.offxml'), PropertyEstimatorOptions()) dummy_workflow = Workflow(dummy_property, dummy_metadata) dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 4 assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_0_0'].replicated_value_a == 'a' assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_0_1'].replicated_value_a == 'a' assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_1_0'].replicated_value_a == 'b' assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_1_1'].replicated_value_a == 'b' assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_0_0'].replicated_value_b == 1 assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_0_1'].replicated_value_b == 2 assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_1_0'].replicated_value_b == 1 assert dummy_workflow.protocols[dummy_workflow.uuid + '|dummy_1_1'].replicated_value_b == 2 print(dummy_workflow.schema)
def generate_base_reweighting_protocols(analysis_protocol, replicator_id='data_repl', id_suffix=''): """Constructs a set of protocols which, when combined in a workflow schema, may be executed to reweight a set of existing data to estimate a particular property. The reweighted observable of interest will be calculated by following the passed in `analysis_protocol`. Parameters ---------- analysis_protocol: AveragePropertyProtocol The protocol which will take input from the stored data, and generate a set of observables to reweight. replicator_id: str The id to use for the data replicator. id_suffix: str A string suffix to append to each of the protocol ids. Returns ------- BaseReweightingProtocols: A named tuple of the protocol which should form the bulk of a property estimation workflow. ProtocolReplicator: A replicator which will clone the workflow for each piece of stored data. """ assert isinstance(analysis_protocol, protocols.AveragePropertyProtocol) replicator_suffix = '_$({}){}'.format(replicator_id, id_suffix) # Unpack all the of the stored data. unpack_stored_data = protocols.UnpackStoredSimulationData( 'unpack_data{}'.format(replicator_suffix)) unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id) # The autocorrelation time of each of the stored files will be calculated for this property # using the passed in analysis protocol. # Decorrelate the frames of the concatenated trajectory. decorrelate_trajectory = protocols.ExtractUncorrelatedTrajectoryData( 'decorrelate_traj{}'.format(replicator_suffix)) decorrelate_trajectory.statistical_inefficiency = ProtocolPath( 'statistical_inefficiency', analysis_protocol.id) decorrelate_trajectory.equilibration_index = ProtocolPath( 'equilibration_index', analysis_protocol.id) decorrelate_trajectory.input_coordinate_file = ProtocolPath( 'coordinate_file_path', unpack_stored_data.id) decorrelate_trajectory.input_trajectory_path = ProtocolPath( 'trajectory_file_path', unpack_stored_data.id) # Stitch together all of the trajectories concatenate_trajectories = protocols.ConcatenateTrajectories( 'concat_traj' + id_suffix) concatenate_trajectories.input_coordinate_paths = [ ProtocolPath('coordinate_file_path', unpack_stored_data.id) ] concatenate_trajectories.input_trajectory_paths = [ ProtocolPath('output_trajectory_path', decorrelate_trajectory.id) ] # Calculate the reduced potentials for each of the reference states. build_reference_system = protocols.BuildSmirnoffSystem( 'build_system{}'.format(replicator_suffix)) build_reference_system.force_field_path = ProtocolPath( 'force_field_path', unpack_stored_data.id) build_reference_system.substance = ProtocolPath('substance', unpack_stored_data.id) build_reference_system.coordinate_file_path = ProtocolPath( 'coordinate_file_path', unpack_stored_data.id) reduced_reference_potential = protocols.CalculateReducedPotentialOpenMM( 'reduced_potential{}'.format(replicator_suffix)) reduced_reference_potential.system_path = ProtocolPath( 'system_path', build_reference_system.id) reduced_reference_potential.thermodynamic_state = ProtocolPath( 'thermodynamic_state', unpack_stored_data.id) reduced_reference_potential.coordinate_file_path = ProtocolPath( 'coordinate_file_path', unpack_stored_data.id) reduced_reference_potential.trajectory_file_path = ProtocolPath( 'output_trajectory_path', concatenate_trajectories.id) # Calculate the reduced potential of the target state. build_target_system = protocols.BuildSmirnoffSystem('build_system_target' + id_suffix) build_target_system.force_field_path = ProtocolPath( 'force_field_path', 'global') build_target_system.substance = ProtocolPath('substance', 'global') build_target_system.coordinate_file_path = ProtocolPath( 'output_coordinate_path', concatenate_trajectories.id) reduced_target_potential = protocols.CalculateReducedPotentialOpenMM( 'reduced_potential_target' + id_suffix) reduced_target_potential.thermodynamic_state = ProtocolPath( 'thermodynamic_state', 'global') reduced_target_potential.system_path = ProtocolPath( 'system_path', build_target_system.id) reduced_target_potential.coordinate_file_path = ProtocolPath( 'output_coordinate_path', concatenate_trajectories.id) reduced_target_potential.trajectory_file_path = ProtocolPath( 'output_trajectory_path', concatenate_trajectories.id) # Finally, apply MBAR to get the reweighted value. mbar_protocol = protocols.ReweightWithMBARProtocol('mbar' + id_suffix) mbar_protocol.reference_reduced_potentials = [ ProtocolPath('reduced_potentials', reduced_reference_potential.id) ] mbar_protocol.reference_observables = [ ProtocolPath('uncorrelated_values', analysis_protocol.id) ] mbar_protocol.target_reduced_potentials = [ ProtocolPath('reduced_potentials', reduced_target_potential.id) ] base_protocols = BaseReweightingProtocols( unpack_stored_data, analysis_protocol, decorrelate_trajectory, concatenate_trajectories, build_reference_system, reduced_reference_potential, build_target_system, reduced_target_potential, mbar_protocol) # Create the replicator object. component_replicator = ProtocolReplicator(replicator_id=replicator_id) component_replicator.protocols_to_replicate = [] # Pass it paths to the protocols to be replicated. for protocol in base_protocols: if protocol.id.find('$({})'.format(replicator_id)) < 0: continue component_replicator.protocols_to_replicate.append( ProtocolPath('', protocol.id)) component_replicator.template_values = ProtocolPath( 'full_system_data', 'global') return base_protocols, component_replicator