def test_simple_store_and_retrieve(data_class): """Tests storing and retrieving a simple object.""" with tempfile.TemporaryDirectory() as temporary_directory: local_storage = LocalFileStorage(temporary_directory) storage_object = data_class() # Make sure the validation fails with pytest.raises(ValueError): local_storage.store_object(storage_object) # This should now pass. storage_object.some_attribute = 10 storage_key = local_storage.store_object(storage_object) assert local_storage.has_object(storage_object) retrieved_object, _ = local_storage.retrieve_object(storage_key) assert retrieved_object is not None assert storage_object.json() == retrieved_object.json() # Ensure that the same key is returned when storing duplicate # data new_storage_key = local_storage.store_object(storage_object) assert storage_key == new_storage_key
def __init__( self, calculation_backend, storage_backend=None, port=8000, working_directory="working-data", enable_data_caching=True, delete_working_files=True, ): """Constructs a new EvaluatorServer object. Parameters ---------- calculation_backend: CalculationBackend The backend to use for executing calculations. storage_backend: StorageBackend, optional The backend to use for storing information from any calculations. If `None`, a default `LocalFileStorage` backend will be used. port: int The port on which to listen for incoming client requests. working_directory: str The local directory in which to store all local, temporary calculation data. enable_data_caching: bool Whether the server should attempt to cache any data, mainly the output of simulations, produced by estimation requests for future re-processing (e.g for reweighting). delete_working_files: bool Whether to delete the working files produced while estimated a batch of properties using a specific calculation layer. """ # Initialize the main 'server' attributes. self._port = port self._server_thread = None self._socket = None self._started = False self._stopped = True # Initialize the internal components. assert calculation_backend is not None and calculation_backend.started self._calculation_backend = calculation_backend if storage_backend is None: storage_backend = LocalFileStorage() self._storage_backend = storage_backend self._enable_data_caching = enable_data_caching self._working_directory = working_directory os.makedirs(self._working_directory, exist_ok=True) self._delete_working_files = delete_working_files self._queued_batches = {} self._finished_batches = {} self._batch_ids_per_client_id = {}
def test_base_simulation_data_storage(): substance = Substance.from_components("C") with tempfile.TemporaryDirectory() as base_directory: data_directory = os.path.join(base_directory, "data_directory") data_object = create_dummy_simulation_data(data_directory, substance) backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) storage_key = storage.store_object(data_object, data_directory) # Regenerate the data directory. os.makedirs(data_directory, exist_ok=True) assert storage.has_object(data_object) assert storage_key == storage.store_object(data_object, data_directory) retrieved_object, retrieved_directory = storage.retrieve_object( storage_key, StoredSimulationData) assert backend_directory in retrieved_directory assert data_object.json() == retrieved_object.json()
def test_base_layer(): properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] dummy_options = RequestOptions() batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = dummy_options batch.force_field_id = "" batch.options.calculation_schemas = { "Density": { "DummyCalculationLayer": CalculationLayerSchema() } } with tempfile.TemporaryDirectory() as temporary_directory: with temporarily_change_directory(temporary_directory): # Create a simple calculation backend to test with. test_backend = DaskLocalCluster() test_backend.start() # Create a simple storage backend to test with. test_storage = LocalFileStorage() layer_directory = "dummy_layer" makedirs(layer_directory) def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 1 assert len(returned_request.exceptions) == 2 dummy_layer = DummyCalculationLayer() dummy_layer.schedule_calculation( test_backend, test_storage, layer_directory, batch, dummy_callback, True, )
def __init__( self, calculation_backend, storage_backend=None, port=8000, working_directory="working-data", ): """Constructs a new EvaluatorServer object. Parameters ---------- calculation_backend: CalculationBackend The backend to use for executing calculations. storage_backend: StorageBackend, optional The backend to use for storing information from any calculations. If `None`, a default `LocalFileStorage` backend will be used. port: int The port on which to listen for incoming client requests. working_directory: str The local directory in which to store all local, temporary calculation data. """ # Initialize the main 'server' attributes. self._port = port self._server_thread = None self._socket = None self._started = False self._stopped = True # Initialize the internal components. assert calculation_backend is not None and calculation_backend.started self._calculation_backend = calculation_backend if storage_backend is None: storage_backend = LocalFileStorage() self._storage_backend = storage_backend self._working_directory = working_directory os.makedirs(self._working_directory, exist_ok=True) self._queued_batches = {} self._finished_batches = {} self._batch_ids_per_client_id = {}
def test_force_field_storage(): """A simple test to that force fields can be stored and retrieved using the local storage backend.""" force_field_source = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as temporary_directory: local_storage = LocalFileStorage(temporary_directory) force_field_id = local_storage.store_force_field(force_field_source) retrieved_force_field = local_storage.retrieve_force_field( force_field_id) assert force_field_source.json() == retrieved_force_field.json() local_storage_new = LocalFileStorage(temporary_directory) assert local_storage_new.has_force_field(force_field_source) new_force_field_id = local_storage_new.store_force_field( force_field_source) assert new_force_field_id == force_field_id
def test_base_simulation_data_query(): substance_a = Substance.from_components("C") substance_b = Substance.from_components("CO") substance_full = Substance.from_components("C", "CO") substances = [substance_a, substance_b, substance_full] with tempfile.TemporaryDirectory() as base_directory: backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) for substance in substances: data_directory = os.path.join(base_directory, f"{substance.identifier}") data_object = create_dummy_simulation_data(data_directory, substance) storage.store_object(data_object, data_directory) for substance in substances: substance_query = SimulationDataQuery() substance_query.substance = substance results = storage.query(substance_query) assert results is not None and len(results) == 1 assert len(next(iter(results.values()))[0]) == 3 component_query = SimulationDataQuery() component_query.substance = substance_full component_query.substance_query = SubstanceQuery() component_query.substance_query.components_only = True results = storage.query(component_query) assert results is not None and len(results) == 2
def test_storage_retrieval(): # Create some dummy properties methane = Substance.from_components("C") methanol = Substance.from_components("CO") mixture = Substance.from_components("C", "CO") # Add extra unused data to make sure the wrong data isn't # Being retrieved. unused_pure = Substance.from_components("CCO") unused_mixture = Substance.from_components("CCO", "CO") data_to_store = [ (methane, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Gas, 1), (mixture, PropertyPhase.Liquid, 1000), (unused_pure, PropertyPhase.Liquid, 1000), (unused_mixture, PropertyPhase.Liquid, 1000), ] storage_keys = {} state = ThermodynamicState(temperature=1.0 * unit.kelvin) properties = [ # Properties with a full system query. Density( value=1.0 * unit.gram / unit.litre, substance=methanol, thermodynamic_state=state, ), DielectricConstant( value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state ), # Properties with a multi-component query. EnthalpyOfVaporization( value=1.0 * unit.joule / unit.mole, substance=methanol, thermodynamic_state=state, ), # Property with a multi-phase query. EnthalpyOfMixing( value=1.0 * unit.joule / unit.mole, substance=mixture, thermodynamic_state=state, ), ExcessMolarVolume( value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state ), ] expected_data_per_property = { Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]}, DielectricConstant: { "full_system_data": [(methane, PropertyPhase.Liquid, 1000)] }, EnthalpyOfVaporization: { "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)], "gas_data": [(methanol, PropertyPhase.Gas, 1)], }, EnthalpyOfMixing: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, ExcessMolarVolume: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, } force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as base_directory: # Create a storage backend with some dummy data. backend_directory = os.path.join(base_directory, "storage_dir") storage_backend = LocalFileStorage(backend_directory) force_field_id = storage_backend.store_force_field(force_field) for substance, phase, n_mol in data_to_store: data_directory = os.path.join(base_directory, substance.identifier) data = create_dummy_simulation_data( data_directory, substance=substance, force_field_id=force_field_id, phase=phase, number_of_molecules=n_mol, ) storage_key = storage_backend.store_object(data, data_directory) storage_keys[(substance, phase, n_mol)] = storage_key for physical_property in properties: schema = registered_calculation_schemas["ReweightingLayer"][ physical_property.__class__.__name__ ] if callable(schema): schema = schema() # noinspection PyProtectedMember metadata = ReweightingLayer._get_workflow_metadata( base_directory, physical_property, "", [], storage_backend, schema, ) assert metadata is not None expected_data_list = expected_data_per_property[physical_property.__class__] for data_key in expected_data_list: assert data_key in metadata stored_metadata = metadata[data_key] expected_metadata = expected_data_list[data_key] assert len(stored_metadata) == len(expected_metadata) if isinstance(stored_metadata[0], list): # Flatten any lists of lists. stored_metadata = [ item for sublist in stored_metadata for item in sublist ] expected_metadata = [ item for sublist in expected_metadata for item in sublist ] metadata_storage_keys = [ os.path.basename(x) for x, _, _ in stored_metadata ] expected_storage_keys = [storage_keys[x] for x in expected_metadata] assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def test_workflow_layer(): """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer` is the simplest implementation of the abstract layer, we settle for testing this.""" properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] # Create a very simple workflow which just returns some placeholder # value. estimated_value = Observable( (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)) protocol_a = DummyProtocol("protocol_a") protocol_a.input_value = estimated_value schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] schema.final_value_source = ProtocolPath("output_value", protocol_a.id) layer_schema = SimulationSchema() layer_schema.workflow_schema = schema options = RequestOptions() options.add_schema("SimulationLayer", "Density", layer_schema) batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = options with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): # Create a directory for the layer. layer_directory = "simulation_layer" os.makedirs(layer_directory) # Set-up a simple storage backend and add a force field to it. force_field = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") storage_backend = LocalFileStorage() batch.force_field_id = storage_backend.store_force_field( force_field) # Create a simple calculation backend to test with. with DaskLocalCluster() as calculation_backend: def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 2 assert len(returned_request.exceptions) == 0 simulation_layer = SimulationLayer() simulation_layer.schedule_calculation( calculation_backend, storage_backend, layer_directory, batch, dummy_callback, True, )
def test_duplicate_simulation_data_storage(reverse_order): substance = Substance.from_components("CO") with tempfile.TemporaryDirectory() as base_directory_path: storage_directory = os.path.join(base_directory_path, "storage") local_storage = LocalFileStorage(storage_directory) # Construct some data to store with increasing # statistical inefficiencies. data_to_store = [] for index in range(3): data_directory = os.path.join(base_directory_path, f"data_{index}") coordinate_name = f"data_{index}.pdb" data_object = create_dummy_simulation_data( directory_path=data_directory, substance=substance, force_field_id="ff_id_1", coordinate_file_name=coordinate_name, statistical_inefficiency=float(index), calculation_id="id", ) data_to_store.append((data_object, data_directory)) # Keep a track of the storage keys. all_storage_keys = set() iterator = enumerate(data_to_store) if reverse_order: iterator = reversed(list(iterator)) # Store the data for index, data in iterator: data_object, data_directory = data storage_key = local_storage.store_object(data_object, data_directory) all_storage_keys.add(storage_key) retrieved_object, stored_directory = local_storage.retrieve_object( storage_key) # Handle the case where we haven't reversed the order of # the data to store. Here only the first object in the list # should be stored an never replaced as it has the lowest # statistical inefficiency. if not reverse_order: expected_index = 0 # Handle the case where we have reversed the order of # the data to store. Here only the each new piece of # data should replace the last, as it will have a lower # statistical inefficiency. else: expected_index = index assert retrieved_object.json( ) == data_to_store[expected_index][0].json() # Make sure the directory has been correctly overwritten / retained # depending on the data order. coordinate_path = os.path.join(stored_directory, f"data_{expected_index}.pdb") assert os.path.isfile(coordinate_path) # Make sure all pieces of data got assigned the same key if # reverse order. assert len(all_storage_keys) == 1
def main(): setup_timestamp_logging() # Retrieve the current version. version = evaluator.__version__.replace(".", "-").replace("v", "") if "+" in version: version = "latest" # Create a new directory to run the current versions results in. os.makedirs(os.path.join(version, "results")) with temporarily_change_directory(version): with DaskLSFBackend( minimum_number_of_workers=1, maximum_number_of_workers=12, resources_per_worker=QueueWorkerResources( number_of_gpus=1, preferred_gpu_toolkit=QueueWorkerResources.GPUToolkit.CUDA, per_thread_memory_limit=5 * unit.gigabyte, wallclock_time_limit="05:59", ), setup_script_commands=[ f"conda activate openff-evaluator-{version}", "module load cuda/10.0", ], queue_name="gpuqueue", ) as calculation_backend: with EvaluatorServer( calculation_backend, working_directory="outputs", storage_backend=LocalFileStorage("cached-data"), ): client = EvaluatorClient() for allowed_layer in ["SimulationLayer", "ReweightingLayer"]: data_set = define_data_set( allowed_layer == "ReweightingLayer") options = RequestOptions() options.calculation_layers = [allowed_layer] options.calculation_schemas = { property_type: {} for property_type in data_set.property_types } if allowed_layer == "SimulationLayer": options.add_schema( "SimulationLayer", "SolvationFreeEnergy", solvation_free_energy_schema(), ) request, _ = client.request_estimate( data_set, ForceField("openff-1.2.0.offxml"), options, parameter_gradient_keys=[ ParameterGradientKey("vdW", smirks, attribute) for smirks in [ "[#1:1]-[#6X4]", "[#1:1]-[#6X4]-[#7,#8,#9,#16,#17,#35]", "[#1:1]-[#8]", "[#6X4:1]", "[#8X2H1+0:1]", "[#1]-[#8X2H2+0:1]-[#1]", ] for attribute in ["epsilon", "rmin_half"] ], ) results, _ = request.results(synchronous=True, polling_interval=60) results.json( os.path.join("results", f"{allowed_layer}.json"))