def test_add_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     # Check for assignment of dataset and reaction IDs.
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertNotEmpty(dataset.dataset_id)
     self.assertLen(dataset.reactions, 1)
     self.assertNotEmpty(dataset.reactions[0].reaction_id)
     # Check for binary output.
     root, ext = os.path.splitext(filenames[0])
     self.assertEqual(ext, '.pbtxt')
     self.assertTrue(os.path.exists(root + '.pb'))
示例#2
0
 def test_add_dataset_with_large_data(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     image = reaction.observations.add().image
     image.bytes_value = b'test data value'
     image.format = 'png'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main(min_size=0.0)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     relative_path = (
         'data/36/ord_data-'
         '36443a1839bf1160087422b7468a93c7b97dac7eea423bfac189208a15823139'
         '.png')
     expected = ('https://github.com/Open-Reaction-Database/'
                 'ord-submissions-test/tree/' + relative_path)
     self.assertEqual(dataset.reactions[0].observations[0].image.url,
                      expected)
     with open(os.path.join(self.test_subdirectory, relative_path),
               'rb') as f:
         self.assertEqual(b'test data value', f.read())
 def test_add_sharded_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-02'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test1'
     dataset1 = dataset_pb2.Dataset(reactions=[reaction])
     dataset1_filename = os.path.join(self.test_subdirectory, 'test1.pbtxt')
     message_helpers.write_message(dataset1, dataset1_filename)
     reaction.provenance.record_created.time.value = '2020-01-03'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test2'
     dataset2 = dataset_pb2.Dataset(reactions=[reaction])
     dataset2_filename = os.path.join(self.test_subdirectory, 'test2.pbtxt')
     message_helpers.write_message(dataset2, dataset2_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test1', 'test2'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 2)
     self.assertFalse(os.path.exists(dataset1_filename))
     self.assertFalse(os.path.exists(dataset2_filename))
示例#4
0
 def test_resolver(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='NAME', value='ethylamine')
     component.is_limiting = True
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 1)
     identifiers = (dataset.reactions[0].inputs['ethylamine'].components[0].
                    identifiers)
     self.assertLen(identifiers, 3)
     self.assertEqual(
         identifiers[1],
         reaction_pb2.CompoundIdentifier(
             type='SMILES', value='CCN',
             details='NAME resolved by PubChem'))
     self.assertEqual(identifiers[2].type,
                      reaction_pb2.CompoundIdentifier.RDKIT_BINARY)
示例#5
0
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     filenames = self._run_main()
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     reaction1 = dataset.reactions[0]
     reaction1.inputs['methylamine'].components[0].amount.moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEqual(changed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
示例#7
0
 def test_add_dataset_with_existing_reaction_ids(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'
     reaction.reaction_id = reaction_id
     reaction.provenance.record_created.time.value = '2020-01-01 11 am'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     # Check that existing record IDs for added datasets are not overridden.
     self.assertEqual(dataset.reactions[0].reaction_id, reaction_id)
     self.assertLen(dataset.reactions[0].provenance.record_modified, 0)
示例#8
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     os.chdir(self.test_subdirectory)
     subprocess.run(['git', 'init'], check=True)
     subprocess.run(
         ['git', 'config', '--local', 'user.email', 'test@ord-schema'],
         check=True)
     subprocess.run(
         ['git', 'config', '--local', 'user.name', 'Test Runner'],
         check=True)
     # Add some initial data.
     reaction = reaction_pb2.Reaction()
     methylamine = reaction.inputs['methylamine']
     component = methylamine.components.add()
     component.identifiers.add(type='SMILES', value='CN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 1
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 75
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'
     dataset_id = 'ord_dataset-64b14868c5cd46dd8e75560fd3589a6b'
     dataset = dataset_pb2.Dataset(reactions=[reaction],
                                   dataset_id=dataset_id)
     # Make sure the initial dataset is valid.
     validations.validate_message(dataset)
     os.makedirs(os.path.join('data', '64'))
     self.dataset_filename = os.path.join(self.test_subdirectory, 'data',
                                          '64', f'{dataset_id}.pbtxt')
     message_helpers.write_message(dataset, self.dataset_filename)
     subprocess.run(['git', 'add', 'data'], check=True)
     subprocess.run(['git', 'commit', '-m', 'Initial commit'], check=True)
示例#9
0
 def setUp(self):
     super().setUp()
     # Suppress RDKit warnings to clean up the test output.
     RDLogger.logger().setLevel(RDLogger.CRITICAL)
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     reaction1 = reaction_pb2.Reaction()
     dummy_input = reaction1.inputs['dummy_input']
     dummy_component = dummy_input.components.add()
     dummy_component.identifiers.add(type='CUSTOM')
     dummy_component.identifiers[0].details = 'custom_identifier'
     dummy_component.identifiers[0].value = 'custom_value'
     dummy_component.is_limiting = reaction_pb2.Boolean.TRUE
     dummy_component.mass.value = 1
     dummy_component.mass.units = reaction_pb2.Mass.GRAM
     reaction1.outcomes.add().conversion.value = 75
     dataset1 = dataset_pb2.Dataset(reactions=[reaction1])
     self.dataset1_filename = os.path.join(self.test_subdirectory,
                                           'dataset1.pbtxt')
     message_helpers.write_message(dataset1, self.dataset1_filename)
     # reaction2 is empty.
     reaction2 = reaction_pb2.Reaction()
     dataset2 = dataset_pb2.Dataset(reactions=[reaction1, reaction2])
     self.dataset2_filename = os.path.join(self.test_subdirectory,
                                           'dataset2.pbtxt')
     message_helpers.write_message(dataset2, self.dataset2_filename)
def dataset_filename(tmp_path) -> str:
    # Create a test database.
    connection = connect(ord_interface.client.POSTGRES_DB)
    connection.set_session(autocommit=True)
    with connection.cursor() as cursor:
        cursor.execute("CREATE DATABASE test;")
    connection.close()
    # Create a test dataset.
    reaction = reaction_pb2.Reaction()
    reaction.reaction_id = "test"
    reaction.identifiers.add(value="reaction", type="REACTION_SMILES")
    input1 = reaction.inputs["input1"]
    input1.components.add().identifiers.add(value="input1", type="SMILES")
    input2 = reaction.inputs["input2"]
    input2.components.add().identifiers.add(value="input2a", type="SMILES")
    input2.components.add().identifiers.add(value="input2b", type="SMILES")
    outcome = reaction.outcomes.add()
    product = outcome.products.add()
    product.measurements.add(type="YIELD", percentage={"value": 2.5})
    product.identifiers.add(value="product", type="SMILES")
    reaction.provenance.doi = "10.0000/test.foo"
    dataset = dataset_pb2.Dataset(dataset_id="test_dataset", reactions=[reaction])
    dataset_filename = (tmp_path / "test.pb").as_posix()
    message_helpers.write_message(dataset, dataset_filename)
    yield dataset_filename
    # Remove the test database.
    connection = connect(ord_interface.client.POSTGRES_DB)
    connection.set_session(autocommit=True)
    with connection.cursor() as cursor:
        cursor.execute("DROP DATABASE test;")
    connection.close()
示例#11
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = sorted(_get_filenames())
    if not filenames:
        logging.info('nothing to do')
        return  # Nothing to do.
    datasets = {}
    for filename in filenames:
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset, FLAGS.input_format)
    if FLAGS.validate:
        validate(datasets)
    if not FLAGS.update:
        logging.info('nothing else to do; use --update for more')
        return  # Nothing else to do.
    for dataset in datasets.values():
        for reaction in dataset.reactions:
            update_reaction(reaction)
    combined = _combine_datasets(datasets)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        output_filename = _get_output_filename(combined.dataset_id)
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(filenames, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename,
                                  FLAGS.input_format)
示例#12
0
 def test_modify_dataset_with_validation_errors(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = (-2)
     message_helpers.write_message(dataset, self.dataset_filename)
     with self.assertRaisesRegex(ValueError, 'must be non-negative'):
         self._run_main()
示例#13
0
 def test_main_pass(self):
     dataset = dataset_pb2.Dataset()
     reaction = dataset.reactions.add()
     component = reaction.inputs['test'].components.add()
     component.identifiers.add(value='c1ccccc1', type='SMILES')
     message_helpers.write_message(dataset, self.pb_filename)
     message_helpers.write_message(dataset, self.pbtxt_filename)
     self._run()
示例#14
0
 def test_round_trip(self, message_format):
     for message in self.messages:
         with tempfile.NamedTemporaryFile(suffix=message_format.value) as f:
             message_helpers.write_message(message, f.name)
             f.flush()
             self.assertEqual(
                 message,
                 message_helpers.load_message(f.name, type(message)))
示例#15
0
 def test_modify_reaction_id(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].reaction_id = 'test_rename'
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test_rename'})
     self.assertEqual(removed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertEmpty(changed)
     self.assertCountEqual([self.dataset_filename], filenames)
示例#16
0
 def test_main_fail(self):
     dataset = dataset_pb2.Dataset()
     reaction = dataset.reactions.add()
     component = reaction.inputs['test'].components.add()
     component.identifiers.add(value='c1ccccc1', type='SMILES')
     message_helpers.write_message(dataset, self.pb_filename)
     component.identifiers.add(value='benzene', type='NAME')
     message_helpers.write_message(dataset, self.pbtxt_filename)
     with self.assertRaisesRegex(ValueError, 'Datasets differ'):
         self._run()
示例#17
0
 def test_multiple_dois(self):
     dataset = dataset_pb2.Dataset()
     dataset.dataset_id = 'ord_dataset-1'
     dataset.reactions.add().provenance.doi = 'foo/bar'
     dataset.reactions.add().provenance.doi = 'not/bar'
     tempdir = self.create_tempdir()
     message_helpers.write_message(
         dataset, os.path.join(tempdir, f'{dataset.dataset_id}.pb'))
     with flagsaver.flagsaver(input=os.path.join(tempdir, '*.pb')):
         list_dois.main(())
示例#18
0
 def test_bad_dataset_id(self):
     dataset = dataset_pb2.Dataset(reactions=[reaction_pb2.Reaction()],
                                   dataset_id='not-a-real-dataset-id')
     filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, filename)
     with flagsaver.flagsaver(root=self.test_subdirectory,
                              input_pattern=filename,
                              validate=False,
                              update=True):
         with self.assertRaisesRegex(ValueError, 'malformed dataset ID'):
             process_dataset.main(())
示例#19
0
 def test_add_dataset_with_validation_errors(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='C#O')
     component.is_limiting = True
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     with self.assertRaisesRegex(ValueError, 'could not validate SMILES'):
         self._run_main()
示例#20
0
def _run_updates(inputs, datasets):
    """Updates the submission files.

    Args:
        inputs: List of FileStatus objects.
        datasets: Dict mapping filenames to Dataset messages.
    """
    for dataset in datasets.values():
        # Set reaction_ids, resolve names, fix cross-references, etc.
        updates.update_dataset(dataset)
        # Offload large Data values.
        data_filenames = data_storage.extract_data(dataset,
                                                   FLAGS.root,
                                                   min_size=FLAGS.min_size,
                                                   max_size=FLAGS.max_size)
        if data_filenames:
            args = ['git', 'add'] + list(data_filenames)
            logging.info('Running command: %s', ' '.join(args))
            subprocess.run(args, check=True)
    combined = _combine_datasets(datasets)
    # Final validation to make sure we didn't break anything.
    options = validations.ValidationOptions(validate_ids=True,
                                            require_provenance=True)
    validations.validate_datasets({'_COMBINED': combined},
                                  FLAGS.write_errors,
                                  options=options)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        _, suffix = os.path.splitext(inputs[0].filename)
        output_filename = os.path.join(
            FLAGS.root,
            message_helpers.id_filename(f'{combined.dataset_id}{suffix}'))
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(inputs, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename)
    # Write a binary version for fast read/write.
    root, ext = os.path.splitext(output_filename)
    if FLAGS.write_binary and ext != '.pb':
        binary_filename = root + '.pb'
        logging.info('writing combined Dataset (binary) to %s',
                     binary_filename)
        message_helpers.write_message(combined, binary_filename)
        args = ['git', 'add', binary_filename]
        logging.info('Running command: %s', ' '.join(args))
        subprocess.run(args, check=True)
示例#21
0
def main(argv):
    del argv  # Only used by app.run().
    with open(FLAGS.template) as f:
        template_string = f.read()
    df = templating.read_spreadsheet(FLAGS.spreadsheet)
    logging.info('generating new Dataset from %s and %s', FLAGS.template,
                 FLAGS.spreadsheet)
    dataset = templating.generate_dataset(template_string,
                                          df,
                                          validate=FLAGS.validate)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        basename, _ = os.path.splitext(FLAGS.spreadsheet)
        output_filename = os.path.join(f'{basename}_dataset.pbtxt')
    logging.info('writing new Dataset to %s', output_filename)
    message_helpers.write_message(dataset, output_filename)
示例#22
0
 def test_add_dataset_with_too_large_data(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     image = reaction.observations.add().image
     image.bytes_value = b'test data value'
     image.format = 'png'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     with self.assertRaisesRegex(ValueError, 'larger than max_size'):
         self._run_main(min_size=0.0, max_size=0.0)
示例#23
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     reaction = reaction_pb2.Reaction()
     reaction.reaction_id = 'test'
     reaction.identifiers.add(value='reaction', type='REACTION_SMILES')
     input1 = reaction.inputs['input1']
     input1.components.add().identifiers.add(value='input1', type='SMILES')
     input2 = reaction.inputs['input2']
     input2.components.add().identifiers.add(value='input2a', type='SMILES')
     input2.components.add().identifiers.add(value='input2b', type='SMILES')
     outcome = reaction.outcomes.add()
     product = outcome.products.add()
     product.measurements.add(type='YIELD', percentage=dict(value=2.5))
     product.identifiers.add(value='product', type='SMILES')
     self.dataset = dataset_pb2.Dataset(reactions=[reaction])
     message_helpers.write_message(
         self.dataset, os.path.join(self.test_subdirectory, 'test.pbtxt'))
示例#24
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d Reaction protos', len(filenames))
    reactions = []
    for filename in filenames:
        reactions.append(
            message_helpers.load_message(filename, reaction_pb2.Reaction))
    if not FLAGS.name:
        logging.warning('Consider setting the dataset name with --name')
    if not FLAGS.description:
        logging.warning(
            'Consider setting the dataset description with --description')
    dataset = dataset_pb2.Dataset(name=FLAGS.name,
                                  description=FLAGS.description,
                                  reactions=reactions)
    if FLAGS.validate:
        validations.validate_datasets({'_COMBINED': dataset})
    message_helpers.write_message(dataset, FLAGS.output)
示例#25
0
def main(argv):
    del argv  # Only used by app.run().
    inputs = sorted(_get_inputs())
    if not inputs:
        logging.info('nothing to do')
        return  # Nothing to do.
    datasets = {}
    for file_status in inputs:
        datasets[file_status.filename] = message_helpers.load_message(
            file_status.filename, dataset_pb2.Dataset)
    if FLAGS.validate:
        validations.validate_datasets(datasets, FLAGS.write_errors)
    if not FLAGS.update:
        logging.info('nothing else to do; use --update for more')
        return  # Nothing else to do.
    for dataset in datasets.values():
        for reaction in dataset.reactions:
            updates.update_reaction(reaction)
        # Offload large Data values.
        data_filenames = data_storage.extract_data(dataset,
                                                   FLAGS.root,
                                                   min_size=FLAGS.min_size,
                                                   max_size=FLAGS.max_size)
        if data_filenames:
            args = ['git', 'add'] + data_filenames
            logging.info('Running command: %s', ' '.join(args))
            subprocess.run(args, check=True)
    combined = _combine_datasets(datasets)
    # Final validation to make sure we didn't break anything.
    validations.validate_datasets({'_COMBINED': combined}, FLAGS.write_errors)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        _, suffix = os.path.splitext(inputs[0].filename)
        output_filename = os.path.join(
            FLAGS.root,
            message_helpers.id_filename(f'{combined.dataset_id}{suffix}'))
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(inputs, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename)
示例#26
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     reaction1 = reaction_pb2.Reaction()
     dummy_input = reaction1.inputs['dummy_input']
     dummy_component = dummy_input.components.add()
     dummy_component.identifiers.add(type='CUSTOM')
     dummy_component.identifiers[0].details = 'custom_identifier'
     dummy_component.identifiers[0].value = 'custom_value'
     dummy_component.is_limiting = reaction_pb2.Boolean.TRUE
     dummy_component.mass.value = 1
     dummy_component.mass.units = reaction_pb2.Mass.GRAM
     reaction1.outcomes.add().conversion.value = 75
     message_helpers.write_message(
         reaction1, os.path.join(self.test_subdirectory,
                                 'reaction-1.pbtxt'))
     # reaction2 is empty.
     reaction2 = reaction_pb2.Reaction()
     message_helpers.write_message(
         reaction2, os.path.join(self.test_subdirectory,
                                 'reaction-2.pbtxt'))
示例#27
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     self._resolver = units.UnitResolver()
     reaction = reaction_pb2.Reaction()
     reaction.setup.is_automated = True
     reaction.inputs['dummy_input'].components.add().CopyFrom(
         message_helpers.build_compound(
             name='n-hexane',
             smiles='CCCCCC',
             role='reactant',
             amount='1 milliliters',
         ))
     reaction.inputs['dummy_input'].components.add().CopyFrom(
         message_helpers.build_compound(
             name='THF',
             smiles='C1OCCC1',
             role='solvent',
             amount='40 liters',
         ))
     reaction.conditions.pressure.atmosphere.type = (
         reaction_pb2.PressureConditions.Atmosphere.OXYGEN)
     reaction.conditions.stirring.rate.rpm = 100
     reaction.conditions.temperature.control.type = (
         reaction_pb2.TemperatureConditions.TemperatureControl.OIL_BATH)
     reaction.conditions.temperature.setpoint.CopyFrom(
         reaction_pb2.Temperature(value=100,
                                  units=reaction_pb2.Temperature.CELSIUS))
     outcome = reaction.outcomes.add()
     outcome.reaction_time.CopyFrom(self._resolver.resolve('40 minutes'))
     outcome.products.add().identifiers.extend(
         message_helpers.build_compound(
             name='hexanone',
             smiles='CCCCC(=O)C',
         ).identifiers)
     reaction.reaction_id = 'dummy_reaction_id'
     self._reaction = reaction
     self._input = os.path.join(self.test_subdirectory, 'reaction.pbtxt')
     message_helpers.write_message(self._reaction, self._input)
示例#28
0
 def test_add_dataset_with_large_data(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     image = reaction.observations.add().image
     image.bytes_value = b'test data value'
     image.format = 'png'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     added, removed, changed, filenames = self._run(min_size=0.0)
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     relative_path = (
         'data/36/ord_data-'
         '36443a1839bf1160087422b7468a93c7b97dac7eea423bfac189208a15823139'
         '.png')
     expected = ('https://github.com/Open-Reaction-Database/'
                 'ord-data/tree/' + relative_path)
     self.assertEqual(dataset.reactions[0].observations[0].image.url,
                      expected)
     with open(os.path.join(self.test_subdirectory, relative_path),
               'rb') as f:
         self.assertEqual(b'test data value', f.read())
示例#29
0
 def test_add_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     # Check for assignment of dataset and reaction IDs.
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertNotEmpty(dataset.dataset_id)
     self.assertLen(dataset.reactions, 1)
     self.assertNotEmpty(dataset.reactions[0].reaction_id)
示例#30
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     reaction1 = reaction_pb2.Reaction()
     dummy_input = reaction1.inputs['dummy_input']
     dummy_component = dummy_input.components.add()
     dummy_component.identifiers.add(type='CUSTOM')
     dummy_component.identifiers[0].details = 'custom_identifier'
     dummy_component.identifiers[0].value = 'custom_value'
     dummy_component.is_limiting = True
     dummy_component.amount.mass.value = 1
     dummy_component.amount.mass.units = reaction_pb2.Mass.GRAM
     reaction1.outcomes.add().conversion.value = 75
     dataset1 = dataset_pb2.Dataset(reactions=[reaction1])
     dataset1_filename = os.path.join(self.test_subdirectory,
                                      'dataset1.pbtxt')
     message_helpers.write_message(dataset1, dataset1_filename)
     # reaction2 is empty.
     reaction2 = reaction_pb2.Reaction()
     dataset2 = dataset_pb2.Dataset(reactions=[reaction1, reaction2])
     dataset2_filename = os.path.join(self.test_subdirectory,
                                      'dataset2.pbtxt')
     message_helpers.write_message(dataset2, dataset2_filename)