Пример #1
0
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     filenames = self._run_main()
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
Пример #2
0
 def test_bad_json(self):
     with tempfile.NamedTemporaryFile(mode='w+') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(json_format.MessageToJson(message))
         f.flush()
         with self.assertRaisesRegex(ValueError, 'no field named "values"'):
             message_helpers.load_message(f.name, test_pb2.Nested, 'json')
Пример #3
0
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     reaction1 = dataset.reactions[0]
     reaction1.inputs['methylamine'].components[0].amount.moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEqual(changed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
Пример #4
0
 def test_bad_pbtxt(self):
     with tempfile.NamedTemporaryFile(mode='w+', suffix='.pbtxt') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(text_format.MessageToString(message))
         f.flush()
         with self.assertRaisesRegex(ValueError, 'no field named "values"'):
             message_helpers.load_message(f.name, test_pb2.Nested)
Пример #5
0
def test_download_dataset(client, filename, expected, tmp_path):
    response = client.get(f"/dataset/{filename}/download",
                          follow_redirects=True)
    assert response.status_code == expected
    if response.status_code == 200:
        # Make sure it parses.
        filename = (tmp_path / "dataset.pb").as_posix()
        with open(filename, "wb") as f:
            f.write(response.data)
        message_helpers.load_message(filename, dataset_pb2.Dataset)
Пример #6
0
 def test_download_dataset_with_kind(self, file_name, kind, expected):
     response = self.client.get(f'/dataset/{file_name}/download/{kind}',
                                follow_redirects=True)
     self.assertEqual(response.status_code, expected)
     if response.status_code == 200:
         # Make sure it parses.
         filename = os.path.join(self.test_directory, f'dataset.{kind}')
         with open(filename, 'wb') as f:
             f.write(response.data)
         message_helpers.load_message(filename, dataset_pb2.Dataset)
Пример #7
0
 def test_bad_binary(self):
     with tempfile.NamedTemporaryFile(suffix='.pb') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(message.SerializeToString())
         f.flush()
         # NOTE(kearnes): The decoder is not perfect; for example, it will
         # not be able to distinguish from a message with the same tags and
         # types (e.g. test_pb2.Scalar and test_pb2.RepeatedScalar).
         with self.assertRaisesRegex(ValueError, 'Error parsing message'):
             message_helpers.load_message(f.name, test_pb2.Nested)
Пример #8
0
 def test_add_sharded_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-02'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test1'
     dataset1 = dataset_pb2.Dataset(reactions=[reaction])
     dataset1_filename = os.path.join(self.test_subdirectory, 'test1.pbtxt')
     message_helpers.write_message(dataset1, dataset1_filename)
     reaction.provenance.record_created.time.value = '2020-01-03'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test2'
     dataset2 = dataset_pb2.Dataset(reactions=[reaction])
     dataset2_filename = os.path.join(self.test_subdirectory, 'test2.pbtxt')
     message_helpers.write_message(dataset2, dataset2_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test1', 'test2'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 2)
     self.assertFalse(os.path.exists(dataset1_filename))
     self.assertFalse(os.path.exists(dataset2_filename))
Пример #9
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d datasets', len(filenames))
    dois = collections.defaultdict(list)
    for filename in filenames:
        logging.info('Checking %s', filename)
        dataset = message_helpers.load_message(filename, dataset_pb2.Dataset)
        dataset_id = os.path.splitext(os.path.basename(filename))[0]
        assert dataset.dataset_id == dataset_id
        doi_set = set()
        for reaction in dataset.reactions:
            # Some poorly-validated DOI entries start with 'doi:'...
            match = re.fullmatch(r'(?:(?:doi)|(?:DOI))?:?\s*(.*)',
                                 reaction.provenance.doi)
            doi_set.add(match.group(1))
        for doi in doi_set:
            dois[doi].append(dataset_id)
    for doi in sorted(dois):
        print(f'* [{doi}](https://doi.org/{doi})')
        for dataset in sorted(dois[doi]):
            url = urllib.parse.urljoin(
                _PREFIX,
                message_helpers.id_filename(dataset) + '.pbtxt')
            print(f'  * [{dataset}]({url})')
Пример #10
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = sorted(_get_filenames())
    if not filenames:
        logging.info('nothing to do')
        return  # Nothing to do.
    datasets = {}
    for filename in filenames:
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset, FLAGS.input_format)
    if FLAGS.validate:
        validate(datasets)
    if not FLAGS.update:
        logging.info('nothing else to do; use --update for more')
        return  # Nothing else to do.
    for dataset in datasets.values():
        for reaction in dataset.reactions:
            update_reaction(reaction)
    combined = _combine_datasets(datasets)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        output_filename = _get_output_filename(combined.dataset_id)
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(filenames, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename,
                                  FLAGS.input_format)
Пример #11
0
 def test_add_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     # Check for assignment of dataset and reaction IDs.
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertNotEmpty(dataset.dataset_id)
     self.assertLen(dataset.reactions, 1)
     self.assertNotEmpty(dataset.reactions[0].reaction_id)
     # Check for binary output.
     root, ext = os.path.splitext(filenames[0])
     self.assertEqual(ext, '.pbtxt')
     self.assertTrue(os.path.exists(root + '.pb'))
Пример #12
0
 def test_add_dataset_with_large_data(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     image = reaction.observations.add().image
     image.bytes_value = b'test data value'
     image.format = 'png'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main(min_size=0.0)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     relative_path = (
         'data/36/ord_data-'
         '36443a1839bf1160087422b7468a93c7b97dac7eea423bfac189208a15823139'
         '.png')
     expected = ('https://github.com/Open-Reaction-Database/'
                 'ord-submissions-test/tree/' + relative_path)
     self.assertEqual(dataset.reactions[0].observations[0].image.url,
                      expected)
     with open(os.path.join(self.test_subdirectory, relative_path),
               'rb') as f:
         self.assertEqual(b'test data value', f.read())
Пример #13
0
 def test_resolver(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='NAME', value='ethylamine')
     component.is_limiting = True
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 1)
     identifiers = (dataset.reactions[0].inputs['ethylamine'].components[0].
                    identifiers)
     self.assertLen(identifiers, 3)
     self.assertEqual(
         identifiers[1],
         reaction_pb2.CompoundIdentifier(
             type='SMILES', value='CCN',
             details='NAME resolved by PubChem'))
     self.assertEqual(identifiers[2].type,
                      reaction_pb2.CompoundIdentifier.RDKIT_BINARY)
Пример #14
0
 def test_add_dataset_with_existing_reaction_ids(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'
     reaction.reaction_id = reaction_id
     reaction.provenance.record_created.time.value = '2020-01-01 11 am'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     # Check that existing record IDs for added datasets are not overridden.
     self.assertEqual(dataset.reactions[0].reaction_id, reaction_id)
     self.assertLen(dataset.reactions[0].provenance.record_modified, 0)
Пример #15
0
 def test_round_trip(self, message_format):
     for message in self.messages:
         with tempfile.NamedTemporaryFile(suffix=message_format.value) as f:
             message_helpers.write_message(message, f.name)
             f.flush()
             self.assertEqual(
                 message,
                 message_helpers.load_message(f.name, type(message)))
Пример #16
0
 def test_modify_dataset_with_validation_errors(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = (-2)
     message_helpers.write_message(dataset, self.dataset_filename)
     with self.assertRaisesRegex(ValueError, 'must be non-negative'):
         self._run_main()
Пример #17
0
 def test_main_with_updates(self):
     output = os.path.join(self.test_subdirectory, 'output.pbtxt')
     with flagsaver.flagsaver(input_pattern=self.dataset1_filename,
                              update=True,
                              output=output):
         process_dataset.main(())
     self.assertTrue(os.path.exists(output))
     dataset = message_helpers.load_message(output, dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 1)
     self.assertStartsWith(dataset.reactions[0].reaction_id, 'ord-')
Пример #18
0
def main(argv):
    del argv  # Only used by app.run().
    dataset = message_helpers.load_message(FLAGS.pb, dataset_pb2.Dataset)
    pb_data = text_format.MessageToString(dataset)
    with open(FLAGS.pbtxt) as f:
        pbtxt_data = f.read()
    if pb_data != pbtxt_data:
        diff = difflib.context_diff(pb_data.splitlines(),
                                    pbtxt_data.splitlines())
        raise ValueError(f'Datasets differ:\n{pprint.pformat(list(diff))}')
Пример #19
0
def migrate_one(user_id, name, conn):
    """Slurp one named dataset from the db/ directory into Postgres."""
    dataset = message_helpers.load_message(f'db/{user_id}/{name}',
                                           dataset_pb2.Dataset)
    serialized = dataset.SerializeToString().hex()
    query = psycopg2.sql.SQL(
        'INSERT INTO datasets VALUES (%s, %s, %s) '
        'ON CONFLICT (user_id, name) DO UPDATE SET serialized=%s')
    with conn.cursor() as cursor:
        cursor.execute(query, [user_id, name[:-6], serialized, serialized])
Пример #20
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d datasets', len(filenames))
    datasets = {}
    for filename in filenames:
        logging.info('Validating %s', filename)
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset)
    validations.validate_datasets(datasets)
Пример #21
0
 def test_modify_reaction_id(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].reaction_id = 'test_rename'
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test_rename'})
     self.assertEqual(removed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertEmpty(changed)
     self.assertCountEqual([self.dataset_filename], filenames)
Пример #22
0
def migrate_one(user_id, name, conn):
    """Slurp one named dataset from the db/ directory into Postgres."""
    dataset = message_helpers.load_message(
        os.path.join(os.path.dirname(__file__), "..", "db", user_id, name),
        dataset_pb2.Dataset)
    serialized = dataset.SerializeToString().hex()
    query = psycopg2.sql.SQL(
        "INSERT INTO datasets VALUES (%s, %s, %s) "
        "ON CONFLICT (user_id, name) DO UPDATE SET serialized=%s")
    with conn.cursor() as cursor:
        cursor.execute(query, [user_id, name[:-6], serialized, serialized])
Пример #23
0
 def test_main(self):
     output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt')
     with flagsaver.flagsaver(template=self.template,
                              spreadsheet=self.spreadsheet,
                              output=output_filename):
         enumerate_dataset.main(())
     self.assertTrue(os.path.exists(output_filename))
     dataset = message_helpers.load_message(output_filename,
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 3)
     validations.validate_message(dataset, raise_on_error=True)
     self.assertEqual(dataset, self.expected)
Пример #24
0
def main(argv):
    del argv  # Only used by app.run().
    reaction = message_helpers.load_message(FLAGS.input, reaction_pb2.Reaction)
    if FLAGS.output_type == 'html':
        text = generate_text.generate_html(reaction)
    elif FLAGS.output_type == 'text':
        text = generate_text.generate_text(reaction)
    else:
        raise ValueError(f'unsupported output_type: {FLAGS.output_type}')
    if FLAGS.output:
        with open(FLAGS.output, 'w') as f:
            f.write(text)
    else:
        print(text)
Пример #25
0
def main(argv):
    del argv  # Only used by app.run()
    reaction = message_helpers.load_message(FLAGS.input_file,
                                            reaction_pb2.Reaction)

    if FLAGS.type == 'html':
        text = generate_html(reaction)
    elif FLAGS.type == 'text':
        text = generate_text(reaction)

    if FLAGS.output:
        with open(FLAGS.output, 'w') as fid:
            fid.write(text)
    else:
        print(text)
Пример #26
0
 def test_simple(self):
     input_pattern = os.path.join(self.test_subdirectory,
                                  'reaction-1.pbtxt')
     output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt')
     with flagsaver.flagsaver(input=input_pattern,
                              name='test dataset',
                              description='this is a test dataset',
                              output=output_filename):
         build_dataset.main(())
     self.assertTrue(os.path.exists(output_filename))
     dataset = message_helpers.load_message(output_filename,
                                            dataset_pb2.Dataset)
     self.assertEqual(dataset.name, 'test dataset')
     self.assertEqual(dataset.description, 'this is a test dataset')
     self.assertLen(dataset.reactions, 1)
Пример #27
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input)
    logging.info('Found %d datasets', len(filenames))
    records = []
    for filename in filenames:
        dataset = message_helpers.load_message(filename, dataset_pb2.Dataset)
        for reaction in dataset.reactions:
            record_dict = get_database_json(reaction)
            record_dict['_dataset_id'] = dataset.dataset_id
            record_dict['_serialized'] = encode_bytes(
                reaction.SerializeToString())
            records.append(json.dumps(record_dict))
    with open(FLAGS.output, 'w') as f:
        for record in records:
            f.write(f'{record}\n')
Пример #28
0
def run():
    """Main function that returns added/removed reaction ID sets.

    This function should be called directly by tests to get access to the
    return values. If main() returns something other than None it will break
    shell error code logic downstream.

    Returns:
        added: Set of added reaction IDs.
        removed: Set of deleted reaction IDs.
        changed: Set of changed reaction IDs.
    """
    inputs = sorted(_get_inputs())
    if not inputs:
        logging.info('nothing to do')
        return set(), set(), set()  # Nothing to do.
    datasets = {}
    for file_status in inputs:
        if file_status.status == 'D':
            continue  # Nothing to do for deleted files.
        datasets[file_status.filename] = message_helpers.load_message(
            file_status.filename, dataset_pb2.Dataset)
    if FLAGS.validate:
        # Note: this does not check if IDs are malformed.
        validations.validate_datasets(datasets, FLAGS.write_errors)
    if FLAGS.base:
        added, removed, changed = get_change_stats(datasets,
                                                   inputs,
                                                   base=FLAGS.base)
        logging.info('Summary: +%d -%d Δ%d reaction IDs', len(added),
                     len(removed), len(changed))
        if (added or removed or changed) and FLAGS.issue and FLAGS.token:
            client = github.Github(FLAGS.token)
            repo = client.get_repo(os.environ['GITHUB_REPOSITORY'])
            issue = repo.get_issue(FLAGS.issue)
            issue.create_comment(
                f'Summary: +{len(added)} -{len(removed)} Δ{len(changed)} '
                'reaction IDs')
    else:
        added, removed, changed = None, None, None
    if FLAGS.update:
        _run_updates(inputs, datasets)
    else:
        logging.info('nothing else to do; use --update for more')
    return added, removed, changed
Пример #29
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d Reaction protos', len(filenames))
    reactions = []
    for filename in filenames:
        reactions.append(
            message_helpers.load_message(filename, reaction_pb2.Reaction))
    if not FLAGS.name:
        logging.warning('Consider setting the dataset name with --name')
    if not FLAGS.description:
        logging.warning(
            'Consider setting the dataset description with --description')
    dataset = dataset_pb2.Dataset(name=FLAGS.name,
                                  description=FLAGS.description,
                                  reactions=reactions)
    if FLAGS.validate:
        validations.validate_datasets({'_COMBINED': dataset})
    message_helpers.write_message(dataset, FLAGS.output)
Пример #30
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input)
    logging.info('Found %d datasets', len(filenames))
    with Tables() as tables:
        for filename in filenames:
            logging.info(filename)
            dataset = message_helpers.load_message(filename,
                                                   dataset_pb2.Dataset)
            for reaction in dataset.reactions:
                process_reaction(reaction, tables)
    if FLAGS.database:
        logging.info('Creating Postgres database')
        create_database()
    if FLAGS.cleanup:
        logging.info('Removing intermediate CSV files')
        for filename in glob.glob(os.path.join(FLAGS.output, '*.csv')):
            logging.info(filename)
            os.remove(filename)