Пример #1
0
    def test_valid_templating(self):
        template_string = self.template_string.replace('value: "CCO"',
                                                       'value: "$my_smiles$"')
        template_string = template_string.replace('value: 75',
                                                  'value: $conversion$')
        df = pd.DataFrame.from_dict({
            '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'],
            '$conversion$': [75, 50, 30],
        })
        dataset = templating.generate_dataset(template_string, df)
        expected_reactions = []
        for smiles, conversion in zip(['CCO', 'CCCO', 'CCCCO'], [75, 50, 30]):
            reaction = reaction_pb2.Reaction()
            reaction.CopyFrom(self.valid_reaction)
            reaction.inputs['in'].components[0].identifiers[0].value = smiles
            reaction.outcomes[0].conversion.value = conversion
            expected_reactions.append(reaction)
        expected_dataset = dataset_pb2.Dataset(reactions=expected_reactions)
        self.assertEqual(dataset, expected_dataset)

        # Test without "$" in column names
        df = pd.DataFrame.from_dict({
            'my_smiles': ['CCO', 'CCCO', 'CCCCO'],
            'conversion': [75, 50, 30],
        })
        dataset = templating.generate_dataset(template_string, df)
        self.assertEqual(dataset, expected_dataset)
Пример #2
0
 def test_bad_placeholders(self):
     template_string = self.template_string.replace('value: "CCO"',
                                                    'value: "$my_smiles$"')
     template_string = template_string.replace('value: 75',
                                               'value: $conversion$')
     df = pd.DataFrame.from_dict({
         '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'],
     })
     with self.assertRaisesRegex(ValueError, r'\$conversion\$ not found'):
         templating.generate_dataset(template_string, df)
Пример #3
0
def enumerate_dataset():
    """Creates a new dataset based on a template reaction and a spreadsheet.

    Three pieces of information are expected to be POSTed in a json object:
        spreadsheet_name: the original filename of the uploaded spreadsheet.
        spreadsheet_data: a base64-encoded string containing the contents of the
            spreadsheet.
        template_string: a string containing a text-formatted Reaction proto,
            i.e., the contents of a pbtxt file.
    A new dataset is created from the template and spreadsheet using
    ord_schema.templating.generate_dataset.
    """
    try:
        data = flask.request.get_json(force=True)
        basename, suffix = os.path.splitext(data['spreadsheet_name'])
        if data['spreadsheet_data'].startswith('data:'):
            # Remove the data URL prefix; see
            # https://developer.mozilla.org/en-US/docs/Web/API/FileReader/readAsDataURL.
            match = re.fullmatch('data:.*?;base64,(.*)',
                                 data['spreadsheet_data'])
            spreadsheet_data = match.group(1)
        else:
            spreadsheet_data = data['spreadsheet_data']
        spreadsheet_data = io.BytesIO(base64.b64decode(spreadsheet_data))
        dataframe = templating.read_spreadsheet(spreadsheet_data,
                                                suffix=suffix)
        dataset = templating.generate_dataset(data['template_string'],
                                              dataframe,
                                              validate=False)
        put_dataset(f'{basename}_dataset', dataset)
        return 'ok'
    except Exception as error:  # pylint: disable=broad-except
        flask.abort(flask.make_response(str(error), 406))
Пример #4
0
 def test_invalid_templating(self):
     template_string = self.template_string.replace('value: "CCO"',
                                                    'value: "$my_smiles$"')
     template_string = template_string.replace('precision: 99',
                                               'precision: $precision$')
     df = pd.DataFrame.from_dict({
         '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'],
         '$precision$': [75, 50, -5],
     })
     expected_reactions = []
     for smiles, precision in zip(['CCO', 'CCCO', 'CCCCO'], [75, 50, -5]):
         reaction = reaction_pb2.Reaction()
         reaction.CopyFrom(self.valid_reaction)
         reaction.inputs['in'].components[0].identifiers[0].value = smiles
         reaction.outcomes[0].conversion.precision = precision
         expected_reactions.append(reaction)
     expected_dataset = dataset_pb2.Dataset(reactions=expected_reactions)
     with self.assertRaisesRegex(ValueError,
                                 'Enumerated Reaction is not valid'):
         templating.generate_dataset(template_string, df)
     dataset = templating.generate_dataset(template_string,
                                           df,
                                           validate=False)
     self.assertEqual(dataset, expected_dataset)
Пример #5
0
def main(argv):
    del argv  # Only used by app.run().
    with open(FLAGS.template) as f:
        template_string = f.read()
    df = templating.read_spreadsheet(FLAGS.spreadsheet)
    logging.info('generating new Dataset from %s and %s', FLAGS.template,
                 FLAGS.spreadsheet)
    dataset = templating.generate_dataset(template_string,
                                          df,
                                          validate=FLAGS.validate)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        basename, _ = os.path.splitext(FLAGS.spreadsheet)
        output_filename = os.path.join(f'{basename}_dataset.pbtxt')
    logging.info('writing new Dataset to %s', output_filename)
    message_helpers.write_message(dataset, output_filename)
Пример #6
0
 def test_missing_values(self):
     # pylint: disable=too-many-locals
     # Build a template reaction.
     reaction = reaction_pb2.Reaction()
     input1 = reaction.inputs['one']
     input1_component1 = input1.components.add()
     input1_component1.identifiers.add(value='CCO', type='SMILES')
     input1_component1.amount.mass.value = 1.2
     input1_component1.amount.mass.units = reaction_pb2.Mass.GRAM
     input1_component2 = input1.components.add()
     input1_component2.is_limiting = True
     input1_component2.identifiers.add(value='c1ccccc1', type='SMILES')
     input1_component2.amount.volume.value = 3.4
     input1_component2.amount.volume.units = reaction_pb2.Volume.LITER
     input2 = reaction.inputs['two']
     input2_component1 = input2.components.add()
     input2_component1.identifiers.add(value='COO', type='SMILES')
     input2_component1.amount.mass.value = 5.6
     input2_component1.amount.mass.units = reaction_pb2.Mass.GRAM
     outcome = reaction.outcomes.add()
     outcome.conversion.value = 75
     template_string = text_format.MessageToString(reaction)
     template_string = template_string.replace('value: "CCO"',
                                               'value: "$smiles$"')
     template_string = template_string.replace('value: 5.6', 'value: $mass$')
     # Build a spreadsheet and test for proper edits.
     filename = os.path.join(self.test_subdirectory, 'missing.csv')
     with open(filename, 'w') as f:
         f.write('smiles,mass\n')
         f.write('CN,\n')  # Missing mass.
         f.write(',1.5\n')  # Missing SMILES.
     df = pd.read_csv(filename)
     dataset = templating.generate_dataset(template_string, df)
     expected_dataset = dataset_pb2.Dataset()
     reaction1 = expected_dataset.reactions.add()
     reaction1.CopyFrom(reaction)
     reaction1.inputs['one'].components[0].identifiers[0].value = 'CN'
     del reaction1.inputs['two']  # No components after amount removal.
     reaction2 = expected_dataset.reactions.add()
     reaction2.CopyFrom(reaction)
     del reaction2.inputs['one'].components[0]  # No indentifiers.
     reaction2.inputs['two'].components[0].amount.mass.value = 1.5
     self.assertEqual(dataset, expected_dataset)