Exemplo n.º 1
0
 def test_nested(self):
     message = test_pb2.Nested()
     self.assertEmpty(
         message_helpers.find_submessages(message, test_pb2.Nested.Child))
     message.child.value = 5.6
     submessages = message_helpers.find_submessages(message,
                                                    test_pb2.Nested.Child)
     self.assertLen(submessages, 1)
     # Show that the returned submessages work as references.
     submessages[0].value = 7.8
     self.assertAlmostEqual(message.child.value, 7.8, places=4)
Exemplo n.º 2
0
def resolve_names(message):
    """Attempts to resolve compound NAME identifiers to SMILES.

    When a NAME identifier is resolved, a SMILES identifier is added to the list
    of identifiers for that compound. Note that this function moves on to the
    next Compound after the first successful name resolution.

    Args:
        message: Reaction proto.

    Returns:
        Boolean whether `message` was modified.
    """
    modified = False
    compounds = message_helpers.find_submessages(message,
                                                 reaction_pb2.Compound)
    for compound in compounds:
        if any(identifier.type in _COMPOUND_STRUCTURAL_IDENTIFIERS
               for identifier in compound.identifiers):
            continue  # Compound already has a structural identifier.
        for identifier in compound.identifiers:
            if identifier.type == identifier.NAME:
                try:
                    smiles = _pubchem_resolve('name', identifier.value)
                    new_identifier = compound.identifiers.add()
                    new_identifier.type = new_identifier.SMILES
                    new_identifier.value = smiles
                    new_identifier.details = 'NAME resolved by PubChem'
                    modified = True
                    break
                except urllib.error.HTTPError as error:
                    logging.info('PubChem could not resolve NAME %s: %s',
                                 identifier.value, error)
    return modified
Exemplo n.º 3
0
 def test_map_nested(self):
     message = test_pb2.MapNested()
     message.children['one'].value = 1.2
     message.children['two'].value = 3.4
     self.assertLen(
         message_helpers.find_submessages(message, test_pb2.MapNested.Child),
         2)
Exemplo n.º 4
0
 def test_repeated_nested(self):
     message = test_pb2.RepeatedNested()
     message.children.add().value = 1.2
     message.children.add().value = 3.4
     self.assertLen(
         message_helpers.find_submessages(message,
                                          test_pb2.RepeatedNested.Child), 2)
Exemplo n.º 5
0
 def test_compounds(self):
     message = reaction_pb2.Reaction()
     message.inputs['test'].components.add().identifiers.add(
         type='NAME', value='aspirin')
     self.assertLen(
         message_helpers.find_submessages(message, reaction_pb2.Compound),
         1)
Exemplo n.º 6
0
 def test_find_data_messages(self):
     message = reaction_pb2.Reaction()
     self.assertEmpty(
         message_helpers.find_submessages(message, reaction_pb2.Data))
     message = reaction_pb2.ReactionObservation()
     message.image.value = 'not an image'
     self.assertLen(
         message_helpers.find_submessages(message, reaction_pb2.Data), 1)
     message = reaction_pb2.ReactionSetup()
     message.automation_code['test1'].value = 'test data 1'
     message.automation_code['test2'].bytes_value = b'test data 2'
     self.assertLen(
         message_helpers.find_submessages(message, reaction_pb2.Data), 2)
     message = reaction_pb2.Reaction()
     message.observations.add().image.value = 'not an image'
     message.setup.automation_code['test1'].value = 'test data 1'
     message.setup.automation_code['test2'].bytes_value = b'test data 2'
     self.assertLen(
         message_helpers.find_submessages(message, reaction_pb2.Data), 3)
Exemplo n.º 7
0
def extract_data(message, root, min_size=0.0, max_size=1.0):
    """Replaces large Data values with pointers to offloaded data.

    Git LFS (https://git-lfs.github.com/) is convenient because it lives in the
    same repo as the associated Reaction records. However, it is not possible to
    get a permanent URL for the uploaded data because it is only committed to
    the PR branch. We have (at least) these options:

        1. Modify the URL just before or after the PR is merged to point to the
           correct branch.
        2. Modify the URL to point to its eventual destination (in the `main`
           branch) and deal with broken links during submission review, or
        3. Use relative paths (relative to the repository root). This means that
           users will have to traverse the repo manually to access referenced
           data instead of simply following a URL.
        4. Merge the data immediately in another repo so the URL is permanent.

    I think (2) is the best option because it yields URLs that will eventually
    work and it is simpler than (1). I don't like option (4) because it requires
    data to be committed and merged before review.

    Args:
        message: Protocol buffer message.
        root: Text root of the repository.
        min_size: Float minimum size of data before it will be written (in MB).
        max_size: Float maximum size of data to write (in MB).

    Returns:
        Set of text filenames; the generated Data files.
    """
    dirname = tempfile.mkdtemp()
    data_messages = message_helpers.find_submessages(message,
                                                     reaction_pb2.Data)
    filenames = set()
    for data_message in data_messages:
        data_filename, data_size = write_data(data_message,
                                              dirname,
                                              min_size=min_size,
                                              max_size=max_size)
        if data_filename:
            basename = os.path.basename(data_filename)
            output_filename = message_helpers.id_filename(basename)
            with_root = flask.safe_join(root, output_filename)
            if os.path.exists(with_root):
                warnings.warn(f'Target Data blob already exists: {with_root}')
            else:
                os.makedirs(os.path.dirname(with_root), exist_ok=True)
                shutil.copy2(data_filename, with_root)
                filenames.add(with_root)
            data_message.url = urllib.parse.urljoin(DATA_URL_PREFIX,
                                                    output_filename)
            logging.info('Created Data link (%g MB): %s', data_size, with_root)
    shutil.rmtree(dirname)
    return filenames
Exemplo n.º 8
0
def add_binary_identifiers(message):
    """Adds RDKIT_BINARY identifiers for compounds with valid structures.

    Note that the RDKIT_BINARY representations are mostly useful in the context
    of searching the database. Accordingly, this function is not included in the
    standard set of Reaction updates in update_reaction().

    Args:
        message: Reaction proto.

    Returns:
        Boolean whether `message` was modified.
    """
    modified = False
    compounds = message_helpers.find_submessages(message,
                                                 reaction_pb2.Compound)
    for compound in compounds:
        if any(identifier.type == identifier.RDKIT_BINARY
               for identifier in message.identifiers):
            continue
        for identifier in compound.identifiers:
            mol = None
            if Chem and identifier.type == identifier.SMILES:
                mol = Chem.MolFromSmiles(identifier.value)
            elif identifier.type == identifier.INCHI:
                mol = Chem.MolFromInchi(identifier.value)
            elif identifier.type == identifier.MOLBLOCK:
                mol = Chem.MolFromMolBlock(identifier.value)
            if mol is not None:
                source = reaction_pb2.CompoundIdentifier.IdentifierType.Name(
                    identifier.type)
                compound.identifiers.add(bytes_value=mol.ToBinary(),
                                         type='RDKIT_BINARY',
                                         details=f'Generated from {source}')
                modified = True
                break  # Only add one RDKIT_BINARY per Compound.
    return modified
Exemplo n.º 9
0
 def test_scalar(self):
     message = test_pb2.Scalar(int32_value=5, float_value=6.7)
     self.assertEmpty(
         message_helpers.find_submessages(message, test_pb2.Scalar))
     with self.assertRaisesRegex(TypeError, 'must be a Protocol Buffer'):
         message_helpers.find_submessages(message, float)