示例#1
0
def make_protein_domain(include_anomalous_amino_acids=True,
                        include_bos=True,
                        include_eos=True,
                        include_pad=True,
                        include_mask=True,
                        length=1024):
    return domains.VariableLengthDiscreteDomain(
        vocab=domains.ProteinVocab(
            include_anomalous_amino_acids=include_anomalous_amino_acids,
            include_bos=include_bos,
            include_eos=include_eos,
            include_pad=include_pad,
            include_mask=include_mask),
        length=length,
    )
示例#2
0
  def setUp(self):
    cls = functools.partial(models.FlaxBERT, **lm_cfg)
    self._domain = domains.VariableLengthDiscreteDomain(
        vocab=domains.ProteinVocab(
            include_anomalous_amino_acids=True,
            include_bos=True,
            include_eos=True,
            include_pad=True,
            include_mask=True),
        length=3)

    lm = cls(domain=self._domain, grad_clip=1.0)
    self.lm = lm
    self.xs = np.array([
        [1, 1, 0],
    ])
    super().setUp()
示例#3
0
"""Dataset preprocessing and pipeline.

Built for Trembl dataset.
"""
import os
import types
from absl import logging
import gin
import numpy as np
import tensorflow.compat.v1 as tf

from protein_lm import domains

protein_domain = domains.VariableLengthDiscreteDomain(
    vocab=domains.ProteinVocab(include_anomalous_amino_acids=True,
                               include_bos=True,
                               include_eos=True),
    length=512)


def dataset_from_tensors(tensors):
    """Converts nested tf.Tensors or np.ndarrays to a tf.Data.Dataset."""
    if isinstance(tensors, types.GeneratorType) or isinstance(tensors, list):
        tensors = tuple(tensors)
    return tf.data.Dataset.from_tensor_slices(tensors)


def _parse_example(value):
    parsed = tf.parse_single_example(
        value, features={'sequence': tf.io.VarLenFeature(tf.int64)})
    sequence = tf.sparse.to_dense(parsed['sequence'])