Пример #1
0
import ray
import torch
from biotransformers.utils.compute_utils import Mutation, get_list_probs, mutation_score
from biotransformers.utils.constant import NATURAL_AAS_LIST
from biotransformers.utils.logger import logger  # noqa
from biotransformers.utils.tqdm_utils import ProgressBar
from biotransformers.utils.utils import init_model_sequences, load_fasta
from biotransformers.wrappers.language_model import LanguageModel
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger

from ..lightning_utils.data import BatchWithConstantNumberTokensDataModule
from ..lightning_utils.models import LightningModule

log = logger("transformers_wrapper")
PathMsaFolder = str
TokenProbsDict = Dict[int, Dict[str, float]]
SequenceProbsList = List[TokenProbsDict]


class TransformersWrapper:
    """
    Abstract class that uses pretrained transformers model to evaluate
    a protein likelihood so as other insights.
    """
    def __init__(
        self,
        model_dir: str,
        language_model_cls: Type[LanguageModel],
        num_gpus: int = 0,
Пример #2
0
import math
import os
from dataclasses import dataclass
from typing import List, Tuple

from Bio import SeqIO
from biotransformers.utils.logger import logger

log = logger("utils")


def convert_bytes_size(size_bytes: int) -> Tuple[str, bool]:
    """[summary]

    Args:
        size_bytes: size in bytes

    Returns:
        Tuple[str,bool]: return the size with correct units and a condition
        to display the warning message.
    """
    if size_bytes == 0:
        return "0B", False
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = int(round(size_bytes / p, 2))
    is_warning = i >= 3  # warning on size only for model in GB

    return "%s%s" % (s, size_name[i]), is_warning
Пример #3
0
from typing import Tuple

import torch
from biotransformers.utils.logger import logger

log = logger("gpus_utils")


def set_device(device: str, multi_gpu: bool) -> Tuple[str, bool]:
    """Set the correct device CPU/GPU

    Args:
        device (str) : could be cpu/cuda:0/cuda
        multi_gpu (bool) : use multi_gpu the same Node

    Returns:
        Tuple[str, bool]:
            * device: str
            * multi_gpu: bool
    """
    n_gpus = torch.cuda.device_count()
    if multi_gpu:
        if not torch.cuda.is_available():
            log.warning("No GPU available, use CPU device")
            return "cpu", False

        if not n_gpus > 1:
            log.warning(
                "Trying to use multi-gpu with only one device, use cuda:0")
            return "cuda:0", False
        else:
Пример #4
0
from biotransformers.bio_transformers import BioTransformers  # noqa
from biotransformers.utils.logger import logger  # noqa

from .version import VERSION  # noqa

log = logger("biotransformers")

__version__ = VERSION
Пример #5
0
- ProtBert BFD: https://huggingface.co/Rostlab/prot_bert_bfd
"""
import copy
from typing import Dict, List, Tuple

import torch
from biotransformers.lightning_utils.data import AlphabetDataLoader
from biotransformers.utils.constant import DEFAULT_ROSTLAB_MODEL, ROSTLAB_LIST
from biotransformers.utils.logger import logger  # noqa
from biotransformers.utils.utils import _generate_chunks, _get_num_batch_iter
from biotransformers.wrappers.language_model import LanguageModel
from ray.actor import ActorHandle
from tqdm import tqdm
from transformers import BertForMaskedLM, BertTokenizer

log = logger("rostlab_wrapper")


class RostlabWrapper(LanguageModel):
    """
    Class that uses a rostlab type of pretrained transformers model to evaluate
    a protein likelihood so as other insights.
    """
    def __init__(self, model_dir: str, device):
        if model_dir not in ROSTLAB_LIST:
            print(f"Model dir '{model_dir}' not recognized."
                  f" Using '{DEFAULT_ROSTLAB_MODEL}' as default")
            model_dir = DEFAULT_ROSTLAB_MODEL
        super().__init__(model_dir=model_dir, device=device)
        self.tokenizer = BertTokenizer.from_pretrained(model_dir,
                                                       do_lower_case=False,
Пример #6
0
specific to the ESM model developed by FAIR (https://github.com/facebookresearch/esm).
"""

from typing import Dict, List, Tuple

import esm
import torch
from biotransformers.lightning_utils.data import AlphabetDataLoader
from biotransformers.utils.constant import DEFAULT_ESM_MODEL, ESM_LIST
from biotransformers.utils.logger import logger  # noqa
from biotransformers.utils.utils import _generate_chunks, _get_num_batch_iter
from biotransformers.wrappers.language_model import LanguageModel
from ray.actor import ActorHandle
from tqdm import tqdm

log = logger("esm_wrapper")
path_msa_folder = str


class ESMWrapper(LanguageModel):
    """
    Class that uses an ESM type of pretrained transformers model to evaluate
    a protein likelihood so as other insights.
    """
    def __init__(self, model_dir: str, device: str):
        if model_dir not in ESM_LIST:
            print(
                f"Model dir '{model_dir}' not recognized. Using '{DEFAULT_ESM_MODEL}' as default"
            )
            model_dir = DEFAULT_ESM_MODEL
        super().__init__(model_dir=model_dir, device=device)