Exemplo n.º 1
0
def suppress_warnings():
    """Suppress the ```Some weights of the model checkpoint...``` warnings from
    huggingface transformers.

    """
    from transformers import logging
    logging.set_verbosity_error()
Exemplo n.º 2
0
    def test_integration(self):
        level_origin = logging.get_verbosity()

        logger = logging.get_logger(
            "transformers.models.bart.tokenization_bart")
        msg = "Testing 1, 2, 3"

        # should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
        if level_origin <= logging.WARNING:
            with CaptureLogger(logger) as cl:
                logger.warning(msg)
            self.assertEqual(cl.out, msg + "\n")

        # this is setting the level for all of `transformers.*` loggers
        logging.set_verbosity_error()

        # should not be able to log warnings
        with CaptureLogger(logger) as cl:
            logger.warning(msg)
        self.assertEqual(cl.out, "")

        # should be able to log warnings again
        logging.set_verbosity_warning()
        with CaptureLogger(logger) as cl:
            logger.warning(msg)
        self.assertEqual(cl.out, msg + "\n")

        # restore to the original level
        logging.set_verbosity(level_origin)
Exemplo n.º 3
0
def logging_setup(args=None):

    logger = logging.getLogger()
    logger.handlers = [
    ]  # Removing default handler to avoid duplication of log messages
    logger.setLevel(logging.ERROR)

    h = logging.StreamHandler(sys.stderr)
    if args != None:
        h = logging.StreamHandler(args.logfile)

    h.setFormatter(
        logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logger.addHandler(h)

    #logger.setLevel(logging.INFO)

    if args != None:
        if not args.quiet:
            logger.setLevel(logging.INFO)
        if args.debug:
            logger.setLevel(logging.DEBUG)

    logging_level = logging.getLogger().level
    if logging_level <= logging.WARNING and logging_level != logging.DEBUG:
        logging.getLogger("ToolWrapper").setLevel(logging.WARNING)

    if logging.getLogger().level != logging.DEBUG:
        from transformers import logging as hf_logging
        hf_logging.set_verbosity_error()

        import tensorflow as tf
        tf.get_logger().setLevel('ERROR')
Exemplo n.º 4
0
def load_NLP_model(model_name, device):
    from transformers import AutoModelForSequenceClassification
    from transformers import logging

    # BART model always complains that we aren't loading everything
    # this is OK for this purpopse and we can ignore it
    logging.set_verbosity_error()

    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    model.eval()
    model.to(device)

    return model
Exemplo n.º 5
0
def Load_Model(bert_classification_model_path):
    # Load Tokenizer and BERT Model
    logging.set_verbosity_error()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)

    model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                          num_labels=10,
                                                          output_attentions=False,
                                                          output_hidden_states=False)

    model.load_state_dict(torch.load(bert_classification_model_path, map_location=torch.device('cpu')))

    return model, tokenizer
Exemplo n.º 6
0
def main():
    from transformers import logging
    logging.set_verbosity_error()

    def augment(text,
                n=10,
                p=0.3,
                pretrained_model='bert-base-cased',
                seed=2021):

        random.seed(seed)
        ba = BERTAugment(pretrained=pretrained_model)

        print(text, '\n')
        for a in ba.augment(text, n=n, p=p):
            print(a)

    Fire(augment)
Exemplo n.º 7
0
def get_sentiment(arg):
    d = {0: "Positive", 1: "Negative", 2: "Neutral"}
    logging.set_verbosity_error()
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
    model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to(
        "cuda"
    )

    inputs = tokenizer(arg, return_tensors="pt", truncation=True, padding=True)
    inputs = inputs.to("cuda")
    ones = [1] * len(arg)
    labels = torch.tensor(ones).unsqueeze(0).to("cuda")  # Batch size
    outputs = model(**inputs, labels=labels)
    ret_list = []
    sentiment_list = []
    sentiment = []
    for o in outputs.logits:
        sentiment_list += [d[max(enumerate(o.tolist()), key=itemgetter(1))[0]]]
    sarr = numpy.array(sentiment_list).reshape(len(sentiment_list) // 5, 5)
    sarr = sarr.tolist()
    for s in sarr:
        c = 0
        for si in s:
            if si == "Positive":
                c += 1
            elif si == "Negative":
                c -= 1
        if c > 0:
            sentiment.append("Positive")
        elif c < 0:
            sentiment.append("Negative")
        else:
            sentiment.append("Neutral")
    for sent in sentiment:
        if sent == "Positive":
            buy = "Highly Recommended"
        elif sent == "Negative":
            buy = "Not Recommended"
        else:
            buy = "Moderately Recommended"
        ret_list.append([sent, buy])
    return ret_list
Exemplo n.º 8
0
    def test_set_level(self):
        logger = logging.get_logger()

        # the current default level is logging.WARNING
        level_origin = logging.get_verbosity()

        logging.set_verbosity_error()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_warning()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_info()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_debug()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        # restore to the original level
        logging.set_verbosity(level_origin)
Exemplo n.º 9
0
from typing import List
import sys
import numpy as np
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import logging

logging.set_verbosity_error()

# this part of the code creates BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
labels = torch.tensor([1]).unsqueeze(0)


def bert_vector(text:str):
   '''computes vector representation of text using BERT'''
   text = text[0:300]
   inputs = tokenizer(text.lower(), return_tensors="pt")
   outputs = model(**inputs, labels=labels, output_hidden_states=True)
   q = outputs[2][11][0]
   return q.mean(dim=0).cpu().detach().numpy()
Exemplo n.º 10
0
from fastai.data.block import DataBlock, ColReader, ItemGetter, ColSplitter, RandomSplitter
from fastai.data.core import DataLoaders
from fastai.imports import *
from fastai.learner import *
from fastai.torch_core import *
from fastai.torch_imports import *
from fastcore.all import *
from transformers import AutoModelForSeq2SeqLM, PreTrainedModel, logging as hf_logging

from ...data.seq2seq.core import Seq2SeqBatchTokenizeTransform, Seq2SeqTextBlock, default_text_gen_kwargs
from ..core import BaseModelCallback, BaseModelWrapper, Blearner
from .core import Seq2SeqMetricsCallback, blurr_seq2seq_splitter
from ...utils import get_hf_objects
from ....utils import PreCalculatedCrossEntropyLoss

hf_logging.set_verbosity_error()


# Cell
@patch
def blurr_translate(self: Learner, inp, **kwargs):
    preds = self.blurr_generate(inp, key="translation_texts", **kwargs)
    return preds


# Cell
@delegates(Blearner.__init__)
class BlearnerForTranslation(Blearner):
    def __init__(self, dls: DataLoaders, hf_model: PreTrainedModel, **kwargs):
        super().__init__(dls, hf_model, **kwargs)
Exemplo n.º 11
0
'''
References:
https://workhuman.atlassian.net/wiki/spaces/whiqnlp/pages/9327519450/Packaging+Models+For+Deployment
https://github.com/aws-samples/aws-sagemaker-pytorch-shop-by-style/blob/master/src/similarity/inference.py
https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html?highlight=model_fn#serve-a-pytorch-model
'''
import logging
import json
import torch
from transformers import AlbertTokenizer
from transformers import AlbertForSequenceClassification
from transformers import logging as transformers_logging
transformers_logging.set_verbosity_error()


CONTENT_TYPE = 'application/json'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger = logging.getLogger(__name__)

model_type = 'albert-base-v2'
tokenizer  = AlbertTokenizer.from_pretrained( model_type, do_lower_case=True )


def model_fn(model_path='model_path'):

    dout  = 0.1
    model = AlbertForSequenceClassification.from_pretrained( model_type,
                                                             num_labels=2,
                                                             output_attentions=False,
                                                             output_hidden_states=False,
                                                             attention_probs_dropout_prob=dout,
Exemplo n.º 12
0
# fine-tune HF pretrained model for IMDB

# zipped raw data at: https://ai.stanford.edu/~amaas/data/sentiment/

"""
from datetime import datetime
import numpy as np
from pathlib import Path
import os
import torch
from torch.utils.data import DataLoader
from transformers import DistilBertTokenizer
from transformers import AdamW, DistilBertForSequenceClassification
from transformers import logging

logging.set_verbosity_error()  # suppress wordy warnings

device = torch.device("cpu")


class IMDbDataset(torch.utils.data.Dataset):
    def __init__(self, reviews_lst, labels_lst):
        self.reviews_lst = reviews_lst  # list of token IDs
        self.labels_lst = labels_lst  # list of 0-1 ints

    def __getitem__(self, idx):
        item = {}  # [input_ids] [attention_mask] [labels]
        for key, val in self.reviews_lst.items():
            item[key] = torch.tensor(val[idx]).to(device)
        item["labels"] = torch.tensor(self.labels_lst[idx]).to(device)
        return item