def suppress_warnings(): """Suppress the ```Some weights of the model checkpoint...``` warnings from huggingface transformers. """ from transformers import logging logging.set_verbosity_error()
def test_integration(self): level_origin = logging.get_verbosity() logger = logging.get_logger( "transformers.models.bart.tokenization_bart") msg = "Testing 1, 2, 3" # should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`) if level_origin <= logging.WARNING: with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, msg + "\n") # this is setting the level for all of `transformers.*` loggers logging.set_verbosity_error() # should not be able to log warnings with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, "") # should be able to log warnings again logging.set_verbosity_warning() with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, msg + "\n") # restore to the original level logging.set_verbosity(level_origin)
def logging_setup(args=None): logger = logging.getLogger() logger.handlers = [ ] # Removing default handler to avoid duplication of log messages logger.setLevel(logging.ERROR) h = logging.StreamHandler(sys.stderr) if args != None: h = logging.StreamHandler(args.logfile) h.setFormatter( logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(h) #logger.setLevel(logging.INFO) if args != None: if not args.quiet: logger.setLevel(logging.INFO) if args.debug: logger.setLevel(logging.DEBUG) logging_level = logging.getLogger().level if logging_level <= logging.WARNING and logging_level != logging.DEBUG: logging.getLogger("ToolWrapper").setLevel(logging.WARNING) if logging.getLogger().level != logging.DEBUG: from transformers import logging as hf_logging hf_logging.set_verbosity_error() import tensorflow as tf tf.get_logger().setLevel('ERROR')
def load_NLP_model(model_name, device): from transformers import AutoModelForSequenceClassification from transformers import logging # BART model always complains that we aren't loading everything # this is OK for this purpopse and we can ignore it logging.set_verbosity_error() model = AutoModelForSequenceClassification.from_pretrained(model_name) model.eval() model.to(device) return model
def Load_Model(bert_classification_model_path): # Load Tokenizer and BERT Model logging.set_verbosity_error() tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=10, output_attentions=False, output_hidden_states=False) model.load_state_dict(torch.load(bert_classification_model_path, map_location=torch.device('cpu'))) return model, tokenizer
def main(): from transformers import logging logging.set_verbosity_error() def augment(text, n=10, p=0.3, pretrained_model='bert-base-cased', seed=2021): random.seed(seed) ba = BERTAugment(pretrained=pretrained_model) print(text, '\n') for a in ba.augment(text, n=n, p=p): print(a) Fire(augment)
def get_sentiment(arg): d = {0: "Positive", 1: "Negative", 2: "Neutral"} logging.set_verbosity_error() tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to( "cuda" ) inputs = tokenizer(arg, return_tensors="pt", truncation=True, padding=True) inputs = inputs.to("cuda") ones = [1] * len(arg) labels = torch.tensor(ones).unsqueeze(0).to("cuda") # Batch size outputs = model(**inputs, labels=labels) ret_list = [] sentiment_list = [] sentiment = [] for o in outputs.logits: sentiment_list += [d[max(enumerate(o.tolist()), key=itemgetter(1))[0]]] sarr = numpy.array(sentiment_list).reshape(len(sentiment_list) // 5, 5) sarr = sarr.tolist() for s in sarr: c = 0 for si in s: if si == "Positive": c += 1 elif si == "Negative": c -= 1 if c > 0: sentiment.append("Positive") elif c < 0: sentiment.append("Negative") else: sentiment.append("Neutral") for sent in sentiment: if sent == "Positive": buy = "Highly Recommended" elif sent == "Negative": buy = "Not Recommended" else: buy = "Moderately Recommended" ret_list.append([sent, buy]) return ret_list
def test_set_level(self): logger = logging.get_logger() # the current default level is logging.WARNING level_origin = logging.get_verbosity() logging.set_verbosity_error() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_warning() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_info() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_debug() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) # restore to the original level logging.set_verbosity(level_origin)
from typing import List import sys import numpy as np import torch from transformers import BertTokenizer, BertForSequenceClassification from transformers import logging logging.set_verbosity_error() # this part of the code creates BERT model model = BertForSequenceClassification.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') labels = torch.tensor([1]).unsqueeze(0) def bert_vector(text:str): '''computes vector representation of text using BERT''' text = text[0:300] inputs = tokenizer(text.lower(), return_tensors="pt") outputs = model(**inputs, labels=labels, output_hidden_states=True) q = outputs[2][11][0] return q.mean(dim=0).cpu().detach().numpy()
from fastai.data.block import DataBlock, ColReader, ItemGetter, ColSplitter, RandomSplitter from fastai.data.core import DataLoaders from fastai.imports import * from fastai.learner import * from fastai.torch_core import * from fastai.torch_imports import * from fastcore.all import * from transformers import AutoModelForSeq2SeqLM, PreTrainedModel, logging as hf_logging from ...data.seq2seq.core import Seq2SeqBatchTokenizeTransform, Seq2SeqTextBlock, default_text_gen_kwargs from ..core import BaseModelCallback, BaseModelWrapper, Blearner from .core import Seq2SeqMetricsCallback, blurr_seq2seq_splitter from ...utils import get_hf_objects from ....utils import PreCalculatedCrossEntropyLoss hf_logging.set_verbosity_error() # Cell @patch def blurr_translate(self: Learner, inp, **kwargs): preds = self.blurr_generate(inp, key="translation_texts", **kwargs) return preds # Cell @delegates(Blearner.__init__) class BlearnerForTranslation(Blearner): def __init__(self, dls: DataLoaders, hf_model: PreTrainedModel, **kwargs): super().__init__(dls, hf_model, **kwargs)
''' References: https://workhuman.atlassian.net/wiki/spaces/whiqnlp/pages/9327519450/Packaging+Models+For+Deployment https://github.com/aws-samples/aws-sagemaker-pytorch-shop-by-style/blob/master/src/similarity/inference.py https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html?highlight=model_fn#serve-a-pytorch-model ''' import logging import json import torch from transformers import AlbertTokenizer from transformers import AlbertForSequenceClassification from transformers import logging as transformers_logging transformers_logging.set_verbosity_error() CONTENT_TYPE = 'application/json' DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger = logging.getLogger(__name__) model_type = 'albert-base-v2' tokenizer = AlbertTokenizer.from_pretrained( model_type, do_lower_case=True ) def model_fn(model_path='model_path'): dout = 0.1 model = AlbertForSequenceClassification.from_pretrained( model_type, num_labels=2, output_attentions=False, output_hidden_states=False, attention_probs_dropout_prob=dout,
# fine-tune HF pretrained model for IMDB # zipped raw data at: https://ai.stanford.edu/~amaas/data/sentiment/ """ from datetime import datetime import numpy as np from pathlib import Path import os import torch from torch.utils.data import DataLoader from transformers import DistilBertTokenizer from transformers import AdamW, DistilBertForSequenceClassification from transformers import logging logging.set_verbosity_error() # suppress wordy warnings device = torch.device("cpu") class IMDbDataset(torch.utils.data.Dataset): def __init__(self, reviews_lst, labels_lst): self.reviews_lst = reviews_lst # list of token IDs self.labels_lst = labels_lst # list of 0-1 ints def __getitem__(self, idx): item = {} # [input_ids] [attention_mask] [labels] for key, val in self.reviews_lst.items(): item[key] = torch.tensor(val[idx]).to(device) item["labels"] = torch.tensor(self.labels_lst[idx]).to(device) return item