def __init__(self, model: str = None): log.info(model) torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log.info(torch_device) if model is None: model = "t5" self.modelName = model # path to all the files that will be used for inference self.path = f"./app/api/{model}/" self.model_path = self.path + "pytorch_model.bin" self.config_path = self.path + "config.json" # Selecting the correct model based on the passed madel input. Default t5 if model == "t5": self.config = T5Config.from_json_file(self.config_path) self.model = T5ForConditionalGeneration(self.config) self.tokenizer = T5Tokenizer.from_pretrained(self.path) self.model.eval() self.model.load_state_dict(torch.load(self.model_path, map_location=torch_device)) elif model == "google/pegasus-newsroom": self.config = PegasusConfig.from_json_file(self.config_path) # self.model = PegasusForConditionalGeneration(self.config) # self.tokenizer = PegasusTokenizer.from_pretrained(self.path) self.model = PegasusForConditionalGeneration.from_pretrained(model).to(torch_device) self.tokenizer = PegasusTokenizer.from_pretrained(model) elif model == "facebook/bart-large-cnn": self.config = BartConfig.from_json_file(self.config_path) # self.model = PegasusForConditionalGeneration(self.config) # self.tokenizer = PegasusTokenizer.from_pretrained(self.path) self.model = BartForConditionalGeneration.from_pretrained(model).to(torch_device) self.tokenizer = BartTokenizer.from_pretrained(model) else: raise Exception("This model is not supported") self.text = str()
def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_json_path): """ Copy/paste/tweak model's weights to our BERT structure. """ model = torch.load(checkpoint_path, map_location="cpu") sd = model["model"] cfg = BartConfig.from_json_file(config_json_path) m = BartForConditionalGeneration(cfg) valid_keys = m.model.state_dict().keys() failures = [] mapping = {} for k, v in sd.items(): if k in IGNORE_KEYS: continue new_k = rename_state_dict_key(k) if new_k not in valid_keys: failures.append([k, new_k]) else: mapping[new_k] = v if cfg.normalize_before: # Blenderbot-3B checkpoints. Rename layernorm_embedding -> layer_norm rename_layernorm_keys(sd) m.model.load_state_dict(mapping, strict=True) m.half() m.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__": pd.set_option('display.width', None) pd.set_option('display.max_colwidth', None) # LOADING MODEL & DATA device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device:", device) model_created = False if args.checkpoint != None: model_created = True if args.bart: config = BartConfig.from_json_file(args.checkpoint + "/config.json") model = BartForConditionalGeneration.from_pretrained( args.checkpoint + "/pytorch_model.bin", config=config) else: config = EncoderDecoderConfig.from_json_file(args.checkpoint + "/config.json") model = EncoderDecoderModel.from_pretrained(args.checkpoint + "/pytorch_model.bin", config=config) if args.language == 'fr': if args.bart: model_name = "WikinewsSum/bart-large-multi-fr-wiki-news" #config = BartConfig.from_pretrained(model_name) tokenizer = BartTokenizer.from_pretrained(model_name) if not model_created:
from flask import Flask, request, render_template from regression import model1, tokenizer_new, tokenize_new import numpy as np from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig import torch config = BartConfig.from_json_file('output_model/hate/config.json') model = BartForConditionalGeneration.from_pretrained('output_model/hate/') tok = BartTokenizer.from_pretrained('output_model/hate/') app = Flask(__name__) app.debug = True @app.route("/", methods=['GET', 'POST']) def index(): if request.method == "POST": name = request.form["name"] hate = " " if (len(name) > 0): if name.split(" ")[-1] == '': a, b, c = tokenize_new([name], tokenizer_new) out = np.round(model1.predict([a, b])[0][0]) if out <= 3: hate = "No Hate detected" elif out > 3 and out <= 5: hate = "LOW" elif out > 5 and out <= 7: hate = "MEDIUM" else: hate = "HIGH"
def __init__( self, pretrained_model=None, additional_special_tokens_encoder=None, additional_special_tokens_decoder=None, model_config=None, vocab_file=None, args=None, use_cuda=True, cuda_device=-1, **kwargs, ): self.args = self._load_model_args() if isinstance(args, dict): self.args.update_from_dict(args) elif isinstance(args, Seq2SeqArgs): self.args = args if "sweep_config" in kwargs: self.is_sweeping = True sweep_config = kwargs.pop("sweep_config") sweep_values = sweep_config_to_sweep_values(sweep_config) self.args.update_from_dict(sweep_values) else: self.is_sweeping = False if self.args.manual_seed: random.seed(self.args.manual_seed) np.random.seed(self.args.manual_seed) torch.manual_seed(self.args.manual_seed) if self.args.n_gpu > 0: torch.cuda.manual_seed_all(self.args.manual_seed) if use_cuda: if torch.cuda.is_available(): if cuda_device == -1: self.device = torch.device("cuda") else: self.device = torch.device(f"cuda:{cuda_device}") else: raise ValueError( "'use_cuda' set to True when cuda is unavailable." "Make sure CUDA is available or set `use_cuda=False`.") else: self.device = "cpu" self.results = {} if not use_cuda: self.args.fp16 = False # BartConfig, BartForConditionalGeneration, BartTokenizer # config = EncoderDecoderConfig.from_encoder_decoder_configs(config, config) model_config = BartConfig.from_json_file(model_config) if pretrained_model is None: self.model = BartForConditionalGeneration(config=model_config) self.encoder_tokenizer = BartTokenizer.from_pretrained(vocab_file) else: self.model = BartForConditionalGeneration.from_pretrained( pretrained_model) self.encoder_tokenizer = BartTokenizer.from_pretrained(vocab_file) self.decoder_tokenizer = self.encoder_tokenizer # special AST token # additional_special_tokens_encoder = {'additional_special_tokens': ['Assertion', 'RegExp', 'Repetition', 'Quantifier', 'ClassRange', 'CharacterClass']} # additional_special_tokens_decoder = {'additional_special_tokens': ['Assertion', 'RegExp', 'Repetition', 'Quantifier', 'ClassRange', 'CharacterClass']} self.config = self.model.config if additional_special_tokens_encoder is not None: self.encoder_tokenizer.add_special_tokens( additional_special_tokens_encoder) if additional_special_tokens_decoder is not None: self.decoder_tokenizer.add_special_tokens( additional_special_tokens_decoder) if self.args.wandb_project and not wandb_available: warnings.warn( "wandb_project specified but wandb is not available. Wandb disabled." ) self.args.wandb_project = None self.args.model_type = 'bart' self.args.model_name = 'ExplainREGEX'