def __init__(self): super().__init__() config = BertConfig() config.output_hidden_states = True self.bert = BertModel.from_pretrained('bert-base-uncased', config=config) self.bertTokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def __init__(self, *, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, random_init=False, **kwargs): TrainableNM.__init__(self, **kwargs) # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + "be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config if random_init: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def load_model(): model_dir = '../../model/model/' config = BertConfig(num_labels=3, output_attentions=True) config.from_pretrained('../../model/bert-cased/') model = BertAttn(config, option='feed', dropout=0.1, gpu=False, seed=0, do_lower_case=False) class_weights = [0.6058, 0.1161, 0.2781] model.set_focal_loss(alpha=class_weights, gamma=-1) model.load_model(True, model_dir) return model
def main(args): if args.dataset == 'sim-R': from BERTDST_utils.simR_data_utils import prepare_dataset, MultiWozDataset, make_turn_label, postprocessing, state_equal, SLOT, OP if args.dataset == 'sim-M': from BERTDST_utils.simM_data_utils import prepare_dataset, MultiWozDataset, make_turn_label, postprocessing, state_equal, SLOT, OP if args.dataset == 'DSTC2': from BERTDST_utils.DSTC2_data_utils import prepare_dataset, MultiWozDataset, make_turn_label, postprocessing, state_equal, SLOT, OP if args.dataset == 'WOZ2.0': from BERTDST_utils.WOZ_data_utils import prepare_dataset, MultiWozDataset, make_turn_label, postprocessing, state_equal, SLOT, OP if args.dataset == 'MultiWOZ2.1': from BERTDST_utils.MultiWOZ_data_utils import prepare_dataset, MultiWozDataset, make_turn_label, postprocessing, state_equal, OP, make_slot_meta ontology = json.load(open(args.ontology_data_path)) SLOT, ontology = make_slot_meta(ontology) slot_meta = SLOT tokenizer = BertTokenizer(args.vocab_path, do_lower_case=True) data = prepare_dataset(1.0, args.test_data_path, tokenizer, slot_meta, args.test_size_window, args.max_seq_length, args.test_MG) model_config = BertConfig.from_json_file(args.bert_config_path) model_config.dropout = 0.1 op2id = OP model = MGDST(model_config, len(op2id), len(slot_meta)) ckpt = torch.load(args.model_ckpt_path, map_location='cpu') model.load_state_dict(ckpt) model.eval() model.to(device) model_evaluation(make_turn_label, postprocessing, state_equal, OP, model, data, tokenizer, slot_meta, 0, args.test_size_window, args.test_MG)
def __init__(self): super(Bert, self).__init__() self.tokenizer = BertTokenizer.from_pretrained(os.path.join(config.get('model_config')['language_model_path'], 'bert-base-uncased-vocab.txt')) modelConfig = BertConfig.from_pretrained(os.path.join(config.get('model_config')['language_model_path'], 'bert_config.json')) self.textExtractor = BertModel.from_pretrained( os.path.join(config.get('model_config')['language_model_path'], 'pytorch_model.bin'), config=modelConfig)
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = BertConfig( vocab_size_or_config_json_file=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def __init__(self, args, dictionary, left_pad=False): super().__init__(dictionary) self.dropout = args.dropout from pytorch_transformers import RobertaModel, BertModel from pytorch_transformers.file_utils import PYTORCH_TRANSFORMERS_CACHE from pytorch_transformers import RobertaConfig, RobertaTokenizer, BertConfig, BertTokenizer if args.pretrained_bert_model.startswith('roberta'): self.embed = RobertaModel.from_pretrained( args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) # self.context = RobertaModel.from_pretrained(args.pretrained_bert_model, # cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.config = RobertaConfig.from_pretrained( args.pretrained_bert_model) self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: self.embed = BertModel.from_pretrained( args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) # self.context = BertModel.from_pretrained(args.pretrained_bert_model, # cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.config = BertConfig.from_pretrained( args.pretrained_bert_model) self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.padding_idx = self.tokenizer.convert_tokens_to_ids( self.tokenizer.pad_token)
def __init__(self, opt): self.opt = opt if 'bert' in opt.model_name: tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name) # bert = BertModel.from_pretrained(opt.pretrained_bert_name) config = BertConfig.from_pretrained(opt.pretrained_bert_name, output_attentions=True) bert = BertModel.from_pretrained(opt.pretrained_bert_name, config=config) self.pretrained_bert_state_dict = bert.state_dict() self.model = opt.model_class(bert, opt).to(opt.device) else: tokenizer = build_tokenizer( fnames=[opt.dataset_file['train'], opt.dataset_file['test']], max_seq_len=opt.max_seq_len, dat_fname='{0}_tokenizer.dat'.format(opt.dataset)) embedding_matrix = build_embedding_matrix( word2idx=tokenizer.word2idx, embed_dim=opt.embed_dim, dat_fname='{0}_{1}_embedding_matrix.dat'.format( str(opt.embed_dim), opt.dataset)) self.model = opt.model_class(embedding_matrix, opt).to(opt.device) self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer) self.testset = ABSADataset(opt.dataset_file['test'], tokenizer) if opt.device.type == 'cuda': logger.info('cuda memory allocated: {}'.format( torch.cuda.memory_allocated(device=opt.device.index))) self._print_args()
def __init__(self, args, device, checkpoint=None, bert_from_extractive=None): super(AbsSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args.large, args.temp_dir, args.finetune_bert) if bert_from_extractive is not None: self.bert.model.load_state_dict( dict([(n[11:], p) for n, p in bert_from_extractive.items() if n.startswith('bert.model')]), strict=True) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.enc_hidden_size, num_hidden_layers=args.enc_layers, num_attention_heads=8, intermediate_size=args.enc_ff_size, hidden_dropout_prob=args.enc_dropout, attention_probs_dropout_prob=args.enc_dropout) self.bert.model = BertModel(bert_config) if(args.max_pos>512): my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size) my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1) self.bert.model.embeddings.position_embeddings = my_pos_embeddings self.vocab_size = self.bert.model.config.vocab_size tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0) if (self.args.share_emb): tgt_embeddings = self.bert.model.embeddings.word_embeddings self.decoder = TransformerDecoder( self.args.dec_layers, self.args.dec_hidden_size, heads=self.args.dec_heads, d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings) self.generator = get_generator(self.vocab_size, self.args.dec_hidden_size, device) self.generator[0].weight = self.decoder.embeddings.weight if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: for module in self.decoder.modules(): if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=0.02) elif isinstance(module, nn.LayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_() for p in self.generator.parameters(): if p.dim() > 1: xavier_uniform_(p) else: p.data.zero_() if(args.use_bert_emb): tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0) tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight) self.decoder.embeddings = tgt_embeddings self.generator[0].weight = self.decoder.embeddings.weight self.to(device)
def __init__(self, args, device, checkpoint): super(ExtSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args, args.temp_dir, args.finetune_bert) self.ext_layer = ExtTransformerEncoder(self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads, args.ext_dropout, args.ext_layers) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.ext_hidden_size, num_hidden_layers=args.ext_layers, num_attention_heads=args.ext_heads, intermediate_size=args.ext_ff_size) self.bert.model = BertModel(bert_config) self.ext_layer = Classifier(self.bert.model.config.hidden_size) if(args.max_pos>512): my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size) my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1) self.bert.model.embeddings.position_embeddings = my_pos_embeddings if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: if args.param_init != 0.0: for p in self.ext_layer.parameters(): p.data.uniform_(-args.param_init, args.param_init) if args.param_init_glorot: for p in self.ext_layer.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device)
def __init__(self, args, device, checkpoint): super(ExtSummarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args.large, args.temp_dir, args.finetune_bert) self.ext_layer = ExtTransformerEncoder( self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads, args.ext_dropout, args.ext_layers) if (args.encoder == 'baseline'): bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.hidden_size, num_hidden_layers=6, num_attention_heads=8, intermediate_size=args.ff_size) self.bert.model = BertModel(bert_config) self.ext_layer = Classifier(self.bert.model.config.hidden_size) if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: if args.param_init != 0.0: for p in self.ext_layer.parameters(): p.data.uniform_(-args.param_init, args.param_init) if args.param_init_glorot: for p in self.ext_layer.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device)
def __init__(self, vocab_size, tag_to_ix, hidden_dim, n_layers): super(BERT_BiLSTM_CRF, self).__init__() self.hidden_dim = hidden_dim self.n_layers = n_layers self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) config = BertConfig.from_pretrained('bert-base-multilingual-cased') self.model = BertModel(config) self.lstm = nn.LSTM(768, hidden_dim, num_layers=n_layers, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size, device=device)) # These two statements enforce the constraint that we never transfer # to the start tag and we never transfer from the stop tag self.transitions.data[tag_to_ix[START_TAG], :] = -10000 self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000 self.hidden = self.init_hidden()
def createCsvData(): config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel(config) with Cd("lemmadata"): with open("id_to_sent.json") as sent_id_dict_file: sent_id_dict = json.load(sent_id_dict_file) for dir_item in os.listdir(): if os.path.isfile(dir_item): if dir_item.endswith(".json") and dir_item != "id_to_sent.json": print(dir_item) with open(dir_item, "r") as f: lemma_data = json.load(f) with Cd("vectors"): with open(dir_item[:-5]+".csv", "w") as vector_file: writer = csv.writer(vector_file, delimiter=",") for instance in lemma_data: inst_sent_id = instance["sent_id"] inst_sense = instance["sense"] inst_sent = sent_id_dict[str(inst_sent_id)] if(len(inst_sent) > 511): continue vector = vectorizeWordInContext(inst_sent, instance["pos"], tokenizer, model) vec_list = vector.detach().tolist() row_data = [inst_sent_id, instance["pos"], inst_sense] + vec_list writer.writerow(row_data)
def __init__(self, num_labels=2, model_type='bert-base-uncased', token_layer='token-cls', output_logits=True): super(BertForWSD, self).__init__() self.config = BertConfig() self.token_layer = token_layer self.num_labels = 2 self.bert = BertModel.from_pretrained(model_type) self.dropout = nn.Dropout(self.config.hidden_dropout_prob) self.output_logits = output_logits # Define which token selection layer to use if token_layer == 'token-cls': self.tokenselectlayer = TokenClsLayer() elif token_layer in ['sent-cls', 'sent-cls-ws']: self.tokenselectlayer = SentClsLayer() else: raise ValueError( "Unidentified parameter for token selection layer") self.classifier = nn.Linear(768, num_labels) if not output_logits: self.softmax = nn.Softmax(dim=1) # to be checked!!! nn.init.xavier_normal_(self.classifier.weight)
def __init__(self): model_dir = '/var/model/bert' if not os.path.isdir(model_dir): model_dir = os.path.abspath(os.path.dirname(__file__) + '/../../var/model/bert') self.use_gpu: bool = torch.cuda.is_available() self.config: BertConfig = BertConfig.from_json_file(model_dir + '/config.json') self.tokenizer: BertTokenizer = BertTokenizer.from_pretrained(model_dir + '/vocab.txt', do_lower_case=False) self.model_masked: BertForMaskedLM = BertForMaskedLM.from_pretrained(model_dir + '/model.bin', config=self.config) self.model: BertModel = self.model_masked.bert # freeze bert encoder for param in self.model.parameters(): param.requires_grad = False for param in self.model_masked.parameters(): param.requires_grad = False self.model.encoder.output_hidden_states = True self.model.eval() self.model_masked.eval() if self.use_gpu: self.model.cuda() self.model_masked.cuda()
def __init__(self, name='bert-base-uncased', dropout=0.1, num_class=2): super(BertC, self).__init__() config = BertConfig.from_pretrained(name) self.bert = BertModel_attack(config) self.proj = nn.Linear(config.hidden_size, num_class) self.loss_f = nn.CrossEntropyLoss() self.drop = nn.Dropout(p=dropout)
def main(): bert_base_config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2) bert_base_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=bert_base_config) count = 0 for name, param in bert_base_model.named_parameters(): if param.requires_grad: size = 1 for s in param.data.size(): size = s * size count += size print('The total number of parameters in bert_base_uncased: ', count) roberta_config = RobertaConfig.from_pretrained('roberta-base', num_labels=2) roberta_model = RobertaForSequenceClassification.from_pretrained('roberta-base',config=roberta_config) count = 0 for name, param in roberta_model.named_parameters(): if param.requires_grad: size = 1 for s in param.data.size(): size = s * size count += size print('The total number of parameters in roberta: ', count) albert_config = AlbertConfig.from_pretrained('albert-base-v2', num_labels=2) albert_model = AlbertForSequenceClassification.from_pretrained('albert-base-v2', config=albert_config) count = 0 for name, param in albert_model.named_parameters(): if param.requires_grad: size = 1 for s in param.data.size(): size = s * size count += size print('The total number of parameters in albert: ', count)
def __init__(self, hidden_dim, n_layers, tagset_size): super(BertLSTM, self).__init__() config = BertConfig.from_pretrained('bert-base-multilingual-cased') self.model = BertModel(config) self.decoder = nn.LSTM(768, hidden_dim, n_layers) self.hiddentotag = nn.Linear(hidden_dim, tagset_size)
def load_model(model_name: str, do_lower_case=False): config = BertConfig.from_pretrained(model_name) tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=do_lower_case) model = BertForQuestionAnswering.from_pretrained(model_name, from_tf=False, config=config) return model, tokenizer
def load_model(self, model_path: str, do_lower_case=False): config = BertConfig.from_pretrained(model_path + "/config.json") tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case) model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config) return model, tokenizer
def main(): torch.cuda.empty_cache() parser = setup_parser() args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: raise ValueError("Output directory already exists and is not empty.") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') args.n_gpu = torch.cuda.device_count() args.device = device set_seed(args) args.task_name = args.task_name.lower() if args.task_name not in processors: raise ValueError("Task not found: {}".format(args.task_name)) processor = processors[args.task_name]() args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() num_labels = len(label_list) ##Load Models config = BertConfig.from_pretrained(args.config_name) tokenizer = BertTokenizer.from_pretrained(args.text_encoder_checkpoint, do_lower_case=args.do_lower_case) text_encoder = BertModel.from_pretrained(args.text_encoder_checkpoint, config=config) graph_encoder = GraphEncoder(args.n_hidden, args.min_score) if args.graph_encoder_checkpoint: graph_encoder.gcnnet.load_state_dict( torch.load(args.graph_encoder_checkpoint)) medsts_classifier = PairClassifier(config.hidden_size + args.n_hidden, 1) medsts_c_classifier = PairClassifier(config.hidden_size + args.n_hidden, 5) medsts_type_classifier = PairClassifier(config.hidden_size + args.n_hidden, 4) model = MedstsNet(text_encoder, graph_encoder, medsts_classifier, medsts_c_classifier, medsts_type_classifier) model.to(args.device) args.n_gpu = 1 if args.do_train: train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, evaluate=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info('global step = {}, average loss = {}'.format( global_step, tr_loss)) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) logger.info("saving model checkpoint to {}".format(args.output_dir)) model_to_save = model.module if hasattr(model, 'module') else model # model_to_save.save_pretrained(args.output_dir) torch.save(model_to_save.state_dict(), os.path.join(args.output_dir, 'saved_model.pth')) tokenizer.save_pretrained(args.output_dir) torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
def start_inference(data, dialogue_type, dest, batchsize, bert_model, cuda): assert torch.cuda.is_available( ) == True, 'PyTorch not running on GPU! #sadpanda' torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.manual_seed(100) dialogue_type_dict = {'DB': 'db_response_new', 'normal': 'response'} config = BertConfig.from_pretrained(bert_model) tokenizer = BertTokenizer.from_pretrained(bert_model) model = BertForNextSentencePrediction(config) model.cuda() model.eval() df = pd.read_csv(data, usecols=['id']) df.dropna(inplace=True) row_count = df.shape[0] del df chunk_count = math.ceil(row_count / batchsize) with open(dest, 'w+'): pass cols = ['context', dialogue_type_dict[dialogue_type]] for i, chunk in enumerate( tqdm(pd.read_csv(open(data, 'r'), usecols=cols, chunksize=batchsize), desc='Batches', total=chunk_count)): samples = get_batch(chunk, dialogue_type_dict[dialogue_type]) assert len(samples) == chunk.shape[0], 'Some samples went missing!' if batchsize == 1: results = convert_single_example_to_features(samples, tokenizer) else: results = convert_examples_to_features(samples, tokenizer) with torch.no_grad(): input_ids = torch.tensor([x.input_ids for x in results]).cuda() token_type_ids = torch.tensor([x.input_type_ids for x in results]).cuda() attention_mask = torch.tensor([x.input_mask for x in results]).cuda() outputs = model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0] outputs = torch.softmax(outputs, dim=1) db_probs = outputs[:, 1] with open(dest, 'a') as f: f.write('\n'.join([str(x) for x in db_probs.tolist()]) + '\n')
def main(): torch.cuda.empty_cache() parser = setup_parser() args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: raise ValueError("Output directory already exists and is not empty.") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') args.n_gpu = torch.cuda.device_count() args.device = device set_seed(args) args.task_name = args.task_name.lower() if args.task_name not in processors: raise ValueError("Task not found: {}".format(args.task_name)) processor = processors[args.task_name]() args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() ##Load Models config = BertConfig.from_pretrained(args.config_name) tokenizer = BertTokenizer.from_pretrained(args.text_encoder_checkpoint, do_lower_case=args.do_lower_case) text_encoder = BertModel.from_pretrained(args.text_encoder_checkpoint, config=config) graph_encoder = GraphEncoder(args.n_hidden, args.min_score) medsts_classifier = PairClassifier(config.hidden_size + args.n_hidden, 1) medsts_c_classifier = PairClassifier(config.hidden_size + args.n_hidden, 5) medsts_c2_classifier = PairClassifier(config.hidden_size + args.n_hidden, 2) medsts_type_classifier = PairClassifier(config.hidden_size + args.n_hidden, 4) model = MedstsNet(text_encoder, graph_encoder, medsts_classifier, medsts_c_classifier, medsts_c2_classifier, medsts_type_classifier) if args.text_only: medsts_classifier = PairClassifier(config.hidden_size, 1) medsts_c_classifier = PairClassifier(config.hidden_size, 5) medsts_c2_classifier = PairClassifier(config.hidden_size, 2) medsts_type_classifier = PairClassifier(config.hidden_size, 4) model = MedstsNet_Textonly(text_encoder, medsts_classifier, medsts_c_classifier, medsts_c2_classifier, medsts_type_classifier) model.to(args.device) args.n_gpu = 1 if args.do_train: train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, evaluate=False, reverse=True) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info('global step = {}, average loss = {}'.format( global_step, tr_loss))
def start(check_accr=False): bert_config = BertConfig.from_json_file(config.bert_config_root) model = BertCloze(bert_config, num_choices=10) load_model(model, config.pretrained_bert_root) generate_prob(model) generate_result(i_range=5) if check_accr: check_result() print("程序运行完成")
def load_artifacts(model_path): """ Loads pretrained model , tokenizer , config.""" model_class = BertForQuestionAnswering model = model_class.from_pretrained(model_path) tokenizer = BertTokenizer.from_pretrained(model_path) config = BertConfig.from_pretrained(model_path) model.to("cpu") model.eval() return model, tokenizer, config
def load_artifacts(model_path): """ Loads pretrained model , tokenizer , config.""" model_class = BertForSequenceClassification model = model_class.from_pretrained(model_path) tokenizer = BertTokenizer.from_pretrained(model_path) config = BertConfig.from_pretrained(model_path) model.to("cpu") model.eval() return model, tokenizer, config
def __init__(self, code_length): # code_length为fc映射到的维度大小 super(TextNet, self).__init__() modelConfig = BertConfig.from_pretrained( './data/bert-base-uncased-config.json') self.textExtractor = BertModel.from_pretrained( './data/bert-base-uncased-pytorch_model.bin', config=modelConfig) # self.textExtractor.eval() embedding_dim = self.textExtractor.config.hidden_size
def __init__(self, code_length=1024): super(TextNet, self).__init__() modelConfig = BertConfig.from_pretrained( '/home/hengyuli/cross-modal/model/bert_config.json') self.textExtractor = BertModel.from_pretrained( '/home/hengyuli/cross-modal/model/pytorch_model.bin', config=modelConfig) embedding_dim = self.textExtractor.config.hidden_size self.fc = nn.Linear(embedding_dim, code_length) self.tanh = torch.nn.Tanh()
def __init__(self, code_length): # code_length为fc映射到的维度大小 super(TextNet, self).__init__() modelConfig = BertConfig.from_pretrained('bert-base-chinese') self.textExtractor = BertModel.from_pretrained('bert-base-chinese', config=modelConfig) embedding_dim = self.textExtractor.config.hidden_size #embedding_dim应该是模型截断处输出的维度 self.fc = nn.Linear(embedding_dim, code_length) self.tanh = torch.nn.Tanh()
def __init__(self): super(Bert, self).__init__() self.tokenizer = BertTokenizer.from_pretrained( '../pretrained/bert-base-uncased/bert-base-uncased-vocab.txt') modelConfig = BertConfig.from_pretrained( '../pretrained/bert-base-uncased/bert_config.json') self.textExtractor = BertModel.from_pretrained( '../pretrained/bert-base-uncased/pytorch_model.bin', config=modelConfig)