def _construct_and_fill_model(self): # TODO: Move it progress to base class progress_dummy = sly.Progress('Building model:', 1) progress_dummy.iter_done_report() self.model = create_model( n_cls=(max(self.class_title_to_idx.values()) + 1), device_ids=self.device_ids) if sly.fs.dir_empty(sly.TaskPaths.MODEL_DIR): sly.logger.info('Weights will not be inited.') # @TODO: add random init (m.weight.data.normal_(0, math.sqrt(2. / n)) else: wi_type = self.config['weights_init_type'] ewit = {'weights_init_type': wi_type} sly.logger.info('Weights will be inited from given model.', extra=ewit) weights_rw = WeightsRW(sly.TaskPaths.MODEL_DIR) if wi_type == TRANSFER_LEARNING: self.model = weights_rw.load_for_transfer_learning( self.model, ignore_matching_layers=['last_conv'], logger=logger) elif wi_type == CONTINUE_TRAINING: self.model = weights_rw.load_strictly(self.model) sly.logger.info('Weights are loaded.', extra=ewit)
def restore_or_create_model(num_train_examples, num_labels, global_batch_size, options): checkpoints = get_checkpoint_files(options.checkpoint_dir) print('Found {} checkpoint files: {}'.format( len(checkpoints), checkpoints), file=sys.stderr, flush=True) for checkpoint in checkpoints: # sorted by ctime print('Restoring from checkpoint', checkpoint, file=sys.stderr, flush=True) try: return load_model(checkpoint) except Exception as e: warning('Failed to restore from checkpoint {}: {}'.format( checkpoint, e)) # No checkpoint could be loaded print('Creating new model', file=sys.stderr, flush=True) pretrained_model = load_pretrained(options) output_offset = int(options.max_seq_length/2) model = create_model(pretrained_model, num_labels, output_offset, options.output_layer) optimizer = create_optimizer(num_train_examples, global_batch_size, options) model.compile( optimizer, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'] ) return model
def _construct_and_fill_model(self): self.device_ids = sly.remap_gpu_devices(self.config['gpu_devices']) self.model = create_model(n_cls=len(self.train_classes), device_ids=self.device_ids) self.model = WeightsRW(self.helper.paths.model_dir).load_strictly(self.model) self.model.eval() logger.info('Weights are loaded.')
def _construct_and_fill_model(self): model_dir = sly.TaskPaths(determine_in_project=False).model_dir self.device_ids = sly.remap_gpu_devices([self.source_gpu_device]) self.model = create_model(n_cls=len(self.train_classes), device_ids=self.device_ids) self.model = WeightsRW(model_dir).load_strictly(self.model) self.model.eval() logger.info('Weights are loaded.')
def train(args, data, model): # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) with tf.Session(config=tf_config) as sess: best_f1 = 0 model = create_model(sess, NERModel, args.output_dir, config, data, logger) logger.info("start training") for epoch in range(1, 1 + args.epochs): loss = [] random.shuffle(data.train_Ids) batch_size = args.batch_size train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] # train_Ids if not instance: continue # batchify_with_label #gazs, word_seq_tensor, word_seq_lengths, biword_seq_tensor, word_seq_lengths, label_seq_tensor, layer_gaz_tensor, gaz_count_tensor, gaz_mask_tensor, mask _, batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_mask, mask = batchify_with_label( instance) batch = (batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_mask, mask, True) step, batch_loss = model.run_step(sess, batch, True) # print(step) loss.append(batch_loss) train_log = {'loss': np.mean(loss)} loss = [] eval_log, class_info = evaluate(sess, args, model, data) logs = dict(train_log, **eval_log) show_info = f'\nEpoch: {epoch} - ' + "-".join( [f' {key}: {value:.4f} ' for key, value in logs.items()]) logger.info(show_info) if logs['eval_f1'] > best_f1: logger.info( f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}" ) logger.info("save model to disk.") best_f1 = logs['eval_f1'] save_model(sess, model, args.output_dir, logger) print("Eval Entity Score: ") for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def train(args, NERModel, processor): train_dataset = load_and_cache_examples(args, processor, data_type='train') train_manager = BatchManager(data=train_dataset, batch_size=args.batch_size, vocab=processor.vocab, label2id=args.label2id, shuffle=True) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) config['vocab_size'] = len(processor.vocab) loss = [] with tf.Session(config=tf_config) as sess: model = create_model(sess, NERModel, args.output_dir, config, logger) logger.info("start training") best_f1 = 0 for epoch in range(1, 1 + args.epochs): train_manager.reset() for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) train_log = {'loss': np.mean(loss)} loss = [] eval_log, class_info = evaluate(sess, args, model, processor) #! logs = dict(train_log, **eval_log) show_info = f'\nEpoch: {epoch} - ' + "-".join( [f' {key}: {value:.4f} ' for key, value in logs.items()]) logger.info(show_info) # scheduler.epoch_step(logs['eval_f1'], epoch) if logs['eval_f1'] > best_f1: logger.info( f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}" ) logger.info("save model to disk.") best_f1 = logs['eval_f1'] save_model(sess, model, args.output_dir, logger) print("Eval Entity Score: ") for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def predict(args,data,model,mode): # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) if mode=='dev': data_Ids=data.dev_Ids elif mode=='test': data_Ids=data.test_Ids with tf.Session(config=tf_config) as sess: model = create_model(sess, NERModel, args.output_dir, config, data,logger) # results = [] metric = SeqEntityScore(data.label_alphabet,markup=args.markup) (test_info, class_info),_= model.evaluate(sess,data_Ids,metric,batch_size=1) test_info = {f'test_{key}': value for key, value in test_info.items()} logger.info(test_info) for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def _construct_and_fill_model(self): self.device_ids = sly.remap_gpu_devices(self.config['gpu_devices']) self.model = create_model(n_cls=len(self.out_classes), device_ids=self.device_ids) if self.helper.model_dir_is_empty(): logger.info('Weights will not be inited.') # @TODO: add random init (m.weight.data.normal_(0, math.sqrt(2. / n)) else: wi_type = self.config['weights_init_type'] ewit = {'weights_init_type': wi_type} logger.info('Weights will be inited from given model.', extra=ewit) weights_rw = WeightsRW(self.helper.paths.model_dir) if wi_type == 'transfer_learning': self.model = weights_rw.load_for_transfer_learning(self.model) elif wi_type == 'continue_training': self._check_prev_model_config() self.model = weights_rw.load_strictly(self.model) logger.info('Weights are loaded.', extra=ewit)
# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import common import torch model = common.create_model() print(model) model.load_state_dict(torch.load('tut6-model.pt')) def translate_sentence(sentence, src_field, trg_field, model, device, max_len=50): model.eval() if isinstance(sentence, str): nlp = spacy.load('de')
def predict(args, processor): # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file) with tf.Session(config=tf_config) as sess: model = create_model(sess, NERModel, args.output_dir, config, logger) test_data = [] with open(str(args.data_dir / "test.json"), 'r') as f: idx = 0 for line in f: tokens = [] json_d = {} line = json.loads(line.strip()) textlist = list(line['text']) for i, word in enumerate(textlist): token = tokenizer.tokenize(word) assert len(token) == 1 tokens.extend(token) assert len(tokens) < args.max_seq_len ntokens = [] segment_ids = [] label_ids = [] ntokens.append("[CLS]") # 句子开始设置CLS 标志 segment_ids.append(0) for i, token in enumerate(tokens): ntokens.append(token) segment_ids.append(0) ntokens.append("[SEP]") segment_ids.append(0) # append("O") or append("[SEP]") not sure! input_ids = tokenizer.convert_tokens_to_ids(ntokens) input_len = len(input_ids) input_mask = [1] * len(input_ids) while len(input_ids) < args.max_seq_len: input_ids.append(0) input_mask.append(0) segment_ids.append(0) raw_text = [] raw_text.append('[CLS]') raw_text.extend(textlist) raw_text.append('[SEP]') assert len(raw_text) == len(ntokens) assert len(input_ids) == args.max_seq_len assert len(input_mask) == args.max_seq_len assert len(segment_ids) == args.max_seq_len json_d['id'] = idx json_d['input_ids'] = input_ids json_d['input_mask'] = input_mask json_d['segment_ids'] = segment_ids json_d['input_len'] = input_len json_d['text'] = raw_text idx += 1 test_data.append(json_d) results = [] train_data = processor.get_train_examples() test_train = load_pickle(args.data_dir / 'train_test.bin') for step, line in enumerate(test_data): a_input_ids = [] a_input_mask = [] a_label_ids = [] a_input_lens = [] a_segment_ids = [] aux_sentence = [ train_data[i] for i in test_train[step][:args.aug_num] ] for s in aux_sentence: a_input_ids.append(s['input_ids']) # a_label_ids.append(s['label_ids']) #地址信息增强,将所有的标签信息改成adress标签,全1 a_label_ids.append(s['input_mask']) a_input_mask.append(s['input_mask']) a_input_lens.append(s['input_len']) a_segment_ids.append(s['segment_ids']) input_ids = line['input_ids'] input_mask = line['input_mask'] input_lens = line['input_len'] segment_ids = line['segment_ids'] batch = { 'ori': ([input_ids], [input_mask], [[]], [input_lens], [segment_ids]), 'aug': ([a_input_ids], [a_input_mask], [a_label_ids], [a_input_lens], [a_segment_ids]) } tags = model.evaluate_line(sess, batch) label_entities = get_entities(tags[0], args.id2label) json_d = {} json_d['id'] = step tags[0] = [args.id2label[idx] for idx in tags[0]] json_d['tag_seq'] = " ".join(tags[0]) json_d['entities'] = label_entities results.append(json_d) print(" ") output_predic_file = str(args.output_dir / "test_prediction.json") output_submit_file = str(args.output_dir / "cluener_submit.json") with open(output_predic_file, "w") as writer: for record in results: writer.write(json.dumps(record) + '\n') test_text = [] test_submit = [] for x, y in zip(test_data, results): json_d = {} json_d['id'] = x['id'] json_d['label'] = {} entities = y['entities'] #加了标记 words = x['text'] if len(entities) != 0: for subject in entities: tag = subject[0] start = subject[1] end = subject[2] word = "".join(words[start:end + 1]) if tag in json_d['label']: if word in json_d['label'][tag]: json_d['label'][tag][word].append([start, end]) else: json_d['label'][tag][word] = [[start, end]] else: json_d['label'][tag] = {} json_d['label'][tag][word] = [[start, end]] test_submit.append(json_d) json_to_text(output_submit_file, test_submit)
1. / 255, input_shape=(common.image_size[0], common.image_size[1])) training_data = training_data.map(lambda x, y: (norm_layer(x), y)) validation_data = validation_data.map(lambda x, y: (norm_layer(x), y)) #Image caching training_data = training_data.cache().prefetch( buffer_size=tf.data.experimental.AUTOTUNE) validation_data = validation_data.cache().prefetch( buffer_size=tf.data.experimental.AUTOTUNE) if os.path.isdir(common.checkpoint_dir): print('loading model from checkpoint') model = tf.keras.models.load_model(common.checkpoint_dir, compile=False) else: print('creating new model') model = common.create_model(num_classes) model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=common.checkpoint_dir, save_best_only=True, monitor='val_loss', mode='min', verbose=1) common.save_class_names(class_names) model.fit(training_data,
def predict(args, model, processor): # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) config['vocab_size'] = len(processor.vocab) config['keep_prob'] = 1.0 with tf.Session(config=tf_config) as sess: model = create_model(sess, NERModel, args.output_dir, config, logger) test_data = [] with open(str(args.data_dir / "test.json"), 'r') as f: idx = 0 for line in f: json_d = {} line = json.loads(line.strip()) text = line['text'] words = list(text) labels = ['O'] * len(words) json_d['id'] = idx json_d['context'] = " ".join(words) json_d['tag'] = " ".join(labels) json_d['raw_context'] = "".join(words) idx += 1 test_data.append(json_d) results = [] for step, line in enumerate(test_data): token_a = line['context'].split(" ") input_ids = [processor.vocab.to_index(w) for w in token_a] input_mask = [1] * len(token_a) input_lens = [len(token_a)] tags = model.evaluate_line( sess, ([input_ids], [input_mask], [[]], input_lens)) label_entities = get_entities(tags[0], args.id2label) json_d = {} json_d['id'] = step tags[0] = [args.id2label[idx] for idx in tags[0]] json_d['tag_seq'] = " ".join(tags[0]) json_d['entities'] = label_entities results.append(json_d) print(" ") output_predic_file = str(args.output_dir / "test_prediction.json") output_submit_file = str(args.output_dir / "cluener_submit.json") with open(output_predic_file, "w") as writer: for record in results: writer.write(json.dumps(record) + '\n') test_text = [] with open(str(args.data_dir / 'test.json'), 'r') as fr: for line in fr: test_text.append(json.loads(line)) test_submit = [] for x, y in zip(test_text, results): json_d = {} json_d['id'] = x['id'] json_d['label'] = {} entities = y['entities'] words = list(x['text']) if len(entities) != 0: for subject in entities: tag = subject[0] start = subject[1] end = subject[2] word = "".join(words[start:end + 1]) if tag in json_d['label']: if word in json_d['label'][tag]: json_d['label'][tag][word].append([start, end]) else: json_d['label'][tag][word] = [[start, end]] else: json_d['label'][tag] = {} json_d['label'][tag][word] = [[start, end]] test_submit.append(json_d) json_to_text(output_submit_file, test_submit)