def main(args): # 1. 加载配置文件 config = load_json_config(args.model_config_file) # 2. 加载模型 bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) model = model.to(device) print('Initialize model Done'.center(60, '*')) # 3. 数据集的准备 infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.infer_data) print("Load INFER Dataset Done, Total eval line: ", infer_dataset.__len__()) # 4. 开始infer infer_model(model, infer_dataset, num_workers=args.data_load_num_workers, inference_speed=args.inference_speed, dump_info_file=args.dump_info_file)
def main(args): config = load_json_config(args.model_config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done") #---------------------MODEL GRAPH INIT--------------------------# bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) logging.info(model) logging.info("Initialize Model Done".center(60, "=")) #-----------GPU SETTING, INFER Only Support Max 1 GPU-----------# use_cuda = args.gpu_ids != '-1' device = torch.device('cuda' if use_cuda else 'cpu') model.to(device) master_gpu_id = 0 # if len(args.gpu_ids) == 1 and use_cuda: # master_gpu_id = int(args.gpu_ids) # model = model.cuda(int(args.gpu_ids)) if use_cuda else model # elif not use_cuda: # master_gpu_id = None # else: # raise RuntimeError("GPU Mode not support, INFER Only Support Max 1 GPU: " + args.gpu_ids) #-----------------------Dataset Init---------------------------# infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.infer_data) logging.info("Load INFER Dataset Done, Total eval line: %s", infer_dataset.__len__()) #-----------------------Running Mode Start, Batch Size Only Support 1--------------------------------# infer_model(master_gpu_id, model, infer_dataset, use_cuda=use_cuda, num_workers=args.data_load_num_workers, inference_speed=args.inference_speed, dump_info_file=args.dump_info_file)
import numpy as np from pprint import pprint sys.path.insert(0, "../") from utils import load_json_config # imports for displaying a video an IPython cell import io import base64 from IPython.display import HTML # In[3]: # Load config file config = load_json_config(config_file_path) # ### Get predictions # In[4]: prediction_file_pickle_path = os.path.join('../', config['output_dir'], config['model_name'], 'test_results.pkl') # In[5]: with open(prediction_file_pickle_path, 'rb') as fp: logits_matrix, features_matrix, targets_list, item_id_list, class_to_idx = pickle.load( fp)
# coding=utf-8 import sys from time import sleep from appium import webdriver from selenium.common.exceptions import NoSuchElementException, WebDriverException from utils import load_json_config reload(sys) sys.setdefaultencoding('utf-8') desired_caps = load_json_config("../config/config-tjj-al100.json") driver = webdriver.Remote('http://localhost:4723/wd/hub', desired_caps) sleep(2) # 1.首次启动权限申请 (autoAcceptAlerts 属性会自动获取权限) for perIndex in [0, 5]: try: driver.find_element_by_id('com.android.packageinstaller:id/permission_allow_button').click() sleep(2) except NoSuchElementException: break # 2.关闭首页推荐弹框 try: driver.find_element_by_id('com.huanshou.taojj:id/iv_close').click() sleep(1) except NoSuchElementException: print 'find_element_by_id: iv_close error!'
def main(args): # 1. 加载预定义的一些配置文件 config = load_json_config(args.model_config_file) bert_config = BertConfig.from_json_file( config.get('bert_config_path')) # bert模型的配置文件 # 2. 预训练模型的加载 if args.run_mode == 'train': # 第一步的训练训练的是teacher cls if args.train_stage == 0: model = FastBertModel.load_pretrained_bert_model( bert_config, config, pretrained_model_path=config.get('bert_pretrained_model_path')) save_model_path_for_train = args.save_model_path # 第二步是去蒸馏student cls elif args.train_stage == 1: model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) save_model_path_for_train = args.save_model_path_distill for name, p in model.named_parameters(): if 'branch_classifier' not in name: p.requires_grad = False print( 'Teacher Classifier Freezed, Student Classifier will Distilling' ) else: print('error, please choose 0 or 1') elif args.run_mode == 'eval': model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) else: print('Operation mode not legal') print("initialize model Done".center(60, '*')) model.to(device) # 3. 数据集的初始化 if args.train_data: train_dataset = PrepareDataset(vocab_file=config.get('vocab_file'), max_seq_len=config.get('max_seq_len'), num_class=config.get('num_class'), data_file=args.train_data) print('load training dataset done. total training num: {}'.format( train_dataset.__len__())) if args.eval_data: eval_dataset = PrepareDataset(vocab_file=config.get('vocab_file'), max_seq_len=config.get('max_seq_len'), num_class=config.get('num_class'), data_file=args.eval_data) print('load eval dataset done. total eval num: {}'.format( eval_dataset.__len__())) # 4.开始训练 if args.run_mode == 'train': optimizer = init_bert_adam_optimizer( model, train_dataset.__len__(), args.epochs, args.batch_size, config.get('gradient_accumulation_steps'), config.get('init_lr'), config.get('warmup_proportion')) train_model(args.train_stage, save_model_path_for_train, model, optimizer, args.epochs, train_dataset, eval_dataset, batch_size=args.batch_size, gradient_accumulation_steps=config.get( 'gradient_accumulation_steps'), num_workers=args.data_load_num_workers) elif args.run_mode == 'eval': eval_model(args.train_stage, model, eval_dataset, batch_size=args.batch_size, num_workers=args.data_load_num_workers) else: print('参数错误')
def main(args): config = load_json_config(args.model_config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done") #---------------------MODEL GRAPH INIT--------------------------# bert_config = BertConfig.from_json_file(config.get("bert_config_path")) if args.run_mode == 'train': #初始训练 if args.train_stage == 0: model = FastBertModel.load_pretrained_bert_model( bert_config, config, pretrained_model_path=config.get("bert_pretrained_model_path")) save_model_path_for_train = args.save_model_path #蒸馏训练 elif args.train_stage == 1: model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) save_model_path_for_train = args.save_model_path_distill #Freeze Part Model for name, p in model.named_parameters(): if "branch_classifier" not in name: p.requires_grad = False logging.info( "Main Graph and Teacher Classifier Freezed, Student Classifier will Distilling" ) else: raise RuntimeError('Operation Train Stage(0 or 1) not Legal') elif args.run_mode == 'eval': model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) else: raise RuntimeError('Operation Mode not Legal') logging.info(model) logging.info("Initialize Model Done".center(60, "=")) #---------------------GPU SETTING--------------------------# # device = torch.device('cuda' if torch.cuda else 'cpu') # model.to(device) # master_gpu_id = 0 use_cuda = args.gpu_ids != '-1' if len(args.gpu_ids) == 1 and use_cuda: master_gpu_id = int(args.gpu_ids) model = model.cuda(int(args.gpu_ids)) if use_cuda else model elif use_cuda: gpu_ids = [int(each) for each in args.gpu_ids.split(",")] master_gpu_id = gpu_ids[0] model = model.cuda(gpu_ids[0]) logging.info("Start multi-gpu dataparallel training/evaluating...") model = torch.nn.DataParallel(model, device_ids=gpu_ids) else: master_gpu_id = None #-----------------------Dataset Init --------------------------------# if args.train_data: train_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.train_data) logging.info("Load Training Dataset Done, Total training line: %s", train_dataset.__len__()) if args.eval_data: eval_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.eval_data) logging.info("Load Eval Dataset Done, Total eval line: %s", eval_dataset.__len__()) #-----------------------Running Mode Start--------------------------------# if args.run_mode == "train": optimizer = init_bert_adam_optimizer( model, train_dataset.__len__(), args.epochs, args.batch_size, config.get("gradient_accumulation_steps"), config.get("init_lr"), config.get("warmup_proportion")) train_model(args.train_stage, save_model_path_for_train, master_gpu_id, model, optimizer, args.epochs, train_dataset, eval_dataset, batch_size=args.batch_size, gradient_accumulation_steps=config.get( "gradient_accumulation_steps"), use_cuda=use_cuda, num_workers=args.data_load_num_workers) elif args.run_mode == "eval": eval_model(args.train_stage, master_gpu_id, model, eval_dataset, batch_size=args.batch_size, use_cuda=use_cuda, num_workers=args.data_load_num_workers) else: raise RuntimeError("Mode not support: " + args.mode)
def main(args): logging.info("Loading HyperParameters".center(60, "=")) config = load_json_config(args.config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done".center(60, "=")) logging.info("Loading Dataset".center(60, "=")) dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), label_weight_file=config.get("label_weight_file"), max_seq_len=config.get("max_seq_len"), training_path=config.get("training_path"), testing_path=config.get("testing_path")) logging.info("Total training line: " + str(dataset.training_len) + ", total testing line: " + str(dataset.testing_len)) label_size = len(dataset.label2idx) logging.info('label size: %d' % label_size) logging.info("Load Dataset Done".center(60, "=")) label_weight = dataset.label_weight.to('cuda') if config.get( "use_cuda") else dataset.label_weight logging.info("Initializing SequenceClassification Model".center(60, "=")) if config.get("pretrained_model_path"): model = BertForMultiLabelClassification.load_pretrained_bert_model( bert_config_path=config.get("bert_config_path"), pretrained_model_path=config.get("pretrained_model_path"), num_labels=len(dataset.label2idx), label_weight=label_weight) else: model = BertForMultiLabelClassification(BertConfig.from_json_file( config.get("bert_config_path")), len(dataset.label2idx), label_weight=label_weight) if config.get("num_tuning_layers") is not None: model.bert.encoder.layer = torch.nn.ModuleList( model.bert.encoder.layer[:config.get("num_tuning_layers")]) logging.info(model) logging.info("Initialize SequenceClassification Model Done".center( 60, "=")) if args.saved_model: logging.info("Loading Saved Model".center(60, "=")) logging.info("Load saved model from: " + args.saved_model) load_saved_model(model, args.saved_model) logging.info("Load Saved Model Done".center(60, "=")) master_gpu_id = None if len(args.gpu_ids) == 1: master_gpu_id = int(args.gpu_ids) model = model.cuda(int( args.gpu_ids)) if config.get("use_cuda") else model else: gpu_ids = [int(each) for each in args.gpu_ids.split(",")] master_gpu_id = gpu_ids[0] model = model.cuda(gpu_ids[0]) logging.info("Start multi-gpu dataparallel training/evaluating...") model = torch.nn.DataParallel(model, device_ids=gpu_ids) if args.mode == "eval": if args.input_file: dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), max_seq_len=config.get("max_seq_len"), label_weight_file=config.get("label_weight_file"), testing_path=args.input_file) eval_model(master_gpu_id, model, dataset, label_size, config.get("eval_batch_size"), config.get("use_cuda"), config.get("num_workers")) elif args.mode == "predict": if args.input_file: dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), max_seq_len=config.get("max_seq_len"), label_weight_file=config.get("label_weight_file"), testing_path=args.input_file) model_predict(master_gpu_id, model, dataset, config, config.get("eval_batch_size"), config.get("use_cuda"), config.get("num_workers"), args.output_file) elif args.mode == "train": optimizer = init_bert_adam_optimizer( model, dataset.training_len, config.get("epochs"), config.get("batch_size"), config.get("gradient_accumulation_steps"), config.get("init_lr"), config.get("warmup_proportion")) train_model(config.get("experiment_name"), master_gpu_id, model, optimizer, config.get("epochs"), dataset, label_size, batch_size=config.get("batch_size"), eval_batch_size=config.get("eval_batch_size"), gradient_accumulation_steps=config.get( "gradient_accumulation_steps"), use_cuda=config.get("use_cuda"), num_workers=config.get("num_workers")) else: raise RuntimeError("Mode not support: " + args.mode)
def main(args): # 1. 加载配置文件 config = load_json_config(args.model_config_file) # 2. 加载模型 bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) model = model.to(device) print('Initialize model Done'.center(60, '*')) max_seq_len = 60 labels = [] texts = [] inference_speed = 0.5 with open('./data/tcl/test.tsv', 'r') as f: lines = f.readlines() for line in lines: line = line.strip() label, text = line.split(' ') labels.append(int(label)) texts.append(text) sum_num = len(labels) correct_num = 0 result = [] for l, t in zip(labels, texts): start_time = time.time() # 3. 数据集的准备 vocab_file = config.get("vocab_file") do_lower_case = True tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) tokens = tokenizer.tokenize(t) tokens = tokens[:(max_seq_len - 1)] tokens = ["[CLS]"] + tokens tokens = tokenizer.convert_tokens_to_ids(tokens) # return {"text": t, "tokens": tokens, "label": label} # 4. 开始infer segment_ids = [0] * len(tokens) attn_masks = [1] * len(tokens) tokens = torch.LongTensor([tokens]) segment_ids = torch.LongTensor([segment_ids]) attn_masks = torch.LongTensor([attn_masks]) l = torch.LongTensor([l]) # print(tokens.size()) # print(segment_ids.size()) # print(attn_masks.size()) # print(l.size()) with torch.no_grad(): probs, layer_idxes, uncertain_infos = model( tokens, token_type_ids=segment_ids, attention_mask=attn_masks, inference=True, inference_speed=inference_speed) _, top_index = probs.topk(1) spend_time = time.time() - start_time if top_index.view(-1) == l: correct_num += 1 print(l[0].numpy()) print(top_index.view(-1)[0].numpy()) exit() s = str(l[0]) + ' ' + str( top_index.view(-1)[0]) + ' ' + str(spend_time) + ' ' + t result.append(s) print('正确率:{}'.format(correct_num / sum_num)) with open('result.txt', 'w') as f: f.write('\n'.join(result))
def init_config(self, config_file): logging.info("Loading HyperParameters".center(60, "=")) self.config = load_json_config(config_file) logging.info(json.dumps(self.config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done".center(60, "="))
import importlib import torch import torch.nn as nn import numpy as np import utils from callbacks import (PlotLearning, AverageMeter) from models.multi_column import MultiColumn import torchvision from transforms_video import * from torchsummary import summary as ts_summary # load configurations args = utils.load_args() config = utils.load_json_config(args.config) # set column model file_name = config['model_name'] model_def = importlib.import_module(f"models.{file_name}") # setup device - CPU or GPU device, device_ids = utils.setup_cuda_devices(args) print(" > Using device: {}".format(device.type)) print(" > Active GPU ids: {}".format(device_ids)) best_loss = float('Inf') if config["input_mode"] == "av": from data_loader_av import VideoFolder else: