示例#1
0
def main(args):
    # 1. 加载配置文件
    config = load_json_config(args.model_config_file)

    # 2. 加载模型
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)
    model = model.to(device)
    print('Initialize model Done'.center(60, '*'))

    # 3. 数据集的准备
    infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                   max_seq_len=config.get("max_seq_len"),
                                   num_class=config.get("num_class"),
                                   data_file=args.infer_data)

    print("Load INFER Dataset Done, Total eval line: ",
          infer_dataset.__len__())

    # 4. 开始infer
    infer_model(model,
                infer_dataset,
                num_workers=args.data_load_num_workers,
                inference_speed=args.inference_speed,
                dump_info_file=args.dump_info_file)
示例#2
0
def main(args):
    config = load_json_config(args.model_config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done")

    #---------------------MODEL GRAPH INIT--------------------------#
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)

    logging.info(model)
    logging.info("Initialize Model Done".center(60, "="))

    #-----------GPU SETTING, INFER Only Support Max 1 GPU-----------#
    use_cuda = args.gpu_ids != '-1'
    device = torch.device('cuda' if use_cuda else 'cpu')
    model.to(device)
    master_gpu_id = 0
    # if len(args.gpu_ids) == 1 and use_cuda:
    #     master_gpu_id = int(args.gpu_ids)
    #     model = model.cuda(int(args.gpu_ids)) if use_cuda else model
    # elif not use_cuda:
    #     master_gpu_id = None
    # else:
    #     raise RuntimeError("GPU Mode not support, INFER Only Support Max 1 GPU: " + args.gpu_ids)

    #-----------------------Dataset Init---------------------------#
    infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                   max_seq_len=config.get("max_seq_len"),
                                   num_class=config.get("num_class"),
                                   data_file=args.infer_data)
    logging.info("Load INFER Dataset Done, Total eval line: %s",
                 infer_dataset.__len__())

    #-----------------------Running Mode Start, Batch Size Only Support 1--------------------------------#
    infer_model(master_gpu_id,
                model,
                infer_dataset,
                use_cuda=use_cuda,
                num_workers=args.data_load_num_workers,
                inference_speed=args.inference_speed,
                dump_info_file=args.dump_info_file)
示例#3
0
import numpy as np
from pprint import pprint

sys.path.insert(0, "../")

from utils import load_json_config

# imports for displaying a video an IPython cell
import io
import base64
from IPython.display import HTML

# In[3]:

# Load config file
config = load_json_config(config_file_path)

# ### Get predictions

# In[4]:

prediction_file_pickle_path = os.path.join('../', config['output_dir'],
                                           config['model_name'],
                                           'test_results.pkl')

# In[5]:

with open(prediction_file_pickle_path, 'rb') as fp:
    logits_matrix, features_matrix, targets_list, item_id_list, class_to_idx = pickle.load(
        fp)
示例#4
0
# coding=utf-8
import sys
from time import sleep

from appium import webdriver
from selenium.common.exceptions import NoSuchElementException, WebDriverException

from utils import load_json_config

reload(sys)
sys.setdefaultencoding('utf-8')

desired_caps = load_json_config("../config/config-tjj-al100.json")
driver = webdriver.Remote('http://localhost:4723/wd/hub', desired_caps)

sleep(2)
# 1.首次启动权限申请 (autoAcceptAlerts 属性会自动获取权限)
for perIndex in [0, 5]:
    try:
        driver.find_element_by_id('com.android.packageinstaller:id/permission_allow_button').click()
        sleep(2)
    except NoSuchElementException:
        break

# 2.关闭首页推荐弹框
try:
    driver.find_element_by_id('com.huanshou.taojj:id/iv_close').click()
    sleep(1)
except NoSuchElementException:
    print 'find_element_by_id:  iv_close    error!'
示例#5
0
def main(args):
    # 1. 加载预定义的一些配置文件
    config = load_json_config(args.model_config_file)
    bert_config = BertConfig.from_json_file(
        config.get('bert_config_path'))  # bert模型的配置文件

    # 2. 预训练模型的加载
    if args.run_mode == 'train':
        # 第一步的训练训练的是teacher cls
        if args.train_stage == 0:
            model = FastBertModel.load_pretrained_bert_model(
                bert_config,
                config,
                pretrained_model_path=config.get('bert_pretrained_model_path'))
            save_model_path_for_train = args.save_model_path
        # 第二步是去蒸馏student cls
        elif args.train_stage == 1:
            model = FastBertModel(bert_config, config)
            load_saved_model(model, args.save_model_path)
            save_model_path_for_train = args.save_model_path_distill
            for name, p in model.named_parameters():
                if 'branch_classifier' not in name:
                    p.requires_grad = False
            print(
                'Teacher Classifier Freezed, Student Classifier will Distilling'
            )
        else:
            print('error, please choose 0 or 1')

    elif args.run_mode == 'eval':
        model = FastBertModel(bert_config, config)
        load_saved_model(model, args.save_model_path)

    else:
        print('Operation mode not legal')

    print("initialize model Done".center(60, '*'))
    model.to(device)

    # 3. 数据集的初始化
    if args.train_data:
        train_dataset = PrepareDataset(vocab_file=config.get('vocab_file'),
                                       max_seq_len=config.get('max_seq_len'),
                                       num_class=config.get('num_class'),
                                       data_file=args.train_data)
        print('load training dataset done. total training num: {}'.format(
            train_dataset.__len__()))

    if args.eval_data:
        eval_dataset = PrepareDataset(vocab_file=config.get('vocab_file'),
                                      max_seq_len=config.get('max_seq_len'),
                                      num_class=config.get('num_class'),
                                      data_file=args.eval_data)
        print('load eval dataset done. total eval num: {}'.format(
            eval_dataset.__len__()))

    # 4.开始训练
    if args.run_mode == 'train':
        optimizer = init_bert_adam_optimizer(
            model, train_dataset.__len__(), args.epochs, args.batch_size,
            config.get('gradient_accumulation_steps'), config.get('init_lr'),
            config.get('warmup_proportion'))

        train_model(args.train_stage,
                    save_model_path_for_train,
                    model,
                    optimizer,
                    args.epochs,
                    train_dataset,
                    eval_dataset,
                    batch_size=args.batch_size,
                    gradient_accumulation_steps=config.get(
                        'gradient_accumulation_steps'),
                    num_workers=args.data_load_num_workers)

    elif args.run_mode == 'eval':
        eval_model(args.train_stage,
                   model,
                   eval_dataset,
                   batch_size=args.batch_size,
                   num_workers=args.data_load_num_workers)
    else:
        print('参数错误')
示例#6
0
def main(args):
    config = load_json_config(args.model_config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done")

    #---------------------MODEL GRAPH INIT--------------------------#
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    if args.run_mode == 'train':
        #初始训练
        if args.train_stage == 0:
            model = FastBertModel.load_pretrained_bert_model(
                bert_config,
                config,
                pretrained_model_path=config.get("bert_pretrained_model_path"))
            save_model_path_for_train = args.save_model_path
        #蒸馏训练
        elif args.train_stage == 1:
            model = FastBertModel(bert_config, config)
            load_saved_model(model, args.save_model_path)
            save_model_path_for_train = args.save_model_path_distill

            #Freeze Part Model
            for name, p in model.named_parameters():
                if "branch_classifier" not in name:
                    p.requires_grad = False
            logging.info(
                "Main Graph and Teacher Classifier Freezed, Student Classifier will Distilling"
            )
        else:
            raise RuntimeError('Operation Train Stage(0 or 1) not Legal')

    elif args.run_mode == 'eval':
        model = FastBertModel(bert_config, config)
        load_saved_model(model, args.save_model_path)
    else:
        raise RuntimeError('Operation Mode not Legal')

    logging.info(model)
    logging.info("Initialize Model Done".center(60, "="))

    #---------------------GPU SETTING--------------------------#
    # device = torch.device('cuda' if torch.cuda else 'cpu')
    # model.to(device)
    # master_gpu_id = 0
    use_cuda = args.gpu_ids != '-1'
    if len(args.gpu_ids) == 1 and use_cuda:
        master_gpu_id = int(args.gpu_ids)
        model = model.cuda(int(args.gpu_ids)) if use_cuda else model
    elif use_cuda:
        gpu_ids = [int(each) for each in args.gpu_ids.split(",")]
        master_gpu_id = gpu_ids[0]
        model = model.cuda(gpu_ids[0])
        logging.info("Start multi-gpu dataparallel training/evaluating...")
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)
    else:
        master_gpu_id = None

    #-----------------------Dataset Init --------------------------------#
    if args.train_data:
        train_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                       max_seq_len=config.get("max_seq_len"),
                                       num_class=config.get("num_class"),
                                       data_file=args.train_data)
        logging.info("Load Training Dataset Done, Total training line: %s",
                     train_dataset.__len__())
    if args.eval_data:
        eval_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                      max_seq_len=config.get("max_seq_len"),
                                      num_class=config.get("num_class"),
                                      data_file=args.eval_data)
        logging.info("Load Eval Dataset Done, Total eval line: %s",
                     eval_dataset.__len__())

    #-----------------------Running Mode Start--------------------------------#
    if args.run_mode == "train":
        optimizer = init_bert_adam_optimizer(
            model, train_dataset.__len__(), args.epochs, args.batch_size,
            config.get("gradient_accumulation_steps"), config.get("init_lr"),
            config.get("warmup_proportion"))
        train_model(args.train_stage,
                    save_model_path_for_train,
                    master_gpu_id,
                    model,
                    optimizer,
                    args.epochs,
                    train_dataset,
                    eval_dataset,
                    batch_size=args.batch_size,
                    gradient_accumulation_steps=config.get(
                        "gradient_accumulation_steps"),
                    use_cuda=use_cuda,
                    num_workers=args.data_load_num_workers)
    elif args.run_mode == "eval":
        eval_model(args.train_stage,
                   master_gpu_id,
                   model,
                   eval_dataset,
                   batch_size=args.batch_size,
                   use_cuda=use_cuda,
                   num_workers=args.data_load_num_workers)
    else:
        raise RuntimeError("Mode not support: " + args.mode)
示例#7
0
def main(args):
    logging.info("Loading HyperParameters".center(60, "="))
    config = load_json_config(args.config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done".center(60, "="))

    logging.info("Loading Dataset".center(60, "="))
    dataset = MultiLabelClassificationDataset(
        vocab_file=config.get("vocab_file"),
        label_file=config.get("label_file"),
        label_weight_file=config.get("label_weight_file"),
        max_seq_len=config.get("max_seq_len"),
        training_path=config.get("training_path"),
        testing_path=config.get("testing_path"))

    logging.info("Total training line: " + str(dataset.training_len) +
                 ", total testing line: " + str(dataset.testing_len))
    label_size = len(dataset.label2idx)
    logging.info('label size: %d' % label_size)
    logging.info("Load Dataset Done".center(60, "="))
    label_weight = dataset.label_weight.to('cuda') if config.get(
        "use_cuda") else dataset.label_weight

    logging.info("Initializing SequenceClassification Model".center(60, "="))
    if config.get("pretrained_model_path"):
        model = BertForMultiLabelClassification.load_pretrained_bert_model(
            bert_config_path=config.get("bert_config_path"),
            pretrained_model_path=config.get("pretrained_model_path"),
            num_labels=len(dataset.label2idx),
            label_weight=label_weight)
    else:
        model = BertForMultiLabelClassification(BertConfig.from_json_file(
            config.get("bert_config_path")),
                                                len(dataset.label2idx),
                                                label_weight=label_weight)
    if config.get("num_tuning_layers") is not None:
        model.bert.encoder.layer = torch.nn.ModuleList(
            model.bert.encoder.layer[:config.get("num_tuning_layers")])
    logging.info(model)
    logging.info("Initialize SequenceClassification Model Done".center(
        60, "="))

    if args.saved_model:
        logging.info("Loading Saved Model".center(60, "="))
        logging.info("Load saved model from: " + args.saved_model)
        load_saved_model(model, args.saved_model)
        logging.info("Load Saved Model Done".center(60, "="))

    master_gpu_id = None
    if len(args.gpu_ids) == 1:
        master_gpu_id = int(args.gpu_ids)
        model = model.cuda(int(
            args.gpu_ids)) if config.get("use_cuda") else model
    else:
        gpu_ids = [int(each) for each in args.gpu_ids.split(",")]
        master_gpu_id = gpu_ids[0]
        model = model.cuda(gpu_ids[0])
        logging.info("Start multi-gpu dataparallel training/evaluating...")
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)

    if args.mode == "eval":
        if args.input_file:
            dataset = MultiLabelClassificationDataset(
                vocab_file=config.get("vocab_file"),
                label_file=config.get("label_file"),
                max_seq_len=config.get("max_seq_len"),
                label_weight_file=config.get("label_weight_file"),
                testing_path=args.input_file)
        eval_model(master_gpu_id, model, dataset, label_size,
                   config.get("eval_batch_size"), config.get("use_cuda"),
                   config.get("num_workers"))

    elif args.mode == "predict":
        if args.input_file:
            dataset = MultiLabelClassificationDataset(
                vocab_file=config.get("vocab_file"),
                label_file=config.get("label_file"),
                max_seq_len=config.get("max_seq_len"),
                label_weight_file=config.get("label_weight_file"),
                testing_path=args.input_file)

        model_predict(master_gpu_id, model, dataset, config,
                      config.get("eval_batch_size"), config.get("use_cuda"),
                      config.get("num_workers"), args.output_file)

    elif args.mode == "train":
        optimizer = init_bert_adam_optimizer(
            model, dataset.training_len, config.get("epochs"),
            config.get("batch_size"),
            config.get("gradient_accumulation_steps"), config.get("init_lr"),
            config.get("warmup_proportion"))
        train_model(config.get("experiment_name"),
                    master_gpu_id,
                    model,
                    optimizer,
                    config.get("epochs"),
                    dataset,
                    label_size,
                    batch_size=config.get("batch_size"),
                    eval_batch_size=config.get("eval_batch_size"),
                    gradient_accumulation_steps=config.get(
                        "gradient_accumulation_steps"),
                    use_cuda=config.get("use_cuda"),
                    num_workers=config.get("num_workers"))
    else:
        raise RuntimeError("Mode not support: " + args.mode)
示例#8
0
def main(args):
    # 1. 加载配置文件
    config = load_json_config(args.model_config_file)

    # 2. 加载模型
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)
    model = model.to(device)
    print('Initialize model Done'.center(60, '*'))

    max_seq_len = 60
    labels = []
    texts = []
    inference_speed = 0.5
    with open('./data/tcl/test.tsv', 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            label, text = line.split('	')
            labels.append(int(label))
            texts.append(text)
    sum_num = len(labels)

    correct_num = 0
    result = []
    for l, t in zip(labels, texts):
        start_time = time.time()
        # 3. 数据集的准备
        vocab_file = config.get("vocab_file")
        do_lower_case = True
        tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                               do_lower_case=do_lower_case)
        tokens = tokenizer.tokenize(t)
        tokens = tokens[:(max_seq_len - 1)]
        tokens = ["[CLS]"] + tokens
        tokens = tokenizer.convert_tokens_to_ids(tokens)
        # return {"text": t, "tokens": tokens, "label": label}
        # 4. 开始infer
        segment_ids = [0] * len(tokens)
        attn_masks = [1] * len(tokens)
        tokens = torch.LongTensor([tokens])
        segment_ids = torch.LongTensor([segment_ids])
        attn_masks = torch.LongTensor([attn_masks])
        l = torch.LongTensor([l])
        # print(tokens.size())
        # print(segment_ids.size())
        # print(attn_masks.size())
        # print(l.size())
        with torch.no_grad():
            probs, layer_idxes, uncertain_infos = model(
                tokens,
                token_type_ids=segment_ids,
                attention_mask=attn_masks,
                inference=True,
                inference_speed=inference_speed)
        _, top_index = probs.topk(1)
        spend_time = time.time() - start_time

        if top_index.view(-1) == l:
            correct_num += 1
        print(l[0].numpy())
        print(top_index.view(-1)[0].numpy())
        exit()

        s = str(l[0]) + '  ' + str(
            top_index.view(-1)[0]) + '  ' + str(spend_time) + '  ' + t
        result.append(s)
    print('正确率:{}'.format(correct_num / sum_num))
    with open('result.txt', 'w') as f:
        f.write('\n'.join(result))
示例#9
0
 def init_config(self, config_file):
     logging.info("Loading HyperParameters".center(60, "="))
     self.config = load_json_config(config_file)
     logging.info(json.dumps(self.config, indent=2, sort_keys=True))
     logging.info("Load HyperParameters Done".center(60, "="))
示例#10
0
import importlib

import torch
import torch.nn as nn
import numpy as np

import utils
from callbacks import (PlotLearning, AverageMeter)
from models.multi_column import MultiColumn
import torchvision
from transforms_video import *
from torchsummary import summary as ts_summary

# load configurations
args = utils.load_args()
config = utils.load_json_config(args.config)

# set column model
file_name = config['model_name']
model_def = importlib.import_module(f"models.{file_name}")

# setup device - CPU or GPU
device, device_ids = utils.setup_cuda_devices(args)
print(" > Using device: {}".format(device.type))
print(" > Active GPU ids: {}".format(device_ids))

best_loss = float('Inf')

if config["input_mode"] == "av":
    from data_loader_av import VideoFolder
else: