예제 #1
0
def main(args):
    # 1. 加载配置文件
    config = load_json_config(args.model_config_file)

    # 2. 加载模型
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)
    model = model.to(device)
    print('Initialize model Done'.center(60, '*'))

    # 3. 数据集的准备
    infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                   max_seq_len=config.get("max_seq_len"),
                                   num_class=config.get("num_class"),
                                   data_file=args.infer_data)

    print("Load INFER Dataset Done, Total eval line: ",
          infer_dataset.__len__())

    # 4. 开始infer
    infer_model(model,
                infer_dataset,
                num_workers=args.data_load_num_workers,
                inference_speed=args.inference_speed,
                dump_info_file=args.dump_info_file)
예제 #2
0
    def init_model(self):
        bert_config = BertConfig.from_json_file(
            self.config.get("bert_config_path"))
        self.model = FastBertModel(bert_config, self.config)
        logging.info(self.model)
        logging.info("Initialize Model Done".center(60, "="))

        logging.info("Load saved model from: " + self.save_model_path)
        load_saved_model(self.model, self.save_model_path)
        logging.info("Load Saved Model Done".center(60, "="))

        if self.use_cuda:
            self.model = self.model.cuda(self.gpu_id)

        self.model.eval()
    def predict(self, criterion):
        print("Predicting on test set...")
        if self.use_cuda:
            self.meta_net.cuda()
        self.loss_criterion = criterion

        # Load model
        self.prev_meta_step_count, self.meta_net, self.meta_optimizer, self.state = utils.load_saved_model(
            self.model_path, self.meta_net,
            self.build_optimizers(self.meta_net))
        print(
            f"Model has been loaded step:{self.prev_meta_step_count}, path:{self.model_path}"
        )
        transform_list_test = []
        # if Config.predict.use_augmentation:
        # transform_list_test.extend([transforms.Resize(Config.data.image_size), ImageNetPolicy(Config.predict.num_sample_augmentation)])
        transform_list_test.extend([
            transforms.Resize(
                (Config.data.image_size,
                 Config.data.image_size)),  # transforms.ToTensor(),
            # transforms.Normalize(mean=[0.485, 0.456, 0.406],
            #                      std=[0.229, 0.224, 0.225])
        ])

        transform_test = transforms.Compose(transform_list_test)
        test_dataset_imgs = read_dataset_test(Config.data.miniimagenet_path,
                                              transform_test)[0]
        evaluation = self.evaluate(test_dataset_imgs)
        print(f"Total score: {evaluation}")
        return evaluation
예제 #4
0
def test(root, binary, filename=""):
    model = load_saved_model(filename=filename)

    dataset_container = SSTContainer(root=root, binary=binary)
    test_X, test_Y = dataset_container.data("test")

    pred_Y = model.predict(test_X)
    pred_Y = np.argmax(pred_Y, axis=1)

    accuracy_value = accuracy_score(test_Y, pred_Y)
    precision_value = precision_score(test_Y, pred_Y, average="macro")
    recall_value = recall_score(test_Y, pred_Y, average="macro")
    f1_score_value = f1_score(
        test_Y,
        pred_Y,
        average="macro",
    )
    cm = confusion_matrix(test_Y,
                          pred_Y,
                          labels=np.sort(np.unique(np.array(test_Y))))

    logger.info(
        f"accuracy: {accuracy_value}, precision: {precision_value}, recall: {recall_value}, f1-score: {f1_score_value}"
    )
    logger.info(f"confusion matrix: \n {cm}")
예제 #5
0
def main(args):
    config = load_json_config(args.model_config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done")

    #---------------------MODEL GRAPH INIT--------------------------#
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)

    logging.info(model)
    logging.info("Initialize Model Done".center(60, "="))

    #-----------GPU SETTING, INFER Only Support Max 1 GPU-----------#
    use_cuda = args.gpu_ids != '-1'
    device = torch.device('cuda' if use_cuda else 'cpu')
    model.to(device)
    master_gpu_id = 0
    # if len(args.gpu_ids) == 1 and use_cuda:
    #     master_gpu_id = int(args.gpu_ids)
    #     model = model.cuda(int(args.gpu_ids)) if use_cuda else model
    # elif not use_cuda:
    #     master_gpu_id = None
    # else:
    #     raise RuntimeError("GPU Mode not support, INFER Only Support Max 1 GPU: " + args.gpu_ids)

    #-----------------------Dataset Init---------------------------#
    infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                   max_seq_len=config.get("max_seq_len"),
                                   num_class=config.get("num_class"),
                                   data_file=args.infer_data)
    logging.info("Load INFER Dataset Done, Total eval line: %s",
                 infer_dataset.__len__())

    #-----------------------Running Mode Start, Batch Size Only Support 1--------------------------------#
    infer_model(master_gpu_id,
                model,
                infer_dataset,
                use_cuda=use_cuda,
                num_workers=args.data_load_num_workers,
                inference_speed=args.inference_speed,
                dump_info_file=args.dump_info_file)
예제 #6
0
def init_model():

    model_dict = {
        "sgd":
        load_saved_model("sgd", os.path.join(ROOT_DIR, "saved_models/sgd.h5")),
        "adam":
        load_saved_model("adam", os.path.join(ROOT_DIR,
                                              "saved_models/adam.h5")),
        "adagrad":
        load_saved_model("adagrad",
                         os.path.join(ROOT_DIR, "saved_models/adagrad.h5")),
        "adabound":
        load_saved_model("adabound",
                         os.path.join(ROOT_DIR, "saved_models/adabound.h5")),
        "amsbound":
        load_saved_model("amsbound",
                         os.path.join(ROOT_DIR, "saved_models/amsbound.h5")),
        "adadelta":
        load_saved_model("adadelta",
                         os.path.join(ROOT_DIR, "saved_models/adadelta.h5"))
    }

    print("Model loaded")

    return model_dict
예제 #7
0
def embeddings_run():
    ast_filepath = "processed/hoc18_ast_block_matrix.npy"
    embed_input = embeddings.load_asts_from_file(ast_filepath, raejoon=True) 
    embed_output = embeddings.get_output_labels(embed_input)
    
    embed_model = embeddings.create_model(embed_input)
    embed_history = embeddings.fit_model(embed_model, 
                                         embed_input, embed_output,
                                         epochs=2)
    #print(embed_history.effective_accuracy["train"])
    #print(embed_history.effective_accuracy["validate"])

    embed_model_filename = "tmp/my_embeddings.h5"
    utils.save_model(embed_model, embed_model_filename)
    embed_model = utils.load_saved_model(embed_model_filename)
    
    ast_dirpath = "anonymizeddata/data/hoc18/asts/"
    embed_matrix = embeddings.get_embeddings(embed_model, embed_input,
                                           ast_dirpath)

    print("Embeddings matrix (including 1st row) size: ",
          np.shape(embed_matrix))
    embeddings.save_embeddings(embed_matrix, embed_dict_filename)    
예제 #8
0
def main(args):
    # 1. 加载预定义的一些配置文件
    config = load_json_config(args.model_config_file)
    bert_config = BertConfig.from_json_file(
        config.get('bert_config_path'))  # bert模型的配置文件

    # 2. 预训练模型的加载
    if args.run_mode == 'train':
        # 第一步的训练训练的是teacher cls
        if args.train_stage == 0:
            model = FastBertModel.load_pretrained_bert_model(
                bert_config,
                config,
                pretrained_model_path=config.get('bert_pretrained_model_path'))
            save_model_path_for_train = args.save_model_path
        # 第二步是去蒸馏student cls
        elif args.train_stage == 1:
            model = FastBertModel(bert_config, config)
            load_saved_model(model, args.save_model_path)
            save_model_path_for_train = args.save_model_path_distill
            for name, p in model.named_parameters():
                if 'branch_classifier' not in name:
                    p.requires_grad = False
            print(
                'Teacher Classifier Freezed, Student Classifier will Distilling'
            )
        else:
            print('error, please choose 0 or 1')

    elif args.run_mode == 'eval':
        model = FastBertModel(bert_config, config)
        load_saved_model(model, args.save_model_path)

    else:
        print('Operation mode not legal')

    print("initialize model Done".center(60, '*'))
    model.to(device)

    # 3. 数据集的初始化
    if args.train_data:
        train_dataset = PrepareDataset(vocab_file=config.get('vocab_file'),
                                       max_seq_len=config.get('max_seq_len'),
                                       num_class=config.get('num_class'),
                                       data_file=args.train_data)
        print('load training dataset done. total training num: {}'.format(
            train_dataset.__len__()))

    if args.eval_data:
        eval_dataset = PrepareDataset(vocab_file=config.get('vocab_file'),
                                      max_seq_len=config.get('max_seq_len'),
                                      num_class=config.get('num_class'),
                                      data_file=args.eval_data)
        print('load eval dataset done. total eval num: {}'.format(
            eval_dataset.__len__()))

    # 4.开始训练
    if args.run_mode == 'train':
        optimizer = init_bert_adam_optimizer(
            model, train_dataset.__len__(), args.epochs, args.batch_size,
            config.get('gradient_accumulation_steps'), config.get('init_lr'),
            config.get('warmup_proportion'))

        train_model(args.train_stage,
                    save_model_path_for_train,
                    model,
                    optimizer,
                    args.epochs,
                    train_dataset,
                    eval_dataset,
                    batch_size=args.batch_size,
                    gradient_accumulation_steps=config.get(
                        'gradient_accumulation_steps'),
                    num_workers=args.data_load_num_workers)

    elif args.run_mode == 'eval':
        eval_model(args.train_stage,
                   model,
                   eval_dataset,
                   batch_size=args.batch_size,
                   num_workers=args.data_load_num_workers)
    else:
        print('参数错误')
예제 #9
0
def main(args):
    config = load_json_config(args.model_config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done")

    #---------------------MODEL GRAPH INIT--------------------------#
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    if args.run_mode == 'train':
        #初始训练
        if args.train_stage == 0:
            model = FastBertModel.load_pretrained_bert_model(
                bert_config,
                config,
                pretrained_model_path=config.get("bert_pretrained_model_path"))
            save_model_path_for_train = args.save_model_path
        #蒸馏训练
        elif args.train_stage == 1:
            model = FastBertModel(bert_config, config)
            load_saved_model(model, args.save_model_path)
            save_model_path_for_train = args.save_model_path_distill

            #Freeze Part Model
            for name, p in model.named_parameters():
                if "branch_classifier" not in name:
                    p.requires_grad = False
            logging.info(
                "Main Graph and Teacher Classifier Freezed, Student Classifier will Distilling"
            )
        else:
            raise RuntimeError('Operation Train Stage(0 or 1) not Legal')

    elif args.run_mode == 'eval':
        model = FastBertModel(bert_config, config)
        load_saved_model(model, args.save_model_path)
    else:
        raise RuntimeError('Operation Mode not Legal')

    logging.info(model)
    logging.info("Initialize Model Done".center(60, "="))

    #---------------------GPU SETTING--------------------------#
    # device = torch.device('cuda' if torch.cuda else 'cpu')
    # model.to(device)
    # master_gpu_id = 0
    use_cuda = args.gpu_ids != '-1'
    if len(args.gpu_ids) == 1 and use_cuda:
        master_gpu_id = int(args.gpu_ids)
        model = model.cuda(int(args.gpu_ids)) if use_cuda else model
    elif use_cuda:
        gpu_ids = [int(each) for each in args.gpu_ids.split(",")]
        master_gpu_id = gpu_ids[0]
        model = model.cuda(gpu_ids[0])
        logging.info("Start multi-gpu dataparallel training/evaluating...")
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)
    else:
        master_gpu_id = None

    #-----------------------Dataset Init --------------------------------#
    if args.train_data:
        train_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                       max_seq_len=config.get("max_seq_len"),
                                       num_class=config.get("num_class"),
                                       data_file=args.train_data)
        logging.info("Load Training Dataset Done, Total training line: %s",
                     train_dataset.__len__())
    if args.eval_data:
        eval_dataset = PrepareDataset(vocab_file=config.get("vocab_file"),
                                      max_seq_len=config.get("max_seq_len"),
                                      num_class=config.get("num_class"),
                                      data_file=args.eval_data)
        logging.info("Load Eval Dataset Done, Total eval line: %s",
                     eval_dataset.__len__())

    #-----------------------Running Mode Start--------------------------------#
    if args.run_mode == "train":
        optimizer = init_bert_adam_optimizer(
            model, train_dataset.__len__(), args.epochs, args.batch_size,
            config.get("gradient_accumulation_steps"), config.get("init_lr"),
            config.get("warmup_proportion"))
        train_model(args.train_stage,
                    save_model_path_for_train,
                    master_gpu_id,
                    model,
                    optimizer,
                    args.epochs,
                    train_dataset,
                    eval_dataset,
                    batch_size=args.batch_size,
                    gradient_accumulation_steps=config.get(
                        "gradient_accumulation_steps"),
                    use_cuda=use_cuda,
                    num_workers=args.data_load_num_workers)
    elif args.run_mode == "eval":
        eval_model(args.train_stage,
                   master_gpu_id,
                   model,
                   eval_dataset,
                   batch_size=args.batch_size,
                   use_cuda=use_cuda,
                   num_workers=args.data_load_num_workers)
    else:
        raise RuntimeError("Mode not support: " + args.mode)
예제 #10
0
def main(args):
    logging.info("Loading HyperParameters".center(60, "="))
    config = load_json_config(args.config_file)
    logging.info(json.dumps(config, indent=2, sort_keys=True))
    logging.info("Load HyperParameters Done".center(60, "="))

    logging.info("Loading Dataset".center(60, "="))
    dataset = MultiLabelClassificationDataset(
        vocab_file=config.get("vocab_file"),
        label_file=config.get("label_file"),
        label_weight_file=config.get("label_weight_file"),
        max_seq_len=config.get("max_seq_len"),
        training_path=config.get("training_path"),
        testing_path=config.get("testing_path"))

    logging.info("Total training line: " + str(dataset.training_len) +
                 ", total testing line: " + str(dataset.testing_len))
    label_size = len(dataset.label2idx)
    logging.info('label size: %d' % label_size)
    logging.info("Load Dataset Done".center(60, "="))
    label_weight = dataset.label_weight.to('cuda') if config.get(
        "use_cuda") else dataset.label_weight

    logging.info("Initializing SequenceClassification Model".center(60, "="))
    if config.get("pretrained_model_path"):
        model = BertForMultiLabelClassification.load_pretrained_bert_model(
            bert_config_path=config.get("bert_config_path"),
            pretrained_model_path=config.get("pretrained_model_path"),
            num_labels=len(dataset.label2idx),
            label_weight=label_weight)
    else:
        model = BertForMultiLabelClassification(BertConfig.from_json_file(
            config.get("bert_config_path")),
                                                len(dataset.label2idx),
                                                label_weight=label_weight)
    if config.get("num_tuning_layers") is not None:
        model.bert.encoder.layer = torch.nn.ModuleList(
            model.bert.encoder.layer[:config.get("num_tuning_layers")])
    logging.info(model)
    logging.info("Initialize SequenceClassification Model Done".center(
        60, "="))

    if args.saved_model:
        logging.info("Loading Saved Model".center(60, "="))
        logging.info("Load saved model from: " + args.saved_model)
        load_saved_model(model, args.saved_model)
        logging.info("Load Saved Model Done".center(60, "="))

    master_gpu_id = None
    if len(args.gpu_ids) == 1:
        master_gpu_id = int(args.gpu_ids)
        model = model.cuda(int(
            args.gpu_ids)) if config.get("use_cuda") else model
    else:
        gpu_ids = [int(each) for each in args.gpu_ids.split(",")]
        master_gpu_id = gpu_ids[0]
        model = model.cuda(gpu_ids[0])
        logging.info("Start multi-gpu dataparallel training/evaluating...")
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)

    if args.mode == "eval":
        if args.input_file:
            dataset = MultiLabelClassificationDataset(
                vocab_file=config.get("vocab_file"),
                label_file=config.get("label_file"),
                max_seq_len=config.get("max_seq_len"),
                label_weight_file=config.get("label_weight_file"),
                testing_path=args.input_file)
        eval_model(master_gpu_id, model, dataset, label_size,
                   config.get("eval_batch_size"), config.get("use_cuda"),
                   config.get("num_workers"))

    elif args.mode == "predict":
        if args.input_file:
            dataset = MultiLabelClassificationDataset(
                vocab_file=config.get("vocab_file"),
                label_file=config.get("label_file"),
                max_seq_len=config.get("max_seq_len"),
                label_weight_file=config.get("label_weight_file"),
                testing_path=args.input_file)

        model_predict(master_gpu_id, model, dataset, config,
                      config.get("eval_batch_size"), config.get("use_cuda"),
                      config.get("num_workers"), args.output_file)

    elif args.mode == "train":
        optimizer = init_bert_adam_optimizer(
            model, dataset.training_len, config.get("epochs"),
            config.get("batch_size"),
            config.get("gradient_accumulation_steps"), config.get("init_lr"),
            config.get("warmup_proportion"))
        train_model(config.get("experiment_name"),
                    master_gpu_id,
                    model,
                    optimizer,
                    config.get("epochs"),
                    dataset,
                    label_size,
                    batch_size=config.get("batch_size"),
                    eval_batch_size=config.get("eval_batch_size"),
                    gradient_accumulation_steps=config.get(
                        "gradient_accumulation_steps"),
                    use_cuda=config.get("use_cuda"),
                    num_workers=config.get("num_workers"))
    else:
        raise RuntimeError("Mode not support: " + args.mode)
예제 #11
0
@app.route('/', methods=['GET'])
def Home():
    return render_template('real_estate.html', data=[{'name':'JA'}, {'name':'JP'}])


@app.route('/predict_price', methods=['GET', 'POST'])
def predict_price():
    if request.method == 'POST':
        area = float(request.form.get('area'))

        rooms = int(request.form.get('rooms'))
        suites = int(request.form.get('suites'))
        bathrooms = int(request.form.get('bathrooms'))
        parkings = int(request.form.get('parkings'))
        neighborhood = request.form.get('neighborhood')

        response = utils.get_estimated_price(
            neighborhood, area, rooms, suites, bathrooms, parkings)
        if response < 0:
            return render_template('real_estate.html', prediction_texts="The price is below zero", prediction_text_dollar="The price is below zero",data=[{'name':'JA'}, {'name':'JP'}])
        else:
            return render_template('real_estate.html', prediction_text="{:.2f}R$".format(response), prediction_text_dollar="{:.2f}$".format(response/5.12),data=[{'name':'JA'}, {'name':'JP'}])
    else:
        return render_template('real_estate.html', data=[{'name':'JA'}, {'name':'JP'}])


if __name__ == "__main__":
    print('Starting python Flask Server for Real estate Prediction')
    utils.load_saved_model()
    app.run(debug=True)
예제 #12
0
def main(args):
    # 1. 加载配置文件
    config = load_json_config(args.model_config_file)

    # 2. 加载模型
    bert_config = BertConfig.from_json_file(config.get("bert_config_path"))
    model = FastBertModel(bert_config, config)
    load_saved_model(model, args.save_model_path)
    model = model.to(device)
    print('Initialize model Done'.center(60, '*'))

    max_seq_len = 60
    labels = []
    texts = []
    inference_speed = 0.5
    with open('./data/tcl/test.tsv', 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            label, text = line.split('	')
            labels.append(int(label))
            texts.append(text)
    sum_num = len(labels)

    correct_num = 0
    result = []
    for l, t in zip(labels, texts):
        start_time = time.time()
        # 3. 数据集的准备
        vocab_file = config.get("vocab_file")
        do_lower_case = True
        tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                               do_lower_case=do_lower_case)
        tokens = tokenizer.tokenize(t)
        tokens = tokens[:(max_seq_len - 1)]
        tokens = ["[CLS]"] + tokens
        tokens = tokenizer.convert_tokens_to_ids(tokens)
        # return {"text": t, "tokens": tokens, "label": label}
        # 4. 开始infer
        segment_ids = [0] * len(tokens)
        attn_masks = [1] * len(tokens)
        tokens = torch.LongTensor([tokens])
        segment_ids = torch.LongTensor([segment_ids])
        attn_masks = torch.LongTensor([attn_masks])
        l = torch.LongTensor([l])
        # print(tokens.size())
        # print(segment_ids.size())
        # print(attn_masks.size())
        # print(l.size())
        with torch.no_grad():
            probs, layer_idxes, uncertain_infos = model(
                tokens,
                token_type_ids=segment_ids,
                attention_mask=attn_masks,
                inference=True,
                inference_speed=inference_speed)
        _, top_index = probs.topk(1)
        spend_time = time.time() - start_time

        if top_index.view(-1) == l:
            correct_num += 1
        print(l[0].numpy())
        print(top_index.view(-1)[0].numpy())
        exit()

        s = str(l[0]) + '  ' + str(
            top_index.view(-1)[0]) + '  ' + str(spend_time) + '  ' + t
        result.append(s)
    print('正确率:{}'.format(correct_num / sum_num))
    with open('result.txt', 'w') as f:
        f.write('\n'.join(result))
    def train_fn(self, criterion, optimizer, resume=True):
        self.loss_criterion = criterion
        self.fast_optimizer = optimizer
        self.meta_optimizer = torch.optim.SGD(self.meta_net.parameters(),
                                              lr=Config.train.meta_lr)

        # self.exp_lr_scheduler = lr_scheduler.StepLR(self.classifier_optimizer, step_size=10, gamma=0.1)

        if resume:
            self.prev_meta_step_count, self.meta_net, self.meta_optimizer, self.state = utils.load_saved_model(
                self.model_path, self.meta_net, self.meta_optimizer)
            print(
                f"Model has been loaded step:{self.prev_meta_step_count}, path:{self.model_path}"
            )
        self.logger = Logger(os.path.join(self.c_path, 'log.txt'),
                             title=self.title)
        self.logger.set_names(
            ['step', 'Learning Rate', 'Train Acc.', 'Valid Acc.'])
        return self._train
    old_data_dictionary = json.loads(utils.get_HDF5(hf_file,
                                                    'data_dictionary'))
    best_parameters = json.loads(utils.get_HDF5(hf_file, 'best_parameters'))
    model_weights = list()
    weight_ctr = 0
    while True:
        try:
            d_key = "weight_" + str(weight_ctr)
            weights = utils.get_HDF5(hf_file, d_key)
            model_weights.append(weights)
            weight_ctr += 1
        except Exception as exception:
            break
    hf_file.close()

    loaded_model = utils.load_saved_model(model_config, model_weights)

    # Extract and process workflows
    connections = extract_workflow_connections.ExtractWorkflowConnections()
    workflow_paths, compatible_next_tools = connections.read_tabular_file(
        sys.argv[1])

    # Process the paths from workflows
    print("Dividing data...")
    data = prepare_data.PrepareData(maximum_path_length, test_share, retrain)
    train_data, train_labels, test_data, test_labels, data_dictionary, reverse_dictionary, inverse_class_weights = data.get_data_labels_matrices(
        workflow_paths, old_data_dictionary)

    # retrain the model on new data
    retrain_predict_tool = RetrainPredictTool()
    results = retrain_predict_tool.retrain_model(