def save(self, features=None): save_output(self.config, self.prob_predictions, self.true_classes, self.auc_scores) if self.config.save_model: if self.config.evaluation_mode == "max": target_auc = max(self.auc_scores) else: if self.config.runs % 2 == 0: new_auc = self.auc_scores.copy() new_auc.sort() target_auc = new_auc[int(self.config.runs / 2)] else: target_auc = median(self.auc_scores) index = self.auc_scores.index(target_auc) save_model(self.config, self.models[index], features)
# converting to numpy array X_test, y_test = np.array(X_test), np.array(y_test) # creating 3D tensor X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], no_of_feature)) ############################################################################### epochs = 120 model = train.training(X_train, y_train, no_of_feature, epochs) path_name = "./model/single_attr_pred_open_from_open" # Saving the model save_load.save_model(path_name, model) ############################################################################### # loading the model path_name = "./model/single_attr_pred_open_from_open" model = save_load.load_model(path_name) sc_output = MinMaxScaler(feature_range = (0,1)) sc_output.fit(input_set[:,output_col]) # prediction using train data pred_train_scaled = model.predict(X_train) # rescaling for predictions ( train data ) train_predict = sc_output.inverse_transform(pred_train_scaled)
def main(args, lr): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) dy_model_class = load_dy_model_class(args.abs_dir) config["config_abs_dir"] = args.abs_dir # modify config from command if args.opt: for parameter in args.opt: parameter = parameter.strip() key, value = parameter.split("=") config[key] = value # tools.vars use_gpu = config.get("runner.use_gpu", True) use_visual = config.get("runner.use_visual", False) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) train_batch_size = config.get("runner.train_batch_size", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) save_checkpoint_interval = config.get("runner.save_checkpoint_interval", 1) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}, save_checkpoint_interval: {}" .format(use_gpu, use_visual, train_batch_size, train_data_dir, epochs, print_interval, model_save_path, save_checkpoint_interval)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') dy_model = dy_model_class.create_model(config) # print(paddle.summary(dy_model, (256, 1, 267), dtypes='int64')) # Create a log_visual object and store the data in the path if use_visual: from visualdl import LogWriter log_visual = LogWriter(args.abs_dir + "/visualDL_log/train") if model_init_path is not None: load_model(model_init_path, dy_model) if not lr: lr = config.get("hyper_parameters.optimizer.learning_rate", 0.001) optimizer = dy_model_class.create_optimizer(dy_model, config) else: optimizer = _create_optimizer(dy_model, lr) logger.info("read data") train_dataloader = create_data_loader(config=config, place=place) test_dataloader = create_data_loader(config=config, place=place, mode="test") last_epoch_id = config.get("last_epoch", -1) step_num = 0 best_metric = 0 for epoch_id in range(last_epoch_id + 1, epochs): # set train mode dy_model.train() metric_list, metric_list_name = dy_model_class.create_metrics() # auc_metric = paddle.metric.Auc("ROC") epoch_begin = time.time() interval_begin = time.time() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, batch in enumerate(train_dataloader()): train_reader_cost += time.time() - reader_start optimizer.clear_grad() train_start = time.time() batch_size = len(batch[0]) loss, metric_list, tensor_print_dict = dy_model_class.train_forward( dy_model, metric_list, batch, config) # print(loss) loss.backward() optimizer.step() train_run_cost += time.time() - train_start total_samples += batch_size if batch_id % print_interval == 0: metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += (metric_list_name[metric_id] + ":{:.6f}, ".format( metric_list[metric_id].accumulate())) if use_visual: log_visual.add_scalar( tag="train/" + metric_list_name[metric_id], step=step_num, value=metric_list[metric_id].accumulate()) tensor_print_str = "" if tensor_print_dict is not None: for var_name, var in tensor_print_dict.items(): tensor_print_str += ("{}:".format(var_name) + str(var.numpy()) + ",") if use_visual: log_visual.add_scalar(tag="train/" + var_name, step=step_num, value=var.numpy()) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + tensor_print_str + " avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} ins/s, loss: {:.6f}" .format( train_reader_cost / print_interval, (train_reader_cost + train_run_cost) / print_interval, total_samples / print_interval, total_samples / (train_reader_cost + train_run_cost), loss.numpy()[0])) # if batch_id > 80000: # tmp_auc = infer_test(dy_model, test_dataloader, dy_model_class, config, print_interval, epoch_id) # if tmp_auc > best_metric: # best_metric = tmp_auc # save_model(dy_model, optimizer, model_save_path, 1000+epoch_id, prefix='rec') # logger.info(f"saved best model, {metric_list_name[0]}: {best_metric}") train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() step_num = step_num + 1 metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += ( metric_list_name[metric_id] + ": {:.6f},".format(metric_list[metric_id].accumulate())) tensor_print_str = "" if tensor_print_dict is not None: for var_name, var in tensor_print_dict.items(): tensor_print_str += ("{}:".format(var_name) + str(var.numpy()) + ",") logger.info("epoch: {} done, ".format(epoch_id) + metric_str + tensor_print_str + " epoch time: {:.2f} s".format(time.time() - epoch_begin)) # if metric_list[0].accumulate() > best_metric: # best_metric = metric_list[0].accumulate() # save_model( # dy_model, optimizer, model_save_path, 1000, prefix='rec') # best model # # save_jit_model(dy_model, model_save_path, prefix='tostatic') # logger.info(f"saved best model, {metric_list_name[0]}: {best_metric}") if epoch_id % save_checkpoint_interval == 0 and metric_list[ 0].accumulate() > 0.5: save_model(dy_model, optimizer, model_save_path, epoch_id, prefix='rec') # middle epochs if metric_list[0].accumulate() >= 0.95: print('Already over fitting, stop training!') break infer_auc = infer_test(dy_model, test_dataloader, dy_model_class, config, print_interval, epoch_id) return infer_auc, lr, train_batch_size, model_save_path
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) dy_model_class = load_dy_model_class(args.abs_dir) config["config_abs_dir"] = args.abs_dir # modify config from command if args.opt: for parameter in args.opt: parameter = parameter.strip() key, value = parameter.split("=") if type(config.get(key)) is int: value = int(value) if type(config.get(key)) is bool: value = (True if value.lower() == "true" else False) config[key] = value # tools.vars use_gpu = config.get("runner.use_gpu", True) use_xpu = config.get("runner.use_xpu", False) use_visual = config.get("runner.use_visual", False) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) train_batch_size = config.get("runner.train_batch_size", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) use_fleet = config.get("runner.use_fleet", False) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, use_xpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}" .format(use_gpu, use_xpu, use_visual, train_batch_size, train_data_dir, epochs, print_interval, model_save_path)) logger.info("**************common.configs**********") if use_xpu: xpu_device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0)) place = paddle.set_device(xpu_device) else: place = paddle.set_device('gpu' if use_gpu else 'cpu') dy_model = dy_model_class.create_model(config) # Create a log_visual object and store the data in the path if use_visual: from visualdl import LogWriter log_visual = LogWriter(args.abs_dir + "/visualDL_log/train") if model_init_path is not None: load_model(model_init_path, dy_model) # to do : add optimizer function optimizer = dy_model_class.create_optimizer(dy_model, config) # use fleet run collective if use_fleet: from paddle.distributed import fleet strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer) dy_model = fleet.distributed_model(dy_model) logger.info("read data") train_dataloader = create_data_loader(config=config, place=place) last_epoch_id = config.get("last_epoch", -1) step_num = 0 for epoch_id in range(last_epoch_id + 1, epochs): # set train mode dy_model.train() metric_list, metric_list_name = dy_model_class.create_metrics() #auc_metric = paddle.metric.Auc("ROC") epoch_begin = time.time() interval_begin = time.time() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, batch in enumerate(train_dataloader()): train_reader_cost += time.time() - reader_start optimizer.clear_grad() train_start = time.time() batch_size = len(batch[0]) loss, metric_list, tensor_print_dict = dy_model_class.train_forward( dy_model, metric_list, batch, config) loss.backward() optimizer.step() train_run_cost += time.time() - train_start total_samples += batch_size if batch_id % print_interval == 0: metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += (metric_list_name[metric_id] + ":{:.6f}, ".format( metric_list[metric_id].accumulate())) if use_visual: log_visual.add_scalar( tag="train/" + metric_list_name[metric_id], step=step_num, value=metric_list[metric_id].accumulate()) tensor_print_str = "" if tensor_print_dict is not None: for var_name, var in tensor_print_dict.items(): tensor_print_str += ("{}:".format(var_name) + str(var.numpy()) + ",") if use_visual: log_visual.add_scalar(tag="train/" + var_name, step=step_num, value=var.numpy()) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + tensor_print_str + " avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} ins/s" .format( train_reader_cost / print_interval, (train_reader_cost + train_run_cost) / print_interval, total_samples / print_interval, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() step_num = step_num + 1 metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += ( metric_list_name[metric_id] + ": {:.6f},".format(metric_list[metric_id].accumulate())) tensor_print_str = "" if tensor_print_dict is not None: for var_name, var in tensor_print_dict.items(): tensor_print_str += ("{}:".format(var_name) + str(var.numpy()) + ",") logger.info("epoch: {} done, ".format(epoch_id) + metric_str + tensor_print_str + " epoch time: {:.2f} s".format(time.time() - epoch_begin)) if use_fleet: trainer_id = paddle.distributed.get_rank() if trainer_id == 0: save_model(dy_model, optimizer, model_save_path, epoch_id, prefix='rec') else: save_model(dy_model, optimizer, model_save_path, epoch_id, prefix='rec')
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) dy_model_class = load_dy_model_class(args.abs_dir) config["config_abs_dir"] = args.abs_dir # tools.vars use_gpu = config.get("runner.use_gpu", True) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}" .format(use_gpu, train_data_dir, epochs, print_interval, model_save_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') dy_model = dy_model_class.create_model(config) if model_init_path is not None: load_model(model_init_path, dy_model) # to do : add optimizer function optimizer = dy_model_class.create_optimizer(dy_model, config) logger.info("read data") train_dataloader = create_data_loader(config=config, place=place) last_epoch_id = config.get("last_epoch", -1) for epoch_id in range(last_epoch_id + 1, epochs): # set train mode dy_model.train() metric_list, metric_list_name = dy_model_class.create_metrics() #auc_metric = paddle.metric.Auc("ROC") epoch_begin = time.time() interval_begin = time.time() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, batch in enumerate(train_dataloader()): train_reader_cost += time.time() - reader_start optimizer.clear_grad() train_start = time.time() batch_size = len(batch[0]) loss, metric_list, tensor_print_dict = dy_model_class.train_forward( dy_model, metric_list, batch, config) loss.backward() optimizer.step() train_run_cost += time.time() - train_start total_samples += batch_size if batch_id % print_interval == 0: metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += (metric_list_name[metric_id] + ": {:.6f},".format( metric_list[metric_id].accumulate())) tensor_print_str = "" if tensor_print_dict is not None: for var_name, var in tensor_print_dict.items(): tensor_print_str += ("{}:".format(var_name) + str(var.numpy()) + ",") logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + tensor_print_str + " avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec" .format( train_reader_cost / print_interval, (train_reader_cost + train_run_cost) / print_interval, total_samples / print_interval, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() metric_str = "" for metric_id in range(len(metric_list_name)): metric_str += ( metric_list_name[metric_id] + ": {:.6f},".format(metric_list[metric_id].accumulate())) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) save_model(dy_model, optimizer, model_save_path, epoch_id, prefix='rec')
def train_net(args): # Configure models and load data avg_best, cnt, acc = 0.0, 0, 0.0 train, dev, test, test_special, lang, SLOTS_LIST, gating_dict, max_word = prepare_data_seq( True, False, batch_size=args.batch_size) # Load model model = TRADE(hidden_size=args.hidden_size, lang=lang, lr=args.lr, dropout=args.dropout, slots=SLOTS_LIST, gating_dict=gating_dict).to(args.device) # Configure criterion, optimizer, scheduler criterion = masked_cross_entropy_for_value criterion_gate = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, min_lr=0.0001, verbose=True) # print("[Info] Slots include ", SLOTS_LIST) # print("[Info] Unpointable Slots include ", gating_dict) for epoch in range(200): model.train() print("Epoch:{}".format(epoch)) # Run the train function pbar = tqdm(enumerate(train), total=len(train)) for i, data in pbar: optimizer.zero_grad() # Encode and Decode use_teacher_forcing = random.random() < args.teacher_forcing_ratio all_point_outputs, gates, words_point_out, words_class_out = model( data, use_teacher_forcing, SLOTS_LIST[1]) loss_ptr = criterion( all_point_outputs.transpose(0, 1).contiguous(), data["generate_y"].contiguous( ), # [:,:len(self.point_slots)].contiguous(), data["y_lengths"]) # [:,:len(self.point_slots)]) loss_gate = criterion_gate( gates.transpose(0, 1).contiguous().view(-1, gates.size(-1)), data["gating_label"].contiguous().view(-1)) loss = loss_ptr + loss_gate loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() pbar.set_description(print_loss(loss_ptr, loss_gate)) if (epoch + 1) % int(args.eval_period) == 0: acc = evaluate(model, dev, avg_best, SLOTS_LIST[2], gating_dict) scheduler.step(acc) if acc >= avg_best: avg_best = acc cnt = 0 save_model(model) else: cnt += 1 if cnt == args.patience or acc == 1.0: print("Ran out of patient, early stop...") break
def evaluate(model, dev, matric_best, slot_temp, gating_dict): # Set to not-training mode to disable dropout model.eval() print("STARTING EVALUATION") all_prediction = {} inverse_unpoint_slot = dict([(v, k) for k, v in gating_dict.items()]) pbar = tqdm(enumerate(dev), total=len(dev)) for j, data_dev in pbar: # Encode and Decode batch_size = len(data_dev['context_len']) _, gates, words, class_words = model(data_dev, False, slot_temp) for bi in range(batch_size): if data_dev["ID"][bi] not in all_prediction.keys(): all_prediction[data_dev["ID"][bi]] = {} all_prediction[data_dev["ID"][bi]][data_dev["turn_id"][bi]] = { "turn_belief": data_dev["turn_belief"][bi] } predict_belief_bsz_ptr, predict_belief_bsz_class = [], [] gate = torch.argmax(gates.transpose(0, 1)[bi], dim=1) # pointer-generator results for si, sg in enumerate(gate): if sg == gating_dict["none"]: continue elif sg == gating_dict["ptr"]: pred = np.transpose(words[si])[bi] st = [] for e in pred: if e == 'EOS': break else: st.append(e) st = " ".join(st) if st == "none": continue else: predict_belief_bsz_ptr.append(slot_temp[si] + "-" + str(st)) else: predict_belief_bsz_ptr.append( slot_temp[si] + "-" + inverse_unpoint_slot[sg.item()]) all_prediction[data_dev["ID"][bi]][data_dev["turn_id"][bi]][ "pred_bs_ptr"] = predict_belief_bsz_ptr if set(data_dev["turn_belief"][bi]) != set( predict_belief_bsz_ptr) and args.gen_sample: print("True", set(data_dev["turn_belief"][bi])) print("Pred", set(predict_belief_bsz_ptr), "\n") if args.gen_sample: os.makedirs('./sample', exist_ok=True) json.dump(all_prediction, open("sample/all_prediction_TRADE.json", 'w'), indent=4) joint_acc_score_ptr, turn_acc_score_ptr = _evaluate_metrics( all_prediction, "pred_bs_ptr", slot_temp) evaluation_metrics = { "Joint Acc": joint_acc_score_ptr, "Turn Acc": turn_acc_score_ptr } print(evaluation_metrics) joint_acc_score = joint_acc_score_ptr # (joint_acc_score_ptr + joint_acc_score_class)/2 if joint_acc_score >= matric_best: save_model(model, 'ACC-{:.4f}'.format(joint_acc_score)) print("MODEL SAVED") return joint_acc_score
model.add(LSTM(units=40)) model.add(Dropout(0.2)) model.add(Dense(units=1, activation=func)) # compiling the rnn model.compile(optimizer=optim, loss='mean_squared_error') # fitting the rnn to the training set model.fit(X_train, y_train, epochs=epoch, batch_size=32) path_name = "./model/hyperParaModels" # Saving the model save_load.save_model(path_name + "/" + str(count), model) count = count + 1 # ============================================================================= path_name = "./model/hyperParaModels" results = pd.DataFrame( columns=['epoch', 'neuron', 'optim', 'activation', 'r2_score', 'MSE']) count = 0 for epoch in epochs: for neuron in neurons: # creating X_test, y_test X_test = [] y_test = []