def train(sess,graph,config): batch_size=config["batch_size"] learning_rate=config["learning_rate"] if config["validation_dataset"] is None: _, train_data,valid_data,info = load_and_split_data(config,filename=config["dataset"],valid_data_rate=config["validation_data_rate"]) else: print("[INFO] training") train_data, info = load_data(config, filename=config["dataset"]) print("[INFO] validation") valid_data, valid_info = load_data(config, filename=config["validation_dataset"]) info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"]) info["graph_num"] = info["graph_num"] + valid_info["graph_num"] model = CoreModel(sess,config,info) model.build(importlib.import_module(config["model.py"])) if config["profile"]: vars_to_train = tf.trainable_variables() print(vars_to_train) writer = tf.summary.FileWriter('logs', sess.graph) # Training start_t = time.time() model.fit(train_data,valid_data) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") if valid_data.num>0: # Validation start_t = time.time() validation_cost,validation_metrics,prediction_data=model.pred_and_eval(valid_data) infer_time = time.time() - start_t print("final cost =",validation_cost) print("accuracy =",validation_metrics["accuracy"]) print("validation time:{0}".format(infer_time) + "[sec]") # Saving if config["save_info_valid"] is not None: result={} result["validation_cost"]=validation_cost result["validation_accuracy"]=validation_metrics result["train_time"]=train_time result["infer_time"]=infer_time save_path=config["save_info_valid"] print("[SAVE] ",save_path) fp=open(save_path,"w") json.dump(result,fp, indent=4, cls=NumPyArangeEncoder) if config["export_model"]: try: print("[SAVE]",config["export_model"]) graph_def = graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), ['output']) tf.train.write_graph(graph_def, '.', config["export_model"], as_text=False) except: print('[ERROR] output has been not found') if config["save_result_valid"] is not None: filename=config["save_result_valid"] save_prediction(filename,prediction_data) if config["make_plot"]: plot_cost(config,valid_data,model) plot_auc(config,valid_data.labels,np.array(prediction_data))
def _train(layers): config = _get_config() _, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) metric_name = "accuracy" with tf.Session() as sess: model = CoreModel(sess, config, info) model.build(GCN(layers), True, False, None) model.fit(train_data, valid_data) _, valid_metrics, _ = model.pred_and_eval(valid_data) return valid_metrics[metric_name]
def train(sess, graph, config): if config["validation_dataset"] is None: _, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) else: print("[INFO] training") train_data, info = load_data(config, filename=config["dataset"]) print("[INFO] validation") valid_data, valid_info = load_data( config, filename=config["validation_dataset"]) info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"]) info["graph_num"] = info["graph_num"] + valid_info["graph_num"] model = CoreModel(sess, config, info) load_model_py(model, config["model.py"]) metric_name = ("mse" if config["task"] == "regression" else "gmfe" if config["task"] == "regression_gmfe" else "accuracy") if config["profile"]: vars_to_train = tf.trainable_variables() print(vars_to_train) # Training start_t = time.time() model.fit(train_data, valid_data) train_time = time.time() - start_t print(f"training time: {train_time}[sec]") if valid_data.num > 0: # Validation start_t = time.time() valid_cost, valid_metrics, prediction_data = model.pred_and_eval( valid_data) infer_time = time.time() - start_t print(f"final cost = {valid_cost}\n" f"{metric_name} = {valid_metrics[metric_name]}\n" f"validation time: {infer_time}[sec]\n") # Saving if config["save_info_valid"] is not None: result = {} result["validation_cost"] = valid_cost result["validation_accuracy"] = valid_metrics result["train_time"] = train_time result["infer_time"] = infer_time if config["task"] != "link_prediction": result["valid_metrics"] = compute_metrics( config, info, prediction_data, valid_data.labels) ## save_path = config["save_info_valid"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print(f"[SAVE] {save_path}") with open(save_path, "w") as fp: json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if config["export_model"]: try: print(f"[SAVE] {config['export_model']}") graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), ['output']) tf.train.write_graph(graph_def, '.', config["export_model"], as_text=False) except: print('[ERROR] output has been not found') if config["save_result_valid"] is not None: filename = config["save_result_valid"] save_prediction(filename, prediction_data) if config["make_plot"]: if config["task"] == "regression" or config[ "task"] == "regression_gmfe": # plot_cost(config, valid_data, model) plot_r2(config, valid_data.labels, np.array(prediction_data)) elif config["task"] == "link_prediction": plot_cost(config, valid_data, model) else: plot_cost(config, valid_data, model) plot_auc(config, valid_data.labels, np.array(prediction_data))
def train(sess, config): if config["validation_dataset"] is None: all_data, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) else: print("[INFO] training") train_data, info = load_data(config, filename=config["dataset"]) print("[INFO] validation") valid_data, valid_info = load_data( config, filename=config["validation_dataset"]) info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"]) info["graph_num"] = info["graph_num"] + valid_info["graph_num"] # train model graph_index_list = [] for i in range(info["graph_num"]): graph_index_list.append([i, i]) info.graph_index_list = graph_index_list info.pos_weight = get_pos_weight(train_data) info.norm = get_norm(train_data) print(f"pos_weight={info.pos_weight}") print(f"norm={info.norm}") model = CoreModel(sess, config, info, construct_feed_callback=construct_feed) load_model_py(model, config["model.py"]) vars_to_train = tf.trainable_variables() for v in vars_to_train: print(v) # Training start_t = time.time() model.fit(train_data, valid_data) train_time = time.time() - start_t print(f"training time:{train_time}[sec]") # Validation start_t = time.time() validation_cost, validation_accuracy, validation_prediction_data = model.pred_and_eval( valid_data) training_cost, training_accuracy, training_prediction_data = model.pred_and_eval( train_data) infer_time = time.time() - start_t print(f"final cost(training ) = {training_cost}\n" f"accuracy (training ) = {training_accuracy['accuracy']}\n" f"final cost(validation) = {validation_cost}\n" f"accuracy (validation) = {validation_accuracy['accuracy']}\n" f"infer time:{infer_time}[sec]\n") # Saving if config["save_info_valid"] is not None: result = {} result["validation_cost"] = validation_cost result["validation_accuracy"] = validation_accuracy["accuracy"] result["train_time"] = train_time result["infer_time"] = infer_time save_path = config["save_info_valid"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print(f"[SAVE] {save_path}") with open(save_path, "w") as fp: json.dump(result, fp, indent=4) if config["save_info_train"] is not None: result = {} result["test_cost"] = training_cost result["test_accuracy"] = training_accuracy["accuracy"] result["train_time"] = train_time save_path = config["save_info_train"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print(f"[SAVE] {save_path}") with open(save_path, "w") as fp: json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if "reconstruction_valid" in config: filename = config["reconstruction_valid"] print(os.path.dirname(filename)) os.makedirs(os.path.dirname(filename), exist_ok=True) print(f"[SAVE] {filename}") joblib.dump(validation_prediction_data, filename) if "reconstruction_train" in config: filename = config["reconstruction_train"] os.makedirs(os.path.dirname(filename), exist_ok=True) print(f"[SAVE] {filename}") joblib.dump(training_prediction_data, filename)
def train(sess, config): batch_size = config["batch_size"] learning_rate = config["learning_rate"] all_data, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) # train model graph_index_list = [] for i in range(all_data.num): graph_index_list.append([i, i]) info.graph_index_list = graph_index_list info.pos_weight = get_pos_weight(train_data) info.norm = get_norm(train_data) print("pos_weight=", info.pos_weight) print("norm=", info.pos_weight) model = CoreModel(sess, config, info, construct_feed_callback=construct_feed) model.build(importlib.import_module(config["model.py"])) vars_to_train = tf.trainable_variables() for v in vars_to_train: print(v) # Training start_t = time.time() model.fit(train_data, valid_data) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") # Validation start_t = time.time() validation_cost, validation_accuracy, validation_prediction_data = model.pred_and_eval( valid_data) training_cost, training_accuracy, training_prediction_data = model.pred_and_eval( train_data) infer_time = time.time() - start_t print("final cost(training ) =", training_cost) print("accuracy (training ) =", training_accuracy["accuracy"]) print("final cost(validation) =", validation_cost) print("accuracy (validation) =", validation_accuracy["accuracy"]) print("infer time:{0}".format(infer_time) + "[sec]") # Saving if config["save_info_valid"] is not None: result = {} result["validation_cost"] = validation_cost result["validation_accuracy"] = validation_accuracy["accuracy"] result["train_time"] = train_time result["infer_time"] = infer_time save_path = config["save_info_valid"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4) if config["save_info_train"] is not None: result = {} result["test_cost"] = training_cost result["test_accuracy"] = training_accuracy["accuracy"] result["train_time"] = train_time save_path = config["save_info_train"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if "reconstruction_valid" in config: filename = config["reconstruction_valid"] print(os.path.dirname(filename)) os.makedirs(os.path.dirname(filename), exist_ok=True) print("[SAVE]", filename) joblib.dump(validation_prediction_data, filename) if "reconstruction_train" in config: filename = config["reconstruction_train"] os.makedirs(os.path.dirname(filename), exist_ok=True) print("[SAVE]", filename) joblib.dump(training_prediction_data, filename)
m.append_lazy(keras.layers.Conv1D, conv_args) m.append_lazy(keras.layers.MaxPooling1D, [ dict(pool_size=4), ]) lstm_args = [ dict(units=i, return_sequences=False, go_backwards=True) for i in [32, 48, 64] ] m.append_lazy(keras.layers.LSTM, lstm_args) return m if __name__ == '__main__': config = _get_config() _, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) m1 = gcn_model() m2 = linear_model(info.sequence_symbol_num) m = m1 + m2 dense_args = [dict(units=i, activation='relu') for i in [32, 64, 128]] m.append_lazy(keras.layers.Dense, dense_args) m.append_lazy(keras.layers.Dense, [ dict(units=info.label_dim, activation='softmax'), ]) g = Generator(m, dump_nn_graph=True) num_nodes = 12 num_layer_type = 4 searcher = Searcher() searcher.register_trial('graph', g) n_trials = 30
def train(sess, graph, config): from sklearn.metrics import roc_curve, auc, accuracy_score, precision_recall_fscore_support batch_size = config["batch_size"] learning_rate = config["learning_rate"] if config["validation_dataset"] is None: _, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) else: print("[INFO] training") train_data, info = load_data(config, filename=config["dataset"]) print("[INFO] validation") valid_data, valid_info = load_data( config, filename=config["validation_dataset"]) info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"]) info["graph_num"] = info["graph_num"] + valid_info["graph_num"] model = CoreModel(sess, config, info) load_model_py(model, config["model.py"]) if config["profile"]: vars_to_train = tf.trainable_variables() print(vars_to_train) writer = tf.summary.FileWriter('logs', sess.graph) # Training start_t = time.time() model.fit(train_data, valid_data) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") if valid_data.num > 0: # Validation start_t = time.time() validation_cost, validation_metrics, prediction_data = model.pred_and_eval( valid_data) infer_time = time.time() - start_t print("final cost =", validation_cost) print("accuracy =", validation_metrics["accuracy"]) print("validation time:{0}".format(infer_time) + "[sec]") # Saving if config["save_info_valid"] is not None: result = {} result["validation_cost"] = validation_cost result["validation_accuracy"] = validation_metrics result["train_time"] = train_time result["infer_time"] = infer_time ## pred_score = np.array(prediction_data) if len(pred_score.shape) == 3: # multi-label-multi-task # #data x # task x #class # => this program supports only 2 labels pred_score = pred_score[:, :, 1] true_label = np.array(valid_data.labels) # #data x # task x #class if len(pred_score.shape) == 1: pred_score = pred_score[:, np.newaxis] if len(true_label.shape) == 1: true_label = true_label[:, np.newaxis] v = [] for i in range(info.label_dim): el = {} if config["task"] == "regression": el["r2"] = sklearn.metrics.r2_score( true_label[:, i], pred_score[:, i]) el["mse"] = sklearn.metrics.mean_squared_error( true_label[:, i], pred_score[:, i]) elif config["task"] == "regression_gmfe": el["gmfe"] = np.exp( np.mean(np.log(true_label[:, i] / pred_score[:, i]))) else: pred = np.zeros(pred_score.shape) pred[pred_score > 0.5] = 1 fpr, tpr, _ = roc_curve(true_label[:, i], pred_score[:, i], pos_label=1) roc_auc = auc(fpr, tpr) acc = accuracy_score(true_label[:, i], pred[:, i]) scores = precision_recall_fscore_support(true_label[:, i], pred[:, i], average='binary') el["auc"] = roc_auc el["acc"] = acc el["pre"] = scores[0] el["rec"] = scores[1] el["f"] = scores[2] el["sup"] = scores[3] v.append(el) result["valid_metrics"] = el ## save_path = config["save_info_valid"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if config["export_model"]: try: print("[SAVE]", config["export_model"]) graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), ['output']) tf.train.write_graph(graph_def, '.', config["export_model"], as_text=False) except: print('[ERROR] output has been not found') if config["save_result_valid"] is not None: filename = config["save_result_valid"] save_prediction(filename, prediction_data) if config["make_plot"]: plot_cost(config, valid_data, model) plot_auc(config, valid_data.labels, np.array(prediction_data))