def build_model(training_data, training_adj, training_labels, eval_data, eval_adj, eval_labels, test_data, test_adj, test_labels, learning_hyperparams, class_weights, graph_params, dumping_name, is_nni=False, device=1): activations = [learning_hyperparams.activation] * (len(learning_hyperparams.hidden_layers) + 1) conf = {"model": learning_hyperparams.model, "hidden_layers": learning_hyperparams.hidden_layers, "dropout": learning_hyperparams.dropout, "lr": learning_hyperparams.learning_rate, "weight_decay": learning_hyperparams.l2_regularization, "training_mat": training_data, "training_adj": training_adj, "training_labels": training_labels, "eval_mat": eval_data, "eval_adj": eval_adj, "eval_labels": eval_labels, "test_mat": test_data, "test_adj": test_adj, "test_labels": test_labels, "optimizer": learning_hyperparams.optimizer, "epochs": learning_hyperparams.epochs, "activations": activations, "loss_coeffs": learning_hyperparams.loss_coefficients, "unary": learning_hyperparams.unary_loss_type, "edge_normalization": learning_hyperparams.edge_normalization} products_path = os.path.join(os.getcwd(), "logs", *dumping_name, datetime.now().strftime("%Y%m%d_%H%M%S_%f")) check_make_dir(products_path) logger = multi_logger([ PrintLogger("MyLogger", level=logging.DEBUG), FileLogger("results_" + dumping_name[1], path=products_path, level=logging.INFO)], name=None) runner = ModelRunner(conf, logger=logger, weights=class_weights, graph_params=graph_params, early_stop=learning_hyperparams.early_stop, is_nni=is_nni, tmp_path=products_path, device=device) return runner
def build_model(rand_test_indices, train_indices,traint,testt, labels ,X, adj_tr, adj_te, in_features, hid_features,out_features,ds_name, activation, optimizer, epochs, dropout, lr, l2_pen, beta, gamma, dumping_name, GS,is_nni=False): optim_name="SGD" if optimizer==optim.Adam: optim_name = "Adam" conf = {"in_features":in_features, "hid_features": hid_features, "out_features":out_features,"ds_name":ds_name, "dropout": dropout, "lr": lr, "weight_decay": l2_pen, "beta": beta, "gamma": gamma, #"training_mat": training_data, "training_labels": training_labels, # "test_mat": test_data, "test_labels": test_labels, "train_ind": train_indices, "test_ind": rand_test_indices, "traint":traint,"testt":testt, "labels":labels, "X":X, "adj_tr": adj_tr,"adj_te": adj_te, "optimizer": optimizer, "epochs": epochs, "activation": activation,"optim_name":optim_name} products_path = os.path.join(os.getcwd(), "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("MyLogger", level=logging.DEBUG), FileLogger("results_%s" % dumping_name, path=products_path, level=logging.INFO)], name=None) data_logger = CSVLogger("results_%s" % dumping_name, path=products_path) data_logger.info("model_name", "loss", "acc") runner = ModelRunner(conf, GS,logger=logger, data_logger=data_logger, is_nni=is_nni) return runner
def build_model(training_data, training_labels, test_data, test_labels, adjacency_matrices, hid_features, activation, optimizer, epochs, dropout, lr, l2_pen, temporal_pen, dumping_name, feature_matrices, is_nni=False): optim_name="SGD" if optimizer==optim.Adam: optim_name = "Adam" conf = {"hid_features": hid_features, "dropout": dropout, "lr": lr, "weight_decay": l2_pen, "temporal_pen": temporal_pen, "training_mat": training_data, "training_labels": training_labels, "test_mat": test_data, "test_labels": test_labels, "adj_matrices": adjacency_matrices, "optimizer": optimizer, "epochs": epochs, "feature_matrices": feature_matrices, "activation": activation,"optim_name":optim_name} products_path = os.path.join(os.getcwd(), "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("MyLogger", level=logging.DEBUG), FileLogger("results_%s" % dumping_name, path=products_path, level=logging.INFO)], name=None) data_logger = CSVLogger("results_%s" % dumping_name, path=products_path) data_logger.info("model_name", "loss", "acc") ## logger.info('STARTING with lr= {:.4f} '.format(lr) + ' dropout= {:.4f} '.format(dropout)+ ' regulariztion_l2_pen= {:.4f} '.format(l2_pen) + ' temporal_pen= {:.10f} '.format(temporal_pen)+ ' optimizer= %s ' %optim_name) logger.debug('STARTING with lr= {:.4f} '.format(lr) + ' dropout= {:.4f} '.format(dropout) + ' regulariztion_l2_pen= {:.4f} '.format(l2_pen) + ' temporal_pen= {:.10f} '.format(temporal_pen) + ' optimizer= %s ' %optim_name) ## runner = ModelRunner(conf, logger=logger, data_logger=data_logger, is_nni=is_nni) return runner
def main_clean(): args = parse_args() dataset = "citeseer" seed = random.randint(1, 1000000000) # "feat_type": "neighbors", conf = { "kipf": { "hidden": args.hidden, "dropout": args.dropout, "lr": args.lr, "weight_decay": args.weight_decay }, "hidden_layers": [16], "multi_hidden_layers": [100, 35], "dropout": 0.6, "lr": 0.01, "weight_decay": 0.001, "dataset": dataset, "epochs": args.epochs, "cuda": args.cuda, "fastmode": args.fastmode, "seed": seed } init_seed(conf['seed'], conf['cuda']) dataset_path = os.path.join(PROJ_DIR, "data", dataset) products_path = os.path.join(CUR_DIR, "logs", args.prefix + dataset, time.strftime("%Y_%m_%d_%H_%M_%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("IdansLogger", level=logging.DEBUG), FileLogger("results_%s" % conf["dataset"], path=products_path, level=logging.INFO), FileLogger("results_%s_all" % conf["dataset"], path=products_path, level=logging.DEBUG), ], name=None) data_logger = CSVLogger("results_%s" % conf["dataset"], path=products_path) data_logger.info("model_name", "loss", "acc", "train_p") runner = ModelRunner(dataset_path, conf, logger=logger, data_logger=data_logger) # execute_runner(runner, logger, 5, num_iter=30) for train_p in range(5, 90, 10): execute_runner(runner, logger, train_p, num_iter=10) logger.info("Finished")
def __init__(self, edge_path, dir_path, features, acc=True, directed=False, gpu=False, device=2, verbose=True, params=None): """ A class used to calculate features for a given graph, input as a text-like file. :param edge_path: str Path to graph edges file (text-like file, e.g. txt or csv), from which the graph is built using networkx. The graph must be unweighted. If its vertices are not [0, 1, ..., n-1], they are mapped to become [0, 1, ..., n-1] and the mapping is saved. Every row in the edges file should include "source_id,distance_id", without a header row. :param dir_path: str Path to the directory in which the feature calculations will be (or already are) located. :param features: list of strings List of the names of each feature. Could be any name from features_meta.py or "additional_features". :param acc: bool Whether to run the accelerated features, assuming it is possible to do so. :param directed: bool Whether the built graph is directed. :param gpu: bool Whether to use GPUs, assuming it is possible to do so (i.e. the GPU exists and the CUDA matches). :param device: int If gpu is True, indicates on which GPU device to calculate. Will return error if the index doesn't match the available GPUs. :param verbose: bool Whether to print things indicating the phases of calculations. :param params: dict, or None For clique detection uses, this is a dictionary of the graph settings (size, directed, clique size, edge probability). Ignored for any other use. """ self._dir_path = dir_path self._features = features # By their name as appears in accelerated_features_meta self._gpu = gpu self._device = device self._verbose = verbose self._logger = multi_logger([PrintLogger("Logger", level=logging.DEBUG), FileLogger("FLogger", path=dir_path, level=logging.INFO)], name=None) \ if verbose else None self._params = params self._load_graph(edge_path, directed) self._get_feature_meta( features, acc) # acc determines whether to use the accelerated features self._adj_matrix = None self._raw_features = None self._other_features = None
def fix_logger(self, dumping_name): # os.getcwd() returns current working directory of a process products_path = os.path.join(os.getcwd(), 'dataset', Dataset_name, "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([PrintLogger("MyLogger", level=logging.DEBUG), FileLogger("results_%s" % dumping_name, path=products_path, level=logging.INFO)], name=None) return logger
def build_model(training_data, training_adj, training_labels, test_data, test_adj, test_labels, optimizer, epochs, lr, l2_pen, class_weights, graph_params, dumping_name, iterations, is_nni=False): conf = { "lr": lr, "weight_decay": l2_pen, "training_mat": training_data, "training_adj": training_adj, "training_labels": training_labels, "test_mat": test_data, "test_adj": test_adj, "test_labels": test_labels, "optimizer": optimizer, "epochs": epochs, "iterations": iterations } products_path = os.path.join(os.getcwd(), "logs", dumping_name, time.strftime("%Y%m%d_%H%M%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("MyLogger", level=logging.DEBUG), FileLogger( "results_" + dumping_name, path=products_path, level=logging.INFO) ], name=None) data_logger = CSVLogger("results_" + dumping_name, path=products_path) data_logger.info("model_name", "loss", "acc", "auc") runner = ModelRunner(conf, logger=logger, data_logger=data_logger, weights=class_weights, graph_params=graph_params, is_nni=is_nni) return runner
def get_loggers(name, products_path, is_debug=True, set_titles=True): logger = multi_logger([ PrintLogger("IdansLogger", level=logging.DEBUG if is_debug else logging.INFO), FileLogger("results_%s" % name, path=products_path, level=logging.INFO), FileLogger( "results_%s_all" % name, path=products_path, level=logging.DEBUG), ], name=None) data_logger = CSVLogger("results_%s" % name, path=products_path) if set_titles: data_logger.set_titles("feat_type", "year", "loss_val", "loss_test", "acc", "auc_test", "train_p", "norm_adj", "epoch") logger.dump_location() data_logger.dump_location() return logger, data_logger
def main(product_params, args): train_p = 50 num_samples = 3 config = { "hidden_layers": [70, 35], "dropout": KIPF_BASE["dropout"], "learning_rate": KIPF_BASE["lr"], "weight_decay": KIPF_BASE["weight_decay"], "epochs": args.epochs, "train_p": 0, "feat_type": "neighbors", "dataset": "firms", "seed": 12345678 } products_path = os.path.join(PROJ_DIR, "logs", config["dataset"], time.strftime("%Y_%m_%d_%H_%M_%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("IdansLogger", level=logging.INFO), FileLogger("results_%s" % config["dataset"], path=products_path, level=logging.INFO), FileLogger("results_%s_all" % config["dataset"], path=products_path, level=logging.DEBUG), ], name=None) # data_logger = CSVLogger("results_%s" % config["dataset"], path=products_path) # all_args = set(config).union(map(at(0), product_params)) # data_logger.info("name", "loss", "accuracy", *sorted(all_args)) runner = ModelRunner(DATA_PATH, args.cuda, logger, None) # data_logger) train_p /= 100. config["test_p"] = 1 - train_p config["train_p"] = train_p # for train_p in [5]: # + list(range(5, 100, 10)): for pars in product(*map(at(1), product_params)): current_params = list(zip(map(at(0), product_params), pars)) # cur_seed = 214899513 # random.randint(1, 1000000000) cur_seed = random.randint(1, 1000000000) current_params.append(("seed", cur_seed)) config.update(current_params) if "seed" in config: np.random.seed(config["seed"]) torch.manual_seed(config["seed"]) if args.cuda is not None: torch.cuda.manual_seed(config["seed"]) config_args = sorted(config.items(), key=at(0)) logger.info( "Arguments: (train %1.2f) " + ", ".join("%s: %s" % (name, val) for name, val in current_params), train_p) res = [] for _ in range(num_samples): res.append(runner.run(config_args)) # res = [runner.run(config) for _ in range(num_samples)] pickle.dump({ "params": current_params, "res": res }, open(os.path.join(products_path, "quant_res.pkl"), "ab"))
def build_model(training_data, training_adj, training_labels, eval_data, eval_adj, eval_labels, test_data, test_adj, test_labels, hidden_layers, activations, optimizer, epochs, dropout, lr, l2_pen, coeffs, unary, class_weights, graph_params, dumping_name, edge_normalization="correct", early_stop=True, is_nni=False, device=1): if coeffs is None: coeffs = [1., 0., 0.] conf = { "hidden_layers": hidden_layers, "dropout": dropout, "lr": lr, "weight_decay": l2_pen, "training_mat": training_data, "training_adj": training_adj, "training_labels": training_labels, "eval_mat": eval_data, "eval_adj": eval_adj, "eval_labels": eval_labels, "test_mat": test_data, "test_adj": test_adj, "test_labels": test_labels, "optimizer": optimizer, "epochs": epochs, "activations": activations, "loss_coeffs": coeffs, "unary": unary, "edge_normalization": edge_normalization } products_path = os.path.join(os.getcwd(), "logs", *dumping_name, time.strftime("%Y%m%d_%H%M%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("MyLogger", level=logging.DEBUG), FileLogger("results_" + dumping_name[1], path=products_path, level=logging.INFO) ], name=None) runner = ModelRunner(conf, logger=logger, weights=class_weights, graph_params=graph_params, early_stop=early_stop, is_nni=is_nni, tmp_path=products_path, device=device) return runner
def main(): args = parse_args() dataset = "cora" # args.dataset seed = random.randint(1, 1000000000) conf = { "kipf": { "hidden": 16, "dropout": 0.5, "lr": 0.01, "weight_decay": 5e-4 }, "hidden_layers": [16], "multi_hidden_layers": [100, 20], "dropout": 0.6, "lr": 0.01, "weight_decay": 0.001, "dataset": dataset, "epochs": args.epochs, "cuda": args.cuda, "fastmode": args.fastmode, "seed": seed } init_seed(conf['seed'], conf['cuda']) dataset_path = os.path.join(PROJ_DIR, "data", dataset) products_path = os.path.join(CUR_DIR, "logs", dataset, time.strftime("%Y_%m_%d_%H_%M_%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("IdansLogger", level=logging.INFO), FileLogger("results_%s" % conf["dataset"], path=products_path, level=logging.INFO), FileLogger("results_%s_all" % conf["dataset"], path=products_path, level=logging.DEBUG), ], name=None) data_logger = CSVLogger("results_%s" % conf["dataset"], path=products_path) data_logger.set_titles("model_name", "loss", "acc", "train_p", "norm_adj", "feat_type") num_iter = 5 for norm_adj in [True, False]: conf["norm_adj"] = norm_adj runner = ModelRunner(products_path, dataset_path, conf, logger=logger, data_logger=data_logger) for train_p in chain([1], range(5, 90, 10)): conf["train_p"] = train_p train_p /= 100 val_p = test_p = (1 - train_p) / 2. train_p /= (val_p + train_p) runner.loader.split_test(test_p) for ft, feat_type in enumerate( ["combined", "neighbors", "features"]): conf["feat_type"] = feat_type results = [ runner.run(train_p, feat_type) for _ in range(num_iter) ] conf_path = os.path.join( runner.products_path, "t%d_n%d_ft%d.pkl" % ( conf["train_p"], norm_adj, ft, )) pickle.dump({ "res": results, "conf": conf }, open(conf_path, "wb")) logger.info("Finished")