def direct_neighbors( adj_matrix_file, train_node_labels_file, test_node_labels_file, use_cuda, metadata ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) mlflow.log_param("model", MODEL_NAME) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() n_nodes = labels.size(0) graph = data_loaders.load_graph( [adj_matrix_file], n_nodes, self_loop=False, normalization=None, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=DirectNeighbors, graph=graph) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def save_ranks(ranks_df, n_nodes, run_name, params=None): u_mlflow.log_dataframe(ranks_df, "ranks", "results") cum_dist = cumulative_distribution_function( ranks=ranks_df["rank"].values, n_nodes=n_nodes ) u_mlflow.log_ndarray(cum_dist, "cum_dist", "results") for index, n in enumerate(cum_dist): mlflow.log_metric("cum_dist", n, step=index) # AUC x = np.arange(1, n_nodes + 1) auc = metrics.auc(x=x, y=cum_dist) mlflow.log_metric("auc", auc) # AUC ratio mlflow.log_metric("auc_ratio", auc / (n_nodes - 1)) # Median rank median = np.median(ranks_df["rank"].values) mlflow.log_metric("median_rank", median) # Plot fig = plot_cumulative_distribution(x, cum_dist, auc, run_name, params) u_mlflow.log_fig(fig, "cumulative_distribution", "figures") plt.close(fig)
def bagging_mlp( embeddings_file, train_node_labels_file, test_node_labels_file, node_features_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() embeddings = None node_features = None if embeddings_file is not None: mlflow.log_param("embeddings", True) mlflow.log_artifact(embeddings_file, "inputs") embeddings = data_loaders.load_embeddings(embeddings_file, use_cuda=use_cuda) else: mlflow.log_param("embeddings", False) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") node_features = data_loaders.load_node_features( node_features_file, use_cuda) else: mlflow.log_param("node_features", False) if embeddings is not None and node_features is not None: in_features = torch.cat((embeddings, node_features), dim=1) elif embeddings is not None: in_features = embeddings elif node_features is not None: in_features = node_features print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=Bagging, bagging_model=MLP, features=in_features, **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def bagging_rgcn( adj_matrix_files, train_node_labels_file, test_node_labels_file, node_features_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() n_nodes = labels.size(0) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") features = data_loaders.load_node_features(node_features_file, use_cuda) else: mlflow.log_param("node_features", False) features = None graph = data_loaders.load_graph( adj_matrix_files, n_nodes, add_edge_type=True, add_node_ids=True, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=Bagging, bagging_model=RGCN, features=features, graph=graph, n_rels=len(adj_matrix_files), **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def net_prop_with_restart( run_name, model_name, normalization, adj_matrix_file, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=run_name): u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.log_param("model", model_name) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() adjacency_matrix = data_loaders.load_adj_matrices( [adj_matrix_file], normalization=normalization, use_cuda=use_cuda )[0] print(run_name) ranks_df = predict.run( labels=labels, model_class=NetPropWithRestart, adjacency_matrix=adjacency_matrix, **params ) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def rwr_m( adj_matrix_files, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() print("Loading adjacency matrices") adj_matrices = data_loaders.load_adj_matrices(adj_matrix_files, normalization=None, use_cuda=use_cuda) adjacency_matrices = torch.stack(adj_matrices) print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=RwrM, adjacency_matrices=adjacency_matrices, **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")