def bagging_rgcn_embeddings(adj_matrix_files, node_labels_file,
                            embeddings_file, use_cuda, params, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        embeddings = data_loaders.load_embeddings(embeddings_file,
                                                  use_cuda=use_cuda)

        graph = data_loaders.load_graph(
            adj_matrix_files,
            n_nodes,
            add_edge_type=True,
            add_node_ids=True,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=Bagging,
                             bagging_model=RGCN,
                             graph=graph,
                             features=embeddings,
                             n_rels=len(adj_matrix_files),
                             **params)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
示例#2
0
def direct_neighbors(adj_matrix_file, node_labels_file, use_cuda, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("model", MODEL_NAME)
        mlflow.log_param("merged_layers", True)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=False,
            normalization=None,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=DirectNeighbors,
                             graph=graph)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME)
示例#3
0
def direct_neighbors(adj_matrix_file, train_node_labels_file,
                     test_node_labels_file, use_cuda, metadata):
    mlflow.set_experiment("Test")

    with mlflow.start_run(run_name=RUN_NAME):
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("model", MODEL_NAME)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        train_mask = ~test_labels.byte()
        n_nodes = labels.size(0)

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=False,
            normalization=None,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = test.run(
            labels=labels,
            train_mask=train_mask,
            model_class=DirectNeighbors,
            graph=graph,
        )

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME)
示例#4
0
def bagging_rgcn(
    adj_matrix_files,
    train_node_labels_file,
    test_node_labels_file,
    node_features_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Test")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(
            train_node_labels_file, use_cuda=use_cuda
        )
        test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        train_mask = ~test_labels.byte()
        n_nodes = labels.size(0)

        if node_features_file is not None:
            mlflow.log_param("node_features", True)
            mlflow.log_artifact(node_features_file, "inputs")
            features = data_loaders.load_node_features(node_features_file, use_cuda)
        else:
            mlflow.log_param("node_features", False)
            features = None

        graph = data_loaders.load_graph(
            adj_matrix_files,
            n_nodes,
            add_edge_type=True,
            add_node_ids=True,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = test.run(
            labels=labels,
            train_mask=train_mask,
            model_class=Bagging,
            bagging_model=RGCN,
            features=features,
            graph=graph,
            n_rels=len(adj_matrix_files),
            **params
        )

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
示例#5
0
def bagging_gcn(
    adj_matrix_file,
    node_features_file,
    train_node_labels_file,
    test_node_labels_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()

        n_nodes = labels.size(0)

        if node_features_file is not None:
            mlflow.log_param("node_features", True)
            mlflow.log_artifact(node_features_file, "inputs")
            features = data_loaders.load_node_features(node_features_file,
                                                       use_cuda)
        else:
            mlflow.log_param("node_features", False)
            features = None

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=SELF_LOOP,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = predict.run(labels=labels,
                               model_class=Bagging,
                               bagging_model=GCN,
                               graph=graph,
                               features=features,
                               **params)

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def bagging_rgcn_embeddings(
    adj_matrix_files,
    train_node_labels_file,
    test_node_labels_file,
    embeddings_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(
            train_node_labels_file, use_cuda=use_cuda
        )
        test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        n_nodes = labels.size(0)

        embeddings = data_loaders.load_embeddings(embeddings_file, use_cuda=use_cuda)

        graph = data_loaders.load_graph(
            adj_matrix_files,
            n_nodes,
            add_edge_type=True,
            add_node_ids=True,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = predict.run(
            labels=labels,
            model_class=Bagging,
            bagging_model=RGCN,
            features=embeddings,
            graph=graph,
            n_rels=len(adj_matrix_files),
            **params
        )

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
示例#7
0
def bagging_gcn(adj_matrix_file, node_features_file, node_labels_file,
                use_cuda, params, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("merged_layers", True)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        if node_features_file is not None:
            mlflow.log_param("node_features", True)
            mlflow.log_artifact(node_features_file, "inputs")
            features = data_loaders.load_node_features(node_features_file,
                                                       use_cuda)
        else:
            mlflow.log_param("node_features", False)
            features = None

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=SELF_LOOP,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=Bagging,
                             bagging_model=GCN,
                             graph=graph,
                             features=features,
                             **params)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)