Пример #1
0
    def __init__(self, app: str, model: str, **kwargs):
        super(RecommendationPipepline, self).__init__(app, model=model, **kwargs)

        if "data" in kwargs:
            data = kwargs["data"]
            val_data = test_data = data[-100:, :]
            data = build_recommendation_data("custom", data, val_data, test_data)
            self.data_path = kwargs.get("data_path", "tmp_data.pt")
            self.batch_size = kwargs.get("batch_size", 128)
            torch.save(data, self.data_path)
            self.dataset = NodeDataset(path=self.data_path, scale_feat=False)
        elif "dataset" in kwargs:
            dataset = kwargs.pop("dataset")
            self.dataset = build_dataset_from_name(dataset)
        else:
            print("Please provide recommendation data!")
            exit(0)

        self.batch_size = kwargs.get("batch_size", 2048)
        self.n_items = self.dataset[0].n_params["n_items"]

        args = get_default_args(task="recommendation", dataset="ali", model=model, **kwargs)
        args.model = args.model[0]

        # task = build_task(args, dataset=self.dataset)
        # task.train()

        # self.model = task.model
        self.model = build_model(args)
        self.model.eval()

        self.user_emb, self.item_emb = self.model.generate()
Пример #2
0
def get_default_args_kg(dataset,
                        model,
                        dw="gnn_kg_link_prediction_dw",
                        mw="gnn_kg_link_prediction_mw"):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict_kg.items():
        args.__setattr__(key, value)
    return args
Пример #3
0
def get_default_args_graph_clf(dataset,
                               model,
                               dw="graph_classification_dw",
                               mw="graph_classification_mw"):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict.items():
        args.__setattr__(key, value)
    return args
Пример #4
0
    def __init__(self, app: str, model: str, **kwargs):
        super(GenerateEmbeddingPipeline, self).__init__(app,
                                                        model=model,
                                                        **kwargs)

        match_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "match.yml")
        with open(match_path, "r", encoding="utf8") as f:
            match = yaml.load(f, Loader=yaml.FullLoader)
        objective = match.get("unsupervised_node_classification", None)
        for pair_dict in objective:
            if "blogcatalog" in pair_dict["dataset"]:
                emb_models = pair_dict["model"]
            elif "cora" in pair_dict["dataset"]:
                gnn_models = pair_dict["model"]

        if model in emb_models:
            self.method_type = "emb"
            args = get_default_args(task="unsupervised_node_classification",
                                    dataset="blogcatalog",
                                    model=model,
                                    **kwargs)
        elif model in gnn_models:
            self.method_type = "gnn"
            args = get_default_args(task="unsupervised_node_classification",
                                    dataset="cora",
                                    model=model,
                                    **kwargs)
        else:
            print("Please choose a model from ", emb_models, "or", gnn_models)
            exit(0)

        self.data_path = kwargs.get("data_path", "tmp_data.pt")
        self.num_features = kwargs.get("num_features", None)
        if self.num_features is not None:
            args.num_features = self.num_features
        elif self.method_type == "gnn":
            print("Please provide num_features for gnn model!")
            exit(0)

        args.model = args.model[0]
        self.model = build_model(args)

        self.trainer = self.model.get_trainer(self.model, args)
        if self.trainer is not None:
            self.trainer = self.trainer(args)
Пример #5
0
def get_default_args_ne(dataset,
                        model,
                        dw="network_embedding_dw",
                        mw="network_embedding_mw"):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict.items():
        args.__setattr__(key, value)
    return args
Пример #6
0
def get_default_args_for_unsup_nn(dataset,
                                  model,
                                  dw="node_classification_dw",
                                  mw="self_auxiliary_mw"):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict.items():
        args.__setattr__(key, value)
    return args
Пример #7
0
def get_default_args_emb(dataset,
                         model,
                         dw="heterogeneous_embedding_dw",
                         mw="heterogeneous_embedding_mw"):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict_emb.items():
        args.__setattr__(key, value)
    return args
Пример #8
0
def test_train():
    args = get_default_args(dataset="cora", model="gcn", epochs=10, cpu=True)
    args.dataset = args.dataset[0]
    args.model = args.model[0]
    args.seed = args.seed[0]
    result = train(args)

    assert "test_acc" in result
    assert result["test_acc"] > 0
Пример #9
0
def test_get_default_args():
    args = options.get_default_args(task="node_classification",
                                    dataset="cora",
                                    model="gcn")

    assert args.task == "node_classification"
    assert args.model == "gcn"
    assert args.dataset == "cora"
    assert args.hidden_size > 0
    assert args.lr > 0
Пример #10
0
def test_get_default_args():
    args = options.get_default_args(dataset=["cora", "citeseer"],
                                    model=["gcn", "gat"],
                                    hidden_size=128)

    assert args.model[0] == "gcn"
    assert args.model[1] == "gat"
    assert args.dataset[0] == "cora"
    assert args.dataset[1] == "citeseer"
    assert args.hidden_size == 128
Пример #11
0
def get_default_args_generative(dataset,
                                model,
                                dw="node_classification_dw",
                                mw="self_auxiliary_mw",
                                **kwargs):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict.items():
        args.__setattr__(key, value)
    for key, value in kwargs.items():
        args.__setattr__(key, value)
    return args
Пример #12
0
def test_set_best_config():
    args = get_default_args(task="node_classification",
                            dataset="citeseer",
                            model="gat")
    args.model = args.model[0]
    args.dataset = args.dataset[0]
    args = set_best_config(args)

    assert args.lr == 0.005
    assert args.max_epoch == 1000
    assert args.weight_decay == 0.001
Пример #13
0
    def __init__(self, app: str, model: str, **kwargs):
        super(GenerateEmbeddingPipeline, self).__init__(app, model=model, **kwargs)

        self.kwargs = kwargs

        emb_models = [
            "prone",
            "netmf",
            "netsmf",
            "deepwalk",
            "line",
            "node2vec",
            "hope",
            "sdne",
            "grarep",
            "dngr",
            "spectral",
        ]
        gnn_models = ["dgi", "mvgrl", "grace", "unsup_graphsage"]

        if model in emb_models:
            self.method_type = "emb"
            args = get_default_args(dataset="blogcatalog", model=model, **kwargs)
        elif model in gnn_models:
            self.method_type = "gnn"
            args = get_default_args(dataset="cora", model=model, **kwargs)
        else:
            print("Please choose a model from ", emb_models, "or", gnn_models)
            exit(0)

        self.data_path = kwargs.get("data_path", "tmp_data.pt")
        self.num_features = kwargs.get("num_features", None)
        if self.num_features is not None:
            args.num_features = self.num_features
        elif self.method_type == "gnn":
            print("Please provide num_features for gnn model!")
            exit(0)

        args.model = args.model[0]
        self.args = args
Пример #14
0
def test_train():
    args = get_default_args(task="node_classification",
                            dataset="cora",
                            model="gcn",
                            max_epoch=10,
                            cpu=True)
    args.dataset = args.dataset[0]
    args.model = args.model[0]
    args.seed = args.seed[0]
    result = train(args)

    assert "Acc" in result
    assert result["Acc"] > 0
Пример #15
0
def raw_experiment(task: str, dataset, model, **kwargs):
    if "args" not in kwargs:
        args = get_default_args(task=task, dataset=dataset, model=model, **kwargs)
    else:
        args = kwargs["args"]

    variants = list(gen_variants(dataset=args.dataset, model=args.model, seed=args.seed))
    variants = check_task_dataset_model_match(task, variants)

    results_dict = defaultdict(list)
    results = [train(args) for args in variant_args_generator(args, variants)]
    for variant, result in zip(variants, results):
        results_dict[variant[:-1]].append(result)

    tablefmt = kwargs["tablefmt"] if "tablefmt" in kwargs else "github"
    output_results(results_dict, tablefmt)

    return results_dict
Пример #16
0
def experiment(dataset, model=None, **kwargs):
    if model is None:
        model = "autognn"
    if isinstance(dataset, str) or isinstance(dataset, Dataset):
        dataset = [dataset]
    if isinstance(model, str) or isinstance(model, nn.Module):
        model = [model]
    if "args" not in kwargs:
        args = get_default_args(dataset=[str(x) for x in dataset],
                                model=[str(x) for x in model],
                                **kwargs)
    else:
        args = kwargs["args"]
        for key, value in kwargs.items():
            if key != "args":
                args.__setattr__(key, value)
    if isinstance(model[0], nn.Module):
        args.model = [x.model_name for x in model]
    print(args)
    args.dataset = dataset
    args.model = model

    if args.max_epoch is not None:
        warnings.warn(
            "The max_epoch is deprecated and will be removed in the future, please use epochs instead!"
        )
        args.epochs = args.max_epoch

    if len(model) == 1 and isinstance(model[0], str) and model[0] == "autognn":
        if not hasattr(args, "search_space"):
            args.search_space = default_search_space
        if not hasattr(args, "seed"):
            args.seed = [1, 2]
        if not hasattr(args, "n_trials"):
            args.n_trials = 20

    if hasattr(args, "search_space"):
        return auto_experiment(args)

    return raw_experiment(args)
Пример #17
0
        .format(acc))

# %%
# Experiment API
# --------------
# CogDL在训练上提供了更易于使用的 API ,即Experiment
from cogdl import experiment
experiment(model="gcn", dataset="cora")
#或者,您可以单独创建每个组件并使用CogDL 中的 build_dataset , build_model 来手动运行该过程。

from cogdl import experiment
from cogdl.datasets import build_dataset
from cogdl.models import build_model
from cogdl.options import get_default_args

args = get_default_args(model="gcn", dataset="cora")
dataset = build_dataset(args)
model = build_model(args)
experiment(model=model, dataset=dataset)

# %%
# 如何保存训练好的模型?
# --------------------------
experiment(model="gcn", dataset="cora", checkpoint_path="gcn_cora.pt")
# 当训练停止时,模型将保存在 gcn_cora.pt 中。如果你想从之前的checkpoint继续训练,使用不同的参数(如学习率、权重衰减等),保持相同的模型参数(如hidden size、模型层数),可以像下面这样做:
experiment(model="gcn",
           dataset="cora",
           checkpoint_path="gcn_cora.pt",
           resume_training=True)

# %%
Пример #18
0
def get_default_args_agc(dataset, model, dw=None, mw=None):
    args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw)
    for key, value in default_dict.items():
        args.__setattr__(key, value)
    return args
Пример #19
0
from cogdl.tasks import build_task
from cogdl.options import get_default_args

args = get_default_args(task="node_classification",
                        dataset="cora",
                        model="gcn")
task = build_task(args)
ret = task.train()