Пример #1
0
def main():
    """
    Parsing command line parameters, reading data, graph decomposition, fitting a ClusterGCN and scoring the model.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    adj = adj_reader(graph)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    clustering_machine = ClusteringMachine(args, graph, features, target, adj)
    clustering_machine.decompose()
    gcn_trainer = ClusterGCNTrainer(args, clustering_machine)
    gcn_trainer.train()
    gcn_trainer.test()
Пример #2
0
def main():
    """
    Parsing command line parameters, reading data, doing sparsification, fitting a GWNN and saving the logs.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    sparsifier = WaveletSparsifier(graph, args.scale, args.approximation_order,
                                   args.tolerance)
    sparsifier.calculate_all_wavelets()
    trainer = GWNNTrainer(args, sparsifier, features, target)
    trainer.fit()
    trainer.score()
    save_logs(args, trainer.logs)
Пример #3
0
def main():
    """
    Parsing command line parameters, reading data, fitting an NGCN and scoring the model.
    """
    args = parameter_parser()
    torch.manual_seed(args.seed)
    tab_printer(args)
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    target = target_reader(args.target_path)
    trainer = Trainer(args, graph, features, target, True)
    trainer.fit()
    if args.model == "mixhop":
        trainer.evaluate_architecture()
        args = trainer.reset_architecture()
        trainer = Trainer(args, graph, features, target, False)
        trainer.fit()
Пример #4
0
    def load_data(self):
        print('loading data...')
        self.features, self.labels, self.idx_train, self.idx_val, self.idx_test \
            = feature_reader(dataset=self.dataset, scale=self.args.scale,
                             train_ratio=self.args.train_ratio, feature_size=self.args.feature_size)

        # print('feature_size', self.features.shape)
        self.n_features = self.features.shape[1]
        self.n_classes = self.labels.max().item() + 1

        self.edges = graph_reader(dataset=self.dataset)
        # transform graph to nxnetwork
        self.G = nx.Graph()
        self.G.add_edges_from(self.edges)
        self.n = self.G.number_of_nodes()
        self.E = self.G.number_of_edges()  # total edges
        print('dataset load finish')
        print('number of nodes:', self.n)
        print('number of edges:', self.E)
Пример #5
0
    def load_data(self):
        self.features, self.labels, self.idx_train, self.idx_val, self.idx_test \
            = feature_reader(dataset=self.dataset, scale=self.args.scale,
                            train_ratio=self.args.train_ratio, feature_size=self.args.feature_size)

        # print('feature_size', self.features.shape)
        self.n_nodes = len(self.labels)
        self.n_features = self.features.shape[1]
        self.n_classes = self.labels.max().item() + 1

        self.edges = graph_reader(dataset=self.dataset)

        self.adj = self.build_adj_mat()

        # self.calculate_connectivity()

        if torch.cuda.is_available():
            self.features = self.features.cuda()
            self.adj = self.adj.cuda()
            self.labels = self.labels.cuda()
            if hasattr(self, 'prj'):
                self.prj = self.prj.cuda()
Пример #6
0
    def load_data(self):
        if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ):
            self.features, self.features_train, self.labels, self.idx_train, self.idx_val, self.idx_test \
                = feature_reader(dataset=self.dataset, scale=self.args.scale, 
                                train_ratio=self.args.train_ratio, feature_size=self.args.feature_size)

            if torch.cuda.is_available():
                self.features = self.features.cuda()
                self.features_train = self.features_train.cuda()
                self.labels = self.labels.cuda()

            self.n_nodes = len(self.labels)
            self.n_features = self.features.shape[1]
            self.multi_label = self.labels.shape[1]
            if self.multi_label == 1:
                self.n_classes = self.labels.max().item() + 1
            else:
                self.n_classes = self.multi_label

        elif self.dataset.startswith( 'twitch-train' ):
            p = self.dataset.find('/')
            self.features, self.labels = feature_reader(dataset=f'twitch/{self.dataset[p+1:]}')
            self.n_nodes = len(self.labels)
            self.n_nodes_1 = int(0.8 * self.n_nodes)
            self.n_nodes_2 = self.n_nodes - self.n_nodes_1
            self.idx_train = np.random.choice(self.n_nodes, self.n_nodes_1, replace=False)
            self.idx_val = np.asarray( list( set(range(self.n_nodes)) - set(range(self.n_nodes_1)) ) )

            self.features_train = self.features[self.idx_train]

            scaler = StandardScaler()
            scaler.fit(self.features_train)
            self.features = scaler.transform(self.features)
            self.features = torch.FloatTensor(self.features)
            self.features_train = self.features[self.idx_train]

            if torch.cuda.is_available():
                self.features = self.features.cuda()
                self.features_train = self.features_train.cuda()
                self.labels = self.labels.cuda()

            self.n_features = 3170
            self.multi_label = 1
            self.n_classes = 2


        elif self.dataset.startswith( 'twitch' ):
            p_0 = self.dataset.find('/')
            data_folder = self.dataset[:p_0]

            p = self.dataset.rfind('/')+1
            self.dataset1 = self.dataset[:p-1]
            self.dataset2 = f'{data_folder}/{self.dataset[p:]}'

            self.features_1, self.labels_1 = feature_reader(dataset=self.dataset1)
            self.features_2, self.labels_2 = feature_reader(dataset=self.dataset2)

            scaler = StandardScaler()
            scaler.fit(self.features_1)
            self.features_1 = torch.FloatTensor(scaler.transform(self.features_1))
            self.features_2 = torch.FloatTensor(scaler.transform(self.features_2))

            if torch.cuda.is_available():
                self.features_1 = self.features_1.cuda()
                self.features_2 = self.features_2.cuda()
                self.labels_1 = self.labels_1.cuda()
                self.labels_2 = self.labels_2.cuda()

            self.n_nodes_1 = len(self.labels_1)
            self.n_nodes_2 = len(self.labels_2)
            self.n_features = 3170
            self.multi_label = 1
            self.n_classes = 2

        elif self.dataset.startswith( 'deezer' ):
            p_0 = self.dataset.find('/')
            data_folder = self.dataset[:p_0]

            p = self.dataset.rfind('/')+1
            self.dataset1 = self.dataset[:p-1]
            self.dataset2 = f'{data_folder}/{self.dataset[p:]}'

            self.labels_1 = feature_reader(dataset=self.dataset1)
            self.labels_2 = feature_reader(dataset=self.dataset2)

            if torch.cuda.is_available():
                self.labels_1 = self.labels_1.cuda()
                self.labels_2 = self.labels_2.cuda()

            self.n_nodes_1 = len(self.labels_1)
            self.n_nodes_2 = len(self.labels_2)
            self.n_classes = self.multi_label = 84

        else:
            raise NotImplementedError(f'dataset = {self.dataset} not implemented!')

        print(f'loading {self.dataset} features done!')

        # print('feature_size', self.features.shape)

        # print('====================================')
        # print('||   n_nodes =', self.n_nodes)
        # print('||   n_features =', self.n_features)
        # print('||   n_classes =', self.n_classes, '(', self.multi_label, ')')
        # print('====================================')

        if self.args.mode in ( 'mlp', 'lr' ): return

        if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ):
            self.adj_full = graph_reader(args=self.args, dataset=self.dataset, n_nodes=self.n_nodes)

            # construct training data
            if self.dataset in ( 'cora', 'citeseer', 'pubmed' ):
                self.adj_train = sp.csr_matrix.copy(self.adj_full)
                self.adj_ori = sp.csr_matrix.copy(self.adj_full)
            else:
                self.adj_train = self.adj_full[self.idx_train, :][:, self.idx_train]
                self.adj_ori = sp.csr_matrix.copy(self.adj_full)

        elif self.dataset.startswith( 'twitch-train' ):
            p = self.dataset.find('/')
            self.adj_full = graph_reader(args=self.args, dataset=f'twitch/{self.dataset[p+1:]}', n_nodes=self.n_nodes)
            self.adj_train = self.adj_full[self.idx_train, :][:, self.idx_train]
            self.adj_ori = sp.csr_matrix.copy(self.adj_full)

        elif self.dataset.startswith( 'twitch' ):
            self.adj_1 = graph_reader(args=self.args, dataset=self.dataset1, n_nodes=self.n_nodes_1)
            self.adj_2 = graph_reader(args=self.args, dataset=self.dataset2, n_nodes=self.n_nodes_2)
            self.adj_ori = sp.csr_matrix.copy(self.adj_2)

        elif self.dataset.startswith( 'deezer' ):
            self.adj_1, self.features_1 = graph_reader(args=self.args, dataset=self.dataset1, n_nodes=self.n_nodes_1)
            self.adj_2, self.features_2 = graph_reader(args=self.args, dataset=self.dataset2, n_nodes=self.n_nodes_2)
            self.adj_ori = sp.csr_matrix.copy(self.adj_2)
            self.n_features = self.features_1.shape[-1]

            if torch.cuda.is_available():
                self.features_1 = self.features_1.cuda()
                self.features_2 = self.features_2.cuda()

        else:
            self.edges = graph_reader(args=self.args, dataset=self.dataset)

        # self.construct_hop_dict()

        # self.exist_edges = random.sample(self.edges.tolist(), self.n_test)
        # self.nonexist_edges = random.sample(self.one_hop_edges, self.n_test)

        # self.nonexist_edges = random.sample(self.two_hop_edges, self.n_test)
        # self.nonexist_edges = random.sample(self.two_hop_edges+self.one_hop_edges, self.n_test)
        # self.nonexist_edges = []
        # cnt_nonexist = 0
        # while 1:
        #     u = np.random.choice(self.n_nodes)
        #     v = np.random.choice(self.n_nodes)
        #     if u != v and v not in self.edge_dict[u]:
        #         self.nonexist_edges.append((u, v))
        #         cnt_nonexist += 1
        #     if cnt_nonexist == self.n_test: break

        # self.labeler = Labeler(self.features, self.labels, self.n_classes, 
        #                         self.idx_train, self.idx_val, self.idx_test)

        self.prepare_data()
Пример #7
0
"""Model runner."""

import os
from asne import ASNE
from utils import graph_reader, feature_reader, parse_args, tab_printer

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


def run_asne(args, graph, features):
    """
    Fitting an ASNE model and saving the embedding.
    :param args: Arguments object.
    :param graph: NetworkX graph.
    :param features: Features in a dictionary.
    """
    tab_printer(args)
    model = ASNE(args, graph, features)
    model.train()
    model.save_embedding()


if __name__ == "__main__":
    args = parse_args()
    graph = graph_reader(args.edge_path)
    features = feature_reader(args.features_path)
    run_asne(args, graph, features)