def train(opt):

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    train_dataloader, val_dataloader = create_dataloader(opt)
    net = Classification()  # 定义训练的网络模型
    net.to(device)
    net.train()
    loss_function = nn.CrossEntropyLoss()  # 定义损失函数为交叉熵损失函数
    optimizer = optim.Adam(net.parameters(), lr=0.001)  # 定义优化器(训练参数,学习率)

    for epoch in range(opt.num_epochs):  # 一个epoch即对整个训练集进行一次训练
        running_loss = 0.0
        correct = 0
        total = 0
        time_start = time.perf_counter()

        for step, data in enumerate(train_dataloader,
                                    start=0):  # 遍历训练集,step从0开始计算
            inputs, labels = data # 获取训练集的图像和标签
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()  # 清除历史梯度

            # forward + backward + optimize
            # outputs = net(inputs.permute(0,1,3,2))  # 正向传播
            outputs = net(inputs)  # 正向传播
            print('outputs.shape', outputs.shape, labels.shape)
            loss = loss_function(outputs, labels)  # 计算损失
            loss.backward()  # 反向传播
            optimizer.step()  # 优化器更新参数
            predict_y = torch.max(outputs, dim=1)[1]
            total += labels.size(0)
            correct += (predict_y == labels).sum().item()
            running_loss += loss.item()
            # print statistics

            # print('train_dataloader length: ', len(train_dataloader))
        acc = correct / total
        print('Train on epoch {}: loss:{}, acc:{}%'.format(epoch + 1, running_loss / total, 100 * correct / total))
        # 保存训练得到的参数
        if opt.model == 'basic':
            save_weight_name = os.path.join(opt.save_path,
                                            'Basic_Epoch_{0}_Accuracy_{1:.2f}.pth'.format(
                                                epoch + 1,
                                                acc))
        elif opt.model == 'plus':
            save_weight_name = os.path.join(opt.save_path,
                                            'Plus_Epoch_{0}_Accuracy_{1:.2f}.pth'.format(
                                                epoch + 1,
                                                acc))
        torch.save(net.state_dict(), save_weight_name)
    print('Finished Training')
def predict(opt):
    # net=torch.load('Lenet.pth') # pth格式 只保留参数
    # print('net', net)
    '''
    需要将basic_option中的is_train 修改为false
    '''
    # opt.is_train = False
    acc = 0
    total = 0
    test_dataloader = create_dataloader(opt)
    net = Classification()
    net.load_state_dict(
        torch.load(
            f"./output/train/weights/exp_1/Basic_Epoch_20_Accuracy_0.99.pth"))
    net = net.to(device)
    with torch.no_grad():
        for index, data in enumerate(test_dataloader, start=1):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, dim=1)
            print(f'number {index} picture maybe : {classes[predicted[0]]}')
            total += labels.size(0)
            acc += (predicted == labels).sum().item()
    print('Accuracy on test set : {}%'.format(100 * acc / total))
Пример #3
0
class GNN(object):
    """Graph Neural Networks that can be easily called and used.
    Authors of this code package:
    Tong Zhao, [email protected]
    Tianwen Jiang, [email protected]
    Last updated: 11/25/2019
    Parameters
    ----------
    adj_matrix: scipy.sparse.csr_matrix
        The adjacency matrix of the graph, where nonzero entries indicates edges.
        The number of each nonzero entry indicates the number of edges between these two nodes.
    features: numpy.ndarray, optional
        The 2-dimension np array that stores given raw feature of each node, where the i-th row
        is the raw feature vector of node i.
        When raw features are not given, one-hot degree features will be used.
    labels: list or 1-D numpy.ndarray, optional
        The class label of each node. Used for supervised learning.
    supervised: bool, optional, default False
        Whether to use supervised learning.
    model: {'gat', 'graphsage'}, default 'gat'
        The GNN model to be used.
        - 'graphsage' is GraphSAGE: https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf
        - 'gat' is graph attention network: https://arxiv.org/pdf/1710.10903.pdf
    n_layer: int, optional, default 2
        Number of layers in the GNN
    emb_size: int, optional, default 128
        Size of the node embeddings to be learnt
    random_state, int, optional, default 1234
        Random seed
    device: {'cpu', 'cuda', 'auto'}, default 'auto'
        The device to use.
    epochs: int, optional, default 5
        Number of epochs for training
    batch_size: int, optional, default 20
        Number of node per batch for training
    lr: float, optional, default 0.7
        Learning rate
    unsup_loss_type: {'margin', 'normal'}, default 'margin'
        Loss function to be used for unsupervised learning
        - 'margin' is a hinge loss with margin of 3
        - 'normal' is the unsupervised loss function described in the paper of GraphSAGE
    print_progress: bool, optional, default True
        Whether to print the training progress
    """
    def __init__(self,
                 adj_matrix,
                 features=None,
                 labels=None,
                 supervised=False,
                 model='gat',
                 n_layer=2,
                 emb_size=128,
                 random_state=1234,
                 device='auto',
                 epochs=5,
                 batch_size=20,
                 lr=0.7,
                 unsup_loss_type='margin',
                 print_progress=True):
        super(GNN, self).__init__()
        # fix random seeds
        random.seed(random_state)
        np.random.seed(random_state)
        torch.manual_seed(random_state)
        torch.cuda.manual_seed_all(random_state)
        # set parameters
        self.supervised = supervised
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.unsup_loss_type = unsup_loss_type
        self.print_progress = print_progress
        self.gat = True if model == 'gat' else False
        # set device
        if device == 'auto':
            self.device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")
        else:
            self.device = device

        # load data
        self.dl = DataLoader(adj_matrix, features, labels, supervised,
                             self.device)

        self.gnn = GraphSage(n_layer,
                             emb_size,
                             self.dl,
                             self.device,
                             gat=self.gat)
        self.gnn.to(self.device)

        if supervised:
            n_classes = len(set(labels))
            self.classification = Classification(emb_size, n_classes)
            self.classification.to(self.device)

    def fit(self):
        train_nodes = copy.deepcopy(self.dl.nodes_train)

        if self.supervised:
            labels = self.dl.labels
            models = [self.gnn, self.classification]
        else:
            unsup_loss = Unsup_Loss(self.dl, self.device)
            models = [self.gnn]
            if self.unsup_loss_type == 'margin':
                num_neg = 6
            elif self.unsup_loss_type == 'normal':
                num_neg = 100

        for epoch in range(self.epochs):
            np.random.shuffle(train_nodes)

            params = []
            for model in models:
                for param in model.parameters():
                    if param.requires_grad:
                        params.append(param)
            optimizer = torch.optim.SGD(params, lr=self.lr)
            optimizer.zero_grad()
            for model in models:
                model.zero_grad()

            batches = math.ceil(len(train_nodes) / self.batch_size)
            visited_nodes = set()
            for index in range(batches):
                if not self.supervised and len(visited_nodes) == len(
                        train_nodes):
                    # finish this epoch if all nodes are visited
                    break
                nodes_batch = train_nodes[index * self.batch_size:(index + 1) *
                                          self.batch_size]
                # extend nodes batch for unspervised learning
                if not self.supervised:
                    nodes_batch = np.asarray(
                        list(
                            unsup_loss.extend_nodes(nodes_batch,
                                                    num_neg=num_neg)))
                visited_nodes |= set(nodes_batch)
                # feed nodes batch to the GNN and returning the nodes embeddings
                embs_batch = self.gnn(nodes_batch)
                # calculate loss
                if self.supervised:
                    # superivsed learning
                    logists = self.classification(embs_batch)
                    labels_batch = labels[nodes_batch]
                    loss_sup = -torch.sum(
                        logists[range(logists.size(0)), labels_batch], 0)
                    loss_sup /= len(nodes_batch)
                    loss = loss_sup
                else:
                    # unsupervised learning
                    if self.unsup_loss_type == 'margin':
                        loss_net = unsup_loss.get_loss_margin(
                            embs_batch, nodes_batch)
                    elif self.unsup_loss_type == 'normal':
                        loss_net = unsup_loss.get_loss_sage(
                            embs_batch, nodes_batch)
                    loss = loss_net

                if self.print_progress:
                    logging.info(
                        'Epoch: [{}/{}],Step [{}/{}], Loss: {:.4f}, Dealed Nodes [{}/{}] '
                        .format(epoch + 1, self.epochs, index + 1, batches,
                                loss.item(), len(visited_nodes),
                                len(train_nodes)))

                loss.backward()
                for model in models:
                    nn.utils.clip_grad_norm_(model.parameters(), 5)
                optimizer.step()
                optimizer.zero_grad()
                for model in models:
                    model.zero_grad()

    def generate_embeddings(self):
        nodes = self.dl.nodes_train
        b_sz = 500
        batches = math.ceil(len(nodes) / b_sz)
        embs = []
        for index in range(batches):
            nodes_batch = nodes[index * b_sz:(index + 1) * b_sz]
            with torch.no_grad():
                embs_batch = self.gnn(nodes_batch)
            assert len(embs_batch) == len(nodes_batch)
            embs.append(embs_batch)
        assert len(embs) == batches
        embs = torch.cat(embs, 0)
        assert len(embs) == len(nodes)
        return embs.cpu().numpy()

    def predict(self):
        if not self.supervised:
            print('GNN.predict() is only supported for supervised learning.')
            sys.exit(0)
        nodes = self.dl.nodes_train
        b_sz = 500
        batches = math.ceil(len(nodes) / b_sz)
        preds = []
        for index in range(batches):
            nodes_batch = nodes[index * b_sz:(index + 1) * b_sz]
            with torch.no_grad():
                embs_batch = self.gnn(nodes_batch)
                logists = self.classification(embs_batch)
                _, predicts = torch.max(logists, 1)
                preds.append(predicts)
        assert len(preds) == batches
        preds = torch.cat(preds, 0)
        assert len(preds) == len(nodes)
        return preds.cpu().numpy()