Пример #1
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    early_stopping = 10

    adj_raw = load_adj_raw(args['dataset']).tocoo()
    ss_labels = partition(adj_raw, args['partitioning_num']).cuda()
    net_gcn = net.net_gcn_multitask(embedding_dim=args['embedding_dim'],
                                    ss_dim=args['partitioning_num'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    loss_val = []
    for epoch in range(1000):

        optimizer.zero_grad()
        output, output_ss = net_gcn(features, adj)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func(output_ss, ss_labels)
        loss = loss_target * args['loss_weight'] + loss_ss * (
            1 - args['loss_weight'])
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features, adj, val_test=True)
            loss_val.append(
                loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(
                loss_val[-(early_stopping + 1):-1]):
            break

    # test
    with torch.no_grad():
        output, _ = net_gcn(features, adj, val_test=True)
        acc_val = f1_score(labels[idx_val].cpu().numpy(),
                           output[idx_val].cpu().numpy().argmax(axis=1),
                           average='micro')
        acc_test = f1_score(labels[idx_test].cpu().numpy(),
                            output[idx_test].cpu().numpy().argmax(axis=1),
                            average='micro')

    return acc_val, acc_test
Пример #2
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])
    adj = load_adj_raw(args['dataset'])
    
    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph()
    g.add_nodes(node_num)
    adj = adj.tocoo()
    g.add_edges(adj.row, adj.col)
    features = features.cuda()
    
    '''
    adj = adj.cuda()
    features = features.cuda()
    '''
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    early_stopping = 10

    if args['net'] == 'gin':
        net_gcn = GINNet(args['embedding_dim'])
    else:
        net_gcn = GATNet(args['embedding_dim'])
        g.add_edges(list(range(node_num)), list(range(node_num)))
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    loss_val = []
    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(g, features, 0, 0)
        loss = loss_func(output[idx_train], labels[idx_train])
        # print('epoch', epoch, 'loss', loss.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            net_gcn.eval()
            output = net_gcn(g, features, 0, 0)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break

    # test
    with torch.no_grad():
        net_gcn.eval()
        output = net_gcn(g, features, 0, 0)
        acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
        acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')

    return acc_val, acc_test
Пример #3
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    early_stopping = 10

    adj_raw = load_adj_raw(args['dataset']).tocsr()
    idx_mask = list(range(node_num))
    adj_mask = adj_raw
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()

    reduced_dim = args['reduced_dimension']
    ss_labels, _, _ = features.svd()
    ss_labels = ss_labels[:, :reduced_dim].cuda()

    net_gcn = net.net_gcn_multitask(embedding_dim=args['embedding_dim'], ss_dim=args['reduced_dimension'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    best_val = 0
    best_val_test = 0
    for epoch in range(500):

        optimizer.zero_grad()
        output, _ = net_gcn(features, adj)
        _, output_ss = net_gcn(features, adj_mask)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func_ss(output_ss, ss_labels) * 1e2
        loss = loss_target + loss_ss * args['loss_weight']
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features, adj, val_test=True)
            # loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

            acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
            if acc_val > best_val:
                best_val = acc_val
                best_val_test = acc_test

    return best_val, best_val_test
Пример #4
0
def run_fix_mask(args, imp_num, adj_percent, wei_percent):

    pruning.setup_seed(args['seed'])
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])
    adj = load_adj_raw(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph()
    g.add_nodes(node_num)
    adj = adj.tocoo()
    g.add_edges(adj.row, adj.col)
    features = features.cuda()
    labels = labels.cuda()
    loss_func = nn.CrossEntropyLoss()

    if args['net'] == 'gin':
        net_gcn = GINNet(args['embedding_dim'], g)
        pruning_gin.add_mask(net_gcn)
        pruning_gin.random_pruning(net_gcn, adj_percent, wei_percent)
        adj_spar, wei_spar = pruning_gin.print_sparsity(net_gcn)

    elif args['net'] == 'gat':
        net_gcn = GATNet(args['embedding_dim'], g)
        g.add_edges(list(range(node_num)), list(range(node_num)))
        pruning_gat.add_mask(net_gcn)
        pruning_gat.random_pruning(net_gcn, adj_percent, wei_percent)
        adj_spar, wei_spar = pruning_gat.print_sparsity(net_gcn)

    else:
        assert False

    net_gcn = net_gcn.cuda()
    for name, param in net_gcn.named_parameters():
        if 'mask' in name:
            param.requires_grad = False

    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}

    for epoch in range(args['fix_epoch']):

        optimizer.zero_grad()
        output = net_gcn(g, features, 0, 0)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            net_gcn.eval()
            output = net_gcn(g, features, 0, 0)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['val_acc'] = acc_val
                best_val_acc['test_acc'] = acc_test
                best_val_acc['epoch'] = epoch

        print(
            "RP[{}] (Fix Mask) Epoch:[{}/{}] LOSS:[{:.4f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
            .format(imp_num, epoch, args['fix_epoch'], loss, acc_val * 100,
                    acc_test * 100, best_val_acc['val_acc'] * 100,
                    best_val_acc['test_acc'] * 100, best_val_acc['epoch']))

    print(
        "syd final: [{},{}] RP[{}] (Fix Mask) Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}] | Adj:[{:.2f}%] Wei:[{:.2f}%]"
        .format(args['dataset'], args['net'], imp_num,
                best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100,
                best_val_acc['epoch'], adj_spar, wei_spar))
Пример #5
0
def run_get_mask(args, imp_num, rewind_weight_mask=None):

    pruning.setup_seed(args['seed'])
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])
    adj = load_adj_raw(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph()
    g.add_nodes(node_num)
    adj = adj.tocoo()

    g.add_edges(adj.row, adj.col)
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()

    if args['net'] == 'gin':
        net_gcn = GINNet(args['embedding_dim'], g)
        pruning_gin.add_mask(net_gcn)
    elif args['net'] == 'gat':
        net_gcn = GATNet(args['embedding_dim'], g)
        g.add_edges(list(range(node_num)), list(range(node_num)))
        pruning_gat.add_mask(net_gcn)
    else:
        assert False

    net_gcn = net_gcn.cuda()

    if rewind_weight_mask:
        net_gcn.load_state_dict(rewind_weight_mask)

    if args['net'] == 'gin':
        pruning_gin.add_trainable_mask_noise(net_gcn, c=1e-5)
        adj_spar, wei_spar = pruning_gin.print_sparsity(net_gcn)
    else:
        pruning_gat.add_trainable_mask_noise(net_gcn, c=1e-5)
        adj_spar, wei_spar = pruning_gat.print_sparsity(net_gcn)

    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}

    rewind_weight = copy.deepcopy(net_gcn.state_dict())

    for epoch in range(args['mask_epoch']):

        optimizer.zero_grad()
        output = net_gcn(g, features, 0, 0)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()
        if args['net'] == 'gin':
            pruning_gin.subgradient_update_mask(net_gcn, args)  # l1 norm
        else:
            pruning_gat.subgradient_update_mask(net_gcn, args)  # l1 norm

        optimizer.step()
        with torch.no_grad():
            net_gcn.eval()
            output = net_gcn(g, features, 0, 0)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['val_acc'] = acc_val
                best_val_acc['test_acc'] = acc_test
                best_val_acc['epoch'] = epoch

                if args['net'] == 'gin':
                    rewind_weight, adj_spar, wei_spar = pruning_gin.get_final_mask_epoch(
                        net_gcn, rewind_weight, args)
                else:
                    rewind_weight, adj_spar, wei_spar = pruning_gat.get_final_mask_epoch(
                        net_gcn, rewind_weight, args)

        print(
            "IMP[{}] (Get Mask) Epoch:[{}/{}] LOSS:[{:.4f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}] | Adj:[{:.2f}%] Wei:[{:.2f}%]"
            .format(imp_num, epoch, args['mask_epoch'], loss, acc_val * 100,
                    acc_test * 100, best_val_acc['val_acc'] * 100,
                    best_val_acc['test_acc'] * 100, best_val_acc['epoch'],
                    adj_spar, wei_spar))

    return rewind_weight
Пример #6
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])
    adj = load_adj_raw(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph().to('cuda:%s' % args['cuda'] )
    print(g.device)
    g.add_nodes(node_num)
    adj = adj.tocoo()
    g.add_edges(adj.row, adj.col)

    # adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    early_stopping = 10

    if args['net'] == 'gin':
        net_gcn = GINNet_ss(args['embedding_dim'], args['reduced_dimension'])
    else:
        net_gcn = GATNet_ss(args['embedding_dim'], args['reduced_dimension'])
        g.add_edges(list(range(node_num)), list(range(node_num)))

    adj_raw = load_adj_raw(args['dataset']).tocsr()
    idx_mask = list(range(node_num))
    adj_mask = adj_raw
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()

    reduced_dim = args['reduced_dimension']
    ss_labels, _, _ = features.svd()
    ss_labels = ss_labels[:, :reduced_dim].cuda()

    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    best_val = 0
    best_val_test = 0
    for epoch in range(400):

        optimizer.zero_grad()
        output, output_ss = net_gcn(g, features, 0, 0)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func_ss(output_ss, ss_labels) * 1e2
        loss = loss_target + loss_ss * args['loss_weight']
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            net_gcn.eval()
            output, _ = net_gcn(g, features, 0, 0)
            # loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))
            wandb.log({
                'val_acc': f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            })
            acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
            if acc_val > best_val:
                best_val = acc_val
                best_val_test = acc_test

    return best_val, best_val_test
Пример #7
0
def run(args):

    pruning.setup_seed(args['seed'])
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])
    adj = load_adj_raw(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph()
    g.add_nodes(node_num)
    adj = adj.tocoo()
    g.add_edges(adj.row, adj.col)
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()

    if args['net'] == 'gin':
        net_gcn = GINNet(args['embedding_dim'])
    else:
        net_gcn = GATNet(args['embedding_dim'])
        g.add_edges(list(range(node_num)), list(range(node_num)))
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}
    for epoch in range(args['total_epoch']):

        optimizer.zero_grad()
        output = net_gcn(g, features, 0, 0)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            net_gcn.eval()
            output = net_gcn(g, features, 0, 0)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['val_acc'] = acc_val
                best_val_acc['test_acc'] = acc_test
                best_val_acc['epoch'] = epoch

        print(
            "(Baseline) Epoch:[{}] LOSS:[{:.2f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
            .format(epoch, loss, acc_val * 100, acc_test * 100,
                    best_val_acc['val_acc'] * 100,
                    best_val_acc['test_acc'] * 100, best_val_acc['epoch']))

    print(
        "syd final: [{},{}] (Baseline) Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
        .format(args['dataset'], args['net'], best_val_acc['val_acc'] * 100,
                best_val_acc['test_acc'] * 100, best_val_acc['epoch']))