Пример #1
0
def main(args):
    # validation
    validation = args.validation
    # data
    dl = Data.LoadData(args.dataset, validation=validation)
    n_users = dl.n_users
    n_items = dl.n_items
    u_feats = dl.u_feats
    v_feats = dl.v_feats
    features_u = dl.features_u
    features_v = dl.features_v
    features_e = dl.e_feats
    t = dl.t
    label = dl.label
    # head & tail
    if validation:
        head, tail = dl.train
        head_v, tail_v = dl.val
        head_t, tail_t = dl.test
        n_train = dl.n_train
        n_val = dl.n_val
        n_test = dl.n_test
    else:
        head, tail = dl.train
        head_t, tail_t = dl.test
        n_train = dl.n_train
        n_test = dl.n_test
    # graph
    g = dgl.bipartite(list(zip(head, tail)),
                      'user',
                      'edit',
                      'item',
                      num_nodes=(n_users, n_items))
    if validation:
        g_v = dgl.bipartite(list(
            zip(np.concatenate([head, head_v]), np.concatenate([tail,
                                                                tail_v]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
        g_t = dgl.bipartite(list(
            zip(np.concatenate([head, head_v, head_t]),
                np.concatenate([tail, tail_v, tail_t]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
    else:
        g_t = dgl.bipartite(list(
            zip(np.concatenate([head, head_t]), np.concatenate([tail,
                                                                tail_t]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
    # cuda
    gpu = args.gpu
    use_cuda = gpu >= 0 and torch.cuda.is_available()
    if use_cuda:
        features_u = features_u.cuda()
        features_v = features_v.cuda()
        features_e = features_e.cuda()
        t = t.cuda()
        label = label.cuda()
        g.to(torch.device('cuda:{}'.format(gpu)))
        if validation:
            g_v.to(torch.device('cuda:{}'.format(gpu)))
        g_t.to(torch.device('cuda:{}'.format(gpu)))

    # sampler
    sampler = utils.Sample(g, num_negs=args.num_neg, num_nei=args.num_nei)

    # batch, train
    n_edges = g.number_of_edges()
    learn = args.learn
    batch_size = args.batch_size
    batch_size_test = args.batch_size_test
    num_heads = args.num_heads
    in_feats_u = features_u.shape[1]
    in_feats_v = features_v.shape[1]
    in_feats_t = args.in_feats_t
    in_feats_e = features_e.shape[1]
    in_feats_s = args.in_feats_s
    in_feats_m = in_feats_s * 2 + in_feats_t + in_feats_e
    out_feats = args.out_feats
    # model, loss function, optimizer
    if args.advanced:
        model = Model.AdvancedTGN(in_feats_u,
                                  in_feats_v,
                                  in_feats_m,
                                  in_feats_t,
                                  in_feats_e,
                                  in_feats_s,
                                  out_feats,
                                  num_heads,
                                  activation=torch.tanh,
                                  method=args.message,
                                  dropout=args.dropout,
                                  use_cuda=use_cuda)
    else:
        model = Model.TGNBasic(in_feats_m,
                               in_feats_u,
                               in_feats_v,
                               in_feats_t,
                               in_feats_e,
                               in_feats_s,
                               out_feats,
                               num_heads,
                               activation=torch.tanh,
                               method=args.message,
                               dropout=args.dropout,
                               use_cuda=use_cuda)
    if use_cuda:
        model.cuda()
    nc = utils.GraphNC(out_feats * 2, 80, 10, args.dropout)
    if use_cuda:
        nc.cuda()
    nc.train()
    opt_nc = torch.optim.Adam(nc.parameters(),
                              lr=args.lr_p,
                              weight_decay=args.decay)
    loss_nc = nn.BCELoss()
    # training loop
    n_epochs = args.n_epochs
    log_every = args.log_every
    eval_every = args.eval_every
    # load params
    if args.advanced:
        model.load_state_dict(
            torch.load('best_params_advanced_{}.pth'.format(args.dataset)))
        model.eval()
    else:
        model.load_state_dict(
            torch.load('best_params_{}.pth'.format(args.dataset)))
        model.eval()
    if validation:
        future_task(n_epochs, batch_size, batch_size_test, use_cuda, gpu,
                    model, nc, loss_nc, opt_nc, sampler, args.num_neg, n_users,
                    n_items, in_feats_s, out_feats, n_train, n_val, g_v, g_t,
                    head, tail, head_v, tail_v, head_t, tail_t, features_u,
                    features_v, features_e, t, label, log_every, eval_every,
                    validation, args.advanced)
    else:
        future_task(n_epochs, batch_size, batch_size_test, use_cuda, gpu,
                    model, nc, loss_nc, opt_nc, sampler, args.num_neg, n_users,
                    n_items, in_feats_s, out_feats, n_train, n_train, g_t, g_t,
                    head, tail, head_t, tail_t, head_t, tail_t, features_u,
                    features_v, features_e, t, label, log_every, eval_every,
                    validation, args.advanced)
Пример #2
0
def main(args):
    # validation
    validation = args.validation
    # data
    dl = Data.LoadData(args.dataset, validation=validation)
    n_users = dl.n_users
    n_items = dl.n_items
    u_feats = dl.u_feats
    v_feats = dl.v_feats
    features_u = dl.features_u
    features_v = dl.features_v
    features_e = dl.e_feats
    t = dl.t
    label = dl.label
    # head & tail
    if validation:
        head, tail = dl.train
        head_v, tail_v = dl.val
        head_t, tail_t = dl.test
        n_train = dl.n_train
        n_val = dl.n_val
        n_test = dl.n_test
        new_val = dl.new_val
        new_test = dl.new_test
    else:
        head, tail = dl.train
        head_t, tail_t = dl.test
        n_train = dl.n_train
        n_test = dl.n_test
        new_test = dl.new_test
    # graph
    g = dgl.bipartite(list(zip(head, tail)),
                      'user',
                      'edit',
                      'item',
                      num_nodes=(n_users, n_items))
    if validation:
        g_v = dgl.bipartite(list(
            zip(np.concatenate([head, head_v]), np.concatenate([tail,
                                                                tail_v]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
        g_t = dgl.bipartite(list(
            zip(np.concatenate([head, head_v, head_t]),
                np.concatenate([tail, tail_v, tail_t]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
    else:
        g_t = dgl.bipartite(list(
            zip(np.concatenate([head, head_t]), np.concatenate([tail,
                                                                tail_t]))),
                            'user',
                            'edit',
                            'item',
                            num_nodes=(n_users, n_items))
    # cuda
    gpu = args.gpu
    use_cuda = gpu >= 0 and torch.cuda.is_available()
    if use_cuda:
        features_u = features_u.cuda()
        features_v = features_v.cuda()
        features_e = features_e.cuda()
        t = t.cuda()
        label = label.cuda()
        g.to(torch.device('cuda:{}'.format(gpu)))
        if validation:
            g_v.to(torch.device('cuda:{}'.format(gpu)))
        g_t.to(torch.device('cuda:{}'.format(gpu)))

    # sampler
    sampler = utils.Sample(g, num_negs=args.num_neg, num_nei=args.num_nei)

    # batch, train
    n_edges = g.number_of_edges()
    learn = args.learn
    batch_size = args.batch_size
    batch_size_test = args.batch_size_test
    num_heads = args.num_heads
    in_feats_u = features_u.shape[1]
    in_feats_v = features_v.shape[1]
    in_feats_t = args.in_feats_t
    in_feats_e = features_e.shape[1]
    in_feats_s = args.in_feats_s
    in_feats_m = in_feats_s * 2 + in_feats_t + in_feats_e
    out_feats = args.out_feats

    # model, loss function, optimizer
    model = Model.AdvancedTGN(in_feats_u,
                              in_feats_v,
                              in_feats_m,
                              in_feats_t,
                              in_feats_e,
                              in_feats_s,
                              out_feats,
                              num_heads,
                              activation=torch.tanh,
                              method=args.message,
                              dropout=args.dropout,
                              use_cuda=use_cuda)
    if use_cuda:
        model.cuda()
    loss_func = utils.Unsuper_Cross_Entropy()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.decay)
    model.train()
    # edge or learn
    if learn == 'node':
        nc = utils.GraphNC(out_feats * 2, 80, 10, args.dropout)
        if use_cuda:
            nc.cuda()
        nc.train()
        opt_nc = torch.optim.Adam(nc.parameters(),
                                  lr=args.lr_p,
                                  weight_decay=args.decay)
        loss_nc = nn.BCELoss()
    # training loop
    n_epochs = args.n_epochs
    log_every = args.log_every
    eval_every = args.eval_every
    iter_pos = []
    iter_neg = []
    iter_d = []
    iter_t = []
    eval_res = {
        'eval_ap': [],
        'eval_auc': [],
        'best_eval_ap': 0,
        'best_eval_auc': 0,
        'test_ap': [],
        'test_auc': [],
        'best_test_ap': 0,
        'best_test_auc': 0
    }
    m_raw = torch.zeros(features_e.shape[0], in_feats_m)
    m_raw_r = torch.zeros(features_e.shape[0], in_feats_m)
    if use_cuda:
        m_raw = m_raw.cuda()
        m_raw_r = m_raw_r.cuda()
    print('Training Embedding...')
    for epoch in range(n_epochs):
        time_epoch_0 = time.time()
        # each epoch
        si, sj = torch.zeros(n_users,
                             in_feats_s), torch.zeros(n_items, in_feats_s)
        zi, zj = torch.zeros(n_users,
                             out_feats), torch.zeros(n_items, out_feats)
        ## cuda
        if use_cuda:
            si, sj = si.cuda(), sj.cuda()
            zi, zj = zi.cuda(), zj.cuda()
        time_step = time.time()
        for start in range(0, head.shape[0], batch_size):
            step = start // batch_size + 1
            end = start + batch_size
            if end > head.shape[0]:
                end = head.shape[0]
            head_b = head[start:end]
            tail_b = tail[start:end]
            # sample
            pos_graph, pos_graph_r, neg_graph, \
            pos_graph_v, neg_graph_v, \
            extra_v_u_id, extra_u_v_id, extra_neg_id = sampler.obtain_Bs(head_b, tail_b, start)
            ## cuda
            if use_cuda:
                pos_graph.to(torch.device('cuda:{}'.format(gpu)))
                pos_graph_r.to(torch.device('cuda:{}'.format(gpu)))
                neg_graph.to(torch.device('cuda:{}'.format(gpu)))
                pos_graph_v.to(torch.device('cuda:{}'.format(gpu)))
                neg_graph_v.to(torch.device('cuda:{}'.format(gpu)))
            # id
            head_id = pos_graph.srcdata[dgl.NID]
            tail_id = pos_graph.dstdata[dgl.NID]
            head_id_r = pos_graph_r.srcdata[dgl.NID]
            tail_id_r = pos_graph_r.dstdata[dgl.NID]
            head_id_neg = neg_graph.srcdata[dgl.NID]
            tail_id_neg = neg_graph.dstdata[dgl.NID]
            head_id_out = pos_graph_v.srcdata[dgl.NID]
            tail_id_out = pos_graph_v.dstdata[dgl.NID]
            # input
            si_b, sj_b = si[head_id], sj[tail_id]
            si_b_r, sj_b_r = sj[head_id_r], si[tail_id_r]
            si_b_n, sj_b_n = si[head_id_neg], sj[tail_id_neg]
            vi_b, vj_b = features_u[head_id], features_v[tail_id]
            vi_b_r, vj_b_r = features_v[head_id_r], features_u[tail_id_r]
            vi_b_n, vj_b_n = features_u[head_id_neg], features_v[tail_id_neg]
            e_b = torch.cat([features_e[extra_u_v_id], features_e[start:end]],
                            dim=0)
            e_b_r = torch.cat(
                [features_e[extra_v_u_id], features_e[start:end]], dim=0)
            e_b_n = torch.cat(
                [features_e[extra_neg_id], features_e[start:end]], dim=0)
            t_b = torch.cat([t[extra_u_v_id], t[start:end]])
            t_b_r = torch.cat([t[extra_v_u_id], t[start:end]])
            t_b_n = torch.cat([t[extra_neg_id], t[start:end]])
            m_raw_b = torch.cat([m_raw[extra_u_v_id], m_raw[start:end]])
            m_raw_r_b = torch.cat([m_raw_r[extra_v_u_id], m_raw_r[start:end]])
            m_raw_n_b = torch.cat([m_raw[extra_neg_id], m_raw[start:end]])
            time_load = time.time()
            # forward
            zi_b, zj_b, zn_b, si_b2, sj_b2, m_raw_i_b, m_raw_j_b = model.forward(
                pos_graph, pos_graph_r, neg_graph, si_b, sj_b, si_b_r, sj_b_r,
                si_b_n, sj_b_n, m_raw_b, m_raw_r_b, m_raw_n_b, e_b, e_b_r,
                e_b_n, t_b, t_b_r, t_b_n, vi_b, vj_b, vi_b_r, vj_b_r, vi_b_n,
                vj_b_n)
            # loss / backward
            loss = loss_func(zi_b, zj_b, zn_b, pos_graph_v, neg_graph_v,
                             use_cuda)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # log time
            time_train = time.time()
            # overwrite
            model.eval()
            with torch.no_grad():
                si[head_id_out], sj[tail_id_out] = si_b2, sj_b2
                zi[head_id_out], zj[tail_id_out] = zi_b, zj_b
                m_raw[extra_u_v_id + list(range(start, end))] = m_raw_i_b
                m_raw_r[extra_v_u_id + list(range(start, end))] = m_raw_j_b
            model.train()
            # log
            edge_pos = pos_graph.number_of_edges()
            edge_neg = neg_graph.number_of_edges()
            iter_pos.append(edge_pos / (time_train - time_step))
            iter_neg.append(edge_neg / (time_train - time_step))
            iter_d.append(time_load - time_step)
            iter_t.append(time_train - time_load)
            if step % log_every == 0:
                if step // 3 == 0:
                    print(
                        'Epoch {:05d} | Step {:05d} | Loss {:.4f} | '
                        'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}'
                        .format(epoch + 1, step, loss.item(),
                                np.mean(iter_pos), np.mean(iter_neg),
                                np.mean(iter_d), np.mean(iter_t)))
                else:
                    print(
                        'Epoch {:05d} | Step {:05d} | Loss {:.4f} | '
                        'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}'
                        .format(epoch + 1, step, loss.item(),
                                np.mean(iter_pos[3:]), np.mean(iter_neg[3:]),
                                np.mean(iter_d[3:]), np.mean(iter_t[3:])))
            time_step = time.time()
        print('\n')
        print('Embedding Has Been Trained!')
        if epoch % eval_every == 0:
            print('Start Evaling...')
            time_ev_0 = time.time()
            if validation:
                link_pre_res = utils.link_pre(model,
                                              utils.Sample(
                                                  g_v,
                                                  num_negs=args.num_neg,
                                                  num_nei=args.num_nei),
                                              n_train,
                                              head_v,
                                              tail_v,
                                              batch_size_test,
                                              features_u[:n_train + n_val],
                                              features_v[:n_train + n_val],
                                              features_e[:n_train + n_val],
                                              t[:n_train + n_val],
                                              n_users,
                                              n_items,
                                              in_feats_s,
                                              out_feats,
                                              args.inductive,
                                              new_val,
                                              use_cuda,
                                              gpu,
                                              advanced=True)
                if epoch > 0:
                    eval_res['eval_ap'].append(link_pre_res['AP'])
                if epoch >= 20 and link_pre_res['AP'] < min(
                        eval_res['eval_ap'][-10:]):
                    break
                if epoch > 0 and link_pre_res['AP'] > eval_res['best_eval_ap']:
                    if args.inductive:
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_inductive_{}.pth'.format(
                                args.dataset))
                    elif args.message != 'last':
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_mean_{}.pth'.format(
                                args.dataset))
                    else:
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_{}.pth'.format(args.dataset))
                    eval_res['best_eval_ap'] = link_pre_res['AP']
                    eval_res['best_eval_auc'] = link_pre_res['AUC']
                    print('Testing...')
                    test_res = utils.link_pre(model,
                                              utils.Sample(
                                                  g_t,
                                                  num_negs=args.num_neg,
                                                  num_nei=args.num_nei),
                                              n_train + n_val,
                                              head_t,
                                              tail_t,
                                              batch_size_test,
                                              features_u,
                                              features_v,
                                              features_e,
                                              t,
                                              n_users,
                                              n_items,
                                              in_feats_s,
                                              out_feats,
                                              args.inductive,
                                              new_test,
                                              use_cuda,
                                              gpu,
                                              advanced=True)
                    eval_res['best_test_ap'] = test_res['AP']
                    eval_res['best_test_auc'] = test_res['AUC']
                time_ev_1 = time.time()
                print(
                    'Eval AP {:.4f} | Eval AUC {:.4f} | Eval Time(s): {:.4f}'.
                    format(link_pre_res['AP'], link_pre_res['AUC'],
                           time_ev_1 - time_ev_0))
                print('Best Eval AP {:.4f} | Best Eval AUC {:.4f} | '
                      'Best Test AP {:.4f} | Best Test AUC {:.4f}'.format(
                          eval_res['best_eval_ap'], eval_res['best_eval_auc'],
                          eval_res['best_test_ap'], eval_res['best_test_auc']))
            else:
                link_pre_res = utils.link_pre(model,
                                              utils.Sample(
                                                  g_t,
                                                  num_negs=args.num_neg,
                                                  num_nei=args.num_nei),
                                              n_train,
                                              head_t,
                                              tail_t,
                                              batch_size_test,
                                              features_u,
                                              features_v,
                                              features_e,
                                              t,
                                              n_users,
                                              n_items,
                                              in_feats_s,
                                              out_feats,
                                              args.inductive,
                                              new_test,
                                              use_cuda,
                                              gpu,
                                              advanced=True)
                if epoch > 0:
                    eval_res['test_ap'].append(link_pre_res['AP'])
                if epoch >= 20 and link_pre_res['AP'] < min(
                        eval_res['test_ap'][-10:]):
                    break
                if epoch > 0 and link_pre_res['AP'] > eval_res['best_test_ap']:
                    if args.inductive:
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_inductive_{}.pth'.format(
                                args.dataset))
                    elif args.message != 'last':
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_mean_{}.pth'.format(
                                args.dataset))
                    else:
                        torch.save(
                            model.state_dict(),
                            'best_params_advanced_{}.pth'.format(args.dataset))
                    eval_res['best_test_ap'] = link_pre_res['AP']
                    eval_res['best_test_auc'] = link_pre_res['AUC']
                time_ev_1 = time.time()
                print(
                    'Test AP {:.4f} | Test AUC {:.4f} | Eval Time(s): {:.4f}'.
                    format(link_pre_res['AP'], link_pre_res['AUC'],
                           time_ev_1 - time_ev_0))
                print('Best Test AP {:.4f} | Best Test AUC {:.4f}'.format(
                    eval_res['best_test_ap'], eval_res['best_test_auc']))
            print('\n')

        time_epoch_1 = time.time()
        print('Epoch Time(s): {:.4f}'.format(time_epoch_1 - time_epoch_0))

    # Future Dynamic Node Classification
    model.load_state_dict(
        torch.load('best_params_advanced_{}.pth'.format(args.dataset)))
    model.eval()
    if learn == 'node':
        if validation:
            NodeCL.future_task(n_epochs,
                               batch_size,
                               batch_size_test,
                               use_cuda,
                               gpu,
                               model,
                               nc,
                               loss_nc,
                               opt_nc,
                               sampler,
                               args.num_neg,
                               n_users,
                               n_items,
                               in_feats_s,
                               out_feats,
                               n_train,
                               n_val,
                               g_v,
                               g_t,
                               head,
                               tail,
                               head_v,
                               tail_v,
                               head_t,
                               tail_t,
                               features_u,
                               features_v,
                               features_e,
                               t,
                               label,
                               log_every,
                               eval_every,
                               validation,
                               advanced=True)
        else:
            NodeCL.future_task(n_epochs,
                               batch_size,
                               batch_size_test,
                               use_cuda,
                               gpu,
                               model,
                               nc,
                               loss_nc,
                               opt_nc,
                               sampler,
                               args.num_neg,
                               n_users,
                               n_items,
                               in_feats_s,
                               out_feats,
                               n_train,
                               n_train,
                               g_t,
                               g_t,
                               head,
                               tail,
                               head_t,
                               tail_t,
                               head_t,
                               tail_t,
                               features_u,
                               features_v,
                               features_e,
                               t,
                               label,
                               log_every,
                               eval_every,
                               validation,
                               advanced=True)

    print('\n')
    print('Finish!!')