示例#1
0
文件: main.py 项目: lfywork/SRGNN_PyG
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    cur_dir = os.getcwd()
    train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train')
    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True)
    test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test')
    test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False)

    log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/' + str(opt) + time.strftime(
        "%Y-%m-%d %H:%M:%S", time.localtime())
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)

    if opt.dataset == 'diginetica':
        n_node = 43097
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37483
    else:
        n_node = 309

    model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)

    logging.warning(model)
    
    for epoch in tqdm(range(opt.epoch)):
        scheduler.step()
        forward(model, train_loader, device, writer, epoch, top_k=opt.top_k, optimizer=optimizer, train_flag=True)
        with torch.no_grad():
            forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False)
示例#2
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cur_dir = os.getcwd()
    if opt.predict:
        save_dir = cur_dir + '/../result/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = save_dir + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") + ".csv"
        test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='predict')
        test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False)
        predict(opt.model_path, test_loader, save_path, device)
        return

    
    train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train')
    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True)
    test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test')
    test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False)
    need_feild = {"lr", "epoch", "batch_size"}
    log_name = "".join([k+"_"+str(v) for k,v in opt.__dict__.items() if k in need_feild])
    log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/' + log_name
    model_dir = cur_dir + '/../model/' + str(opt.dataset)
    model_path = cur_dir + '/../model/' + str(opt.dataset) + '/' + log_name + '.pth'
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    logging.warning('model save to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)
    
    node_d = {'diginetica': 43097,  'yoochoose1_64': 37483,  'yoochoose1_4': 37483, 'debias': 117538}
    n_node =  node_d.get(opt.dataset, 309)
    model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)

    logging.warning(model)
    
    for epoch in tqdm(range(opt.epoch)):
        #scheduler.step()
        forward(model, train_loader, device, writer, epoch, scheduler, top_k=opt.top_k, optimizer=optimizer, train_flag=True)
        with torch.no_grad():
            forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False)
    torch.save(model, model_path) 
示例#3
0
def entropy_on_new(cur_dir, now, opt, model, device, current_win, win_size):
    new_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset,
                                     phrase='new' + now,
                                     sampled_data=current_win)
    new_loader = DataLoader(new_dataset,
                            batch_size=opt.batch_size,
                            shuffle=False)

    with torch.no_grad():
        pro = forward_entropy(
            model, new_loader, device,
            max(max(max(current_win[0])), max(current_win[1])))

    os.remove('../datasets/' + opt.dataset + '/processed/new' + now + '.pt')

    return random_on_new(current_win, win_size, p=pro)
示例#4
0
def entropy_on_union(cur_dir,
                     now,
                     opt,
                     model,
                     device,
                     current_res,
                     current_win,
                     win_size,
                     ent='entropy'):
    # R' = R U R^{new}
    uni_x = current_res[0] + current_win[0]
    uni_y = current_res[1] + current_win[1]
    uni_user = current_res[2] + current_win[2]
    uni_data = (uni_x, uni_y, uni_user)

    uni_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset,
                                     phrase='uni' + now,
                                     sampled_data=uni_data)
    uni_loader = DataLoader(uni_dataset,
                            batch_size=opt.batch_size,
                            shuffle=False)

    with torch.no_grad():
        if ent == 'entropy':
            pro = forward_entropy(
                model, uni_loader, device,
                max(max(max(current_win[0])), max(current_win[1])))
        elif ent == 'cross':
            pro = forward_cross_entropy(
                model, uni_loader, device,
                max(max(max(current_win[0])), max(current_win[1])))
        elif ent == 'wass':
            pro = forward_wass(
                model, uni_loader, device,
                max(max(max(current_win[0])), max(current_win[1])))

    os.remove('../datasets/' + opt.dataset + '/processed/uni' + now + '.pt')

    return random_on_union(current_res, current_win, win_size, p=pro)
示例#5
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    cur_dir = os.getcwd()
    # custom dataset
    train_dataset = MultiSessionsGraph(cur_dir + '/datasets/' + opt.dataset,
                                       phrase='train')
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True)
    test_dataset = MultiSessionsGraph(cur_dir + '/datasets/' + opt.dataset,
                                      phrase='test')
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             shuffle=False)

    # log_dir = cur_dir + '/log/' + str(opt.dataset) + '/' + str(opt) + '_s2s3_linear_gat8-1_noleaky_' + time.strftime(
    #     "%Y-%m-%d %H:%M:%S", time.localtime())
    log_dir = cur_dir + '/log/' + str(opt.dataset) + '/' + 'model_log'
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)

    if opt.dataset == 'diginetica':
        n_node = 43097
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37483
    else:
        n_node = 309

    model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = SortPoolModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = Set2SetModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = GINSet2SetModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = VirtualNodeModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = Set2SetATTModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)
    # model = VirtualNodeRNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.l2)
    # optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.l2, momentum=opt.momentum)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt.lr_dc_step,
                                                gamma=opt.lr_dc)

    logging.warning(model)

    for epoch in tqdm(range(opt.epoch)):
        scheduler.step()
        _, _ = forward(model,
                       train_loader,
                       device,
                       writer,
                       epoch,
                       top_k=opt.top_k,
                       optimizer=optimizer,
                       train_flag=True)
        with torch.no_grad():
            h, m = forward(model,
                           test_loader,
                           device,
                           writer,
                           epoch,
                           top_k=opt.top_k,
                           train_flag=False)
            print(h, m)
示例#6
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    cur_dir = os.getcwd()
    train_filename, test_filename = 'train.txt', 'test.txt'
    dataset_name = opt.dataset
    if 'yoochoose' in opt.dataset:
        dataset_name = 'yoochoose'
        train_filename = opt.dataset + '-' + train_filename
        test_filename = opt.dataset + '-' + test_filename

    train_dataset = MultiSessionsGraph(
        name=train_filename,
        raw_dir=cur_dir + '/../../../_data/' + dataset_name + '/processed/',
        save_dir=cur_dir + '/../../../_data/' + dataset_name + '/saved/',
        force_reload=True)
    num_train = len(train_dataset)
    train_sampler = SubsetRandomSampler(torch.arange(num_train))
    train_loader = GraphDataLoader(
        train_dataset,
        batch_size=opt.batch_size,
        # sampler=train_sampler,
        shuffle=True,
        drop_last=False)

    test_dataset = MultiSessionsGraph(
        name=test_filename,
        raw_dir=cur_dir + '/../../../_data/' + dataset_name + '/processed/',
        save_dir=cur_dir + '/../../../_data/' + dataset_name + '/saved/',
        force_reload=True)
    num_test = len(test_dataset)
    test_sampler = SubsetRandomSampler(torch.arange(num_test))
    test_loader = GraphDataLoader(
        test_dataset,
        batch_size=opt.batch_size,
        # sampler=test_sampler,
        shuffle=False,
        drop_last=False)

    log_dir = os.path.join(cur_dir, 'log', str(opt.dataset), str(opt))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)

    if 'diginetica' in opt.dataset:
        n_node = 43097
    elif 'yoochoose' in opt.dataset:
        n_node = 37483
    else:
        n_node = 309

    model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.l2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt.lr_dc_step,
                                                gamma=opt.lr_dc)

    logging.warning(model)

    for epoch in tqdm(range(opt.epoch)):
        forward(model,
                train_loader,
                device,
                writer,
                epoch,
                top_k=opt.top_k,
                optimizer=optimizer,
                train_flag=True)
        with torch.no_grad():
            forward(model,
                    test_loader,
                    device,
                    writer,
                    epoch,
                    top_k=opt.top_k,
                    train_flag=False)
        scheduler.step()