Python MovieLens примеры, torch_geometric.datasets.MovieLens Python примеры использования

Пример #1

0

Показать файл

Файл: pgat_recsys.py Проект: lizhedm/MADemo

    def __init__(self, num_recs, dataset_args, model_args, train_args):
        self.num_recs = num_recs
        self.train_args = train_args

        self.data = MovieLens(**dataset_args).data.to(train_args['device'])
        model = PGATNetEx(self.data.num_nodes[0], self.data.num_relations[0],
                          **model_args)
        if not dataset_args['debug']:
            model.load_state_dict(torch.load(train_args))
        self.model = model.to(train_args['device'])

Пример #2

0

Показать файл

Файл: pgat_recsys.py Проект: 356255531/ZheMADemo

    def __init__(self, num_recs, dataset_args, model_args, device_args):
        self.num_recs = num_recs
        self.device_args = device_args

        self.dataset = MovieLens(**dataset_args)
        self.data = self.dataset.data.to(device_args['device'])

        model_path = model_args['model_path']
        model_path = os.path.join(
            model_path, 'weights{}.pkl'.format(self.dataset.build_suffix()))
        del model_args['model_path']
        self.model = PAGATNet(num_nodes=self.data.num_nodes[0],
                              **model_args).to(device_args['device'])

        self.model.eval()
        try:
            self.model.load_state_dict(
                torch.load(model_path, map_location=torch.device('cpu')))
            print("Model from {} successfully loaded!".format(model_path))
        except:
            print("No weights found in {}! Try use random initialized model.".
                  format(model_path))
        self.recommended = []

Пример #3

0

Показать файл

Файл: pgat_recsys.py Проект: 356255531/ZheMADemo

class PGATRecSys(object):
    def __init__(self, num_recs, dataset_args, model_args, device_args):
        self.num_recs = num_recs
        self.device_args = device_args

        self.dataset = MovieLens(**dataset_args)
        self.data = self.dataset.data.to(device_args['device'])

        model_path = model_args['model_path']
        model_path = os.path.join(
            model_path, 'weights{}.pkl'.format(self.dataset.build_suffix()))
        del model_args['model_path']
        self.model = PAGATNet(num_nodes=self.data.num_nodes[0],
                              **model_args).to(device_args['device'])

        self.model.eval()
        try:
            self.model.load_state_dict(
                torch.load(model_path, map_location=torch.device('cpu')))
            print("Model from {} successfully loaded!".format(model_path))
        except:
            print("No weights found in {}! Try use random initialized model.".
                  format(model_path))
        self.recommended = []

    def get_top_n_popular_items(self, n=10):
        """
        Get the top n movies from self.data.ratings.
        Remove the duplicates in self.data.ratings and sort it by movie count.
        After you find the top N popular movies' item id,
        look over the details information of item in self.data.movies
        :param n: the number of items, int
        :return: df: popular item dataframe, df
        """

        ratings_df = self.data.ratings[0][['iid', 'movie_count']]
        ratings_df = ratings_df.sort_values(by='movie_count', ascending=False)
        ratings_df = ratings_df.drop_duplicates(subset=['iid'])
        popular_iids = [iid for iid in ratings_df.iid][:n]
        item_df = self.data.items[0]
        popular_item_df = item_df[item_df.iid.isin(popular_iids)]
        return popular_item_df

    def build_user(self, iids, demographic_info):
        """
        Build user profiles given the historical user interactions
        :param iids: user selected item ids, list
        :param demographic_info: (gender, occupation), tuple
        :return:
        """
        self.base_iids = iids
        self.demographic_info = demographic_info
        # Build edges for new user
        self.new_user_nid = self.model.node_emb.weight.shape[0]

        new_user_gender_nid = self.data.e2nid[0]['gender'][demographic_info[0]]
        new_user_occ_nid = self.data.e2nid[0]['occ'][int(demographic_info[1])]
        i_nids = [self.data.e2nid[0]['iid'][iid] for iid in iids]
        row = i_nids + [new_user_gender_nid, new_user_occ_nid]
        col = [self.new_user_nid for i in range(len(iids) + 2)]
        self.new_edge_index = torch.from_numpy(np.array([row, col])).long().to(
            self.device_args['device'])

        # Build path begins and ends with
        new_path_np = utils.path.join(self.data.edge_index,
                                      self.new_edge_index)
        self.new_path = torch.from_numpy(new_path_np).long().to(
            self.device_args['device'])

        # Get new user embedding by applying message passing
        self.new_user_emb = torch.nn.Embedding(
            1, self.model.node_emb.weight.shape[1], max_norm=1, norm_type=2.0)
        new_node_emb = torch.cat(
            (self.model.node_emb.weight, self.new_user_emb.weight), dim=0)
        self.propagated_new_user_emb = self.model(new_node_emb,
                                                  self.new_path)[0][-1, :]
        print('user building done...')

    def get_recommendations(self, rs_proportion):

        iids = self.get_top_n_popular_items(200).iid
        iids = [iid for iid in iids if iid not in self.recommended]
        rec_iids = [iid for iid in iids if iid not in self.base_iids]
        rec_nids = [self.data.e2nid[0]['iid'][iid] for iid in rec_iids]

        mask = np.isin(self.data.path_np[0][-1, :], rec_nids)
        full_path_index = torch.from_numpy(self.data.path_np[0][:, mask]).to(
            self.device_args['device'])
        propagated_node_emb = self.model(self.model.node_emb.weight,
                                         full_path_index)[0]
        rec_item_emb = propagated_node_emb[rec_nids, :]
        est_feedback = torch.sum(self.propagated_new_user_emb * rec_item_emb,
                                 dim=1).reshape(-1).cpu().detach().numpy()
        rec_iid_idx = [i for i in np.argsort(est_feedback)]
        # [:self.num_recs]

        rec_iids = [rec_iids[idx] for idx in rec_iid_idx]
        # how to know what is the explanation type of rec_iids

        exp_tuple = [self.get_explanation(iid) for iid in rec_iids]
        exp, expl_types = [_[0] for _ in exp_tuple], [_[1] for _ in exp_tuple]

        iui_rec_index = [
            idx for idx, expl_type in enumerate(expl_types)
            if expl_type == 'IUI'
        ][:rs_proportion['IUI']]
        iui_rec_iids = [rec_iids[idx] for idx in iui_rec_index]
        iui_rec_exp = [exp[idx] for idx in iui_rec_index]

        uiu_rec_index = [
            idx for idx, expl_type in enumerate(expl_types)
            if expl_type == 'UIU'
        ][:rs_proportion['UIU']]
        uiu_rec_iids = [rec_iids[idx] for idx in uiu_rec_index]
        uiu_rec_exp = [exp[idx] for idx in uiu_rec_index]

        iudd_rec_index = [
            idx for idx, expl_type in enumerate(expl_types)
            if expl_type == 'IUDD'
        ][:rs_proportion['IUDD']]
        iudd_rec_iids = [rec_iids[idx] for idx in iudd_rec_index]
        iudd_rec_exp = [exp[idx] for idx in iudd_rec_index]

        uicc_rec_index = [
            idx for idx, expl_type in enumerate(expl_types)
            if expl_type == 'UICC'
        ][:rs_proportion['UICC']]
        uicc_rec_iids = [rec_iids[idx] for idx in uicc_rec_index]
        uicc_rec_exp = [exp[idx] for idx in uicc_rec_index]

        # iui_rec_padded_index = [idx for idx, expl_type in enumerate(expl_types) if expl_type == 'IUI'][rs_proportion['IUI']:]
        # iui_rec_padded_iids = [rec_iids[idx] for idx in iui_rec_padded_index]
        # iui_rec_padded_exp = [exp[idx] for idx in iui_rec_padded_index]

        temp_final_rec_iids = iui_rec_iids + uiu_rec_iids + iudd_rec_iids + uicc_rec_iids

        padded_rec_index = [idx for idx, expl_type in enumerate(expl_types)]
        padded_rec_iids = [
            iid for iid in rec_iids if iid not in temp_final_rec_iids
        ]
        padded_rec_exp = [exp[idx] for idx in padded_rec_index]

        final_rec_iids = (temp_final_rec_iids + padded_rec_iids)[:10]

        temp_final_exp = iui_rec_exp + uiu_rec_exp + iudd_rec_exp + uicc_rec_exp
        final_exp = (temp_final_exp + padded_rec_exp)[:10]

        self.recommended += final_rec_iids

        item_df = self.data.items[0]
        rec_item_df = item_df[item_df.iid.isin(final_rec_iids)]

        return rec_item_df, final_exp

    def get_explanation(self, iid):
        movie_nid = self.data.e2nid[0]['iid'][iid]
        row = [movie_nid, self.new_user_nid]
        col = [self.new_user_nid, movie_nid]
        expl_edge_index = torch.from_numpy(np.array([row, col])).long().to(
            self.device_args['device'])
        exist_edge_index = torch.cat(
            (self.data.edge_index, self.new_edge_index), dim=1)
        new_path_np = utils.path.join(exist_edge_index, expl_edge_index)
        new_path = torch.from_numpy(new_path_np).long().to(
            self.device_args['device'])
        new_node_emb = torch.cat(
            (self.model.node_emb.weight, self.new_user_emb.weight), dim=0)
        att = self.model.forward(new_node_emb, new_path)[1]
        opt_path = new_path[:, torch.argmax(att)].numpy()

        e = self.data.nid2e[0][opt_path[0]]

        if e[0] == 'uid':
            expl = 'Uid0--Iid{}--Uid{}'.format(iid, e[1])
            expl_type = 'UIU'
        elif e[0] == 'iid':
            expl = 'Iid{}--Uid0--Iid{}'.format(iid, e[1])
            expl_type = 'IUI'
        elif e[0] == 'gender' or e[0] == 'occ':
            expl = 'Iid{}--Uid0--DFType{}--DFValue{}'.format(iid, e[0], e[1])
            expl_type = 'IUDD'
        else:
            expl = 'Uid0--Iid{}--CFType{}--CFValue{}'.format(iid, e[0], e[1])
            expl_type = 'UICC'

        return expl, expl_type

Пример #4

0

Показать файл

    'batch_size': args.batch_size,
    'weight_decay': args.weight_decay,
    'lr': args.lr,
    'device': device,
    'weights_folder': weights_folder,
    'logger_folder': logger_folder
}
rec_args = {'num_recs': args.num_recs}
print('dataset params: {}'.format(dataset_args))
print('task params: {}'.format(model_args))
print('train params: {}'.format(train_args))
print('rec params: {}'.format(rec_args))

if __name__ == '__main__':
    randomizer = random.Random(2019)
    dataset = MovieLens(**dataset_args)
    dataset.data = dataset.data.to(train_args['device'])
    data = dataset.data
    train_pos_unid_inid_map, test_pos_unid_inid_map, neg_unid_inid_map = \
        data.train_pos_unid_inid_map[0], data.test_pos_unid_inid_map[0], data.neg_unid_inid_map[0]

    model = PAGATNet(num_nodes=dataset.data.num_nodes[0],
                     **model_args).to(train_args['device'])

    optimizer = Adam(model.parameters(),
                     lr=train_args['lr'],
                     weight_decay=train_args['weight_decay'])
    if torch.cuda.is_available():
        torch.cuda.synchronize()

    t_start = time.perf_counter()

Пример #5

0

Показать файл

from torch.nn import Linear

import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens
from torch_geometric.nn import SAGEConv, to_hetero

parser = argparse.ArgumentParser()
parser.add_argument('--use_weighted_loss',
                    action='store_true',
                    help='Whether to use weighted MSE loss.')
args = parser.parse_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens')
dataset = MovieLens(path, model_name='all-MiniLM-L6-v2')
data = dataset[0].to(device)

# Add user node features for message passing:
data['user'].x = torch.eye(data['user'].num_nodes, device=device)
del data['user'].num_nodes

# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
data = T.ToUndirected()(data)
del data['movie', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.

# Perform a link-level split into training, validation, and test edges:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,

Пример #6

0

Показать файл

import json

import os

import pandas as pd
from torch_geometric.datasets import MovieLens

apikey = ''
key1 = 'e760129c'
key2 = 'e44e5305'
key3 = '8403a97b'
key4 = '192c6b0e'

root = osp.join('.', 'tmp', 'ml')
data = MovieLens(root=root, name='1m', num_core=10).data
movies = data.items[0]

director_list = []
actor_list = []

for i, (title, year) in enumerate(zip(movies.title, movies.year)):

    if i in range(0, 1000):
        apikey = key1
    if i in range(1000, 2000):
        apikey = key2
    if i in range(2000, 3000):
        apikey = key3
    if i in range(3000, 4000):
        apikey = key4

Python MovieLens примеры использования