Exemplo n.º 1
0
    def __init__(self):
        super(RatingGetter, self).__init__()
        self.config = ConfigX()

        self.user = {}
        self.item = {}
        self.all_User = {}
        self.all_Item = {}
        self.id2user = {}
        self.id2item = {}
        self.trainSet_u = defaultdict(dict)
        self.trainSet_i = defaultdict(dict)
        self.testSet_u = defaultdict(dict)  # used to store the test set by hierarchy user:[item,rating]
        self.testSet_i = defaultdict(dict)  # used to store the test set by hierarchy item:[user,rating]
        self.testColdUserSet_u = defaultdict(dict)  # 用来存储冷启动用户test的rating信息
        self.trainHotUserSet = []  # 训练充足的用户 in train set
        self.trainSetLength = 0
        self.testSetLength = 0

        self.userMeans = {}  # used to store the mean values of users's ratings
        self.itemMeans = {}  # used to store the mean values of items's ratings
        self.globalMean = 0

        self.generate_data_set()  # 生成训练、测试集
        self.get_data_statistics()  # 统计user,item评分均值以及全局均值
        self.get_cold_start_users()  # 获得冷启动的user
        self.get_full_users()  # 获得训练充分的user
Exemplo n.º 2
0
    def __init__(self, k):
        super(RatingGetter, self).__init__()
        self.config = ConfigX()
        self.k_current = k
        self.user = {}
        self.item = {}
        self.all_User = {}
        self.all_Item = {}
        self.id2user = {}
        self.id2item = {}
        self.dataSet_u = defaultdict(dict)
        self.trainSet_u = defaultdict(dict)
        self.trainSet_i = defaultdict(dict)
        self.testSet_u = defaultdict(
            dict)  # used to store the test set by hierarchy user:[item,rating]
        self.testSet_i = defaultdict(
            dict)  # used to store the test set by hierarchy item:[user,rating]
        self.testColdUserSet_u = defaultdict(
            dict)  # cold start users in test set
        self.trainHotUserSet = []  # hot users in train set
        self.trainSetLength = 0
        self.testSetLength = 0

        self.userMeans = {}  # used to store the mean values of users's ratings
        self.itemMeans = {}  # used to store the mean values of items's ratings
        self.globalMean = 0

        self.generate_data_set()  # generate train and test set
        self.getDataSet()
        self.get_data_statistics()
        self.get_cold_start_users()
Exemplo n.º 3
0
 def __init__(self):
     self.dataSet_u = defaultdict(dict)
     self.dataSet_i = defaultdict(dict)
     self.CUNet = defaultdict(list)
     self.CINet = defaultdict(list)
     self.config = ConfigX()
     self.CU_file = "../data/net/%s_CUnet.txt" % self.config.dataset_name
     self.CI_file = "../data/net/%s_CInet.txt" % self.config.dataset_name
Exemplo n.º 4
0
    def __init__(self):
        super(SimBase, self).__init__()

        self.config = ConfigX()
        self.user_sim = SimMatrix()
        self.item_sim = SimMatrix()
        self.user_k_neibor = defaultdict(dict)
        self.item_k_neibor = defaultdict(dict)
 def __init__(self):
     super(MF, self).__init__()
     self.config = ConfigX()
     self.rg = RatingGetter()  # loading raing data
     # self.init_model()
     self.iter_rmse = []
     self.iter_mae = []
     pass
Exemplo n.º 6
0
    def __init__(self):
        super(MF, self).__init__()
        self.config = ConfigX()
        cpprint(self.config.__dict__)  #print the configuration

        # self.rg = RatingGetter()  # loading raing data
        # self.init_model()
        self.iter_rmse = []
        self.iter_mae = []
        pass
 def __init__(self):
     super(DataStatis, self).__init__()
     self.config = ConfigX()
     self.rg = RatingGetter()  # loading raing data
     self.tg = TrustGetter()
     self.cold_rating = 0
     self.cold_social = 0
     self.cold_rating_social = 0
     self.cold_rating_warm_social = 0
     self.warm_rating_cold_social = 0
     self.warm_rating_warm_social = 0
Exemplo n.º 8
0
    def __init__(self):
        super(TrustGetter, self).__init__()
        self.config = ConfigX()

        self.user = {}  # used to store the order of users
        self.relations = self.get_relations()
        self.followees = defaultdict(dict)
        self.followers = {}
        self.matrix_User = {}
        self.matrix_Item = {}
        self.generate_data_set()
Exemplo n.º 9
0
    def __init__(self):
        super(SimGe, self).__init__()
        self.config = ConfigX()

        self.config.walkCount = 30
        self.config.walkLength = 20
        self.config.walkDim = 20
        self.config.winSize = 5
        self.config.topK = 50

        self.user_sim = SimMatrix()
        self.item_sim = SimMatrix()
        self.user_k_neibor = defaultdict(dict)
        self.item_k_neibor = defaultdict(dict)
Exemplo n.º 10
0
    def __init__(self, fixseed = True):
        super(MF, self).__init__()
        self.config = ConfigX()
        self.configc = ConfigCUC()
        cpprint(self.config.__dict__)  #print the configuration
        # 打印数据统计
        print_data_file_stats(self.config.rating_path)
        print_data_file_stats(self.config.trust_path)

        if fixseed:
            np.random.seed(seed=self.config.random_state) # 固定随机种子

        # self.rg = RatingGetter()  # loading raing data
        # self.init_model()
        self.iter_rmse = []
        self.iter_mae = []
        pass
Exemplo n.º 11
0
    def __init__(self, k):
        """
        k - fold number 运行 第几个 fold
        """
        super(MetaGetter, self).__init__()
        self.log = logging.getLogger('reader.MetaGetter')
        self.config = ConfigX()
        self.configc = ConfigCUC()
        self.k_current = k
        self.meta = {}  #key: id,value: 流水号
        self.item = {}  #key: id,value: 流水号
        self.all_Meta = {}  # 相当于 meta
        self.all_Item = {}  # 相当于 item
        self.id2meta = {}  #key: 流水号,value: id
        self.id2item = {}  #key: 流水号,value: id
        self.dataSet_m = defaultdict(dict)
        self.trainSet_m = defaultdict(dict)
        self.trainSet_i = defaultdict(dict)
        self.testSet_m = defaultdict(
            dict)  # used to store the test set by hierarchy meta:[item,rating]
        self.testSet_i = defaultdict(
            dict)  # used to store the test set by hierarchy item:[meta,rating]
        self.testColdMetaSet_m = defaultdict(
            dict)  # cold start metas in test set
        self.trainHotMetaSet = []  # hot metas in train set
        self.trainSetLength = 0
        self.testSetLength = 0

        self.metaMeans = {}  # used to store the mean values of metas's meta
        self.itemMeans = {}  # used to store the mean values of items's meta
        self.globalMean = 0

        self.generate_data_set()  # generate train and test set
        self.getDataSet()
        self.get_data_statistics()
        self.get_cold_start_metas()

        self.log.debug(" 准备好各种结构,便于使用 ")
        self.log.debug("all_Meta len: %s " % len(self.all_Meta))
        self.log.debug("all_Item len: %s " % len(self.all_Item))
        self.log.debug("meta len: %s " % len(self.meta))
        self.log.debug("item len: %s " % len(self.item))
        self.log.debug("id2meta len: %s " % len(self.id2meta))
        self.log.debug("id2item len: %s " % len(self.id2item))
        self.log.debug("嵌套 dict trainSet_m len: %s " % len(self.trainSet_m))
        self.log.debug("嵌套 dict trainSet_i len: %s " % len(self.trainSet_i))
Exemplo n.º 12
0
            self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum(
            ) + 0.5 * self.config.lambdaQ * (self.Q * self.Q).sum()

            iteration += 1
            if self.isConverged(iteration):
                break


if __name__ == '__main__':
    # srg = SocialReg()
    # srg.train_model(0)
    # coldrmse = srg.predict_model_cold_users()
    # print('cold start user rmse is :' + str(coldrmse))
    # srg.show_rmse()
    configx = ConfigX()
    configx.k_fold_num = 5
    configx.rating_path = "../data/ft_ratings.txt"
    configx.rating_cv_path = "../data/cv/"

    split_5_folds(configx)

    rmses = []
    maes = []
    tcsr = SocialReg()
    # print(bmf.rg.trainSet_u[1])
    for i in range(tcsr.config.k_fold_num):
        print('the %dth cross validation training' % i)
        tcsr.train_model(i)
        rmse, mae = tcsr.predict_model()
        rmses.append(rmse)
Exemplo n.º 13
0
            cols[k][from_ind:to_ind] = items[k_index_list]
            vals[k][from_ind:to_ind] = rating_vals[k_index_list]
            nonzeros[k] += sum(k_index_list)

    if not os.path.exists('../data/cv'):
        os.makedirs('../data/cv')
        print('../data/cv folder has been established.')

    for k, (row, col, val,
            nonzero) in enumerate(zip(rows, cols, vals, nonzeros)):
        bucket_df = pd.DataFrame(
            {
                'user': row[:nonzero],
                'item': col[:nonzero],
                'rating': val[:nonzero]
            },
            columns=['user', 'item', 'rating'])
        bucket_df.to_csv("../data/cv/%s-%d.csv" % (configx.dataset_name, k),
                         sep=configx.sep,
                         header=False,
                         index=False)
        print("%s -fold%d data generated finished!" %
              (configx.dataset_name, k))

    print("All Data Generated Done!")


if __name__ == "__main__":
    configx = ConfigX()
    split_5_folds(configx)
Exemplo n.º 14
0
# encoding:utf-8
import sys

sys.path.append("..")

import numpy as np
from numpy.linalg import norm
from configx.configx import ConfigX

config = ConfigX()


def l1(x):
    return norm(x, ord=1)


def l2(x):
    return norm(x)


def normalize(rating, minVal=config.min_val, maxVal=config.max_val):
    'get the normalized value using min-max normalization'
    if maxVal > minVal:
        return float(rating - minVal) / (maxVal - minVal) + 0.01
    elif maxVal == minVal:
        return rating / maxVal
    else:
        print('error... maximum value is less than minimum value.')
        raise ArithmeticError


def roulette(_datas, _ps):
    return np.random.choice(_datas, p=_ps)


def save_walks(walks, result_path):
    with open(result_path, "w") as f:
        for walk in walks:
            f.writelines(' '.join(walk) + '\n')
    pass


if __name__ == '__main__':
    g = Graph()
    c = ConfigX()
    number_walks = 5
    path_length = 5

    G = load_edgelist(c.trust_path, undirected=True)
    walks = build_deepwalk_corpus(G, number_walks, path_length)
    # cpprint(walks)
    save_walks(walks, "../data/social_corpus.txt")

    print("Number of nodes: {}".format(len(G.nodes())))

    num_walks = len(G.nodes()) * number_walks

    print("Number of walks: {}".format(num_walks))

    data_size = num_walks * path_length