예제 #1
0
    def __init__(self, dim, alpha, lambda_, num_user_cluster, num_woi_cluster):
        self.users = []
        self.items = []
        self.item_cluster = []
        self.latent = dim * 3
        self.d = dim

        for i in range(num_user_cluster):
            self.users.append(OriginalUCBUserStruct(self.latent, lambda_, i))

        for i in range(1000):
            self.items.append(OriginalUCBItemStruct(dim))

        for i in range(25):
            self.item_cluster.append(OriginalUCBItemClusterStruct(dim))

        self.dim = dim

        self.alpha = alpha
        self.dic = {}
        self.item_dic = {}
        self.now_vec = None
        self.item_count = 0
        self.sim_dic = self.clac_itemsim()
        self.sim_tuple = self.choice_itemsim()
        self.first_flg = False

        self.item_model = UserCluster(25).model_load('item50_25.pkl')
예제 #2
0
def user_clustering():
    users = []
    with open(FILE_DIR + file_path[0]) as f:
        for line in f:
            _, _, _, user_data, _ = InputData.split_data(line)
            if user_data not in users: users.append(user_data)
            if len(users) % 10000 == 0: print(len(users))
    print(len(users))

    kmeans = UserCluster(N_CLUSTERING)
    kmeans.fit(features)
    joblib.dump(kmeans, 'kmeans.pkl')
    return kmeans
예제 #3
0
def user_clustering():
    users = []
    with open('../analytics/stdev/usercluster200.csv') as f:
        for line in f:
            _, _, _, user_data, _ = InputData.split_data(line)
            if user_data not in users: users.append(user_data)
            if len(users) % 10000 == 0: print(len(users))
    print(len(users))

    kmeans = UserCluster(N_CLUSTERING)
    kmeans.fit(features)
    joblib.dump(kmeans, 'kmeans.pkl')
    return kmeans
예제 #4
0
if __name__ == "__main__":

    dimension = 6
    alpha = 0.3
    lambda_ = 0.1

    global reward
    global count

    # 手法の呼び出し
    algorithms = {}
    # algorithms['Random'] = Random(dimension)
    algorithms['LinearedUCB'] = LinUCBAlgorithm(dimension, alpha, lambda_,
                                                N_CLUSTERING)
    algorithms['OriginalUCB'] = OriginalUCBAlgorithm(dimension, alpha, lambda_,
                                                     N_CLUSTERING)
    for name in algorithms.keys():
        algorithms[name].load_weight(name + '_weight_' + str(N_CLUSTERING) +
                                     '_3.csv')
        reward[name] = 0
        count[name] = 1

    # clus    ter_model = user_clustering()
    print("=====Enviroment Start=====")
    run_enviroment(
        algorithms,
        cluster_model=UserCluster(N_CLUSTERING).model_load('model' +
                                                           str(N_CLUSTERING) +
                                                           '_3.pkl'))
예제 #5
0
    alpha = 1.0
    lambda_ = 0.1
    n_stdev_cluster = 50
    # model_file = 'model'+str(N_CLUSTERING)+'_3000000.pkl'
    model_file = 'model' + str(N_CLUSTERING) + '_3.pkl'
    global reward
    global count

    # 手法の呼び出し
    algorithms = {}
    # algorithms['Random'] = Random(dimension)
    # algorithms['LinearedUCB'] = LinUCBAlgorithm(dimension, alpha, lambda_, N_CLUSTERING)
    algorithms['Hybrid_LinUCB'] = Hybrid_LinUCBAlgorithm(
        dimension, alpha, lambda_)
    # algorithms['CLUB'] = CLUBAlgorithm(dimension, alpha, lambda_, N_CLUSTERING, alpha)
    # algorithms['GOBLin'] = GOBLinAlgorithm(dimension, alpha, lambda_, N_CLUSTERING)
    # algorithms['CoLinUCB'] = CoLinUCBAlgorithm(dimension, alpha, lambda_, N_CLUSTERING)
    # algorithms['OriginalUCB'] = OriginalUCBAlgorithm(dimension, alpha, lambda_, N_CLUSTERING, n_stdev_cluster)
    for name in algorithms.keys():
        reward[name] = 1
        count[name] = 1

    # cluster_model = user_clustering()
    print("=====Enviroment Start=====")
    run_enviroment(
        algorithms,
        cluster_model=UserCluster(N_CLUSTERING).model_load(model_file))

    elapsed_time = time.time() - start
    print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
예제 #6
0
class OriginalUCBAlgorithm:
    def __init__(self, dim, alpha, lambda_, num_user_cluster, num_woi_cluster):
        self.users = []
        self.items = []
        self.item_cluster = []
        self.latent = dim * 3
        self.d = dim

        for i in range(num_user_cluster):
            self.users.append(OriginalUCBUserStruct(self.latent, lambda_, i))

        for i in range(1000):
            self.items.append(OriginalUCBItemStruct(dim))

        for i in range(25):
            self.item_cluster.append(OriginalUCBItemClusterStruct(dim))

        self.dim = dim

        self.alpha = alpha
        self.dic = {}
        self.item_dic = {}
        self.now_vec = None
        self.item_count = 0
        self.sim_dic = self.clac_itemsim()
        self.sim_tuple = self.choice_itemsim()
        self.first_flg = False

        self.item_model = UserCluster(25).model_load('item50_25.pkl')

    def decide(self, userID, user_data, pool_articles):
        maxprob = -10000
        maxid = None
        for id_, article in pool_articles.items():
            id_ = int(id_)
            article = np.array(article)
            article_vec = copy.deepcopy(article)
            itemID = self.get_itemID(id_)
            # 新出のアイテムは類似度1位の追加コンテキストをコピー
            if self.items[itemID].time == 0:
                self.copy_vecs(id_, self.items[itemID])
            itemClusterID = self.get_itemClusterID(article)
            article = np.append(
                self.items[itemID].get_itemvec(article),
                self.item_cluster[itemClusterID].item_cluster_theta)
            prob_lin = self.users[userID].get_prob(self.alpha, article)
            prob = prob_lin
            if maxprob < prob:
                maxprob = prob
                maxid = id_
        return maxid

    def decide_try(self, userID, user_data, pool_articles):
        maxprob = -10000
        maxid = None

        for id_, article in pool_articles.items():
            id_ = int(id_)
            self.now_vec = article
            article = np.array(article)
            article_vec = copy.deepcopy(article)
            itemID = self.regist_itemID(id_, article)
            if self.items[itemID].time == 0:
                self.copy_vecs(id_, self.items[itemID])
            itemClusterID = self.get_itemClusterID(article)
            article = np.append(
                self.items[itemID].get_itemvec(article),
                self.item_cluster[itemClusterID].item_cluster_theta)
            prob_lin = self.users[userID].get_prob(
                self.alpha, article, self.item_cluster[itemClusterID])
            simIDs = self.get_itemsimID(self.sim_tuple[id_])
            prob_sim_item = self.items[itemID].get_CBF([
                self.items[simIDs[0]], self.items[simIDs[1]],
                self.items[simIDs[2]], self.items[simIDs[3]],
                self.items[simIDs[4]]
            ], self.users[userID].user_theta[:6], self.sim_dic)
            # prob =  prob_lin # 追加ベクトル:clustervec,stdevvec 8/31現状一番良い 最終 1.253 最高 1.253
            prob = prob_lin + 0.8 * prob_sim_item  # 追加ベクトル:clustervec,stdevvec + CBF的アプローチ(類似上位5までを使用) + 新出アイテムへのベクトルコピー 9/1現状一番良い 最終 1.272 最高 1.272 2回目:1.261 # 右上下
            if maxprob < prob:
                maxprob = prob
                maxid = id_
        return maxid

    def copy_vecs(self, itemID, item):
        for i in range(5):
            if self.items[self.get_itemID(
                    self.sim_tuple[itemID][i])].high_n > 10:
                top_sim_item = self.items[self.get_itemID(
                    int(self.sim_tuple[itemID][i]))]
                item.high_n = top_sim_item.high_n
                item.high_average_context = copy.deepcopy(
                    top_sim_item.high_average_context)
                item.stdev_context = copy.deepcopy(top_sim_item.stdev_context)
                item.ave_distance_click = top_sim_item.ave_distance_click

    def get_itemID(self, article):
        if article not in self.dic:
            self.dic[article] = self.item_count
            self.item_count += 1
        return self.dic[article]

    def regist_itemID(self, article, article_vec):
        if article not in self.dic:
            self.dic[article] = self.item_count
            self.item_count += 1
            if len(self.dic) > 50:
                self.item_dic[article] = article_vec
                self.sim_dic = self.sim_calc()
                self.sim_tuple = self.choice_itemsim()
        return self.dic[article]

    def get_itemClusterID(self, article):
        return self.item_model.predict_cluster(article)[0]

    def get_itemsimID(self, sim_tuples):
        get_sim = lambda x: self.get_itemID(x)
        return list(map(get_sim, sim_tuples))

    def choice_itemsim(self):
        import copy
        sim_tuple = {}
        for id_, dics in self.sim_dic.items():
            keys = []
            values = []
            for id2_, sim in dics.items():
                keys.append(id2_)
                values.append(sim)
            values2 = copy.deepcopy(values)
            values.sort(reverse=True)
            sim_tuple[id_] = (keys[values.index(values2[0])],
                              keys[values.index(values2[1])],
                              keys[values.index(values2[2])],
                              keys[values.index(values2[3])],
                              keys[values.index(values2[4])])
        return sim_tuple

    def update(self, userID, user_data, article_feature, click, article_id):
        user_data = np.array(user_data)
        article_feature = np.array(article_feature)
        itemClusterID = self.get_itemClusterID(article_feature)
        article_feature = self.items[self.get_itemID(article_id)].get_itemvec(
            article_feature)
        article_feature = np.append(
            article_feature,
            self.item_cluster[itemClusterID].get_itemcluster_vec())

        self.users[userID].update_parameters(article_feature, click, user_data)
        self.item_cluster[itemClusterID].update(article_feature, click,
                                                self.users[userID])
        if int(click) == 1:
            # self.users[userID].update_stdev(article_feature)
            self.items[self.get_itemID(article_id)].update_stdev(
                article_feature, self.users[userID])

    def clac_itemsim(self):
        with open('../data/itemdata.csv') as f:
            for line in f:
                line = line.split(',')
                self.item_dic[line[0]] = []
                for val in line[1:]:
                    self.item_dic[line[0]].append(float(val))

                if int(line[0]) not in self.dic:
                    self.dic[int(line[0])] = self.item_count
                    self.item_count += 1
        return self.sim_calc()

    def sim_calc(self):
        sim_dic = {}
        cos = lambda v1, v2: np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.
                                               norm(v2))
        for id_, val in self.item_dic.items():
            sim_dic[int(id_)] = {}
            id_ = int(id_)
            for id2_, val2 in self.item_dic.items():
                id2_ = int(id2_)
                if id_ == id2_: continue
                sim_dic[id_][id2_] = cos(val, val2)
                if math.isnan(sim_dic[id_][id2_]): sim_dic[id_][id2_] = 0.2
        return sim_dic

    def save_weight(self, filename):
        with open('./weight/' + filename, 'w') as f:
            for userID in range(len(self.users)):
                for idx, weight in enumerate(list(self.users[userID].w)):
                    f.write(str(float(weight)))
                    if idx != 5:
                        f.write(',')
                    else:
                        f.write('\n')
                for idx, weight in enumerate(
                        list(self.users[userID].sigma.reshape(1, 36)[0])):
                    f.write(str(float(weight)))
                    if idx != 35:
                        f.write(',')
                    else:
                        f.write('\n')

    def load_weight(self, filename):
        count = 1
        userID = 0
        with open('./weight/' + filename) as f:
            for line in f:
                line = line.split(',')
                if (count - 1) % 2 == 0:
                    vec = []
                    for val in line:
                        vec.append(float(val))
                    self.users[userID].w = np.array(vec)
                elif count % 2 == 0:
                    matrix = []
                    for val in line:
                        matrix.append(float(val))
                    self.users[userID].sigma = np.array(matrix).reshape(6, 6)
                    userID += 1
                count += 1

    def memory_item_num(self):
        with open('../data/num_item_click.csv', 'w') as f:
            for id_, item in enumerate(self.items):
                f.write(str(id_) + ',' + str(item.high_n) + '\n')
예제 #7
0
파일: svd.py 프로젝트: chan-p/ReserchBandit
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import os
import sys

path = os.path.join(os.path.dirname(__file__), '../system/')
sys.path.append(path)

from input_data import InputData
from user_clustering import UserCluster

clicks = {}

model = UserCluster(40).model_load('model40.pkl')
with open('/Users/chan-p/Desktop/R6/ydata-fp-td-clicks-v1_0.20090501') as f:
    for line in f:
        _, click_article_id, click, user_data, article_pool = InputData.split_data(
            line)
        userID = model.predict_cluster(user_data)[0]
        if click == 1:
            if userID not in clicks: clicks[userID] = []
            clicks[userID].append(article_pool[str(click_article_id)])
            if len(clicks[userID]) == 5000:
                break
            if len(clicks[userID]) % 100 == 0:
                print(userID)
                print(len(clicks[userID]))

for id_, val in clicks.items():
    print(id_)