Пример #1
0
    def exploitation(self):

        # do a hypothesis testing for the previous item
        self.hypothesis_test()

        # compute recommendable items from V to the current user
        activeitems = np.multiply(self.Xi_u == 0, np.array(self.V) == 1)

        # if there are some items recommendable, recommend least recently recommended among V
        if sum(activeitems) > 0:

            # make the element that are not in V or activeitems to be larger than + self.M
            cnt_modified = np.array(self.item_cnt) + self.M * (
                np.array(activeitems) == 0) + self.M * (np.array(self.V) == 0)
            self.item_selected = rand_argmin(cnt_modified)

        # if there are none from V, recommend item that is never recommended before and add to V
        else:
            cnt_modified = np.array(
                self.item_cnt) + self.M * (np.array(self.Xi_u) == 1)
            self.item_selected = rand_argmin(cnt_modified)
            self.V[self.item_selected] = 1

        #update the counter for the selected item
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Пример #2
0
    def exploitation(self):

        k = self.compute_UCB_ind()

        if k == 1 or k == 2:
            # compute recommendable items from S to current user
            recommendable_from_I_k = np.multiply(
                np.array(self.Xi_u) == 0,
                np.array(self.I) == k)
            if sum(recommendable_from_I_k) > 0:
                self.item_selected = rand_argmax(recommendable_from_I_k)
            else:
                self.item_selected = rand_argmin(self.Xi_u)

        else:
            # do a recommendations in a round robbins manner
            if self.k_prev[self.u_current] == 1:
                # recom from 2
                k = 2
                self.k_prev[self.u_current] = 2
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')
            else:
                # recom from 1
                k = 1
                self.k_prev[self.u_current] = 1
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0
Пример #3
0
    def choose_item(self, state):
        (u_current, Xi_current, _) = state
        Xi_u = Xi_current[:, u_current]
        assert len(Xi_u) == self.N

        #choose the item that is not recommended uniformly at random
        i = rand_argmin(Xi_u)
        assert Xi_current[i, u_current] == 0
        return i
Пример #4
0
    def explorations(self):

        #remove the previously recommended items for the current user from the candidate
        #item_cnt_temp: element that is previously recommended items or not in I_0 is made M
        item_cnt_temp = np.array(self.Xi_u == 1) * self.M + np.array(
            np.array(self.I_0) == 0) * self.M + self.item_cnt
        item_cnt_temp = np.minimum(item_cnt_temp, self.M)

        # If some of item_cnt_temp is not M <-> if there exists an item in I_0_minus that can be recommended to curent user
        if sum(item_cnt_temp) < self.M * self.N:

            # select item from argmin item_cnt_temp
            self.item_selected = rand_argmin(item_cnt_temp)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

            #update the counter
            self.item_cnt[
                self.item_selected] = self.item_cnt[self.item_selected] + 1

            #update I_0_minus
            self.I_0_minus = np.multiply(self.I_0,
                                         (np.array(self.item_cnt) < self.T_0))

            # indicate to record the reward
            self.reward_rec_flag = 1

        else:
            # do a random sampling from a new item
            self.item_selected = rand_argmin(self.Xi_u)

            assert self.Xi_current[self.item_selected, self.u_current] == 0
            print('random sampling')

            # do not update the counter

            # indicate not to record the reward
            self.reward_rec_flag = 0
Пример #5
0
    def explorations(self):
        #remove the previously recommended items (Xi_u = 1) for the current user from the candidate
        #item_cnt_temp: element that is previously recommended items or not in S (S = 0) is made M
        item_cnt_temp = np.array(self.Xi_u == 1) * self.M + np.array(
            np.array(self.S) == 0) * self.M + self.item_cnt
        item_cnt_temp = np.minimum(item_cnt_temp, self.M)

        # If some of item_cnt_temp is not M <-> if there exists an item in I_0_minus that can be recommended to curent user
        if sum(item_cnt_temp) < self.M * self.N:

            # select item from S with the smallest recommended number
            self.item_selected = rand_argmin(item_cnt_temp)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

        else:
            # select recommendable item from outside of S
            self.item_selected = rand_argmin(self.Xi_u)

            assert self.Xi_current[self.item_selected, self.u_current] == 0
            print('random sampling')

        #update the counter for the selected item
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Пример #6
0
    def exploitation(self):

        # compute user's tendency if none, return 0
        A_u = np.multiply(
            np.array(self.Xi_u) == 0,
            np.array(self.hatA[:, self.u_current])) - np.array(
                self.LARGE_CONSTANT * self.maxhatA * np.array(self.Xi_u) == 1)
        self.item_selected = rand_argmax(A_u)

        # if we cannot select good item, force to do a random sampling.
        if self.Xi_current[self.item_selected, self.u_current] == 1:
            self.item_selected = rand_argmin(self.Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0
Пример #7
0
    def choose_item(self, state):
        (self.u_current, self.Xi_current, self.prev_reward) = state
        Xi_u = self.Xi_current[:, self.u_current]
        assert len(Xi_u) == self.N

        if self.t == 1:
            #select T/m log T items
            self.random_I_0_selection()

        #update the reward sum
        self.update_reward_sum()

        #update emirical average
        self.update_emprical_average()

        #update KLUCB index
        self.update_KLUCB_index()
        KLUCB_index_u = self.KLUCB_indexes

        # remove items that are not recommendable to the user are removed
        for i in range(self.N):
            if Xi_u[i] == 1:
                KLUCB_index_u[i] = 0

        # choose item based on modified KLUCB indexes
        if sum(KLUCB_index_u) > 0:

            # select item in I_0 with largest KLUCB index
            self.item_selected = rand_argmax(KLUCB_index_u)
        else:

            #random item selections
            self.item_selected = rand_argmin(Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        # record previously used item
        self.item_prev = self.item_selected

        self.t = self.t + 1

        return self.item_selected
Пример #8
0
    def exploitation(self):

        # update I_1
        self.update_V()

        # compute recommendable items from V to current user
        activeitems = np.multiply(self.Xi_u == 0, np.array(self.V) == 1)

        if sum(activeitems) > 0:
            # compute emirical averages
            averages = self.emprical_average()

            # keep only for the active items. (otherwise element is -1)
            for i in range(self.N):
                if activeitems[i] == 0:
                    averages[i] = -1

            # choose the best (empirical average) item in V
            self.item_selected = rand_argmax(averages)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

        else:
            # random sampling from V_0^c when there are no items from V that can be recommended to current user

            # Recompute activeitems, recommendable and in V_0^c
            activeitems = np.multiply(self.Xi_u == 0, np.array(self.V_0) == 0)
            if sum(activeitems) == 0:
                self.item_selected = rand_argmin(self.Xi_u)
            else:
                # select item unifromly at random from activeitems
                self.item_selected = rand_argmax(activeitems)

            # add the item to V and V_0
            self.V[self.item_selected] = 1
            self.V_0[self.item_selected] = 1
            if self.Xi_current[self.item_selected, self.u_current] == 1:
                from IPython.core.debugger import Pdb
                Pdb().set_trace()

        # update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Пример #9
0
    def explorations(self):

        #remove the previously recommended items for the current user from the candidate
        #candidates: element that is previously recommended items or not in S is made 0, othewise elements are 1 (recommendable)
        candidates = np.multiply(np.array(self.Xi_u == 0),
                                 np.array(np.array(self.S) == 1))

        # If there are recommendable item from S,
        if sum(candidates) > 0:

            # select item from candidates,
            self.item_selected = rand_argmax(candidates)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

        else:
            # do a random sampling from a new item
            self.item_selected = rand_argmin(self.Xi_u)

            assert self.Xi_current[self.item_selected, self.u_current] == 0
            print('random sampling')

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Пример #10
0
    def do_clustering(self):
        A = self.generate_A()  # adj matrix (s times s)
        p_tilde = 2 * np.sum(A) / self.s / (self.s - 1)
        A_low = lowrank_approx(A, self.K)  # rank-2 approximation

        r_t = [0 for ind in range(int(np.floor(np.log(self.s))))]
        for ind in range(int(np.floor(np.log(self.s)))):

            # find neighborhoods
            Q = [0 for i in range(self.s)]
            for i in range(self.s):
                Q[i] = set()
                for j in range(self.s):
                    if np.linalg.norm(A_low[i] - A_low[j])**2 <= (
                            ind + 1) * p_tilde * self.epsilon_alg:
                        Q[i].add(j)

            T = [0 for i in range(self.K)]
            xi = np.zeros((self.K, self.s))
            Qprev = set()
            for k in range(self.K):
                cardinalities = [0 for i in range(self.s)]
                for i in range(self.s):
                    cardinalities[i] = len(Q[i] - Qprev)

                # compute the index v_k^\star
                v_k = rand_argmax(np.array(cardinalities))
                T[k] = Q[v_k] - Qprev
                Qprev = Qprev.union(Q[v_k])
                for i in range(self.s):
                    if i in T[k]:
                        xi[k] = xi[k] + A_low[i] / len(T[k])

            # remaining items assignment
            if len(Qprev) != self.s:
                for v in set(range(self.s)) - Qprev:
                    distances = np.zeros(self.K)
                    for k in range(self.K):
                        distances[k] = np.linalg.norm(A_low[v] - xi[k])**2

                    k_star = rand_argmax(distances)
                    T[k_star].add(v)

            #compute r_t
            for k in range(self.K):
                for i in range(self.s):
                    if i in T[k]:
                        r_t[ind] = r_t[ind] + np.linalg.norm(A_low[v] -
                                                             xi[k])**2

        #end for ind...
        minind = rand_argmin(np.array(r_t))
        ind = minind  # do a clustering with a smallerst error

        # do a clustering again with minind
        # find neighborhoods
        Q = [0 for i in range(self.s)]
        for i in range(self.s):
            Q[i] = set()
            for j in range(self.s):
                if np.linalg.norm(A_low[i] - A_low[j])**2 <= (
                        ind + 1) * p_tilde * self.epsilon_alg:
                    Q[i].add(j)

        T = [0 for i in range(self.K)]
        xi = np.zeros((self.K, self.s))
        Qprev = set()
        for k in range(self.K):
            cardinalities = [0 for i in range(self.s)]
            for i in range(self.s):
                cardinalities[i] = len(Q[i] - Qprev)

            # compute the index v_k^\star
            v_k = rand_argmax(np.array(cardinalities))
            T[k] = Q[v_k] - Qprev
            Qprev = Qprev.union(Q[v_k])
            for i in range(self.s):
                if i in T[k]:
                    xi[k] = xi[k] + A_low[i] / len(T[k])

        # remaining items assignment
        if len(Qprev) != self.s:
            #from IPython.core.debugger import Pdb; Pdb().set_trace()
            for v in set(range(self.s)) - Qprev:
                distances = np.zeros(self.K)
                for k in range(self.K):
                    distances[k] = np.linalg.norm(A_low[v] - xi[k])**2

                k_star = rand_argmin(distances)
                T[k_star].add(v)

        for k in range(self.K):
            for i in T[k]:
                self.I_S[i] = k + 1

        for i in range(self.N):
            if self.S[i] == 1:
                self.I[i] = self.I_S[self.N_to_S[i]]

        # for the debug
        err_num = 0
        for i in range(self.N):
            if i <= int(self.N / 2 - 1):
                if self.I[i] == 2:
                    err_num = err_num + 1

            if i > int(self.N / 2 - 1):
                if self.I[i] == 1:
                    err_num = err_num + 1

        err_rate = min(err_num / self.s, 1 - err_num / self.s)
        print('err_rate after SC=', end="")
        print(err_rate)

        #estimation of \hat{p}(i, j)
        for i in range(self.K):
            for j in range(self.K):
                numerator = 0
                for v in T[i]:
                    for u in T[j]:
                        numerator = numerator + A[v, u]
                denominator = len(T[i]) * self.s
                self.P_kl[i, j] = numerator / denominator

        # local improvement
        S = [0 for i in range(self.K)]
        Sprev = [0 for i in range(self.K)]
        for k in range(self.K):
            Sprev[k] = T[k]

        for ind in range(int(np.floor(np.log(self.s)))):
            for k in range(self.K):
                S[k] = set()

            for v in range(self.s):

                # computation of likelihood
                likelihoods = np.zeros(self.K)
                for i in range(self.K):
                    # sum up over all k
                    wegihtsum = 0
                    psum = 0
                    for k in range(self.K):

                        weight_by_Avw = 0

                        for w in Sprev[i]:
                            weight_by_Avw = weight_by_Avw + A[v, w]
                        wegihtsum = wegihtsum + weight_by_Avw
                        psum = psum + self.P_kl[i, k]
                        likelihoods[i] = likelihoods[
                            i] + weight_by_Avw * np.log(self.P_kl[i, k])

                    # add the case of k = 0 (in the paper's notations)
                    likelihoods[i] = likelihoods[i] + (self.s -
                                                       wegihtsum) * (1 - psum)

                # maximum likelihood
                i_star = rand_argmax(likelihoods)
                S[i_star].add(v)

            #update Sprev
            for k in range(self.K):
                Sprev[k] = S[k]

        # (end for ind loop)

        for k in range(self.K):
            for i in S[k]:
                self.I_S[i] = k + 1

        for i in range(self.N):
            if self.S[i] == 1:
                self.I[i] = self.I_S[self.N_to_S[i]]

        # for the debug (compuation of err rate)
        err_num = 0
        for i in range(self.N):
            if i <= int(self.N / 2 - 1):
                if self.I[i] == 2:
                    err_num = err_num + 1

            if i > int(self.N / 2 - 1):
                if self.I[i] == 1:
                    err_num = err_num + 1

        err_rate2 = min(err_num / self.s, 1 - err_num / self.s)
        print('err_rate after SP=', end="")
        print(err_rate2)
        print('err_rate improvement = ', end="")
        print(err_rate - err_rate2)
Пример #11
0
    def exploitation(self):
        #round robbin recommendations
        if self.U_0[self.u_current] == 1 and self.t <= self.T_1:
            # do a recommendations in a round robbins manner
            if self.k_prev[self.u_current] == 1:
                # recom from 2
                k = 2
                self.k_prev[self.u_current] = 2
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')
            else:
                # recom from 1
                k = 1
                self.k_prev[self.u_current] = 1
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)

        #end round robbin
        else:  # exploitation using L
            x_kl = np.zeros((self.K, 2))
            for k in range(self.K):
                for l in range(2):
                    x_kl[k, l] = np.max([
                        np.abs(self.P_kl_user[k, l] -
                               self.rho_users[self.u_current, k]) -
                        self.eps_users, 0
                    ])
            L_ind = set()
            for l in range(2):
                term = 0
                for k in range(self.K):
                    cnt_k = np.sum(
                        np.multiply(np.array(self.Xi_u),
                                    np.array(self.I) == k + 1))
                    term = term + cnt_k * x_kl[k, l]**2
                cnt_user = np.sum(np.array(self.Xi_u))
                if term < 0.01 * np.log(cnt_user):
                    L_ind.add(l)

            recom_k = 0

            if len(L_ind) != 0:
                setbestk = set()
                for l in L_ind:
                    setbestk.add(self.argmax_k[l])

                recom_k = random.sample(setbestk, 1)
            else:
                recom_k = random.sample(range(self.K), 1)

            # increment k so that it align with the actual index
            recom_k = recom_k[0] + 1

            if recom_k == 1 or recom_k == 2:
                # compute recommendable items from S to current user
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == recom_k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)

            else:
                # do a recommendations in a round robbins manner
                if self.k_prev[self.u_current] == 1:
                    # recom from 2
                    recom_k = 2
                    self.k_prev[self.u_current] = 2
                    recommendable_from_I_k = np.multiply(
                        np.array(self.Xi_u) == 0,
                        np.array(self.I) == recom_k)
                    if sum(recommendable_from_I_k) > 0:
                        self.item_selected = rand_argmax(
                            recommendable_from_I_k)
                    else:
                        self.item_selected = rand_argmin(self.Xi_u)

                else:
                    # recom from 1
                    recom_k = 1
                    self.k_prev[self.u_current] = 1
                    recommendable_from_I_k = np.multiply(
                        np.array(self.Xi_u) == 0,
                        np.array(self.I) == recom_k)
                    if sum(recommendable_from_I_k) > 0:
                        self.item_selected = rand_argmax(
                            recommendable_from_I_k)
                    else:
                        self.item_selected = rand_argmin(self.Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0
Пример #12
0
    def explorations(self):

        #random explorations
        self.item_selected = rand_argmin(self.Xi_u)
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1