Exemplo n.º 1
0
    def do_user_clustering(self):

        self.compute_user_averages()

        self.select_U_star()
        self.eps_users = self.compute_eps_users()

        # find neighborhoods
        Q = [0 for u in range(self.M)]
        for u in range(self.M):
            Q[u] = set()
            for v in range(self.M):
                if np.linalg.norm(self.rho_users[u, :] -
                                  self.rho_users[v, :]) <= self.eps_users:
                    Q[u].add(v)
        Qprev = set()
        for l in range(2):
            cardinalities = [0 for y in range(self.M)]
            for u in range(self.M):
                cardinalities[u] = len(Q[u] - Qprev)

            u_l = rand_argmax(np.array(cardinalities))
            self.P_kl_user[:, l] = np.transpose(self.rho_users[u_l, :])
            Qprev = Qprev.union(Q[u_l])

        for l in range(2):
            self.argmax_k[l] = rand_argmax(self.P_kl_user[:, l])
Exemplo n.º 2
0
    def choose_item(self, state):
        (self.u_current, self.Xi_current, self.prev_reward) = state
        Xi_u = self.Xi_current[:, self.u_current]
        assert len(Xi_u) == self.N

        #update the reward sum
        self.update_reward_sum()

        #update empirical average
        self.update_emprical_average()

        #update KLUCB index
        self.update_KLUCB_index()
        KLUCB_index_u = self.KLUCB_indexes

        # remove items that are not recommendable to the user are removed
        for i in range(self.N):
            if Xi_u[i] == 1:
                KLUCB_index_u[i] = 0

        # choose item based on modified KLUCB indexes
        self.item_selected = rand_argmax(KLUCB_index_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        # record previously used item
        self.item_prev = self.item_selected

        # increment time
        self.t = self.t + 1

        return self.item_selected
Exemplo n.º 3
0
    def exploitation(self):

        k = self.compute_UCB_ind()

        if k == 1 or k == 2:
            # compute recommendable items from S to current user
            recommendable_from_I_k = np.multiply(
                np.array(self.Xi_u) == 0,
                np.array(self.I) == k)
            if sum(recommendable_from_I_k) > 0:
                self.item_selected = rand_argmax(recommendable_from_I_k)
            else:
                self.item_selected = rand_argmin(self.Xi_u)

        else:
            # do a recommendations in a round robbins manner
            if self.k_prev[self.u_current] == 1:
                # recom from 2
                k = 2
                self.k_prev[self.u_current] = 2
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')
            else:
                # recom from 1
                k = 1
                self.k_prev[self.u_current] = 1
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0
Exemplo n.º 4
0
    def do_clustering(self):
        averages = self.emprical_average()
        averages = averages + (self.I_0 == 0) * self.LARGE_CONST

        # find neighborhoods
        Q = [0 for i in range(self.N)]
        for i in range(self.N):
            Q[i] = set()
            if self.I_0[i] == 1:
                for j in range(self.N):
                    if self.I_0[j] == 1:
                        if abs(averages[i] - averages[j]) <= self.epsilon_alg:
                            Q[i].add(j)

        M = set()
        Qprev = set()

        #find centers of clusters
        for k in range(self.K):
            cardinalities = [0 for i in range(self.N)]
            for i in range(self.N):
                if self.I_0[i] == 1:
                    cardinalities[i] = len(Q[i] - Qprev)

            i = rand_argmax(np.array(cardinalities))
            M.add(i)
            Qprev = Qprev.union(Q[i])

        # initialize with negative value
        average_cluster_tmp = [-self.LARGE_CONST for i in range(self.N)]

        # put empirical averages of the elements of M
        for m in M:
            average_cluster_tmp[m] = averages[m]
        average_cluster_tmp = np.array(average_cluster_tmp)

        # reorder the cluster_average (in a discreasing order in k)
        for k in range(self.K):
            i = rand_argmax(average_cluster_tmp)
            assert i in M

            self.cluster_average[k] = average_cluster_tmp[i]
            M = M - {i}
            average_cluster_tmp[i] = -self.LARGE_CONST
Exemplo n.º 5
0
    def exploitation(self):

        # update I_1
        self.update_V()

        # compute recommendable items from V to current user
        activeitems = np.multiply(self.Xi_u == 0, np.array(self.V) == 1)

        if sum(activeitems) > 0:
            # compute emirical averages
            averages = self.emprical_average()

            # keep only for the active items. (otherwise element is -1)
            for i in range(self.N):
                if activeitems[i] == 0:
                    averages[i] = -1

            # choose the best (empirical average) item in V
            self.item_selected = rand_argmax(averages)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

        else:
            # random sampling from V_0^c when there are no items from V that can be recommended to current user

            # Recompute activeitems, recommendable and in V_0^c
            activeitems = np.multiply(self.Xi_u == 0, np.array(self.V_0) == 0)
            if sum(activeitems) == 0:
                self.item_selected = rand_argmin(self.Xi_u)
            else:
                # select item unifromly at random from activeitems
                self.item_selected = rand_argmax(activeitems)

            # add the item to V and V_0
            self.V[self.item_selected] = 1
            self.V_0[self.item_selected] = 1
            if self.Xi_current[self.item_selected, self.u_current] == 1:
                from IPython.core.debugger import Pdb
                Pdb().set_trace()

        # update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Exemplo n.º 6
0
    def select_U_star(self):
        U_0_tmp = self.U_0.copy()
        for ind in range(self.s_u_star):
            cnt_users_star = np.zeros(self.M)

            for u in range(self.M):
                cnt_users_star[u] = np.sum(np.array(
                    self.Xi_current[:, u])) * U_0_tmp[u] - 1 * np.array(
                        U_0_tmp[u] == 0)

            new_u = rand_argmax(cnt_users_star)
            self.U_star[new_u] = 1
            U_0_tmp[new_u] = 0
Exemplo n.º 7
0
    def additional_sampling(self):
        S_0 = np.ceil(1 / self.Delta_0**2)

        item_cnt_tmp = self.compute_cnt_for_active_items()

        # if item is changed, put the previous item into I_0
        if self.item_prev != self.item_selected:
            self.I_0[self.item_prev] = 1

        imax = rand_argmax(item_cnt_tmp)
        if self.Xi_current[imax, self.u_current] != 0:
            from IPython.core.debugger import Pdb
            Pdb().set_trace()

        assert self.Xi_current[imax, self.u_current] == 0
        while (item_cnt_tmp[imax] == self.M):
            print('M reached. change item')
            self.I_0[imax] = 1
            item_cnt_tmp = self.item_cnt - 2 * self.M * self.I_0
            imax = rand_argmax(item_cnt_tmp)

        averages = self.emprical_average()

        if item_cnt_tmp[imax] >= S_0 and abs(
                averages[imax] - max(self.cluster_average)) >= self.Delta_0:
            self.I_0[imax] = 1
            item_cnt_tmp = self.compute_cnt_for_active_items()
            imax = rand_argmax(item_cnt_tmp)
        assert self.Xi_current[imax, self.u_current] == 0

        self.item_selected = imax

        # record the number of observations
        self.item_cnt[imax] = self.item_cnt[imax] + 1

        # indicate to record the reward
        self.reward_rec_flag = 1
Exemplo n.º 8
0
    def exploitation(self):

        # compute user's tendency if none, return 0
        A_u = np.multiply(
            np.array(self.Xi_u) == 0,
            np.array(self.hatA[:, self.u_current])) - np.array(
                self.LARGE_CONSTANT * self.maxhatA * np.array(self.Xi_u) == 1)
        self.item_selected = rand_argmax(A_u)

        # if we cannot select good item, force to do a random sampling.
        if self.Xi_current[self.item_selected, self.u_current] == 1:
            self.item_selected = rand_argmin(self.Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0
Exemplo n.º 9
0
    def choose_item(self, state):
        (self.u_current, self.Xi_current, self.prev_reward) = state
        Xi_u = self.Xi_current[:, self.u_current]
        assert len(Xi_u) == self.N

        if self.t == 1:
            #select T/m log T items
            self.random_I_0_selection()

        #update the reward sum
        self.update_reward_sum()

        #update emirical average
        self.update_emprical_average()

        #update KLUCB index
        self.update_KLUCB_index()
        KLUCB_index_u = self.KLUCB_indexes

        # remove items that are not recommendable to the user are removed
        for i in range(self.N):
            if Xi_u[i] == 1:
                KLUCB_index_u[i] = 0

        # choose item based on modified KLUCB indexes
        if sum(KLUCB_index_u) > 0:

            # select item in I_0 with largest KLUCB index
            self.item_selected = rand_argmax(KLUCB_index_u)
        else:

            #random item selections
            self.item_selected = rand_argmin(Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        # record previously used item
        self.item_prev = self.item_selected

        self.t = self.t + 1

        return self.item_selected
Exemplo n.º 10
0
    def explorations(self):

        #remove the previously recommended items for the current user from the candidate
        #candidates: element that is previously recommended items or not in S is made 0, othewise elements are 1 (recommendable)
        candidates = np.multiply(np.array(self.Xi_u == 0),
                                 np.array(np.array(self.S) == 1))

        # If there are recommendable item from S,
        if sum(candidates) > 0:

            # select item from candidates,
            self.item_selected = rand_argmax(candidates)
            assert self.Xi_current[self.item_selected, self.u_current] == 0

        else:
            # do a random sampling from a new item
            self.item_selected = rand_argmin(self.Xi_u)

            assert self.Xi_current[self.item_selected, self.u_current] == 0
            print('random sampling')

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1
Exemplo n.º 11
0
def Tabular_q(env,
              episodes,
              num_act,
              episode_length=np.inf,
              epsilon=0.05,
              alpha=lambda v, t: 0.1,
              gamma=0.99,
              eval_interval=np.inf,
              Qs=None,
              init=0,
              soft_end=False,
              Q_trafo=lambda x: x):
    # Q-lerning. Returns Q-values as a dict.
    # Alpha is a map from visit count and the elapsed time to the learning rate. eval_interval determines
    # after how many episodes the greedy policy is evaluated and the return printed. Qs allows for the initialization
    # of Q-values with a dictionary. If Qs is None, init allows for constant initialization at the value init.
    # soft end determines, how terminal states are treated. If soft_end is true, transitions to terminal states still
    # update on the Q-value of the next state. Q_trafo is the scalarization function that determines the action
    # selection in the multi-objective case.
    vs = {}
    if Qs is None:
        Qs = {}
    else:
        Qs = deepcopy(Qs)
    for i in range(episodes):
        obs_new = env.reset()
        if obs_new not in Qs.keys():
            Qs[obs_new] = [init for i in range(num_act)]
        if obs_new not in vs.keys():
            vs[obs_new] = [init for i in range(num_act)]
        done = False
        t = 0
        while done is False and t < episode_length:
            if obs_new not in Qs.keys():
                Qs[obs_new] = [init for i in range(num_act)]
            if obs_new not in vs.keys():
                vs[obs_new] = [init for i in range(num_act)]

            if np.random.uniform() > epsilon:
                act_new = rand_argmax(Q_trafo(Qs[obs_new]))
            else:
                act_new = np.random.choice(np.arange(num_act))

            if t > 0:
                error = (rew +
                         gamma * Qs[obs_new][np.argmax(Q_trafo(Qs[obs_new]))] -
                         Qs[obs][act])
                Qs[obs][act] = Qs[obs][act] + alpha(vs[obs][act], t) * error
                vs[obs][act] = vs[obs][act] + 1

            obs = obs_new
            act = act_new

            if hasattr(env, 'dynamic') and env.dynamic is True:
                obs_new, rew, done, _ = env.step(act, Qs)
            else:
                obs_new, rew, done, _ = env.step(act)

            if done is True:
                if soft_end is False:
                    error = (rew - Qs[obs][act])
                    Qs[obs][act] = Qs[obs][act] + alpha(vs[obs][act],
                                                        t) * error
                    vs[obs][act] = vs[obs][act] + 1
                else:
                    if obs_new not in Qs.keys():
                        Qs[obs_new] = [init for i in range(num_act)]
                    error = (
                        rew +
                        gamma * Qs[obs_new][np.argmax(Q_trafo(Qs[obs_new]))] -
                        Qs[obs][act])
                    Qs[obs][act] = Qs[obs][act] + alpha(vs[obs][act],
                                                        t) * error
                    vs[obs][act] = vs[obs][act] + 1

            t = t + 1

        if i % eval_interval == (-1) % eval_interval:
            obs = env.reset()
            total = 0
            if obs not in Qs.keys():
                Qs[obs] = [init for i in range(num_act)]
            done = False
            s = 0
            while done is False and s < episode_length:
                act = rand_argmax(Q_trafo(Qs[obs]))
                obs_new, rew, done, _ = env.step(act)
                if obs_new not in Qs.keys():
                    Qs[obs_new] = [init for i in range(num_act)]
                obs = obs_new
                total = total + rew
                s = s + 1
            print(i, total)
    return Qs
Exemplo n.º 12
0
    def do_clustering(self):
        A = self.generate_A()  # adj matrix (s times s)
        p_tilde = 2 * np.sum(A) / self.s / (self.s - 1)
        A_low = lowrank_approx(A, self.K)  # rank-2 approximation

        r_t = [0 for ind in range(int(np.floor(np.log(self.s))))]
        for ind in range(int(np.floor(np.log(self.s)))):

            # find neighborhoods
            Q = [0 for i in range(self.s)]
            for i in range(self.s):
                Q[i] = set()
                for j in range(self.s):
                    if np.linalg.norm(A_low[i] - A_low[j])**2 <= (
                            ind + 1) * p_tilde * self.epsilon_alg:
                        Q[i].add(j)

            T = [0 for i in range(self.K)]
            xi = np.zeros((self.K, self.s))
            Qprev = set()
            for k in range(self.K):
                cardinalities = [0 for i in range(self.s)]
                for i in range(self.s):
                    cardinalities[i] = len(Q[i] - Qprev)

                # compute the index v_k^\star
                v_k = rand_argmax(np.array(cardinalities))
                T[k] = Q[v_k] - Qprev
                Qprev = Qprev.union(Q[v_k])
                for i in range(self.s):
                    if i in T[k]:
                        xi[k] = xi[k] + A_low[i] / len(T[k])

            # remaining items assignment
            if len(Qprev) != self.s:
                for v in set(range(self.s)) - Qprev:
                    distances = np.zeros(self.K)
                    for k in range(self.K):
                        distances[k] = np.linalg.norm(A_low[v] - xi[k])**2

                    k_star = rand_argmax(distances)
                    T[k_star].add(v)

            #compute r_t
            for k in range(self.K):
                for i in range(self.s):
                    if i in T[k]:
                        r_t[ind] = r_t[ind] + np.linalg.norm(A_low[v] -
                                                             xi[k])**2

        #end for ind...
        minind = rand_argmin(np.array(r_t))
        ind = minind  # do a clustering with a smallerst error

        # do a clustering again with minind
        # find neighborhoods
        Q = [0 for i in range(self.s)]
        for i in range(self.s):
            Q[i] = set()
            for j in range(self.s):
                if np.linalg.norm(A_low[i] - A_low[j])**2 <= (
                        ind + 1) * p_tilde * self.epsilon_alg:
                    Q[i].add(j)

        T = [0 for i in range(self.K)]
        xi = np.zeros((self.K, self.s))
        Qprev = set()
        for k in range(self.K):
            cardinalities = [0 for i in range(self.s)]
            for i in range(self.s):
                cardinalities[i] = len(Q[i] - Qprev)

            # compute the index v_k^\star
            v_k = rand_argmax(np.array(cardinalities))
            T[k] = Q[v_k] - Qprev
            Qprev = Qprev.union(Q[v_k])
            for i in range(self.s):
                if i in T[k]:
                    xi[k] = xi[k] + A_low[i] / len(T[k])

        # remaining items assignment
        if len(Qprev) != self.s:
            #from IPython.core.debugger import Pdb; Pdb().set_trace()
            for v in set(range(self.s)) - Qprev:
                distances = np.zeros(self.K)
                for k in range(self.K):
                    distances[k] = np.linalg.norm(A_low[v] - xi[k])**2

                k_star = rand_argmin(distances)
                T[k_star].add(v)

        for k in range(self.K):
            for i in T[k]:
                self.I_S[i] = k + 1

        for i in range(self.N):
            if self.S[i] == 1:
                self.I[i] = self.I_S[self.N_to_S[i]]

        # for the debug
        err_num = 0
        for i in range(self.N):
            if i <= int(self.N / 2 - 1):
                if self.I[i] == 2:
                    err_num = err_num + 1

            if i > int(self.N / 2 - 1):
                if self.I[i] == 1:
                    err_num = err_num + 1

        err_rate = min(err_num / self.s, 1 - err_num / self.s)
        print('err_rate after SC=', end="")
        print(err_rate)

        #estimation of \hat{p}(i, j)
        for i in range(self.K):
            for j in range(self.K):
                numerator = 0
                for v in T[i]:
                    for u in T[j]:
                        numerator = numerator + A[v, u]
                denominator = len(T[i]) * self.s
                self.P_kl[i, j] = numerator / denominator

        # local improvement
        S = [0 for i in range(self.K)]
        Sprev = [0 for i in range(self.K)]
        for k in range(self.K):
            Sprev[k] = T[k]

        for ind in range(int(np.floor(np.log(self.s)))):
            for k in range(self.K):
                S[k] = set()

            for v in range(self.s):

                # computation of likelihood
                likelihoods = np.zeros(self.K)
                for i in range(self.K):
                    # sum up over all k
                    wegihtsum = 0
                    psum = 0
                    for k in range(self.K):

                        weight_by_Avw = 0

                        for w in Sprev[i]:
                            weight_by_Avw = weight_by_Avw + A[v, w]
                        wegihtsum = wegihtsum + weight_by_Avw
                        psum = psum + self.P_kl[i, k]
                        likelihoods[i] = likelihoods[
                            i] + weight_by_Avw * np.log(self.P_kl[i, k])

                    # add the case of k = 0 (in the paper's notations)
                    likelihoods[i] = likelihoods[i] + (self.s -
                                                       wegihtsum) * (1 - psum)

                # maximum likelihood
                i_star = rand_argmax(likelihoods)
                S[i_star].add(v)

            #update Sprev
            for k in range(self.K):
                Sprev[k] = S[k]

        # (end for ind loop)

        for k in range(self.K):
            for i in S[k]:
                self.I_S[i] = k + 1

        for i in range(self.N):
            if self.S[i] == 1:
                self.I[i] = self.I_S[self.N_to_S[i]]

        # for the debug (compuation of err rate)
        err_num = 0
        for i in range(self.N):
            if i <= int(self.N / 2 - 1):
                if self.I[i] == 2:
                    err_num = err_num + 1

            if i > int(self.N / 2 - 1):
                if self.I[i] == 1:
                    err_num = err_num + 1

        err_rate2 = min(err_num / self.s, 1 - err_num / self.s)
        print('err_rate after SP=', end="")
        print(err_rate2)
        print('err_rate improvement = ', end="")
        print(err_rate - err_rate2)
Exemplo n.º 13
0
    def exploitation(self):
        #round robbin recommendations
        if self.U_0[self.u_current] == 1 and self.t <= self.T_1:
            # do a recommendations in a round robbins manner
            if self.k_prev[self.u_current] == 1:
                # recom from 2
                k = 2
                self.k_prev[self.u_current] = 2
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)
                    #print('random sampling (exploi)')
            else:
                # recom from 1
                k = 1
                self.k_prev[self.u_current] = 1
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)

        #end round robbin
        else:  # exploitation using L
            x_kl = np.zeros((self.K, 2))
            for k in range(self.K):
                for l in range(2):
                    x_kl[k, l] = np.max([
                        np.abs(self.P_kl_user[k, l] -
                               self.rho_users[self.u_current, k]) -
                        self.eps_users, 0
                    ])
            L_ind = set()
            for l in range(2):
                term = 0
                for k in range(self.K):
                    cnt_k = np.sum(
                        np.multiply(np.array(self.Xi_u),
                                    np.array(self.I) == k + 1))
                    term = term + cnt_k * x_kl[k, l]**2
                cnt_user = np.sum(np.array(self.Xi_u))
                if term < 0.01 * np.log(cnt_user):
                    L_ind.add(l)

            recom_k = 0

            if len(L_ind) != 0:
                setbestk = set()
                for l in L_ind:
                    setbestk.add(self.argmax_k[l])

                recom_k = random.sample(setbestk, 1)
            else:
                recom_k = random.sample(range(self.K), 1)

            # increment k so that it align with the actual index
            recom_k = recom_k[0] + 1

            if recom_k == 1 or recom_k == 2:
                # compute recommendable items from S to current user
                recommendable_from_I_k = np.multiply(
                    np.array(self.Xi_u) == 0,
                    np.array(self.I) == recom_k)
                if sum(recommendable_from_I_k) > 0:
                    self.item_selected = rand_argmax(recommendable_from_I_k)
                else:
                    self.item_selected = rand_argmin(self.Xi_u)

            else:
                # do a recommendations in a round robbins manner
                if self.k_prev[self.u_current] == 1:
                    # recom from 2
                    recom_k = 2
                    self.k_prev[self.u_current] = 2
                    recommendable_from_I_k = np.multiply(
                        np.array(self.Xi_u) == 0,
                        np.array(self.I) == recom_k)
                    if sum(recommendable_from_I_k) > 0:
                        self.item_selected = rand_argmax(
                            recommendable_from_I_k)
                    else:
                        self.item_selected = rand_argmin(self.Xi_u)

                else:
                    # recom from 1
                    recom_k = 1
                    self.k_prev[self.u_current] = 1
                    recommendable_from_I_k = np.multiply(
                        np.array(self.Xi_u) == 0,
                        np.array(self.I) == recom_k)
                    if sum(recommendable_from_I_k) > 0:
                        self.item_selected = rand_argmax(
                            recommendable_from_I_k)
                    else:
                        self.item_selected = rand_argmin(self.Xi_u)

        #update the counter
        self.item_cnt[
            self.item_selected] = self.item_cnt[self.item_selected] + 1

        assert self.Xi_current[self.item_selected, self.u_current] == 0