示例#1
0
    def _compute_logp_point(test, comp_a, comp_b=None, pi=None):
        """
        Computes the ranking for the test data. This could work with one component (single component evaluation) or
        with two and mixing weights. The mixing weights can be global or for each individual.

         INPUT:
        -------
            1. test:          <(I, L) csr_mat>    sparse counts matrix. Rows are individuals, columns are locations.
            2. comp_a:        <(I, L) ndarray>    each row is a score for the i'th individual.
            3. comp_b:        <(I, L) ndarray>    each row is a score for the i'th individual.
            3. pi:            <(2, ) or (2, I)>   mixing weights, global or for each user.

         OUTPUT:
        --------
            1. ranking:     <(2, ) tuple>   avg. per individual and avg. across all points
        """
        if pi is None:
            scores = comp_a
        elif len(pi.shape) == 1:
            scores = pi[0] * comp_a + pi[1] * comp_b
        else:
            scores = col_vector(pi[:, 0]) * comp_a + col_vector(pi[:,
                                                                   1]) * comp_b

        return obj_func['p_logp'](scores, test)
示例#2
0
    def _compute_logp(test, comp_a, comp_b=None, pi=None):
        """
        Computes the ranking for the test data. This could work with one component (single component evaluation) or
        with two and mixing weights. The mixing weights can be global or for each individual.

         INPUT:
        -------
            1. test:          <(I, L) csr_mat>    sparse counts matrix. Rows are individuals, columns are locations.
            2. comp_a:        <(I, L) ndarray>    each row is a score for the i'th individual.
            3. comp_b:        <(I, L) ndarray>    each row is a score for the i'th individual.
            3. pi:            <(2, ) or (2, I)>   mixing weights, global or for each user.

         OUTPUT:
        --------
            1. ranking:     <(2, ) tuple>   avg. per individual and avg. across all points
        """
        if pi is None:
            scores = comp_a
        elif len(pi.shape) == 1:
            scores = pi[0] * comp_a + pi[1] * comp_b
        else:
            scores = col_vector(pi[:, 0]) * comp_a + col_vector(pi[:, 1]) * comp_b
        return [obj_func['ind_logp'](scores, test), obj_func['p_logp'](scores, test)]
示例#3
0
def _learn_mix_mult(alpha, mem_mult, mf_mult, val_data, num_em_iter=100, tol=0.00001):
    """
    Learning the mixing weights for mixture of two multinomials. Each observation is considered as a data point
    and the mixing weights (\pi) are learned using all the points.

    NOTE: In order for the algorithm to work, there can be no location that can get 0 probability by both the mem_mult
    and the mf_mult. In my runs, I use MPE to estimate the mf_mult while using MLE for the mum_mul. That way the mf_mult
    has no 0 values.


     INPUT:
    -------
        1. alpha:       <float / (2, ) ndarray>   Dirichlet prior for the pi learning. If <float> is given it is treated
                                                  as a flat prior. Has to be bigger than 1.
        2. mem_mult:    <(I, L) ndarray>    each row is the multinomial parameter according to the "self" data
        3. mf_mult:     <(I, L) ndarray>    each row is the multinomial parameter according to the matrix factorization
        4. val_data:    <(N, 3) ndarray>    each row is [ind_id, loc_id, counts]
        5. num_em_iter: <int>               number of em iterations
        6. tol:         <float>             convergence threshold

     OUTPUT:
    --------
        1. pi:  <(2, ) ndarray>     mixing weights.

     RAISE:
    -------
        1. ValueError:
                a. alphas are not bigger than 1
                b. the multinomial's rows don't sum to 1
                c. There is a location with both mults 0 (see NOTE)

    """
    if np.any(alpha <= 1):
        raise ValueError('alpha values have to be bigger than 1')

    if np.any(np.abs(np.sum(mem_mult, axis=1) - 1) > 0.001):
        raise ValueError('mem_mult param is not a multinomial -- all rows must sum to 1')

    if np.any(np.abs(np.sum(mf_mult, axis=1) - 1) > 0.001):
        raise ValueError('mf_mult param is not a multinomial -- all rows must sum to 1')

    if type(alpha) == float or type(alpha) == int:
        alpha = np.array([alpha, alpha])

    # Creating responsibility matrix and initializing it hard assignment on random
    log_like_tracker = [-np.inf]
    pi = np.array([0.5, 0.5])
    start = time.time()
    for em_iter in range(1, num_em_iter + 1):
        # Evey 5 iteration we will compute the posterior log probability to see if we converged.
        if em_iter % 5 == 0:
            data_log_like = pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)] + \
                            pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]

            # The data likelihood was computed for each location, but it should be in the power of the number
            # of observations there, or a product in the log space.
            data_likelihood = np.log(data_log_like) * val_data[:, 2]

            prior_probability = dirch.logpdf(pi, alpha=alpha)
            log_likelihood = np.mean(data_likelihood + prior_probability)

            if np.abs(log_likelihood - log_like_tracker[-1]) < tol:
                break


            log_like_tracker.append(log_likelihood)

        # E-Step
        resp = [pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)],
                pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]]

        if np.all(resp == 0):
            raise ValueError('0 mix probability')

        resp = np.array(resp).T
        resp = normalize_mat_row(resp)

        # M-Step. Only on the \pi with Dirichlet prior alpha > 1
        pi = np.sum(resp * col_vector(val_data[:, 2]), axis=0)
        pi += alpha - 1
        pi /= np.sum(pi)

    total_time = time.time() - start
    log.debug('Finished EM. Total time = %d secs -- %.3f per iteration' % (total_time, total_time / em_iter))

    return pi
示例#4
0
    def evaluate(self, train, val, test, dim, area):
        def logP(score_mat, test):
            logp_p = np.zeros(int(test.sum()))
            logp_indiv = np.zeros(test.shape[0])
            test_data = coo_matrix(test)

            temp = score_mat / np.sum(score_mat)
            idx = 0
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_p[int(idx):int(idx + v)] = np.log(temp[i, j])
                idx += v

            temp = normalize_mat_row(score_mat)
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_indiv[i] += v * np.log(temp[i, j])

            n_train = np.array([int(test.sum(axis=1)[i][0]) for i in range(I)])
            logp_indiv /= n_train

            return logp_p, logp_indiv

        ALPHA = np.arange(0.1, 1.1, 0.1)

        mem_scores = self._train_mfs(['memory'], train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'], train, dim,
                                            area)[0] + 0.0001

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        N = int(np.sum(mem_scores))
        I, L = train.shape
        n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)])

        results = dict()
        headers = [
            'EM global', 'EM indiv', 'S_mem', 'Dirichlet', 'Translation_JM',
            'Translation_Dirichlet'
        ]
        logP_p = DataFrame(np.zeros((int(test.sum()), 6)), columns=headers)
        logP_indiv = DataFrame(np.zeros((I, 6)), columns=headers)
        mix_alpha = DataFrame(np.zeros((I, 6)), columns=headers)

        log.info('#####learning statistical translation model#######')
        log.info('computing sparse mutual information')

        binary = (train > 0) * 1  #I*L
        count_1d = binary.sum(axis=0)  #1*L
        count_2d = np.dot(binary.T, binary)  #L*L
        P_1d = count_1d / I  # exists zeros
        P_2d = count_2d / I
        temp = P_2d / np.outer(P_1d, P_1d)
        temp[~np.isfinite(temp)] = 1  # zero / zero = zero
        temp[temp == 0] = 1  # avoid log_zero
        PPMI = np.log2(temp)
        PPMI[PPMI < 0] = 0

        k = 50
        idx = np.array([[
            j for j in np.asarray(PPMI[i].argsort().T).reshape(-1)[-k:][::-1]
            if PPMI[i, j] > 0
        ] for i in range(L)])
        for u in range(L):
            if u not in idx[u]:
                idx[u].append(u)

        binary = (np.array(train.toarray()) > 0) * 1  #I*L
        MI = np.zeros((L, L))
        from sklearn import metrics
        for u in range(L):
            for w in idx[u]:
                if MI[u, w] == 0:
                    MI[u, w] = metrics.mutual_info_score(
                        None,
                        None,
                        contingency=np.histogram2d(binary[:, u], binary[:,
                                                                        w])[0])
                    MI[w, u] = MI[u, w]
        MI = normalize_mat_row(MI)
        MI[~np.isfinite(MI)] = 1 / L
        ##########and self transition probability########
        log.info(
            'gridsearching on validation set (can be optimized) with JM smoothing'
        )
        val_result = dict()
        for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
                trans = MI * (1 - alpha) + np.identity(L) * alpha
                pref = np.dot(
                    mem_mult,
                    trans)  # consider each trans[i] as a  base vector
                temp = pref * mu + popularity_mult * (1 - mu)
                val_result[(alpha, mu)] = self._compute_logp_point(val, temp)
        #####choose alpha and mu that achieves best avg. point logP
        alpha, mu = max(val_result, key=val_result.get)
        trans = MI * (1 - alpha) + np.identity(L) * alpha
        pref = np.dot(mem_mult, trans)
        stm_scores = pref * mu + popularity_mult * (1 - mu)
        log.info('Evaluating MI based translation model with JM smoothing')
        stm_result = self._compute_erank_logp(test, stm_scores)
        results['Translation_JM'] = stm_result
        log.info("self transition weight and popularity weight: %f, %f" %
                 (alpha, 1 - mu))
        #####record results and mixture parameters########
        logP_p['Translation_JM'], logP_indiv['Translation_JM'] = logP(
            stm_scores, test)
        mix_alpha['Translation_JM'] = np.zeros(I) + mu * alpha

        ##########and self transition probability########
        log.info(
            'gridsearching on validation set (can be optimized) with Dirichlet prior'
        )
        val_result = dict()
        for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
                trans = MI * (1 - alpha) + np.identity(L) * alpha
                pref = np.dot(
                    mem_scores,
                    trans)  # consider each trans[i] as a  base vector
                temp = pref + popularity_mult * mu * N / I
                val_result[(alpha, mu)] = self._compute_logp_point(val, temp)
        #####choose alpha and mu that achieves best avg. point logP
        alpha, mu = max(val_result, key=val_result.get)
        trans = MI * (1 - alpha) + np.identity(L) * alpha
        pref = np.dot(mem_scores, trans)
        stm_scores = pref + popularity_mult * mu * N / I
        log.info('Evaluating MI based translation model with Dirichlet prior')
        stm_result = self._compute_erank_logp(test, stm_scores)
        results['Translation_Dirichlet'] = stm_result
        log.info("self transition weight and prior strength: %f, %f" %
                 (alpha, mu * N / I))
        #####record results and mixture parameters########
        logP_p['Translation_Dirichlet'], logP_indiv[
            'Translation_Dirichlet'] = logP(stm_scores, test)
        mix_alpha['Translation_Dirichlet'] = n_train * alpha / (n_train +
                                                                mu * N / I)

        log.info('#############learning EM global#################')
        pi_mem_pop = learn_mix_mult_global(1.1, mem_mult, popularity_mult, val)
        log.info('Global mixing weight is %f and %f' %
                 (pi_mem_pop[0], pi_mem_pop[1]))
        log.info('Evaluating EM global')

        em_global_scores = pi_mem_pop[0] * mem_mult + pi_mem_pop[
            1] * popularity_mult
        EM_global_result = self._compute_erank_logp(test, em_global_scores)
        results['EM global'] = EM_global_result
        logP_p['EM global'], logP_indiv['EM global'] = logP(
            em_global_scores, test)
        mix_alpha['EM global'] = pi_mem_pop[0] + np.zeros(I)

        log.info('#############learning EM individual##############')
        pi_mem_pop = learn_mix_mult_on_individual(1.1, mem_mult,
                                                  popularity_mult, val)
        log.info('Evaluating EM indiv')

        em_indiv_scores = col_vector(pi_mem_pop[:, 0]) * mem_mult + col_vector(
            pi_mem_pop[:, 1]) * popularity_mult
        EM_indiv_result = self._compute_erank_logp(test, mem_mult,
                                                   popularity_mult, pi_mem_pop)
        results['EM indiv'] = EM_indiv_result
        logP_p['EM indiv'], logP_indiv['EM indiv'] = logP(
            em_indiv_scores, test)
        mix_alpha['EM indiv'] = pi_mem_pop[:, 0]

        log.info('#############learning S_memory###################')
        log.info('gridsearching on validation set')
        val_result = dict()
        for alpha in ALPHA:
            temp = mem_scores * alpha + popularity_scores * (1 - alpha)
            val_result[alpha] = self._compute_logp_point(val, temp)
        #####choose alpha that achieves best avg. point logP
        alpha = max(val_result, key=val_result.get)
        print('alpha:', alpha)
        s_mem_scores = mem_scores * alpha + popularity_scores * (1 - alpha)
        log.info('Evaluating smoothed memory')
        s_mem_result = self._compute_erank_logp(test, s_mem_scores)
        results['S_Mem'] = s_mem_result

        n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)])
        temp = n_train.mean()
        logP_p['S_mem'], logP_indiv['S_mem'] = logP(s_mem_scores, test)
        mix_alpha['S_mem'] = alpha * n_train / (alpha * n_train +
                                                (1 - alpha) * temp)

        log.info('############learning with Dirichlet prior#############')
        log.info('gridsearching on validation set')
        val_result = dict()
        for alpha in ALPHA:
            temp = mem_scores + popularity_mult * alpha * N / I
            val_result[alpha] = self._compute_logp_point(val, temp)
        #####choose alpha that achieves best avg. point logP
        alpha = max(val_result, key=val_result.get)
        print('alpha:', alpha)
        dirichlet_scores = mem_scores + popularity_mult * alpha * N / I
        log.info('Evaluating with Dirichlet prior')
        dirichlet_result = self._compute_erank_logp(test, dirichlet_scores)
        results['Dirichlet'] = dirichlet_result

        logP_p['Dirichlet'], logP_indiv['Dirichlet'] = logP(
            dirichlet_scores, test)
        mix_alpha['Dirichlet'] = n_train / (n_train + alpha * N / I)

        self.pretty_print(results)
        return logP_p, logP_indiv, mix_alpha