Пример #1
0
def tag_values(estimator, tags_array, items_array, tag_to_item, seeker_profile, 
               tag_pop, outfile):
    '''
    Saves the value of each individual tag.
    '''
    #Value for each tag
    print('#tag_id', 'rho', 'surprisal', 'dkl', 'dkl*rho', 'dkl/surprisal',
          'n_items', 'prob_tag', 'pop_tag', 'mean_pti', file=outfile)
    prob_tags = estimator.vect_prob_tag(tags_array)
    for i, tag_id in enumerate(tags_array):
        
        #Probabilities
        prob_tag_items = estimator.vect_prob_tag_given_item(items_array, 
                                                            tag_id)
        prob_tag = prob_tags[i]
        prob_item_seeker_tag = (prob_tag_items / prob_tag) * seeker_profile
        prob_item_seeker_tag /= prob_item_seeker_tag.sum() #Renormalize
        prob_items_tagged = seeker_profile[tag_to_item[tag_id]]
        
        #Metrics
        dkl = entropy.kullback_leiber_divergence(prob_item_seeker_tag, 
                                                 seeker_profile)
        rho = np.mean(prob_items_tagged)
        surprisal = np.mean(1.0 / -np.log2(prob_items_tagged))
        mean_pti = np.mean(prob_tag_items[tag_to_item[tag_id]])
        pop_tag = tag_pop[tag_id]
        print(tag_id, rho, surprisal, dkl, rho * dkl, dkl * surprisal,
              len(prob_items_tagged), prob_tag, pop_tag, mean_pti, file=outfile)
Пример #2
0
 def test_kl3(self):
     x_probs = np.array([0.25, 0.20, 0, 0.55])
     xy_probs = np.array([0.20, 0, 0.25, 0.55])
     
     self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, 
                                                               xy_probs), 
                            float('inf'))
Пример #3
0
 def test_kl2(self):
     x_probs = np.array([0.04, 0.16] * 5 + [0])
     xy_probs = np.array([0.02, 0.18] * 5 + [0])
     
     dkl = 0
     for i in range(len(x_probs) - 1):
         div = x_probs[i] / xy_probs[i]
         dkl += x_probs[i] * math.log(div, 2)
         
     self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, 
                                                               xy_probs), 
                            dkl)
Пример #4
0
def tag_values(estimator, tags_array, items_array, tag_to_item, seeker_profile,
               tag_pop, outfile):
    '''
    Saves the value of each individual tag.
    '''
    #Value for each tag
    print('#tag_id',
          'rho',
          'surprisal',
          'dkl',
          'dkl*rho',
          'dkl/surprisal',
          'n_items',
          'prob_tag',
          'pop_tag',
          'mean_pti',
          file=outfile)
    prob_tags = estimator.vect_prob_tag(tags_array)
    for i, tag_id in enumerate(tags_array):

        #Probabilities
        prob_tag_items = estimator.vect_prob_tag_given_item(
            items_array, tag_id)
        prob_tag = prob_tags[i]
        prob_item_seeker_tag = (prob_tag_items / prob_tag) * seeker_profile
        prob_item_seeker_tag /= prob_item_seeker_tag.sum()  #Renormalize
        prob_items_tagged = seeker_profile[tag_to_item[tag_id]]

        #Metrics
        dkl = entropy.kullback_leiber_divergence(prob_item_seeker_tag,
                                                 seeker_profile)
        rho = np.mean(prob_items_tagged)
        surprisal = np.mean(1.0 / -np.log2(prob_items_tagged))
        mean_pti = np.mean(prob_tag_items[tag_to_item[tag_id]])
        pop_tag = tag_pop[tag_id]
        print(tag_id,
              rho,
              surprisal,
              dkl,
              rho * dkl,
              dkl * surprisal,
              len(prob_items_tagged),
              prob_tag,
              pop_tag,
              mean_pti,
              file=outfile)