Python SmoothEstimator示例，tagassess.probability_estimates.smooth_estimator.SmoothEstimator Python示例

示例#1

0

显示文件

文件： DumpProbs.py 项目： flaviovdf/tag_assess

def main(database, table, smooth_func, lambda_, min_tag_freq):
    
    with AnnotReader(database) as reader:
        reader.change_table(table)
        
        #Builds value calculator
        estimator = SmoothEstimator(smooth_func, lambda_, reader.iterate())
        calculator = ValueCalculator(estimator)
        
        #Determine tags which will be considered
        tags_to_consider = []
        if min_tag_freq < 0: #All tags
            tags_to_consider = range(estimator.num_tags())
        else:
            counter = Counter(annot['tag'] for annot in reader.iterate())
            for tag, pop in counter.iteritems():
                if pop >= min_tag_freq:
                    tags_to_consider.append(tag)
                    
        #Dumps probabilities
        connection = None
        database = None
        try:            
            items = np.arange(estimator.num_items())
            for tag in tags_to_consider:
                v_prob_it = calculator.rnorm_prob_items_given_tag(tag, items)
                for item in xrange(len(v_prob_it)):
                    prob = float(v_prob_it[item])
                    print({'tag':tag, 'item':item, 'prob_it':prob})
                
        finally:
            if connection:
                connection.disconnect()

示例#2

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_user_given_item(self):
        self.__init_test(test.SMALL_DEL_FILE)
        smooth_func = 'JM'
        lamb = 0.5
        p = SmoothEstimator(smooth_func, lamb, self.annots, 1)

        prob = p.prob_user_given_item(0, 0)
        self.assertTrue(prob > 0)

示例#3

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_prob_user_given_item(self):
     self.__init_test(test.SMALL_DEL_FILE)
     smooth_func = 'JM'
     lamb = 0.5
     p = SmoothEstimator(smooth_func, lamb, self.annots, 1)
     
     prob = p.prob_user_given_item(0, 0)
     self.assertTrue(prob > 0)

示例#4

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_prob_user_given_item_profsize(self):
     self.__init_test(test.SMALL_DEL_FILE)
     smooth_func = 'JM'
     lamb = 0.5
     p = SmoothEstimator(smooth_func, lamb, self.annots, 
             user_profile_fract_size = 0)
     
     prob = p.prob_user_given_item(0, 0)
     self.assertEquals(prob, 0.0)

示例#5

0

显示文件

文件： UserFiles.py 项目： flaviovdf/tag_assess

def compute_for_user(database, table, user, relevant, annotated, 
                     smooth_func, lambda_, user_profile_size, out_folder):
    with AnnotReader(database) as reader:
        reader.change_table(table)
        
        #Relevant items by user are left out with this query
        query = {'$or' : [
                          { 'user':{'$ne'  : user} }, 
                          { 'item':{'$nin' : relevant} }
                         ]
                }
        
        #Probability estimator
        est = SmoothEstimator(smooth_func, lambda_, 
                              reader.iterate(query = query),
                              user_profile_size = user_profile_size)
        value_calc = value_calculator.ValueCalculator(est)
        
        fname = 'user_%d' % user
        user_folder = os.path.join(out_folder, fname)
        os.mkdir(user_folder)
        
        #Initial information
        with io.open(os.path.join(user_folder, 'info'), 'w') as info:
            info.write(u'#UID: %d\n' %user)
            
            relevant_str = ' '.join([str(i) for i in relevant])
            annotated_str = ' '.join([str(i) for i in annotated])
            
            info.write(u'# %d relevant  items: %s\n' %(len(relevant), 
                                                       str(relevant_str)))
            info.write(u'# %d annotated items: %s\n' %(len(annotated), 
                                                       str(annotated_str)))
        
        #Create Graph
        iterator = reader.iterate(query = query)
        tag_to_item, item_to_tag = \
            index_creator.create_double_occurrence_index(iterator, 
                                                         'tag', 'item')
            
        #Items to consider <-> Gamma items
        items_to_consider = set(xrange(est.num_items()))
        annotated_set = set(annotated)
        items_to_consider.difference_update(annotated_set)
        
        compute_tag_values(est, value_calc, tag_to_item, user, 
                           user_folder, 
                           np.array([i for i in items_to_consider]))
        
        relevant_tags_fpath = os.path.join(user_folder, 'relevant_item.tags')
        with io.open(relevant_tags_fpath, 'w') as rel:
            rel.write(u'#ITEM TAG\n')
            for item in relevant:
                for tag in item_to_tag[item]:
                    rel.write(u'%d %d\n' %(item, tag))

示例#6

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_user_given_item_profsize(self):
        self.__init_test(test.SMALL_DEL_FILE)
        smooth_func = 'JM'
        lamb = 0.5
        p = SmoothEstimator(smooth_func,
                            lamb,
                            self.annots,
                            user_profile_fract_size=0)

        prob = p.prob_user_given_item(0, 0)
        self.assertEquals(prob, 0.0)

示例#7

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_item(self):
        self.__init_test(test.SMALL_DEL_FILE)
        smooth_func = 'JM'
        lamb = 0.5
        p = SmoothEstimator(smooth_func, lamb, self.annots, 1)

        #Item probabilities
        self.assertAlmostEquals(p.prob_item(0), 5 / 10)
        self.assertAlmostEquals(p.prob_item(1), 1 / 10)
        self.assertAlmostEquals(p.prob_item(2), 2 / 10)
        self.assertAlmostEquals(p.prob_item(3), 1 / 10)
        self.assertAlmostEquals(p.prob_item(4), 1 / 10)

示例#8

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_prob_item(self):
     self.__init_test(test.SMALL_DEL_FILE)
     smooth_func = 'JM'
     lamb = 0.5
     p = SmoothEstimator(smooth_func, lamb, self.annots, 1)
     
     #Item probabilities
     self.assertAlmostEquals(p.prob_item(0), 5 / 10)
     self.assertAlmostEquals(p.prob_item(1), 1 / 10)
     self.assertAlmostEquals(p.prob_item(2), 2 / 10)
     self.assertAlmostEquals(p.prob_item(3), 1 / 10)
     self.assertAlmostEquals(p.prob_item(4), 1 / 10)

示例#9

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_item_given_tag(self):
        self.__init_test(test.SMALL_DEL_FILE)

        lambda_ = 0.3
        smooth_func = 'Bayes'
        p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)

        for tag in [0, 1, 2, 3, 4, 5]:
            pis = []
            pits = []

            for item in [0, 1, 2, 3, 4]:
                pi = p.prob_item(item)
                pti = p.prob_tag_given_item(item, tag)

                pis.append(pi)
                pits.append(pti * pi)

            #Assert
            pis = np.array(pis)
            pis /= pis.sum()

            pits = np.array(pits)
            pits /= pits.sum()

            gamma_items = np.array([0, 1, 2, 3, 4])
            assert_array_almost_equal(pis, p.prob_items(gamma_items))
            assert_array_almost_equal(pits,
                                      p.prob_items_given_tag(tag, gamma_items))

            self.assertAlmostEqual(1, sum(p.prob_items(gamma_items)))
            self.assertAlmostEqual(
                1, sum(p.prob_items_given_tag(tag, gamma_items)))

示例#10

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_gamma_items_prob_items(self):
     self.__init_test(test.SMALL_DEL_FILE)
     
     lambda_ = 0.3
     smooth_func = 'Bayes'
     p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)
     
     gamma_items = np.array([1, 2])
     
     pi_1 = p.prob_item(1)
     pi_2 = p.prob_item(2)
     
     gamma_pi = p.prob_items(gamma_items)
     
     self.assertEqual(gamma_pi[0], pi_1 / (pi_1 + pi_2))
     self.assertEqual(gamma_pi[1], pi_2 / (pi_1 + pi_2))

示例#11

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_gamma_items_prob_items(self):
        self.__init_test(test.SMALL_DEL_FILE)

        lambda_ = 0.3
        smooth_func = 'Bayes'
        p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)

        gamma_items = np.array([1, 2])

        pi_1 = p.prob_item(1)
        pi_2 = p.prob_item(2)

        gamma_pi = p.prob_items(gamma_items)

        self.assertEqual(gamma_pi[0], pi_1 / (pi_1 + pi_2))
        self.assertEqual(gamma_pi[1], pi_2 / (pi_1 + pi_2))

示例#12

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_item_given_tag(self):
        self.__init_test(test.SMALL_DEL_FILE)
        
        lambda_ = 0.3
        smooth_func = 'Bayes'
        p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)
        
        for tag in [0, 1, 2, 3, 4, 5]:
            pis = []
            pits = []
            
            for item in [0, 1, 2, 3, 4]:
                pi = p.prob_item(item)
                pti = p.prob_tag_given_item(item, tag)
                
                pis.append(pi)
                pits.append(pti * pi)
                
            #Assert
            pis = np.array(pis)
            pis /= pis.sum()
            
            pits = np.array(pits)
            pits /= pits.sum()
            
            gamma_items = np.array([0, 1, 2, 3, 4])
            assert_array_almost_equal(pis, p.prob_items(gamma_items))
            assert_array_almost_equal(pits, p.prob_items_given_tag(tag, 
                                                                gamma_items))

            self.assertAlmostEqual(1, sum(p.prob_items(gamma_items)))
            self.assertAlmostEqual(1, 
                    sum(p.prob_items_given_tag(tag, gamma_items)))

示例#13

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_tag_given_item(self):
     self.__init_test(test.SMALL_DEL_FILE)
     smooth_func = 'JM'
     lamb = 0.5
     p = SmoothEstimator(smooth_func, lamb, self.annots, 1)
         
     #Tag given item
     prob_i0_t0 = jelinek_mercer(2, 5, 3, 10, lamb)
     prob_i1_t0 = jelinek_mercer(0, 5, 3, 10, lamb)
     prob_i2_t0 = jelinek_mercer(1, 2, 3, 10, lamb)
     prob_i3_t0 = jelinek_mercer(0, 5, 3, 10, lamb)
     prob_i4_t0 = jelinek_mercer(0, 5, 3, 10, lamb)
     
     self.assertEquals(p.prob_tag_given_item(0, 0), prob_i0_t0)
     self.assertEquals(p.prob_tag_given_item(1, 0), prob_i1_t0)
     self.assertEquals(p.prob_tag_given_item(2, 0), prob_i2_t0)
     self.assertEquals(p.prob_tag_given_item(3, 0), prob_i3_t0)
     self.assertEquals(p.prob_tag_given_item(4, 0), prob_i4_t0)

示例#14

0

显示文件

def real_main(database, table, smooth_func, lambda_, user):
    with AnnotReader(database) as reader:
        reader.change_table(table)
        est = SmoothEstimator(smooth_func, lambda_, reader.iterate())
        vc = value_calculator.ValueCalculator(est)
        
        iitem_value = vc.item_value(user)
        for item, item_val in iitem_value.iteritems():
            print(item, item_val)

示例#15

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_tag_given_item(self):
        self.__init_test(test.SMALL_DEL_FILE)
        smooth_func = 'JM'
        lamb = 0.5
        p = SmoothEstimator(smooth_func, lamb, self.annots, 1)

        #Tag given item
        prob_i0_t0 = jelinek_mercer(2, 5, 3, 10, lamb)
        prob_i1_t0 = jelinek_mercer(0, 5, 3, 10, lamb)
        prob_i2_t0 = jelinek_mercer(1, 2, 3, 10, lamb)
        prob_i3_t0 = jelinek_mercer(0, 5, 3, 10, lamb)
        prob_i4_t0 = jelinek_mercer(0, 5, 3, 10, lamb)

        self.assertEquals(p.prob_tag_given_item(0, 0), prob_i0_t0)
        self.assertEquals(p.prob_tag_given_item(1, 0), prob_i1_t0)
        self.assertEquals(p.prob_tag_given_item(2, 0), prob_i2_t0)
        self.assertEquals(p.prob_tag_given_item(3, 0), prob_i3_t0)
        self.assertEquals(p.prob_tag_given_item(4, 0), prob_i4_t0)

示例#16

0

显示文件

def compute_for_user(database, table, user, relevant, annotated,
                    smooth_func, lambda_, user_profile_size, out_folder):
    
    with AnnotReader(database) as reader:
        reader.change_table(table)
        #Relevant items by user are left out with this query
        query = {'$or' : [
                          { 'user':{'$ne'  : user} }, 
                          { 'item':{'$nin' : relevant} }
                         ]
                }
        
        
        #Probability estimator
        est = SmoothEstimator(smooth_func, lambda_, reader.iterate(query=query),
                              user_profile_size = user_profile_size)
        value_calc = value_calculator.ValueCalculator(est)
        
        fname = 'user_%d' % user
        user_folder = os.path.join(out_folder, fname)
        os.mkdir(user_folder)
        
        #Initial information
        with open(os.path.join(user_folder, 'info'), 'w') as info:
            print('#UID: %d' %user, file=info)
            
            relevant_str = ' '.join([str(i) for i in relevant])
            annotated_str = ' '.join([str(i) for i in annotated])
            
            print('#%d relevant: %s' %(len(relevant), str(relevant_str)), 
                  file=info)
            print('#%d annotated: %s' %(len(annotated), str(annotated_str)), 
                  file=info)
        
        items = np.array(relevant, dtype='l')
        v_piu = value_calc.rnorm_prob_items_given_user(user, items)
        v_dkl = value_calc.tag_value_personalized(user, gamma_items=items)
        
        v_dkl_argsort = v_dkl.argsort()
        top_5_tags = v_dkl_argsort[:5]
        bottom_5_tags = v_dkl_argsort[len(v_dkl) - 5:]
        
        write_points_file(v_piu, os.path.join(user_folder, 'v_piu.dat'))
        write_points_file(v_dkl, os.path.join(user_folder, 'v_dkl.dat'))
        
        for i, tag in enumerate(top_5_tags):
            v_pitu = value_calc.rnorm_prob_items_given_user_tag(user, tag, items)
            write_points_file(v_pitu, os.path.join(user_folder, 
                                                   'v_pitu_tag_%d_top_%d.dat'
                                                   % (tag, i + 1)))
        for i, tag in enumerate(bottom_5_tags):
            v_pitu = value_calc.rnorm_prob_items_given_user_tag(user, tag, items)
            write_points_file(v_pitu, os.path.join(user_folder, 
                                                   'v_pitu_tag_%d_bottom_%d.dat' 
                                                   % (tag, 5 - i)))

示例#17

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_bayes(self):
        self.__init_test(test.SMALL_DEL_FILE)

        smooth_func = 'Bayes'
        lamb = 0.3
        p = SmoothEstimator(smooth_func, lamb, self.annots, 1)

        prob_i0_t0 = bayes(2, 5, 3, 10, lamb)
        prob_i0_t1 = bayes(1, 5, 3, 10, lamb)
        prob_i0_t2 = bayes(0, 5, 1, 10, lamb)
        prob_i0_t3 = bayes(1, 5, 1, 10, lamb)
        prob_i0_t4 = bayes(1, 5, 1, 10, lamb)
        prob_i0_t5 = bayes(0, 5, 1, 10, lamb)

        self.assertAlmostEquals(p.prob_tag_given_item(0, 0), prob_i0_t0)
        self.assertAlmostEquals(p.prob_tag_given_item(0, 1), prob_i0_t1)
        self.assertAlmostEquals(p.prob_tag_given_item(0, 2), prob_i0_t2)
        self.assertAlmostEquals(p.prob_tag_given_item(0, 3), prob_i0_t3)
        self.assertAlmostEquals(p.prob_tag_given_item(0, 4), prob_i0_t4)
        self.assertAlmostEquals(p.prob_tag_given_item(0, 5), prob_i0_t5)

示例#18

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

 def test_bayes(self):
     self.__init_test(test.SMALL_DEL_FILE)
     
     smooth_func = 'Bayes'
     lamb = 0.3
     p = SmoothEstimator(smooth_func, lamb, self.annots, 1)
     
     prob_i0_t0 = bayes(2, 5, 3, 10, lamb)
     prob_i0_t1 = bayes(1, 5, 3, 10, lamb)
     prob_i0_t2 = bayes(0, 5, 1, 10, lamb)
     prob_i0_t3 = bayes(1, 5, 1, 10, lamb)
     prob_i0_t4 = bayes(1, 5, 1, 10, lamb)
     prob_i0_t5 = bayes(0, 5, 1, 10, lamb)
     
     self.assertAlmostEquals(p.prob_tag_given_item(0, 0), prob_i0_t0)
     self.assertAlmostEquals(p.prob_tag_given_item(0, 1), prob_i0_t1)
     self.assertAlmostEquals(p.prob_tag_given_item(0, 2), prob_i0_t2)
     self.assertAlmostEquals(p.prob_tag_given_item(0, 3), prob_i0_t3)
     self.assertAlmostEquals(p.prob_tag_given_item(0, 4), prob_i0_t4)
     self.assertAlmostEquals(p.prob_tag_given_item(0, 5), prob_i0_t5)

示例#19

0

显示文件

文件： GlobalGraphAndValues.py 项目： flaviovdf/tag_assess

def compute_tag_values(smooth_func, lambda_, annotation_it, 
                       tag_to_item, tag_pops, out_folder):
    est = SmoothEstimator(smooth_func, lambda_, annotation_it)
    value_calc = value_calculator.ValueCalculator(est)
    
    tag_value = value_calc.tag_value_item_search()
    with io.open(os.path.join(out_folder, 'tag.values'), 'w') as values:
        for tag, tag_val in enumerate(tag_value):
            items = np.array([item for item in tag_to_item[tag]])
            mean_prob = value_calc.rnorm_prob_items(items).mean()
            final_val = tag_val * mean_prob
            values.write(u'%d %d %.15f %.15f %.15f\n' % 
                         (tag, tag_pops[tag], tag_val, mean_prob, final_val))

示例#20

0

显示文件

def create_bayes_estimator(annotations, lambda_, user_profile_fract_size=.4):
    '''
    Creates smooth estimator with the best Bayes parameter described in [1]_
    
    References
    ----------
    [1]_ Personalization of Tagging Systems, 
    Wang, Jun, Clements Maarten, Yang J., de Vries Arjen P., and 
    Reinders Marcel J. T. , 
    Information Processing and Management, Volume 46, Issue 1, p.58-70, (2010)
    '''
    smooth_estimator = SmoothEstimator('Bayes', lambda_, annotations,
                                       user_profile_fract_size)
    return smooth_estimator

示例#21

0

显示文件

def main(database,
         table,
         smooth_func,
         lambda_,
         alpha,
         output_folder,
         min_tag_freq=1):

    assert os.path.isdir(
        output_folder), '%s is not a directory' % output_folder
    tag_value_fpath = os.path.join(output_folder, 'tag.values')
    item_tag_fpath = os.path.join(output_folder, 'item_tag.pairs')
    item_probs_fpath = os.path.join(output_folder, 'item.probs')

    with AnnotReader(database) as reader:
        reader.change_table(table)

        #Determine the items annotated by each tag and array of all items
        items_array, tags_array, tag_to_item, tag_pop = \
                fetch_tags_and_items(reader, min_tag_freq)

        #Generates user profile based on zipf and computes value
        n_items = items_array.shape[0]
        seeker_profile = np.zeros(n_items, dtype='float64')
        n_dists = 10
        for i in xrange(n_dists):
            seeker_profile += np.random.zipf(alpha, n_items)

        #Average it out and transform to probabilities
        seeker_profile /= n_dists
        seeker_profile /= seeker_profile.sum()

        #Tag Value
        estimator = SmoothEstimator(smooth_func, lambda_, reader.iterate())
        with open(tag_value_fpath, 'w') as tag_value_file:
            tag_values(estimator, tags_array, items_array, tag_to_item,
                       seeker_profile, tag_pop, tag_value_file)

        #Item tag pairs
        with open(item_tag_fpath, 'w') as item_tag_file:
            print('#tag_id', 'item_id', file=item_tag_file)
            for tag_id in tag_to_item:
                for item_id in tag_to_item[tag_id]:
                    print(tag_id, item_id, file=item_tag_file)

        with open(item_probs_fpath, 'w') as item_probs_file:
            print('#item_id', 'prob', file=item_probs_file)
            for item_id, prob in enumerate(seeker_profile):
                print(item_id, prob, file=item_probs_file)

示例#22

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_items_given_user_and_tag(self):
        self.__init_test(test.SMALL_DEL_FILE)

        lambda_ = 0.3
        smooth_func = 'Bayes'
        p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)

        for user in [0, 1, 2]:
            for tag in [0, 1, 2, 3, 4, 5]:
                pitus = []
                pius = []
                for item in [0, 1, 2, 3, 4]:
                    pi = p.prob_item(item)
                    pti = p.prob_tag_given_item(item, tag)
                    pui = p.prob_user_given_item(item, user)

                    piu = pui * pi
                    pitu = pti * pui * pi

                    pitus.append(pitu)
                    pius.append(piu)

                sum_pitus = sum(pitus)
                sum_pius = sum(pius)
                for item in [0, 1, 2, 3, 4]:
                    pitus[item] = pitus[item] / sum_pitus
                    pius[item] = pius[item] / sum_pius

                #Assert
                gamma_items = np.array([0, 1, 2, 3, 4])
                assert_array_almost_equal(
                    pius, p.prob_items_given_user(user, gamma_items))
                assert_array_almost_equal(
                    pitus, p.prob_items_given_user_tag(user, tag, gamma_items))

                self.assertAlmostEqual(
                    1, sum(p.prob_items_given_user(user, gamma_items)))

                self.assertAlmostEqual(
                    1, sum(p.prob_items_given_user_tag(user, tag,
                                                       gamma_items)))

示例#23

0

显示文件

文件： test_smooth_estimator.py 项目： flaviovdf/tag_assess

    def test_prob_items_given_user_and_tag(self):
        self.__init_test(test.SMALL_DEL_FILE)
        
        lambda_ = 0.3
        smooth_func = 'Bayes'
        p = SmoothEstimator(smooth_func, lambda_, self.annots, 1)
        
        for user in [0, 1, 2]:
            for tag in [0, 1, 2, 3, 4, 5]:
                pitus = []
                pius = []
                for item in [0, 1, 2, 3, 4]:
                    pi = p.prob_item(item)
                    pti = p.prob_tag_given_item(item, tag)
                    pui = p.prob_user_given_item(item, user)
                    
                    piu = pui * pi
                    pitu = pti * pui * pi
                    
                    pitus.append(pitu)
                    pius.append(piu)
                
                sum_pitus = sum(pitus)
                sum_pius = sum(pius)
                for item in [0, 1, 2, 3, 4]:
                    pitus[item] = pitus[item] / sum_pitus
                    pius[item] = pius[item] / sum_pius
                    
                #Assert
                gamma_items = np.array([0, 1, 2, 3, 4])
                assert_array_almost_equal(pius, 
                        p.prob_items_given_user(user, gamma_items))
                assert_array_almost_equal(pitus, 
                        p.prob_items_given_user_tag(user, tag, gamma_items))
                
                self.assertAlmostEqual(1, sum(p.prob_items_given_user(user, 
                                                            gamma_items)))

                self.assertAlmostEqual(1, 
                        sum(p.prob_items_given_user_tag(user, tag, 
                                                            gamma_items)))

示例#24

0

显示文件

def compute_for_user(database, table, user, relevant, annotated, smooth_func,
                     lambda_, user_profile_size, out_folder):
    with AnnotReader(database) as reader:
        reader.change_table(table)

        #Relevant items by user are left out with this query
        query = {
            '$or': [{
                'user': {
                    '$ne': user
                }
            }, {
                'item': {
                    '$nin': relevant
                }
            }]
        }

        #Probability estimator
        est = SmoothEstimator(smooth_func,
                              lambda_,
                              reader.iterate(query=query),
                              user_profile_size=user_profile_size)
        value_calc = value_calculator.ValueCalculator(est)

        fname = 'user_%d' % user
        user_folder = os.path.join(out_folder, fname)
        os.mkdir(user_folder)

        #Initial information
        with io.open(os.path.join(user_folder, 'info'), 'w') as info:
            info.write(u'#UID: %d\n' % user)

            relevant_str = ' '.join([str(i) for i in relevant])
            annotated_str = ' '.join([str(i) for i in annotated])

            info.write(u'# %d relevant  items: %s\n' %
                       (len(relevant), str(relevant_str)))
            info.write(u'# %d annotated items: %s\n' %
                       (len(annotated), str(annotated_str)))

        #Create Graph
        tag_to_item, item_to_tag = \
            index_creator.create_double_occurrence_index(reader.iterate(query = query),
                                                         'tag', 'item')

        create_graph(tag_to_item, item_to_tag, user_folder)

        #Items to consider <-> Gamma items
        annotated_set = set(annotated)
        iestimates = value_calc.item_value(user)

        #Filter top 10
        top_vals = iestimates.argsort()
        items_to_consider = set()
        for item in top_vals:
            if item in annotated_set:
                continue

            items_to_consider.add(item)
            if len(items_to_consider) == 10:
                break

        compute_tag_values(est, value_calc, tag_to_item, user, user_folder,
                           np.array([i for i in items_to_consider]))

        with io.open(os.path.join(user_folder, 'relevant_item.tags'),
                     'w') as rel:
            rel.write(u'#ITEM TAG\n')
            for item in relevant:
                for tag in item_to_tag[item]:
                    rel.write(u'%d %d\n' % (item, tag))

示例#25

0

显示文件

 def build_value_calculator(self, annots, smooth_func, lambda_):
     est = SmoothEstimator(smooth_func, lambda_, annots, 1)
     return est, value_calculator.ValueCalculator(est, annots)