Python FilteredUserItemAnnotations示例，tagassess.dao.helpers.FilteredUserItemAnnotations Python示例

示例#1

0

显示文件

def main(db_fpath, db_name, cross_val_folder, probs_folder):

    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = \
            load_train_test_validation(cross_val_folder)

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)

        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)
        annotations = annot_filter.annotations(reader.iterate())

        est = PrecomputedEstimator(probs_folder)
        value_calc = ValueCalculator(est, annotations)

        run_exp(user_validation_tags, user_test_tags, est, value_calc)

示例#2

0

显示文件

文件： PrintTagValuePrecomputed.py 项目： flaviovdf/tag_assess

def main(db_fpath, db_name, cross_val_folder, probs_folder):
    
    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = \
            load_train_test_validation(cross_val_folder)

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)
        
        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)
        annotations = annot_filter.annotations(reader.iterate())
        
        est = PrecomputedEstimator(probs_folder)
        value_calc = ValueCalculator(est, annotations)
        
        run_exp(user_validation_tags, user_test_tags, est, value_calc)

示例#3

0

显示文件

文件： CreateTrainValTest.py 项目： flaviovdf/tag_assess

def sanity_check(reader, user_items_to_filter):
    '''
    A simple sanity check to verify that we did not delete any
    user, item or tag from the trace.
    '''
    
    users = set()
    items = set()
    tags = set()
    for annotation in reader.iterate():
        user = annotation['user']
        item = annotation['item']
        tag = annotation['tag']
        users.add(user)
        items.add(item)
        tags.add(tag)
    
    filtered = FilteredUserItemAnnotations(user_items_to_filter)
    filtered_users = set()
    filtered_items = set()
    filtered_tags = set()
    for annotation in filtered.annotations(reader.iterate()):
        user = annotation['user']
        item = annotation['item']
        tag = annotation['tag']
        
        assert user in users
        assert item in items
        assert tag in tags
        
        if user in user_items_to_filter:
            assert item not in user_items_to_filter[user]
        
        filtered_users.add(user)
        filtered_items.add(item)
        filtered_tags.add(tag)
    
    assert len(filtered_users) == len(users)
    assert len(filtered_items) == len(items)
    assert len(filtered_tags) == len(tags)
    
    assert len(filtered_users.symmetric_difference(users)) == 0
    assert len(filtered_items.symmetric_difference(items)) == 0
    assert len(filtered_tags.symmetric_difference(tags)) == 0

示例#4

0

显示文件

文件： CreateTrainValTest.py 项目： flaviovdf/tag_assess

def sanity_check(reader, user_items_to_filter):
    '''
    A simple sanity check to verify that we did not delete any
    user, item or tag from the trace.
    '''

    users = set()
    items = set()
    tags = set()
    for annotation in reader.iterate():
        user = annotation['user']
        item = annotation['item']
        tag = annotation['tag']
        users.add(user)
        items.add(item)
        tags.add(tag)

    filtered = FilteredUserItemAnnotations(user_items_to_filter)
    filtered_users = set()
    filtered_items = set()
    filtered_tags = set()
    for annotation in filtered.annotations(reader.iterate()):
        user = annotation['user']
        item = annotation['item']
        tag = annotation['tag']

        assert user in users
        assert item in items
        assert tag in tags

        if user in user_items_to_filter:
            assert item not in user_items_to_filter[user]

        filtered_users.add(user)
        filtered_items.add(item)
        filtered_tags.add(tag)

    assert len(filtered_users) == len(users)
    assert len(filtered_items) == len(items)
    assert len(filtered_tags) == len(tags)

    assert len(filtered_users.symmetric_difference(users)) == 0
    assert len(filtered_items.symmetric_difference(items)) == 0
    assert len(filtered_tags.symmetric_difference(tags)) == 0

示例#5

0

显示文件

def main(db_fpath, db_name, cross_val_folder, probs_folder):
    
    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags, \
            user_test_items = load_train_test_validation(cross_val_folder)

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)
        
        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)
        est = PrecomputedEstimator(probs_folder)
        run_exp(user_validation_tags, user_test_tags, user_test_items, est, 
                annot_filter, reader)

示例#6

0

显示文件

def main(db_fpath,
         db_name,
         cross_val_folder,
         param_value,
         est_name,
         rand_seed=None,
         num_cores=-1):
    '''Dispatches jobs in multiple cores'''

    seed(rand_seed)

    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = \
            load_train_test_validation(cross_val_folder)

    #all tags used by all users. Used o create a random set of tags excluding
    #these ones
    used_tags = set()
    for user in user_items_to_filter:
        used_tags.update(user_validation_tags[user])
        used_tags.update(user_test_tags[user])

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)

        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)

        #Generate 50 random tags not used by any user the test set
        #Also creates some indexes used to define gamma items
        annotations = annot_filter.annotations(reader.iterate())
        user_to_item = defaultdict(set)
        items = set()
        tags = set()
        random_tags = []
        for annotation in annotations:
            user = annotation['user']
            item = annotation['item']
            tag = annotation['tag']

            user_to_item[user].add(item)
            items.add(item)
            tags.add(tag)

            if tag not in used_tags and tag not in random_tags:
                random_tags.append(tag)

        shuffle(random_tags)
        random_tags = random_tags[:NUM_RANDOM_TAGS]

        #Gets number of tags and items
        num_items = len(items)
        num_tags = len(tags)

        #Create estimator
        annotations = annot_filter.annotations(reader.iterate())
        if est_name == 'lda':
            est = create_lda_estimator(annotations, param_value, num_items,
                                       num_tags)
        else:
            est = create_bayes_estimator(annotations, param_value)

        annotations = annot_filter.annotations(reader.iterate())
        value_calc = ValueCalculator(est, annotations)

        run_exp(user_items_to_filter, user_test_tags, user_to_item, num_items,
                random_tags, value_calc)

示例#7

0

显示文件

文件： GridSearch.py 项目： flaviovdf/tag_assess

def run_one(args):
    """
    This method will be run by parallel processes. Basically, it is the
    main method for each possible parameter being tested. It will work as
    follows:
    
    1. Loads train, validation and test separation from files
    
    2. Values of p(i|u) are computed for the gamma items set for each user
       based on the train set. Gamma items is just every item excluding the
       user items.
       
    3. Computes p(i|t,u) for a set of tags gamma items for each user. The set
       of tags is composed of the previous user tags (those on the test set), 
       the tags which were used on the validation set, the tags used on the
       train set and 50 random tags not previously used by the user.
    
    4. Saves p(i|u) and p(i|t,u) for items and tags considered above on the
       output folder. This provides sufficient information for choosing the best
       estimator (on the validation set) and performing further experiments 
       (actually computing tag values) on the test set.  
    """

    # unbox arguments
    db_fpath, db_name, output_folder, cross_val_folder, est_name, param_one, value_one, param_two, value_two = args

    # get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = load_train_test_validation(cross_val_folder)

    # all tags used by all users. Used o create a random set of tags excluding
    # these ones
    used_tags = set()
    for user in user_items_to_filter:
        used_tags.update(user_validation_tags[user])
        used_tags.update(user_test_tags[user])

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)

        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)

        # Generate 50 random tags not used by any user in validation or test
        # Also creates some indexes used to define gamma items
        annotations = annot_filter.annotations(reader.iterate())
        user_to_item = defaultdict(set)
        items = set()
        tags = set()
        random_tags = []
        for annotation in annotations:
            user = annotation["user"]
            item = annotation["item"]
            tag = annotation["tag"]

            user_to_item[user].add(item)
            items.add(item)
            tags.add(tag)

            if tag not in used_tags and tag not in random_tags:
                random_tags.append(tag)

        shuffle(random_tags)
        random_tags = random_tags[:NUM_RANDOM_TAGS]

        # Gets number of tags and items
        num_items = len(items)
        num_tags = len(tags)

        # Create estimator
        annotations = annot_filter.annotations(reader.iterate())
        save_lhood = False
        if est_name == "lda":
            est = create_lda_estimator(annotations, value_one, num_items, num_tags, value_two)
            save_lhood = True
        else:
            est = create_bayes_estimator(annotations, value_one, value_two)

        param_out_folder = os.path.join(
            output_folder, "params-%s-%f_%s-%f" % (param_one, value_one, param_two, value_two)
        )

        os.mkdir(param_out_folder)
        run_exp(
            user_items_to_filter,
            user_validation_tags,
            user_test_tags,
            user_to_item,
            num_items,
            random_tags,
            est,
            param_out_folder,
            save_lhood,
        )

示例#8

0

显示文件

def run_one(args):
    '''
    This method will be run by parallel processes. Basically, it is the
    main method for each possible parameter being tested. It will work as
    follows:
    
    1. Loads train, validation and test separation from files
    
    2. Values of p(i|u) are computed for the gamma items set for each user
       based on the train set. Gamma items is just every item excluding the
       user items.
       
    3. Computes p(i|t,u) for a set of tags gamma items for each user. The set
       of tags is composed of the previous user tags (those on the test set), 
       the tags which were used on the validation set, the tags used on the
       train set and 50 random tags not previously used by the user.
    
    4. Saves p(i|u) and p(i|t,u) for items and tags considered above on the
       output folder. This provides sufficient information for choosing the best
       estimator (on the validation set) and performing further experiments 
       (actually computing tag values) on the test set.  
    '''

    #unbox arguments
    db_fpath, db_name, output_folder, cross_val_folder, est_name, \
            param_one, value_one, param_two, value_two = args

    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = \
            load_train_test_validation(cross_val_folder)

    #all tags used by all users. Used o create a random set of tags excluding
    #these ones
    used_tags = set()
    for user in user_items_to_filter:
        used_tags.update(user_validation_tags[user])
        used_tags.update(user_test_tags[user])

    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)

        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)

        #Generate 50 random tags not used by any user in validation or test
        #Also creates some indexes used to define gamma items
        annotations = annot_filter.annotations(reader.iterate())
        user_to_item = defaultdict(set)
        items = set()
        tags = set()
        random_tags = []
        for annotation in annotations:
            user = annotation['user']
            item = annotation['item']
            tag = annotation['tag']

            user_to_item[user].add(item)
            items.add(item)
            tags.add(tag)

            if tag not in used_tags and tag not in random_tags:
                random_tags.append(tag)

        shuffle(random_tags)
        random_tags = random_tags[:NUM_RANDOM_TAGS]

        #Gets number of tags and items
        num_items = len(items)
        num_tags = len(tags)

        #Create estimator
        annotations = annot_filter.annotations(reader.iterate())
        save_lhood = False
        if est_name == 'lda':
            est = create_lda_estimator(annotations, value_one, num_items,
                                       num_tags, value_two)
            save_lhood = True
        else:
            est = create_bayes_estimator(annotations, value_one, value_two)

        param_out_folder = os.path.join(output_folder, \
                'params-%s-%f_%s-%f' % \
                (param_one, value_one, param_two, value_two))

        os.mkdir(param_out_folder)
        run_exp(user_items_to_filter, user_validation_tags, user_test_tags,
                user_to_item, num_items, random_tags, est, param_out_folder,
                save_lhood)

示例#9

0

显示文件

文件： PrintTagValue.py 项目： flaviovdf/tag_assess

def main(db_fpath, db_name, cross_val_folder, param_value, est_name, 
         rand_seed=None, num_cores=-1):
    '''Dispatches jobs in multiple cores'''
    
    seed(rand_seed)
    
    #get cross validation dicts
    user_items_to_filter, user_validation_tags, user_test_tags = \
            load_train_test_validation(cross_val_folder)

    #all tags used by all users. Used o create a random set of tags excluding 
    #these ones
    used_tags = set()
    for user in user_items_to_filter:
        used_tags.update(user_validation_tags[user])
        used_tags.update(user_test_tags[user])
    
    with AnnotReader(db_fpath) as reader:
        reader.change_table(db_name)
        
        annot_filter = FilteredUserItemAnnotations(user_items_to_filter)
        
        #Generate 50 random tags not used by any user the test set
        #Also creates some indexes used to define gamma items
        annotations = annot_filter.annotations(reader.iterate())
        user_to_item = defaultdict(set)
        items = set()
        tags = set()
        random_tags = []
        for annotation in annotations:
            user = annotation['user']
            item = annotation['item']
            tag = annotation['tag']
            
            user_to_item[user].add(item)
            items.add(item)
            tags.add(tag)
            
            if tag not in used_tags and tag not in random_tags:
                random_tags.append(tag)
        
        shuffle(random_tags)
        random_tags = random_tags[:NUM_RANDOM_TAGS]    
        
        #Gets number of tags and items
        num_items = len(items)
        num_tags = len(tags)
        
        #Create estimator
        annotations = annot_filter.annotations(reader.iterate())
        if est_name == 'lda':
            est = create_lda_estimator(annotations, param_value, 
                num_items, num_tags)
        else:
            est = create_bayes_estimator(annotations, param_value)

        annotations = annot_filter.annotations(reader.iterate())
        value_calc = ValueCalculator(est, annotations)
        
        run_exp(user_items_to_filter, user_test_tags, user_to_item, num_items, 
                random_tags, value_calc)