Exemplo n.º 1
0
def read_user_features(about, user_ids, user_id_mapping):

    features = Features()
    features.item_ids = user_id_mapping

    if user_ids:
        for uid in user_id_mapping:
            features.add_feature(uid, 'user_id:' + uid)

    if about:
        # Add intercepts
        for uid in user_id_mapping:
            features.add_feature(uid, 'intercept')

        for i, (user_id, about_me) in enumerate(read_user_data()):

            clean_about = (strip_tags(about_me).replace('\n', ' ').lower())
            clean_about = _process_post_body(clean_about)

            for token in clean_about:
                features.add_feature(user_id, 'about:' + token)

    features.set_shape()

    return features
Exemplo n.º 2
0
def read_user_features(about, user_ids, user_id_mapping):

    features = Features()
    features.item_ids = user_id_mapping

    if user_ids:
        for uid in user_id_mapping:
            features.add_feature(uid, 'user_id:' + uid)
        
    if about:
        # Add intercepts
        for uid in user_id_mapping:
            features.add_feature(uid, 'intercept')

        for i, (user_id, about_me) in enumerate(read_user_data()):

            clean_about = (strip_tags(about_me)
                           .replace('\n', ' ')
                           .lower())
            clean_about = _process_post_body(clean_about)

            for token in clean_about:
                features.add_feature(user_id, 'about:' + token)

    features.set_shape()
                       
    return features