示例#1
0
def get_features_of_styles(arows,note_feature_list=None,user_feature_list=None,note_feature_names=None,user_feature_names=None):
    # makes a nice table of statistics ~~
    arows = [a for a in arows if a.get('emax',None) is not None] # fiter out insub
    if note_feature_list is None: note_feature_list = default_note_features
    if user_feature_list is None: user_feature_list = default_user_features
    if note_feature_names is None: note_feature_names = default_note_feature_names
    if user_feature_names is None: user_feature_names = default_user_feature_names    
    owners = get_userids_with_cat(arows)
    owners = filter(lambda oid: intent.owner_orm(oid) not in cal.RESEARCHERS,owners)
    notes_of_owners = reduce(lambda x,y:x+y,[[n for n in intent.owner_orm(o).note_owner.all().values()] for o in owners])

    print "-----------ALL [", len(notes_of_owners)," from ",len(owners),"]"
    group_note_features = intent._gfanalyze(notes_of_owners,note_feature_list,note_feature_names)
    group_user_features = intent._gfanalyze(owners,user_feature_list,user_feature_names)

    # now partition by cats?
    cat_test = {}
    for cat in cats:
        users_with_cat = get_userids_with_cat(arows,cat)
        cat_notes =  reduce(lambda x,y: x+y,[[x for x in User.objects.filter(id=o)[0].note_owner.values()] for o in users_with_cat ])
        print "--------------:",cat,":--------------- (u=",len(users_with_cat),"n =", len(cat_notes),")"
        cat_note_features = intent._gfanalyze(cat_notes, note_feature_list)
        cat_user_features = intent._gfanalyze(users_with_cat, user_feature_list)        
        cat_test[cat] = intent._gfcompare(cat_note_features,group_note_features,note_feature_names)
        cat_test[cat].update( intent._gfcompare(cat_user_features,group_user_features,user_feature_names)  )
        
    return cat_test
示例#2
0
        cat_test[cat] = intent._gfcompare(cat_note_features,group_note_features,note_feature_names)
        cat_test[cat].update( intent._gfcompare(cat_user_features,group_user_features,user_feature_names)  )
        
    return cat_test

########################################################################################
## new delicious anova code for CHI 2011 paper
## Figures 3 and 7 are generated from these fns
## 
## krows = kl.read()
## kl.print_aov_bits(krows,'note_words')
## kl.print_aov_bits(krows,'note_lines')
## kl.print_aov_bits(krows,'note_deleted')
## kl.print_aov_bits(krows,'note_lifetime_none')

_filter_user_ids_for_researchers = lambda users: filter(lambda oid: intent.owner_orm(oid) not in cal.RESEARCHERS, users)
_filter_missing_keeper_labels = lambda arows,labeler: filter(lambda arow: arow.get(labeler,None) is not None, arows)
_get_all_notes_owned = lambda dude_ids: reduce(lambda x,y:x+y,[[n for n in intent.owner_orm(o).note_owner.all().values()] for o in dude_ids])
_get_owners_of_rows = lambda arows: set([x["userid"] for x in arows])

def anova_by_note_feature(krows,feature_name):
    owners = _get_owners_of_rows(krows)
    owners = _filter_user_ids_for_researchers(owners)

    owner_types = {}
    # populate owner -> cat mapping 
    for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ]

    # gets rid of dudes who have no category
    owners = list(owner_types.keys())
    print "SOURCE OWNER SET : ", len(owners)