def get_features_of_styles(arows,note_feature_list=None,user_feature_list=None,note_feature_names=None,user_feature_names=None): # makes a nice table of statistics ~~ arows = [a for a in arows if a.get('emax',None) is not None] # fiter out insub if note_feature_list is None: note_feature_list = default_note_features if user_feature_list is None: user_feature_list = default_user_features if note_feature_names is None: note_feature_names = default_note_feature_names if user_feature_names is None: user_feature_names = default_user_feature_names owners = get_userids_with_cat(arows) owners = filter(lambda oid: intent.owner_orm(oid) not in cal.RESEARCHERS,owners) notes_of_owners = reduce(lambda x,y:x+y,[[n for n in intent.owner_orm(o).note_owner.all().values()] for o in owners]) print "-----------ALL [", len(notes_of_owners)," from ",len(owners),"]" group_note_features = intent._gfanalyze(notes_of_owners,note_feature_list,note_feature_names) group_user_features = intent._gfanalyze(owners,user_feature_list,user_feature_names) # now partition by cats? cat_test = {} for cat in cats: users_with_cat = get_userids_with_cat(arows,cat) cat_notes = reduce(lambda x,y: x+y,[[x for x in User.objects.filter(id=o)[0].note_owner.values()] for o in users_with_cat ]) print "--------------:",cat,":--------------- (u=",len(users_with_cat),"n =", len(cat_notes),")" cat_note_features = intent._gfanalyze(cat_notes, note_feature_list) cat_user_features = intent._gfanalyze(users_with_cat, user_feature_list) cat_test[cat] = intent._gfcompare(cat_note_features,group_note_features,note_feature_names) cat_test[cat].update( intent._gfcompare(cat_user_features,group_user_features,user_feature_names) ) return cat_test
cat_test[cat] = intent._gfcompare(cat_note_features,group_note_features,note_feature_names) cat_test[cat].update( intent._gfcompare(cat_user_features,group_user_features,user_feature_names) ) return cat_test ######################################################################################## ## new delicious anova code for CHI 2011 paper ## Figures 3 and 7 are generated from these fns ## ## krows = kl.read() ## kl.print_aov_bits(krows,'note_words') ## kl.print_aov_bits(krows,'note_lines') ## kl.print_aov_bits(krows,'note_deleted') ## kl.print_aov_bits(krows,'note_lifetime_none') _filter_user_ids_for_researchers = lambda users: filter(lambda oid: intent.owner_orm(oid) not in cal.RESEARCHERS, users) _filter_missing_keeper_labels = lambda arows,labeler: filter(lambda arow: arow.get(labeler,None) is not None, arows) _get_all_notes_owned = lambda dude_ids: reduce(lambda x,y:x+y,[[n for n in intent.owner_orm(o).note_owner.all().values()] for o in dude_ids]) _get_owners_of_rows = lambda arows: set([x["userid"] for x in arows]) def anova_by_note_feature(krows,feature_name): owners = _get_owners_of_rows(krows) owners = _filter_user_ids_for_researchers(owners) owner_types = {} # populate owner -> cat mapping for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ] # gets rid of dudes who have no category owners = list(owner_types.keys()) print "SOURCE OWNER SET : ", len(owners)