def anova_by_user_feature(krows,feature_name): ## first get rid of owners that don't have a keeping category -- bamf! krows = filter(lambda x : get_category(x) is not None, krows) owners = _get_owners_of_rows(krows) owners = _filter_user_ids_for_researchers(owners) owner_types = {} for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ] # print owner_types def compfeat(owner_id): result = getattr(wuw,feature_name)(owner_id).values()[0] # print "owner -- ", owner_id, " ", result return result feature_functions = { "owner_type" : lambda ownid : owner_types.get(long(ownid),None), #"owner_id" : lambda ownid: ownid, feature_name : lambda ownid: compfeat(ownid) # lambda n: intent.get_feature_for_note(n["id"],feature_name) } # fmla = _make_aov_fmla('%s ~ owner_type + owner_id' % feature_name, owners, feature_functions, ['owner_id', 'owner_type']) fmla = aov.make_fmla('%s ~ owner_type' % feature_name, owners, feature_functions, ['owner_type']) aov.compute_averages(owners,feature_functions,feature_name,'owner_type') aovres = r('aov')(fmla) tsd = r('TukeyHSD')(aovres) ios = StringIO.StringIO() print >>ios,r.summary(tsd) return fmla,aovres,tsd,ios.getvalue()
def anova_by_note_feature(krows,feature_name): owners = _get_owners_of_rows(krows) owners = _filter_user_ids_for_researchers(owners) owner_types = {} # populate owner -> cat mapping for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ] # gets rid of dudes who have no category owners = list(owner_types.keys()) print "SOURCE OWNER SET : ", len(owners) notes_of_owners = _get_all_notes_owned(owners) def compfeat(n,name): #print "COMPFEAT ", name, " ", n["id"] #if intent.get_feature_for_note(n["id"],feature_name) < 0: #print "getting feature for note ", n["id"], intent.get_feature_for_note(n["id"],feature_name), feature_name, n["contents"][:10], return intent.get_feature_for_note(n["id"],name) feature_functions = { "owner_type" : lambda n : owner_types.get(n["owner_id"],None), "owner_id" : lambda n: n["owner_id"], feature_name : lambda n: compfeat(n,feature_name) # lambda n: intent.get_feature_for_note(n["id"],feature_name) } print "notes of owners ", len(notes_of_owners) fmla = aov.make_fmla( '%s ~ owner_type + owner_id' % feature_name, notes_of_owners, feature_functions, ['owner_id','owner_type']) aov.compute_averages(notes_of_owners,feature_functions,feature_name,'owner_type') aovres = r('aov')(fmla) tsd = r('TukeyHSD')(aovres) ios = StringIO.StringIO() print >>ios,r.summary(tsd) return fmla,aovres,tsd,ios.getvalue()