def user_search(user, days_ago=None, nosmoothing=False): from jv3.study.content_analysis import activity_logs_for_user global search_cache global search_query_cache alogs = wuw.reduceRepeatLogsValues( activity_logs_for_user(user, None, days_ago)) searches = [] queries = [] last_time = 0 for al_i in range(len(alogs)): al = alogs[al_i] if al["action"] == 'search': try: query = JSONDecoder().decode(al["search"]) except: continue if type(query) == dict: key = None if 'search' in query: key = 'search' if 'query' in query: key = 'query' if key is not None: # no empty searches pls if len(query[key].strip()) > 0 and nosmoothing or long( al['when']) - long(last_time) > ( 10 * 1000): # 10 second smoothing queries.append(query[key]) al['query'] = query[key] al['hits'] = query.get('hits', []) searches.append(al) last_time = al['when'] elif al["action"] == 'clear-search' and ( nosmoothing or long(al['when']) - long(last_time) > (10 * 1000)): al['query'] = '' searches.append(al) last_time = al["when"] search_cache[user.id] = searches search_query_cache[user.id] = queries return searches, nltk.FreqDist(queries)
def user_search(user,days_ago=None,nosmoothing=False): from jv3.study.content_analysis import activity_logs_for_user global search_cache global search_query_cache alogs = wuw.reduceRepeatLogsValues(activity_logs_for_user(user,None,days_ago)) searches = [] queries = [] last_time = 0 for al_i in range(len(alogs)): al = alogs[al_i] if al["action"] == 'search': try: query = JSONDecoder().decode(al["search"]) except: continue if type(query) == dict: key = None if 'search' in query: key = 'search' if 'query' in query: key = 'query' if key is not None: # no empty searches pls if len(query[key].strip()) > 0 and nosmoothing or long(al['when'])-long(last_time) > (10*1000): # 10 second smoothing queries.append(query[key]) al['query'] = query[key] al['hits'] = query.get('hits',[]) searches.append(al) last_time = al['when'] elif al["action"] == 'clear-search' and (nosmoothing or long(al['when'])-long(last_time) > (10*1000)): al['query'] = '' searches.append(al) last_time = al["when"] search_cache[user.id] = searches search_query_cache[user.id] = queries return searches,nltk.FreqDist(queries)
def note_ss(note,filter_top=False): from jv3.study.content_analysis import activity_logs_for_user from jv3.study.ca_load import jid2nidforuser global __sigscroll_startend_cache_flat SSCF = __sigscroll_startend_cache_flat def compute_duration(note): def dur(send): if type(send) == tuple: return send[1]-send[0] return send xd = SSCF.get(note["id"],[]) if len(xd) > 1: return reduce(lambda x,y: dur(x)+dur(y),xd) elif len(xd) == 1: return dur(xd[0]) return -1 if note["id"] in SSCF : return {'sigscroll_counts': len(SSCF.get(note["id"],[])), 'sigscroll_duration': compute_duration(note) } ## populate for this uer alogs = activity_logs_for_user(note["owner_id"],None) # if len(alogs) == 0: # ## means we have no activitylogs for that user # from jv3.study.content_analysis import _notes_to_features # SSCF.update( [ (n["id"],[]) for n in [_notes_to_values(x) for x in Note.objects.filter(owner=n["owner_id"])] ] ) # return debug__all_nids = [] next_is_top = True toplist_jids = [] # things to block alogs.sort(key=lambda x: x["when"]) print "activity logs", len(alogs) for al_i in range(len(alogs)): print al_i al = alogs[al_i] if al["action"] == 'sidebar-open': next_is_top = True continue if not al["action"] == "significant-scroll": continue if al["search"] is None: print "skipping" continue al["search"] = json.loads(al["search"]) if next_is_top: toplist_jids = [long(nv["id"]) for nv in al["search"]["note_visibilities"]] ##print "TOPLIST :: %s " % repr(toplist_jids) next_is_top = False for nv in al["search"]["note_visibilities"]: try : jid = int(nv["id"]) ## this returns the _jid_ not id! debug__all_nids.append(jid) ## omit nots that are at the top of the list if filter_top and jid in toplist_jids: print "filter top and jid in toplist continuing" continue nid = jid2nidforuser(al["owner"],jid) ## convert to NID (guaranteed unique) if nv.has_key("exitTime") and nv.has_key("entryTime"): ap = SSCF.get(nid,[]) if nv["entryTime"] == nv["exitTime"]: print " case 1 ",jid ## this is to get around the bug in 0.4.5-7 which ## results in (start,start) for no-scroll open-close, and search/idle ap.append( (nv["entryTime"],long(al["when"])) ) else: print "case 2 " ap.append( (nv["entryTime"],nv["exitTime"]) ) SSCF[nid] = ap except: print "noncritical warn %s " % repr(sys.exc_info()) pass ## filter all the newdudes print SSCF SSCF.update( dict([ (nid,adjacent_filtered(views)) for nid,views in SSCF.iteritems() ]) ) # if (nid in new_dudes) ] ) ) print "all debug__all_nids",len(set(debug__all_nids)) return {'sigscroll_counts': len(SSCF.get(note["id"],[])),'sigscroll_duration': compute_duration(note) }
def note_ss(note, filter_top=False): from jv3.study.content_analysis import activity_logs_for_user from jv3.study.ca_load import jid2nidforuser global __sigscroll_startend_cache_flat SSCF = __sigscroll_startend_cache_flat def compute_duration(note): def dur(send): if type(send) == tuple: return send[1] - send[0] return send xd = SSCF.get(note["id"], []) if len(xd) > 1: return reduce(lambda x, y: dur(x) + dur(y), xd) elif len(xd) == 1: return dur(xd[0]) return -1 if note["id"] in SSCF: return { 'sigscroll_counts': len(SSCF.get(note["id"], [])), 'sigscroll_duration': compute_duration(note) } ## populate for this uer alogs = activity_logs_for_user(note["owner_id"], None) # if len(alogs) == 0: # ## means we have no activitylogs for that user # from jv3.study.content_analysis import _notes_to_features # SSCF.update( [ (n["id"],[]) for n in [_notes_to_values(x) for x in Note.objects.filter(owner=n["owner_id"])] ] ) # return debug__all_nids = [] next_is_top = True toplist_jids = [] # things to block alogs.sort(key=lambda x: x["when"]) print "activity logs", len(alogs) for al_i in range(len(alogs)): print al_i al = alogs[al_i] if al["action"] == 'sidebar-open': next_is_top = True continue if not al["action"] == "significant-scroll": continue if al["search"] is None: print "skipping" continue al["search"] = json.loads(al["search"]) if next_is_top: toplist_jids = [ long(nv["id"]) for nv in al["search"]["note_visibilities"] ] ##print "TOPLIST :: %s " % repr(toplist_jids) next_is_top = False for nv in al["search"]["note_visibilities"]: try: jid = int(nv["id"]) ## this returns the _jid_ not id! debug__all_nids.append(jid) ## omit nots that are at the top of the list if filter_top and jid in toplist_jids: print "filter top and jid in toplist continuing" continue nid = jid2nidforuser( al["owner"], jid) ## convert to NID (guaranteed unique) if nv.has_key("exitTime") and nv.has_key("entryTime"): ap = SSCF.get(nid, []) if nv["entryTime"] == nv["exitTime"]: print " case 1 ", jid ## this is to get around the bug in 0.4.5-7 which ## results in (start,start) for no-scroll open-close, and search/idle ap.append((nv["entryTime"], long(al["when"]))) else: print "case 2 " ap.append((nv["entryTime"], nv["exitTime"])) SSCF[nid] = ap except: print "noncritical warn %s " % repr(sys.exc_info()) pass ## filter all the newdudes print SSCF SSCF.update( dict([ (nid, adjacent_filtered(views)) for nid, views in SSCF.iteritems() ])) # if (nid in new_dudes) ] ) ) print "all debug__all_nids", len(set(debug__all_nids)) return { 'sigscroll_counts': len(SSCF.get(note["id"], [])), 'sigscroll_duration': compute_duration(note) }