def __regenerate_models(): users = db(db.user_model.id > 0).select() for u in users: u.last_cat = None u.count_pair = 0 u.num_ideas = 0 u.count_transition_pairs = 0 u.transition_graph = '[]' u.category_matrix = '{}' u.update_record() # Get all ideas ideas = db(db.idea.id > 0).select() # For each idea, get tags and update model for i in ideas: # Get tags tags = db((db.tag_idea.idea == i.id) & (db.tag.id == db.tag_idea.tag)).select( db.tag.tag, groupby=db.tag_idea.id) tags = [t.tag for t in tags] # Get contextual info problem_id = i.problem user_id = i.userId # Get model and update it model = user_models.UserModel(user_id, problem_id) model.update(tags)
def download_ideas(): __check_auth() problem_id = long(request.vars['problem']) fields = [ 'user', 'condition', 'id', 'idea', 'timestamp', 'origin', 'sources', 'tags' ] # Get records rows = db(db.idea.problem == problem_id).select() records = [] for r in rows: model = user_models.UserModel(r.userId, problem_id) tags = db((db.tag_idea.idea == r.id) & (db.tag.id == db.tag_idea.tag)).select() tags = [t.tag.tag for t in tags] records.append([ r.userId, model.user_condition, # condition r.id, r.idea, r.dateAdded, r.origin, r.sources, '|'.join(tags) # tags ]) # Return filename = 'ideas_%d_%s.csv' % (problem_id, datetime.date.today()) return __prepare_csv_response(fields, records, filename, problem_id)
def get_inferred_categories(user_id, problem_id, db): ''' I intended this class to not have to access DB at all, but right now I can't see a better way to do this. ''' user_model = user_models.UserModel(user_id, problem_id) all_users = db((db.idea.pool == True) & (db.idea.problem == problem_id) & (db.idea.userId == db.user_info.id) & (db.idea.userId <> user_id)).select(db.idea.userId, db.user_info.userId, distinct=True) models = [] for u in all_users: models.append(user_models.UserModel(u.idea.userId, problem_id)) nearest_neighbors = find_n_nearest(user_model, models, 5, db) # Infer categories from nearest neighbors inferred = infer_categories(user_model, nearest_neighbors, True) return inferred
def usermodel(): __check_auth() user_id = request.vars['user'] problem_id = request.vars['problem'] if not user_id or not problem_id: redirect(URL('stats', 'index')) # retrieve contextual info user = db(db.user_info.id == user_id).select().first() problem = db(db.problem.id == problem_id).select().first() if not user or not problem: redirect(URL('stats', 'index')) # Retrieve user model user_model = user_models.UserModel(user.id, problem.id) # Get all user models in problem_id all_users = __get_users_in_problem(problem_id, [user_id]) models = [] for u in all_users: models.append(user_models.UserModel(u.idea.userId, problem_id)) nearest_neighbors = collab_filter.find_n_nearest(user_model, models, 5, db) # Infer categories from nearest neighbors inferred = collab_filter.infer_categories(user_model, nearest_neighbors, True) # Get logs logs = db((db.action_log.problem == problem_id) & (db.action_log.userId == user_id)).select() return dict(user=user, problem=problem, model=user_model, nearest_neighbors=nearest_neighbors, inferred=inferred, inferred_json=json.dumps(inferred), logs=logs, adjacent=', '.join( __handle_unicode( user_model.transition_graph.get_adjacent( user_model.last_cat))), inspiration_cats=', '.join( __handle_unicode( user_model.get_inspiration_categories(3))), ordered_tags=', '.join( __handle_unicode(user_model.get_ordered_tags())))
def __get_data(problem_id): users = __get_users_in_problem(problem_id) # Define fields fields = [ 'problem_id', 'user_id', 'public_id', 'initial_login', 'condition', 'num_ideas', 'num_refined', 'num_combined', 'num_inspirations', 'num_clicks_sp', 'breadth', 'depth_avg', 'depth_max', 'breadth_1', 'breadth_2', 'avg_click_i', 'sp_fulfill', 'insp_fulfill', 'category_switch_ratio', 'model_url', ] # TODO get data records = [] for u in users: user = db(db.user_info.id == u.idea.userId).select().first() public_id = user.userId initial_login = user.initialLogin model = user_models.UserModel(u.idea.userId, problem_id) # Get the number of inspiration requests num_inspirations = db( (db.action_log.problem == problem_id) & (db.action_log.userId == u.idea.userId) & (db.action_log.actionName == 'get_available_tasks')).count() # Get the number of clicks on a solution space cell num_clicks_sp = db((db.action_log.problem == problem_id) & (db.action_log.userId == u.idea.userId) & (db.action_log.actionName == 'get_ideas')).select( ).find(lambda r: 'tags' in r.extraInfo) # Get the number of refined and combined ideas refined = db((db.idea.problem == problem_id) & (db.idea.userId == u.idea.userId) & (db.idea.origin == 'refinement')).count() combined = db((db.idea.problem == problem_id) & (db.idea.userId == u.idea.userId) & (db.idea.origin == 'combine')).count() breadth_1, breadth_2 = __get_breadth_per_half(user, problem_id) # Get fulfillment and related metrics fulfill = __get_fulfillment_metrics(user, problem_id) # Create record user_record = [ problem_id, u.idea.userId, public_id, initial_login, model.user_condition, model.get_num_ideas(), refined, combined, num_inspirations, len(num_clicks_sp), model.get_breadth(), model.get_depth_avg(), model.get_depth_max(), breadth_1, breadth_2, fulfill['avg_click_i'], fulfill['sp_fulfill'], fulfill['insp_fulfill'], model.category_switch_ratio, 'http://' + request.env.http_host + URL('stats', 'usermodel?problem=%d&user=%d' % (problem_id, u.idea.userId)), ] # Add to array records.append(user_record) return fields, records
def __update_user_model(user_id, problem_id, tags, tag_names): model = user_models.UserModel(user_id, problem_id) tags_names = [tag_names[t] for t in tags] model.update(tags_names)
def __get_tasks(user_id, problem_id): # Get tags model = user_models.UserModel(user_id, problem_id) inspiration_categories, all_inferred = model.get_inspiration_categories( NUM_TASKS) # Remove duplicates inspiration_categories = list(set(inspiration_categories)) inspiration_categories.extend(all_inferred) # Get idea ids that the user already has used as inspiration completed_tasks = db((db.task.completed_by == user_id) & (db.task.task_type == 'RatingTask') & (db.task.problem == problem_id)).select( db.task.idea) completed_tasks = [row.idea for row in completed_tasks] if len(inspiration_categories) < NUM_TASKS: # there were not enough inspiration categories. Randomily add more tags = [ t.tag for t in db(db.tag.problem == problem_id).select( orderby='<random>') ] inspiration_categories.extend(tags) tasks = [] for t in inspiration_categories: try: # Get tag id t_id = db((db.tag.problem == problem_id) & (db.tag.tag == t)).select(db.tag.id).first().id # Get an idea id if t_id: idea_id = db((db.tag_idea.tag == t_id) & (~db.tag_idea.idea.belongs(completed_tasks)) & (db.tag_idea.idea == db.idea.id) & (db.idea.userId != user_id) & (db.idea.pool == True)).select( db.tag_idea.idea, orderby='<random>').first().idea if idea_id: # Get a task for this idea and add it to list task = db((db.task.idea == idea_id) & (db.task.idea == db.idea.id) & (db.task.idea == db.tag_idea.idea) & (db.tag_idea.tag == db.tag.id)).select().first() if task: tasks.append(task) except Exception: pass if len(tasks) == NUM_TASKS: break # Add favorites favorites = __get_favorites(user_id) for t in tasks: if t.idea.id in favorites: t.idea.favorite = True else: t.idea.favorite = False # Filter to max number of tasks and return return tasks
def get_versioning_structure(): ''' Return the tree structure for the versioning panel Structure: [ { id:31, type: 'combination', tags: ['tag1', 'tag2'], children: [ { id: 01, ..., children: ... }, { id: 91, ..., children: ... } ] } ] ''' problem_id = util.get_problem_id(request) user_id = session.user_id cache_type = 'versioning' timestamp = datetime.datetime.min complete_idea_map = dict() filtered_idea_list = [] ids = [] # Retrieve latest cache ''' TODO enable cache again Cache is temporarily disabled due to a bug where refinements (and probably mergers) wouldn't be accurately reflected in the updated model. E.g. if an idea was refined, the verisoning view would still show the pre-refined idea as the latest version, alongside with the refined idea. The solution should involve updating the cached model when you detect a new idea that's the result of of merge or refinement ''' cache = None # db((db.visualization_cache.problem == problem_id) & (db.visualization_cache.type == cache_type)).select().first() if cache: json_cache = json.loads(cache.cache, cls=ClassDecoder) timestamp = cache.timestamp complete_idea_map = json_cache['complete_idea_map'] filtered_idea_list = json_cache['filtered_idea_list'] ids = json_cache['ids'] # get all ideas query = ((db.idea.id == db.tag_idea.idea) & (db.tag.id == db.tag_idea.tag) & (db.idea.problem == problem_id) & (db.idea.dateAdded > timestamp)) results = db(query).select( orderby=db.idea.dateAdded, groupby=db.idea.id) # Ordered from older to newer # Get user model and ordered tag sequence model = user_models.UserModel(user_id, problem_id) ordered_tags = model.get_ordered_tags() # build idea map for i, r in enumerate(results): tags = [t.tag.tag for t in r.idea.tag_idea.select()] complete_idea_map[str(r.idea.id)] = Node(tags=tags, type=r.idea.origin, id=r.idea.id, i=i) children = [] if r.idea.sources: children = [complete_idea_map[str(c)] for c in r.idea.sources] # TODO update previous ideas to reflect their replacement complete_idea_map[str(r.idea.id)].children = children if not r.idea.replacedBy: filtered_idea_list.append(complete_idea_map[str(r.idea.id)]) ids.append(r.idea.id) # Trim nodes to avoid redundancy for i in filtered_idea_list: __trim_node(i, ids) # Update cache key = (db.visualization_cache.problem == problem_id) & (db.visualization_cache.type == cache_type) db.visualization_cache.update_or_insert( key, problem=problem_id, type=cache_type, cache=json.dumps(dict(complete_idea_map=complete_idea_map, filtered_idea_list=filtered_idea_list, ids=ids), cls=ClassEncoder), timestamp=datetime.datetime.now()) # sort ideas according to user model filtered_idea_list.sort(key=lambda idea: (min( [ordered_tags.index(t) for t in idea.tags]), idea.i)) # Log log_action(user_id, problem_id, 'get_versioning_structure', {'cache': (cache != None)}) # return response.headers['Content-Type'] = 'application/json' return json.dumps(filtered_idea_list, cls=ClassEncoder)
def get_solution_space(): user_id = session.user_id problem_id = util.get_problem_id(request) user_model = user_models.UserModel(user_id, problem_id) connections = dict() timestamp = datetime.datetime.min max_n = 0 # Retrieve latest cache # TODO Cache is temporarily disabled since I change the handling of an idea's pool flag to run the studies. That means each user will have their own cache. cache = __get_cache(problem_id) if not cache: # If no cache, build it cache = __build_cache(problem_id, user_model) # Get info from cache json_cache = json.loads(cache.cache) connections = json_cache['connections'] all_tags = json_cache['tags'] timestamp = cache.timestamp max_n = json_cache['max_n'] # Get tags ordered by the user model tags = user_model.get_ordered_tags() # get user's ideas with respective tags ideas = db((db.idea.id == db.tag_idea.idea) & (db.tag.id == db.tag_idea.tag) & (db.idea.problem == problem_id)).select(orderby=~db.idea.id, groupby=db.idea.id) # extract tags for idea in ideas: idea_tags = list() for tag in idea.idea.tag_idea.select(): tag = tag.tag.tag.lower() idea_tags.append(tag) all_tags.append(tag) idea_tags.sort() # this contains a sorted array of tags for idea # insert into data structure key = '|'.join(idea_tags) if key not in connections.keys(): connections[key] = dict(tags=idea_tags, n=0) n = connections[key]['n'] + 1 connections[key]['n'] = n if n > max_n: max_n = n # tags = tags[:SOLUTION_SPACE_MAX_TAGS] # since another user may have added another tag, and since "tags" holds ALL tags, we need to remove those that are not in "ideas" final_tags = tags # start with ordered array of tags for t in all_tags: # Go over all tags if t not in final_tags: # If not already in the array, add it final_tags.append(t) # Create minimap overview and generate outcome dict overview = __generate_birdseye_solutionspace(final_tags, connections, max_n=max_n) outcome = json.dumps( dict(tags=final_tags, connections=connections, max_n=max_n, overview=overview)) # Log log_action(user_id, problem_id, 'get_solution_space', { 'cache': (cache != None), 'tags': final_tags }) return outcome
def __update_user_model(user_id, problem_id, tags): model = user_models.UserModel(user_id, problem_id) model.update(tags)