def warm_app(id, short_name, featured=False): if id not in apps_cached: cached_apps.get_app(short_name) cached_apps.n_tasks(id) n_task_runs = cached_apps.n_task_runs(id) cached_apps.overall_progress(id) cached_apps.last_activity(id) cached_apps.n_completed_tasks(id) cached_apps.n_volunteers(id) if n_task_runs >= 1000 or featured: print "Getting stats for %s as it has %s task runs" % (short_name, n_task_runs) stats.get_stats(id, app.config.get('GEO')) apps_cached.append(id)
def get_project_stats(_id, short_name): # pragma: no cover """Get stats for project.""" import pybossa.cache.projects as cached_projects import pybossa.cache.project_stats as stats from flask import current_app cached_projects.get_project(short_name) cached_projects.n_tasks(_id) cached_projects.n_task_runs(_id) cached_projects.overall_progress(_id) cached_projects.last_activity(_id) cached_projects.n_completed_tasks(_id) cached_projects.n_volunteers(_id) stats.get_stats(_id, current_app.config.get('GEO'))
def warm_project(_id, short_name, featured=False): if _id not in projects_cached: cached_projects.get_project(short_name) cached_projects.n_tasks(_id) n_task_runs = cached_projects.n_task_runs(_id) cached_projects.overall_progress(_id) cached_projects.last_activity(_id) cached_projects.n_completed_tasks(_id) cached_projects.n_volunteers(_id) if n_task_runs >= 1000 or featured: # print ("Getting stats for %s as it has %s task runs" % # (short_name, n_task_runs)) stats.get_stats(_id, app.config.get('GEO')) projects_cached.append(_id)
def get_project_report_userdata(project_id): """Return users details who contributed to a particular project.""" if project_id is None: return None total_tasks = n_tasks(project_id) sql = text( ''' SELECT id as u_id, name, fullname, (SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS completed_tasks, ((SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id =:project_id) * 100 / :total_tasks) AS percent_completed_tasks, (SELECT min(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS first_submission_date, (SELECT max(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS last_submission_date, (SELECT coalesce(AVG(to_timestamp(finish_time, 'YYYY-MM-DD"T"HH24-MI-SS.US') - to_timestamp(created, 'YYYY-MM-DD"T"HH24-MI-SS.US')), interval '0s') FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS avg_time_per_task FROM public.user u WHERE id IN (SELECT DISTINCT user_id FROM task_run tr GROUP BY project_id, user_id HAVING project_id=:project_id); ''') results = session.execute(sql, dict(project_id=project_id, total_tasks=total_tasks)) users_report = [ [row.u_id, row.name, row.fullname, row.completed_tasks, row.percent_completed_tasks, row.first_submission_date, row.last_submission_date, round(row.avg_time_per_task.total_seconds() / 60, 2)] for row in results] return users_report
def projects_contributed(user_id): """Return projects that user_id has contributed to.""" sql = text( """ WITH apps_contributed as (SELECT DISTINCT(project_id) FROM task_run WHERE user_id=:user_id) SELECT project.id, project.name, project.short_name, project.owner_id, project.description, project.info FROM project, apps_contributed WHERE project.id=apps_contributed.project_id ORDER BY project.name DESC; """ ) results = session.execute(sql, dict(user_id=user_id)) projects_contributed = [] for row in results: project = dict( id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info, ) projects_contributed.append(project) return projects_contributed
def hidden_projects(user_id): """Return hidden projects for user_id.""" sql = text( """ SELECT project.id, project.name, project.short_name, project.description, project.owner_id, project.info FROM project, task WHERE project.id=task.project_id AND project.owner_id=:user_id AND project.hidden=1 AND (project.info->>'task_presenter') IS NOT NULL GROUP BY project.id, project.name, project.short_name, project.description;""" ) projects_published = [] results = session.execute(sql, dict(user_id=user_id)) for row in results: project = dict( id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info, ) projects_published.append(project) return projects_published
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with two fields regarding the tasks for the user: { 'done': 10, 'total: 100 } This will mean that the user has done a 10% of the available tasks for him """ if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is used here for task_runs too query_attrs = dict(project_id=project.id) if current_user.is_anonymous(): query_attrs['user_ip'] = request.remote_addr or '127.0.0.1' else: query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) tmp = dict(done=taskrun_count, total=n_tasks(project.id)) return Response(json.dumps(tmp), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def n_tasks(project_id): """Return number of tasks of project. Data is cached for one day. """ from pybossa.cache import projects return projects.n_tasks(project_id)
def update_stats(project_id, period='2 week'): """Update the stats of a given project.""" hours, hours_anon, hours_auth, max_hours, \ max_hours_anon, max_hours_auth = stats_hours(project_id, period) users, anon_users, auth_users = stats_users(project_id, period) dates, dates_anon, dates_auth = stats_dates(project_id, period) sum(dates.values()) sorted(dates.iteritems(), key=operator.itemgetter(0)) dates_stats = stats_format_dates(project_id, dates, dates_anon, dates_auth) hours_stats = stats_format_hours(project_id, hours, hours_anon, hours_auth, max_hours, max_hours_anon, max_hours_auth) users_stats = stats_format_users(project_id, users, anon_users, auth_users) data = dict(dates_stats=dates_stats, hours_stats=hours_stats, users_stats=users_stats) ps = session.query(ProjectStats).filter_by(project_id=project_id).first() n_tasks = cached_projects.n_tasks(project_id) n_task_runs = cached_projects.n_task_runs(project_id) n_results = cached_projects.n_results(project_id) overall_progress = cached_projects.overall_progress(project_id) last_activity = cached_projects.last_activity(project_id) n_volunteers = cached_projects.n_volunteers(project_id) n_completed_tasks = cached_projects.n_completed_tasks(project_id) average_time = cached_projects.average_contribution_time(project_id) n_blogposts = cached_projects.n_blogposts(project_id) if ps is None: ps = ProjectStats(project_id=project_id, info=data, n_tasks=n_tasks, n_task_runs=n_task_runs, n_results=n_results, n_volunteers=n_volunteers, n_completed_tasks=n_completed_tasks, average_time=average_time, overall_progress=overall_progress, n_blogposts=n_blogposts, last_activity=last_activity) db.session.add(ps) else: ps.info = data ps.n_tasks = n_tasks ps.n_task_runs = n_task_runs ps.overall_progress = overall_progress ps.last_activity = last_activity ps.n_results = n_results ps.n_completed_tasks = n_completed_tasks ps.n_volunteers = n_volunteers ps.average_time = average_time ps.n_blogposts = n_blogposts db.session.commit() return dates_stats, hours_stats, users_stats
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with four fields regarding the tasks for the user: { 'done': 10, 'total: 100, 'remaining': 90, 'remaining_for_user': 45 } This will mean that the user has done 10% of the available tasks for the project, 90 tasks are yet to be submitted and the user can access 45 of them based on user preferences. """ if current_user.is_anonymous: return abort(401) if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too query_attrs = dict(project_id=project.id, user_id=current_user.id) guidelines_updated = _guidelines_updated(project.id, current_user.id) taskrun_count = task_repo.count_task_runs_with(**query_attrs) num_available_tasks = n_available_tasks(project.id, include_gold_task=True) num_available_tasks_for_user = n_available_tasks_for_user( project, current_user.id) response = dict(done=taskrun_count, total=n_tasks(project.id), completed=n_completed_tasks(project.id), remaining=num_available_tasks, locked=len({ task["task_id"] for task in get_locked_tasks(project) }), remaining_for_user=num_available_tasks_for_user, quiz=current_user.get_quiz_for_project(project), guidelines_updated=guidelines_updated) if current_user.admin or (current_user.subadmin and current_user.id in project.owners_ids): num_gold_tasks = n_unexpired_gold_tasks(project.id) response['available_gold_tasks'] = num_gold_tasks return Response(json.dumps(response), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def get_stats(project_id, period='2 week', full=False): """Get project's stats.""" ps = session.query(ProjectStats).filter_by(project_id=project_id).first() if not ps: update_stats(project_id, period) ps = session.query(ProjectStats).filter_by( project_id=project_id).first() # stuff we want real-time ps.overall_progress = cached_projects.overall_progress(project_id) ps.n_tasks = cached_projects.n_tasks(project_id) # end if full: return ps else: return ps.info['dates_stats'], ps.info['hours_stats'], ps.info[ 'users_stats']
def draft_projects(user_id): """Return draft projects for user_id.""" sql = text(''' SELECT * FROM project WHERE project.published=false AND :user_id = ANY (project.owners_ids::int[]); ''') projects_draft = [] results = session.execute(sql, dict(user_id=user_id)) for row in results: project = dict(row) project['n_tasks'] = n_tasks(row.id) project['n_volunteers'] = n_volunteers(row.id) project['overall_progress'] = overall_progress(row.id) projects_draft.append(project) return projects_draft
def projects_contributed(user_id, order_by='name'): """Return projects that user_id has contributed to.""" sql = text(''' WITH projects_contributed as (SELECT project_id, MAX(finish_time) as last_contribution FROM task_run WHERE user_id=:user_id GROUP BY project_id) SELECT * FROM project, projects_contributed WHERE project.id=projects_contributed.project_id ORDER BY {} DESC; '''.format(order_by)) results = session.execute(sql, dict(user_id=user_id)) projects_contributed = [] for row in results: project = dict(row) project['n_tasks'] = n_tasks(row.id) project['n_volunteers'] = n_volunteers(row.id) project['overall_progress'] = overall_progress(row.id) projects_contributed.append(project) return projects_contributed
def projects_contributed(user_id): """Return projects that user_id has contributed to.""" sql = text(''' WITH projects_contributed as (SELECT DISTINCT(project_id) FROM task_run WHERE user_id=:user_id) SELECT * FROM project, projects_contributed WHERE project.id=projects_contributed.project_id ORDER BY project.name DESC; ''') results = session.execute(sql, dict(user_id=user_id)) projects_contributed = [] for row in results: project = dict(row) project['n_tasks'] = n_tasks(row.id) project['n_volunteers'] = n_volunteers(row.id) project['overall_progress'] = overall_progress(row.id), projects_contributed.append(project) return projects_contributed
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with four fields regarding the tasks for the user: { 'done': 10, 'total: 100, 'remaining': 90, 'remaining_for_user': 45 } This will mean that the user has done 10% of the available tasks for the project, 90 tasks are yet to be submitted and the user can access 45 of them based on user preferences. """ if current_user.is_anonymous: return abort(401) if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too query_attrs = dict(project_id=project.id) query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) num_available_tasks = n_available_tasks(project.id, current_user.id) num_available_tasks_for_user = n_available_tasks_for_user( project, current_user.id) response = dict(done=taskrun_count, total=n_tasks(project.id), remaining=num_available_tasks, remaining_for_user=num_available_tasks_for_user, quiz=current_user.get_quiz_for_project(project)) return Response(json.dumps(response), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def get_project_report_userdata(project_id): """Return users details who contributed to a particular project.""" if project_id is None: return None total_tasks = n_tasks(project_id) sql = text(''' SELECT id as u_id, name, fullname, email_addr, admin, subadmin, enabled, user_pref->'languages' AS languages, user_pref->'locations' AS locations, info->'metadata'->'start_time' AS start_time, info->'metadata'->'end_time' AS end_time, info->'metadata'->'timezone' AS timezone, info->'metadata'->'user_type' AS type_of_user, info->'metadata'->'review' AS additional_comments, (SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS completed_tasks, ((SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id =:project_id) * 100 / :total_tasks) AS percent_completed_tasks, (SELECT min(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS first_submission_date, (SELECT max(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS last_submission_date, (SELECT coalesce(AVG(to_timestamp(finish_time, 'YYYY-MM-DD"T"HH24-MI-SS.US') - to_timestamp(created, 'YYYY-MM-DD"T"HH24-MI-SS.US')), interval '0s') FROM task_run WHERE user_id = u.id AND project_id=:project_id) AS avg_time_per_task FROM public.user u WHERE id IN (SELECT DISTINCT user_id FROM task_run tr GROUP BY project_id, user_id HAVING project_id=:project_id); ''') results = session.execute( sql, dict(project_id=project_id, total_tasks=total_tasks)) users_report = [[ str(row.u_id), row.name, row.fullname, row.email_addr, str(row.admin), str(row.subadmin), str(row.enabled), str(row.languages), str(row.locations), str(row.start_time), str(row.end_time), str(row.timezone), row.type_of_user, row.additional_comments, str(row.completed_tasks), str(row.percent_completed_tasks), row.first_submission_date, row.last_submission_date, str(round(row.avg_time_per_task.total_seconds() / 60, 2)) ] for row in results] return users_report
def get_project_report_userdata(project_id, start_date, end_date): """Return users details who contributed to a particular project.""" date_clause, sql_params = get_taskrun_date_range_sql_clause_params( start_date, end_date) sql_params["project_id"] = project_id sql_params["total_tasks"] = n_tasks(project_id) sql = text(''' SELECT id as u_id, name, fullname, email_addr, admin, subadmin, enabled, user_pref->'languages' AS languages, user_pref->'locations' AS locations, info->'metadata'->'work_hours_from' AS work_hours_from, info->'metadata'->'work_hours_to' AS work_hours_to, info->'metadata'->'timezone' AS timezone, info->'metadata'->'user_type' AS type_of_user, info->'metadata'->'review' AS additional_comments, (SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id=:project_id''' + date_clause + ''') AS completed_tasks, ((SELECT count(id) FROM task_run WHERE user_id = u.id AND project_id =:project_id''' + date_clause + ''') * 100 / :total_tasks) AS percent_completed_tasks, (SELECT min(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id''' + date_clause + ''') AS first_submission_date, (SELECT max(finish_time) FROM task_run WHERE user_id = u.id AND project_id=:project_id''' + date_clause + ''') AS last_submission_date, (SELECT coalesce(AVG(to_timestamp(finish_time, 'YYYY-MM-DD"T"HH24-MI-SS.US') - to_timestamp(created, 'YYYY-MM-DD"T"HH24-MI-SS.US')), interval '0s') FROM task_run WHERE user_id = u.id AND project_id=:project_id''' + date_clause + ''') AS avg_time_per_task FROM "user" u WHERE id IN (SELECT DISTINCT user_id FROM task_run GROUP BY project_id, user_id HAVING project_id=:project_id); ''') results = session.execute(sql, sql_params) users_report = [[ row.u_id, row.name, row.fullname, row.email_addr, row.admin, row.subadmin, row.enabled, row.languages, row.locations, row.work_hours_from, row.work_hours_to, row.timezone, row.type_of_user, row.additional_comments, row.completed_tasks, row.percent_completed_tasks, row.first_submission_date, row.last_submission_date, round(row.avg_time_per_task.total_seconds() / 60, 2) ] for row in results] return users_report
def draft_projects(user_id): """Return draft projects for user_id.""" sql = text(''' SELECT project.id, project.name, project.short_name, project.description, project.owner_id, project.info FROM project WHERE project.owner_id=:user_id AND project.published=false; ''') projects_draft = [] results = session.execute(sql, dict(user_id=user_id)) for row in results: project = dict(id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info) projects_draft.append(project) return projects_draft
def projects_contributed(user_id): """Return projects that user_id has contributed to.""" sql = text(''' WITH projects_contributed as (SELECT DISTINCT(project_id) FROM task_run WHERE user_id=:user_id) SELECT project.id, project.name, project.short_name, project.owner_id, project.description, project.info FROM project, projects_contributed WHERE project.id=projects_contributed.project_id ORDER BY project.name DESC; ''') results = session.execute(sql, dict(user_id=user_id)) projects_contributed = [] for row in results: project = dict(id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info) projects_contributed.append(project) return projects_contributed
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with two fields regarding the tasks for the user: { 'done': 10, 'total: 100 } This will mean that the user has done a 10% of the available tasks for him """ if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too external_uid = request.args.get('external_uid') query_attrs = dict(project_id=project.id) if current_user.is_anonymous: if external_uid is None: anon_ip = request.remote_addr or '127.0.0.1' query_attrs['user_ip'] = anonymizer.ip(anon_ip) else: query_attrs['external_uid'] = external_uid else: query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) tmp = dict(done=taskrun_count, total=n_tasks(project.id)) return Response(json.dumps(tmp), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def published_projects(user_id): """Return published projects for user_id.""" sql = text(''' SELECT project.id, project.name, project.short_name, project.description, project.owner_id, project.info FROM project, task WHERE project.id=task.project_id AND project.owner_id=:user_id AND project.hidden=0 AND project.info LIKE('%task_presenter%') GROUP BY project.id, project.name, project.short_name, project.description, project.info;''') projects_published = [] results = session.execute(sql, dict(user_id=user_id)) for row in results: project = dict(id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=json.loads(row.info)) projects_published.append(project) return projects_published
def published_projects(user_id): """Return published projects for user_id.""" sql = text(''' SELECT project.id, project.name, project.short_name, project.description, project.owner_id, project.info FROM project WHERE project.published=true AND :user_id = ANY (project.owners_ids::int[]); ''') projects_published = [] results = session.execute(sql, dict(user_id=user_id)) for row in results: project = dict(id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info) projects_published.append(project) return projects_published
def projects_contributed(user_id, order_by='name'): """Return projects that user_id has contributed to.""" sql = text(''' WITH projects_contributed as (SELECT project_id, MAX(finish_time) as last_contribution FROM task_run WHERE user_id=:user_id GROUP BY project_id) SELECT project.id, project.name as name, project.short_name, project.owner_id, project.description, project.info, project.owners_ids FROM project, projects_contributed WHERE project.id=projects_contributed.project_id ORDER BY {} DESC; '''.format(order_by)) results = session.execute(sql, dict(user_id=user_id)) projects_contributed = [] for row in results: project = dict(id=row.id, name=row.name, short_name=row.short_name, owner_id=row.owner_id, owners_ids=row.owners_ids, description=row.description, overall_progress=overall_progress(row.id), n_tasks=n_tasks(row.id), n_volunteers=n_volunteers(row.id), info=row.info) projects_contributed.append(project) return projects_contributed
def test_n_tasks_returns_number_of_total_tasks(self): project = self.create_project_with_tasks(completed_tasks=1, ongoing_tasks=1) tasks = cached_projects.n_tasks(project.id) assert tasks == 2, tasks