def run(self): """Computes student progress statistics.""" student_progress = self.ProgressAggregator(self._course) mapper = models_utils.QueryMapper( StudentPropertyEntity.all(), batch_size=500, report_every=1000) mapper.run(student_progress.visit) return student_progress.progress_data
def run(self): """Computes peer review statistics.""" stats = ReviewStatsAggregator() mapper = utils.QueryMapper(peer.ReviewSummary.all(), batch_size=500, report_every=1000) mapper.run(stats.visit) completed_arrays_by_unit = {} for unit_id in stats.counts_by_completed_reviews: max_completed_reviews = max( stats.counts_by_completed_reviews[unit_id].keys()) completed_reviews_array = [] for i in range(max_completed_reviews + 1): if i in stats.counts_by_completed_reviews[unit_id]: completed_reviews_array.append( stats.counts_by_completed_reviews[unit_id][i]) else: completed_reviews_array.append(0) completed_arrays_by_unit[unit_id] = completed_reviews_array return {'counts_by_completed_reviews': completed_arrays_by_unit}
def run(self): """Computes student statistics.""" enrollment = EnrollmentAggregator() scores = ScoresAggregator() names = NamesAggregator() mapper = utils.QueryMapper(Student.all(), batch_size=500, report_every=1000) def map_fn(student): enrollment.visit(student) scores.visit(student) names.visit(student) mapper.run(map_fn) data = { 'enrollment': { 'enrolled': enrollment.enrolled, 'unenrolled': enrollment.unenrolled }, 'scores': scores.name_to_tuple, 'id': names.st_id } return data
def run(self): """Computes submitted question answers statistics.""" question_stats = self.MultipleChoiceQuestionAggregator(self._course) mapper = models_utils.QueryMapper( EventEntity.all(), batch_size=500, report_every=1000) mapper.run(question_stats.visit) return (question_stats.id_to_questions_dict, question_stats.id_to_assessments_dict)
def test_run_processes_one_entity(self): """Tests that we can process < batch_size results.""" Model().put() num_processed = utils.QueryMapper(Model.all()).run(process, 1, string='foo') model = Model.all().get() self.assertEqual(1, num_processed) self.assertEqual(1, model.number) self.assertEqual('foo', model.string)
def test_run_process_more_than_1000_entities(self): """Tests we can process more entities than the old limit of 1k.""" counter = counters.PerfCounter( 'test-run-process-more-than-1000-entities-counter', 'counter for testing increment by QueryMapper') db.put([Model() for _ in xrange(1001)]) # Also pass custom args to QueryMapper ctor. num_processed = utils.QueryMapper(Model.all(), batch_size=50, counter=counter, report_every=0).run(process, 1, string='foo') last_written = Model.all().order('-create_date').get() self.assertEqual(1001, counter.value) self.assertEqual(1001, num_processed) self.assertEqual(1, last_written.number) self.assertEqual('foo', last_written.string)
def query(cls, to, intent): """Gets the Status of notifications queued previously via send_async(). Serially performs one datastore query per user in the to list. Args: to: list of string. The recipients of the notification. intent: string. Short string identifier of the intent of the notification (for example, 'invitation' or 'reminder'). Returns: Dict of to string -> [Status, sorted by descending enqueue date]. """ results = {} for address in to: mapper = utils.QueryMapper(cls._get_query_query(address, intent)) mapper.run(_accumulate_statuses, results) return results
def expire_old_reviews_for_unit(cls, review_window_mins, unit_id): """Finds and expires all old review steps for a single unit. Args: review_window_mins: int. Number of minutes before we expire reviews assigned by domain.ASSIGNER_KIND_AUTO. unit_id: string. Id of the unit to restrict the query to. Returns: 2-tuple of list of db.Key of peer.ReviewStep. 0th element is keys that were written successfully; 1st element is keys that we failed to update. """ query = cls.get_expiry_query(review_window_mins, unit_id) mapper = utils.QueryMapper(query, counter=COUNTER_EXPIRY_QUERY_KEYS_RETURNED, report_every=100) expired_keys = [] exception_keys = [] def map_fn(review_step_key, expired_keys, exception_keys): try: expired_keys.append(cls.expire_review(review_step_key)) except: # All errors are the same. pylint: disable=bare-except # Skip. Either the entity was updated between the query and # the update, meaning we don't need to expire it; or we ran into # a transient datastore error, meaning we'll expire it next # time. COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_SKIP.inc() exception_keys.append(review_step_key) COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_START.inc() mapper.run(map_fn, expired_keys, exception_keys) COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_EXPIRE.inc( increment=len(expired_keys)) COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_SUCCESS.inc() return expired_keys, exception_keys
def get_activity_scores(cls, student_user_ids, course, force_refresh=True): """Retrieve activity data for student using EventEntity. For each student, launch a Query of EventEntities to retrieve student scores. The Query is launched as a map-reduce background process that will return up to 500 results, reporting back every second. It reports back by calling the map_fn callback, which in turn calls parse_activity scores. As soon as the Query is launched (in the background) the foreground process calls build_missing_scores() to construct a student_answer.dict that will be updated as score data for that student is received. Events properties include a userid (a number) and a source (e.g., tag-assessement), a recorded-on date (timestamp) and data (a dictionary). Here's a typeical data dict: {"loc": {"city": "mililani", "language": "en-US,en;q=0.8", "locale": "en_US", "country": "US", "region": "hi", "long": -158.01528099999999, "lat": 21.451331, "page_locale": "en_US"}, "instanceid": "yOkVTqWogdaF", "quid": "5733935958982656", "score": 1, "location": "https://mobilecsp-201608.appspot.com/mobilecsp/unit?unit=1&lesson=45", "answer": [0, 1, 2, 4], "type": "McQuestion", "user_agent": "Mozilla/5.0 ..."} Note that it includes the unit_id and lesson_id as part of the Url """ # Instantiate parser object cached_date = datetime.datetime.now() activityParser = ActivityScoreParser() if force_refresh: activityParser.params = activityParser.build_additional_mapper_params( course.app_context) # Launch a background Query for each student's activity data. This is expensive. for user_id in student_user_ids: # if GLOBAL_DEBUG: logging.debug('***RAM*** launching a query for student ' + str(user_id)) mapper = models_utils.QueryMapper( EventEntity.all().filter('user_id in', [user_id]) \ .filter('recorded_on >= ', cls.CUTOFF_DATE), \ batch_size=1000, report_every=1000) # Callback function -- e.g., 45-50 callbacks per query def map_fn(activity_attempt): # if GLOBAL_DEBUG: # logging.debug('***RAM*** map_fn ' + str(activity_attempt)) activityParser.parse_activity_scores(activity_attempt) mapper.run(map_fn) # In the foreground create the student_answer_dict, which is stored at: # activity_scores[student][unit][lesson][sequence] where sequence is # the question's sequential position within the lesson. # So each question in the lesson will have a question_answer_dict. activityParser.build_missing_scores() #Lets cache results for each student for user_id in student_user_ids: cached_student_data = {} cached_student_data['date'] = cached_date student = Student.get_by_user_id(user_id) cached_student_data[ 'scores'] = activityParser.activity_scores.get( student.email, {}) cached_student_data[ 'attempts'] = activityParser.num_attempts_dict.get( student.email, {}) MemcacheManager.set( cls._memcache_key_for_student(student.email), cached_student_data) else: uncached_students = [] for student_id in student_user_ids: if student_id != '': student = Student.get_by_user_id(student_id) temp_email = student.email temp_mem = cls._memcache_key_for_student(temp_email) scores_for_student = MemcacheManager.get(temp_mem) if scores_for_student: cached_date = scores_for_student['date'] activityParser.activity_scores[ student_id] = scores_for_student['scores'] activityParser.num_attempts_dict[ student_id] = scores_for_student['scores'] else: uncached_students.append(student_id) if len(uncached_students) > 0: if cached_date == None or datetime.datetime.now( ) < cached_date: cached_date = datetime.datetime.now() activityParser.params = activityParser.build_additional_mapper_params( course.app_context) for user_id in uncached_students: mapper = models_utils.QueryMapper( EventEntity.all().filter('user_id in', [user_id]) \ .filter('recorded_on >= ', cls.CUTOFF_DATE), \ batch_size=1000, report_every=1000) def map_fn(activity_attempt): activityParser.parse_activity_scores(activity_attempt) mapper.run(map_fn) activityParser.build_missing_scores() #Lets cache results for each student for user_id in uncached_students: cached_student_data = {} cached_student_data['date'] = cached_date student = Student.get_by_user_id(user_id) cached_student_data[ 'scores'] = activityParser.activity_scores.get( student.email, {}) MemcacheManager.set( cls._memcache_key_for_student(student.email), cached_student_data) score_data = {} score_data['date'] = cached_date score_data['scores'] = activityParser.activity_scores score_data['attempts'] = activityParser.num_attempts_dict if GLOBAL_DEBUG: logging.debug('***RAM*** get_activity_scores returning scores: ' + str(score_data['scores'])) return score_data
def _process_records(self, namespace, now, stats): with common_utils.Namespace(namespace): # Treating as module-protected. pylint: disable-msg=protected-access mapper = model_utils.QueryMapper( notifications.Manager._get_in_process_notifications_query()) mapper.run(process_notification, now, stats)
def test_run_processes_empty_result_set(self): self.assertEqual( 0, utils.QueryMapper(Model.all()).run(process, 1, string='foo'))
def test_raising_stop_mapping_stops_execution(self): db.put([Model(number=x) for x in xrange(11)]) num_processed = utils.QueryMapper( Model.all().order('number')).run(stop_mapping_at_5) self.assertEqual(5, num_processed)