def resolve_all_courses(self, args, info): try: return PrivateCourse.get_all_courses() except: # pylint: disable=bare-except common_utils.log_exception_origin() raise
def delete_course(cls): """Called back repeatedly from deferred queue dispatcher.""" try: kind_name = cls.get_any_undeleted_kind_name() if not kind_name: # No entity types remain to be deleted from the Datastore for # this course (i.e. namespace), so call (in no particular # order) callbacks waiting to be informed of course deletion. ns_name = namespace_manager.get_namespace() common_utils.run_hooks( cls.COURSE_DELETED_HOOKS.itervalues(), ns_name) logging.info( 'CourseDeleteHandler found no entity types to delete for ' 'namespace %s; deletion complete.', ns_name) return model = Model(kind_name) keys = list(db.Query(Model(kind_name), keys_only=True).run( batch_size=cls.DELETE_BATCH_SIZE)) entities.delete(keys) logging.info( 'CourseDeleteHandler deleted %d entities of type %s from ' 'namespace %s', len(keys), kind_name, namespace_manager.get_namespace()) deferred.defer(cls.delete_course) except Exception: logging.critical( 'Failed when attempting to delete course for namespace %s', namespace_manager.get_namespace()) common_utils.log_exception_origin() raise
def delete_course(cls): """Called back repeatedly from deferred queue dispatcher.""" try: kind = metadata.Kind.all().get() if not kind: logging.info( 'CourseDeleteHandler found no entity types to delete for ' 'namespace %s; deletion complete.', namespace_manager.get_namespace()) return kind_name = kind.kind_name model = Model(kind_name) keys = list(db.Query(Model(kind_name), keys_only=True).run( batch_size=cls.DELETE_BATCH_SIZE)) entities.delete(keys) logging.info( 'CourseDeleteHandler deleted %d entities of type %s from ' 'namespace %s', len(keys), kind_name, namespace_manager.get_namespace()) deferred.defer(cls.delete_course) except Exception: logging.critical( 'Failed when attempting to delete course for namespace %s', namespace_manager.get_namespace()) common_utils.log_exception_origin() raise
def run(self, job_name, sequence_num, namespace, output, complete_fn, mapreduce_pipeline_args): results = [] try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) for file_reader in iterator: for item in file_reader: # Map/reduce puts reducer output into blobstore files as a # string obtained via "str(result)". Use AST as a safe # alternative to eval() to get the Python object back. results.append(ast.literal_eval(item)) if complete_fn: util.for_name(complete_fn)(mapreduce_pipeline_args, results) with Namespace(namespace): db.run_in_transaction( DurableJobEntity._complete_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results)) # Don't know what exceptions are currently, or will be in future, # thrown from Map/Reduce or Pipeline libraries; these are under # active development. # # pylint: disable=broad-except except Exception, ex: logging.critical('Failed running map/reduce job %s: %s', job_name, str(ex)) common_utils.log_exception_origin() time_completed = time.time() with Namespace(namespace): db.run_in_transaction( DurableJobEntity._fail_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results, str(ex)))
def delete_course(cls): """Called back repeatedly from deferred queue dispatcher.""" try: kind_name = cls.get_any_undeleted_kind_name() if not kind_name: # No entity types remain to be deleted from the Datastore for # this course (i.e. namespace), so call (in no particular # order) callbacks waiting to be informed of course deletion. ns_name = namespace_manager.get_namespace() common_utils.run_hooks(cls.COURSE_DELETED_HOOKS.itervalues(), ns_name) logging.info( 'CourseDeleteHandler found no entity types to delete for ' 'namespace %s; deletion complete.', ns_name) return model = Model(kind_name) keys = list( db.Query(Model(kind_name), keys_only=True).run(batch_size=cls.DELETE_BATCH_SIZE)) entities.delete(keys) logging.info( 'CourseDeleteHandler deleted %d entities of type %s from ' 'namespace %s', len(keys), kind_name, namespace_manager.get_namespace()) deferred.defer(cls.delete_course) except Exception: logging.critical( 'Failed when attempting to delete course for namespace %s', namespace_manager.get_namespace()) common_utils.log_exception_origin() raise
def run(self, job_name, sequence_num, namespace, output, complete_fn, kwargs): results = [] try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) for file_reader in iterator: for item in file_reader: # Map/reduce puts reducer output into blobstore files as a # string obtained via "str(result)". Use AST as a safe # alternative to eval() to get the Python object back. results.append(ast.literal_eval(item)) if complete_fn: util.for_name(complete_fn)(kwargs, results) with Namespace(namespace): db.run_in_transaction( DurableJobEntity._complete_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results)) # Don't know what exceptions are currently, or will be in future, # thrown from Map/Reduce or Pipeline libraries; these are under # active development. # # pylint: disable=broad-except except Exception, ex: logging.critical('Failed running map/reduce job %s: %s', job_name, str(ex)) common_utils.log_exception_origin() time_completed = time.time() with Namespace(namespace): db.run_in_transaction( DurableJobEntity._fail_job, job_name, sequence_num, MapReduceJob.build_output(self.root_pipeline_id, results, str(ex)))
def resolve_course(self, args, info): try: course_id = _resolve_id(Course, args['id']) return Course.get_course(course_id) except: # pylint: disable=bare-except common_utils.log_exception_origin() logging.exception('Error resolving course') return None
def _remove_indexed_items(cls, indexed_value, removers): for remover in removers: try: remover(indexed_value) except Exception, ex: logging.critical("Failed to wipe out user data via %s", str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status code.
def _remove_indexed_items(cls, indexed_value, removers): for remover in removers: try: remover(indexed_value) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status code.
def cron_action(self, app_context, global_state): pending_work = removal_models.BatchRemovalState.get_all_work() # Handle users with no remaining batch deletions to do separately. if None in pending_work: removal_policy = _get_removal_policy(app_context) final_removal_user_ids = pending_work[None] for user_id in final_removal_user_ids: try: removal_policy.on_all_data_removed(user_id) except Exception, ex: # pylint: disable=broad-except logging.warning( 'Error trying to do final cleanup for user %s: %s', user_id, str(ex)) common_utils.log_exception_origin() del pending_work[None]
def send_message(cls, the_dict): message = transforms.dumps(the_dict) try: # One attempt to get the message out synchronously. cls._emit_message(message) except Exception, ex: # pylint: disable=broad-except # Anything goes wrong, it goes on the deferred queue for retries. logging.critical('Problem trying to report statistics: %s', ex) common_utils.log_exception_origin() options = taskqueue.TaskRetryOptions( task_retry_limit=cls._RETRY_OPT_NUM_TRIES, task_age_limit=cls._RETRY_OPT_AGE_LIMIT_SECONDS, min_backoff_seconds=cls._RETRY_OPT_MIN_BACKOFF_SECONDS, max_backoff_seconds=cls._RETRY_OPT_MAX_BACKOFF_SECONDS, max_doublings=cls._RETRY_OPT_MAX_DOUBLINGS) deferred.defer(cls._emit_message, message, _retry_options=options)
def map(event): for component in StudentAggregateComponentRegistry.get_components_for_event_source(event.source): component_name = component.get_name() params = context.get().mapreduce_spec.mapper.params static_data = params.get(component_name) value = None try: value = component.process_event(event, static_data) # pylint: disable=broad-except except Exception, ex: common_utils.log_exception_origin() logging.critical( "Student aggregation map function " "component handler %s failed: %s", component_name, str(ex) ) if value: value_str = "%s:%s" % (component_name, transforms.dumps(value)) yield event.user_id, value_str
def map(event): for component in (StudentAggregateComponentRegistry. get_components_for_event_source(event.source)): component_name = component.get_name() params = context.get().mapreduce_spec.mapper.params static_data = params.get(component_name) value = None try: value = component.process_event(event, static_data) # pylint: disable=broad-except except Exception, ex: common_utils.log_exception_origin() logging.critical( 'Student aggregation map function ' 'component handler %s failed: %s', component_name, str(ex)) if value: value_str = '%s:%s' % (component_name, transforms.dumps(value)) yield event.user_id, value_str
def _remove_per_course_indexed_items(cls, user_id): # We expect that there are comparatively few items indexed by user_id # or email address. Further, since we're running from a task queue, # we have 10 minutes to get this done. We could do these deletions in # parallel via async callback/follow-up, but the benefit isn't worth # the additional complexity. # Try to look up student to do removals by email address. This may # not work, in that the Student may already be gone. If that's the # case, though, we would have started the removers that delete by # user_id, and have finished with the by-email deletions, so we can # trust that if we can't load the Student, we will have already done # the by-email deletions on some earlier attempt. student = None try: student = models.Student.get_by_user_id(user_id) except Exception, ex: # pylint: disable=broad-except logging.error("Failed looking up student by user ID %s", user_id) common_utils.log_exception_origin()
def _remove_indexed_items(cls, user_id): # We expect that there are comparatively few items indexed by user_id # or email address. Further, since we're running from a task queue, # we have 10 minutes to get this done. We could do these deletions in # parallel via async callback/follow-up, but the benefit isn't worth # the additional complexity. # Try to look up student to do removals by email address. This may # not work, in that the Student may already be gone. If that's the # case, though, we would have started the removers that delete by # user_id, and have finished with the by-email deletions, so we can # trust that if we can't load the Student, we will have already done # the by-email deletions on some earlier attempt. student = None try: student = models.Student.get_by_user_id(user_id) except Exception, ex: # pylint: disable=broad-except logging.error('Failed looking up student by user ID %s', user_id) common_utils.log_exception_origin()
def cron_action(self, app_context, global_state): pending_work = removal_models.BatchRemovalState.get_all_work() logging.info( "Data removal cron handler for namespace %s: %d items to do", app_context.get_namespace_name(), len(pending_work), ) # Handle users with no remaining batch deletions to do separately. if None in pending_work: removal_policy = _get_removal_policy(app_context) final_removal_user_ids = pending_work[None] for user_id in final_removal_user_ids: logging.info("Data removal cron handler: final removal for %s", user_id) try: removal_policy.on_all_data_removed(user_id) except Exception, ex: # pylint: disable=broad-except logging.warning("Error trying to do final cleanup for user %s: %s", user_id, str(ex)) common_utils.log_exception_origin() del pending_work[None]
def cron_action(self, app_context, global_state): pending_work = removal_models.BatchRemovalState.get_all_work() logging.info( 'Data removal cron handler for namespace %s: %d items to do', app_context.get_namespace_name(), len(pending_work)) # Handle users with no remaining batch deletions to do separately. if None in pending_work: removal_policy = _get_removal_policy(app_context) final_removal_user_ids = pending_work[None] for user_id in final_removal_user_ids: logging.info('Data removal cron handler: final removal for %s', user_id) try: removal_policy.on_all_data_removed(user_id) except Exception, ex: # pylint: disable=broad-except logging.warning( 'Error trying to do final cleanup for user %s: %s', user_id, str(ex)) common_utils.log_exception_origin() del pending_work[None]
def delete_course(self, namespace, login=True): if login: self.login(self.LOGIN, admin=True) # Best effort, but don't block test if course removal fails. Removing # courses is a cleanup step that helps reduce flakes. Don't add to # flakiness by being fragile about cleanup failures. patience = 5 while patience: patience -= 1 page = self.load_courses_list() try: element = page.find_element_by_css_selector( '[data-course-namespace={}] [delete_course] button'.format( namespace)) element.click() page.switch_to_alert().accept() except exceptions.TimeoutException: logging.info('Could not find course; assuming deleted.') common_utils.log_exception_origin() break except exceptions.UnexpectedAlertPresentException, ex1: logging.warning('Unexpected alert: %s', str(ex1)) common_utils.log_exception_origin() page.switch_to_alert().accept() # Previous alert? Not ours? continue except exceptions.WebDriverException, ex2: logging.warning('WebDriverException: %s', str(ex2)) common_utils.log_exception_origin() continue
def _internal_get(self): """Separate function from get() to permit simple calling by tests.""" if self.is_globally_enabled(): global_state = self.global_setup() for app_context in sites.get_all_courses(): if self.is_enabled_for_course(app_context): namespace = app_context.get_namespace_name() with common_utils.Namespace(namespace): try: self.cron_action(app_context, global_state) except Exception, ex: # pylint: disable=broad-except logging.critical( 'Cron handler %s for course %s: %s', self.__class__.__name__, app_context.get_slug(), str(ex)) common_utils.log_exception_origin() else: logging.info( 'Skipping cron handler %s for course %s', self.__class__.__name__, app_context.get_slug()) self.response.write('OK.')
def reduce(user_id, values): # Convenience for collections: Pre-load Student and Course objects. student = None try: student = models.Student.get_by_user_id(user_id) # pylint: disable=broad-except except Exception: common_utils.log_exception_origin() if not student: logging.warning( 'Student for student aggregation with user ID %s ' 'was not loaded. Ignoring records for this student.', user_id) return params = context.get().mapreduce_spec.mapper.params ns = params['course_namespace'] app_context = sites.get_course_index().get_app_context_for_namespace( ns) course = courses.Course(None, app_context=app_context) # Bundle items together into lists by collection name event_items = collections.defaultdict(list) for value in values: component_name, payload = value.split(':', 1) event_items[component_name].append(transforms.loads(payload)) # Build up per-Student aggregate by calling each component. Note that # we call each component whether or not its mapper produced any # output. aggregate = {} for component in StudentAggregateComponentRegistry.get_components(): component_name = component.get_name() static_value = params.get(component_name) value = {} try: value = component.produce_aggregate( course, student, static_value, event_items.get(component_name, [])) if not value: continue # pylint: disable=broad-except except Exception, ex: common_utils.log_exception_origin() logging.critical( 'Student aggregation reduce function ' 'component handler %s failed: %s', component_name, str(ex)) continue schema_name = params['schema_names'][component_name] if schema_name not in value: logging.critical( 'Student aggregation reduce handler %s produced ' 'a dict which does not contain the top-level ' 'name (%s) from its registered schema.', component_name, schema_name) continue variances = transforms.validate_object_matches_json_schema( value[schema_name], params['schemas'][component_name]) if variances: logging.critical( 'Student aggregation reduce handler %s produced ' 'a value which does not match its schema: %s', component_name, ' '.join(variances)) continue aggregate.update(value)
def resolve_all_courses(self, args, info): try: return Course.get_all_courses() except: # pylint: disable=bare-except common_utils.log_exception_origin() raise
def reduce(user_id, values): # Convenience for collections: Pre-load Student and Course objects. student = None try: student = models.Student.get_by_user_id(user_id) # pylint: disable=broad-except except Exception: common_utils.log_exception_origin() if not student: logging.warning( 'Student for student aggregation with user ID %s ' 'was not loaded. Ignoring records for this student.', user_id) return params = context.get().mapreduce_spec.mapper.params ns = params['course_namespace'] app_context = sites.get_course_index().get_app_context_for_namespace(ns) course = courses.Course(None, app_context=app_context) # Bundle items together into lists by collection name event_items = collections.defaultdict(list) for value in values: component_name, payload = value.split(':', 1) event_items[component_name].append(transforms.loads(payload)) # Build up per-Student aggregate by calling each component. Note that # we call each component whether or not its mapper produced any # output. aggregate = {} for component in StudentAggregateComponentRegistry.get_components(): component_name = component.get_name() static_value = params.get(component_name) value = {} try: value = component.produce_aggregate( course, student, static_value, event_items.get(component_name, [])) if not value: continue # pylint: disable=broad-except except Exception, ex: common_utils.log_exception_origin() logging.critical('Student aggregation reduce function ' 'component handler %s failed: %s', component_name, str(ex)) continue schema_name = params['schema_names'][component_name] if schema_name not in value: logging.critical( 'Student aggregation reduce handler %s produced ' 'a dict which does not contain the top-level ' 'name (%s) from its registered schema.', component_name, schema_name) continue variances = transforms.validate_object_matches_json_schema( value[schema_name], params['schemas'][component_name]) if variances: logging.critical( 'Student aggregation reduce handler %s produced ' 'a value which does not match its schema: %s', component_name, ' '.join(variances)) continue aggregate.update(value)
remover(student.email) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status. # Do removals which depend only on user_id. Do these last, so that # we're not removing stuff that earlier steps might depend on. for remover in models_data_removal.Registry.get_user_id_removers(): try: remover(user_id) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status code. @classmethod def _initiate_unindexed_deletion(cls, user_id): # Make a DB entry that will tell the cron job that there is work to do # to clean up un-indexed entities for this user. class_names = models_data_removal.Registry.get_unindexed_class_names() removal_models.BatchRemovalState.create(user_id, class_names) @classmethod def on_all_data_removed(cls, user_id): """Called back from DataRemovalCronHandler when batch deletion done.""" # Any user_id we are called for has had all wipeout batch jobs run. # This means that all un-indexed items have been removed for that
def resolve_current_user(self, args, info): try: return CurrentUser(users.get_current_user()) except: # pylint: disable=bare-except common_utils.log_exception_origin() raise
remover(student.email) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status. # Do removals which depend only on user_id. Do these last, so that # we're not removing stuff that earlier steps might depend on. for remover in models_data_removal.Registry.get_user_id_removers(): try: remover(user_id) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status code. @classmethod def _initiate_unindexed_deletion(cls, user_id): # Make a DB entry that will tell the cron job that there is work to do # to clean up un-indexed entities for this user. class_names = models_data_removal.Registry.get_unindexed_class_names() removal_models.BatchRemovalState.create(user_id, class_names) @classmethod def on_all_data_removed(cls, user_id): """Called back from DataRemovalJob started by cron handler when done.""" removal_models.ImmediateRemovalState.delete_by_user_id(user_id)
class ImmediateRemovalPolicy(AbstractDataRemovalPolicy): @classmethod def get_name(cls): return 'immediate_removal' @classmethod def get_description(cls): return 'Immediate removal of most data; batch removal of event data' @classmethod def prevent_registration(cls, app_context, user_id): if removal_models.ImmediateRemovalState.is_deletion_pending(user_id): return [ safe_dom.Element('p') .add_text( # I18N: Shown when a student is attempting to re-enroll in a # course soon after un-enrolling. It takes up to several # hours to remove their data, and they are prevented from # re-enrolling during that time to prevent problems. app_context.gettext( 'You cannot re-register for this course at the ' 'current time, because deletion of your previous ' 'data is still in progress. Please try again in ' 'in a few hours.' ) ) ] return [] @classmethod def on_user_add(cls, user_id): # NOTE: A sufficiently motivated Student attacker could re-register # himself by just POST-ing directly to the student-creation form # handler while deletion was still pending. However, to do that, he # would have had to manually construct the registration form - when # the form is painted, it calls prevent_registration(), above, and # that should suppress the form for well-intentioned Students. # # If the POST is done maliciously, there is a very real possibility of # a race: the batch cleanup of Event data would run up to several # hours later, possibly after the re-registered student had completed # some assessments. Having EventEntity items removed would probably # have a negligible effect on course-wide statistics, but would # definitely show up as missing items on the Gradebook analytics page. # Further, the student's scores would have been recorded separately in # the new Student record, so that and the event record would be # inconsistent. This situation is hard to achieve for well-behaved # users, and only of minor consequence to system correctness, so we # accept it. removal_models.ImmediateRemovalState.create(user_id) @classmethod def on_user_removal_intent(cls, user_id): # It doesn't look like this is enough to get the work done, and it's # not. Here, we are just recording the user's intention to have # their stuff removed. In theory, we will be called back Very Soon # from the user lifecycle notification queue, at which point we will # check their deletion-desire status, and start clobbering. # That callback is delivered at on_user_unenroll(). removal_models.ImmediateRemovalState.set_deletion_pending(user_id) @classmethod def on_user_unenroll(cls, user_id): if removal_models.ImmediateRemovalState.is_deletion_pending(user_id): # Allow exceptions to propagate out, which will cause the lifecycle # queue to do retries. cls._remove_indexed_items(user_id) cls._initiate_unindexed_deletion(user_id) @classmethod def _remove_indexed_items(cls, user_id): # We expect that there are comparatively few items indexed by user_id # or email address. Further, since we're running from a task queue, # we have 10 minutes to get this done. We could do these deletions in # parallel via async callback/follow-up, but the benefit isn't worth # the additional complexity. # Try to look up student to do removals by email address. This may # not work, in that the Student may already be gone. If that's the # case, though, we would have started the removers that delete by # user_id, and have finished with the by-email deletions, so we can # trust that if we can't load the Student, we will have already done # the by-email deletions on some earlier attempt. student = None try: student = models.Student.get_by_user_id(user_id) except Exception, ex: # pylint: disable=broad-except logging.error('Failed looking up student by user ID %s', user_id) common_utils.log_exception_origin() # But don't return -- we still need to do removals based on # user_id even though we cannot remove by email address. if student and student.email: for remover in models_data_removal.Registry.get_email_removers(): try: remover(student.email) except Exception, ex: logging.critical('Failed to wipe out user data via %s', str(remover)) common_utils.log_exception_origin() raise # Propagate exception so POST returns 500 status.