def _queue_subtasks(self, create_subtask_fcn, items_per_query, items_per_task, initial_count, extra_count): """Queue subtasks while enrolling more students into course in the middle of the process.""" task_id = str(uuid4()) instructor_task = InstructorTaskFactory.create( course_id=self.course.id, task_id=task_id, task_key='dummy_task_key', task_type='bulk_course_email', ) self._enroll_students_in_course(self.course.id, initial_count) task_queryset = CourseEnrollment.objects.filter( course_id=self.course.id) def initialize_subtask_info(*args): # pylint: disable=unused-argument """Instead of initializing subtask info enroll some more students into course.""" self._enroll_students_in_course(self.course.id, extra_count) return {} with patch('instructor_task.subtasks.initialize_subtask_info' ) as mock_initialize_subtask_info: mock_initialize_subtask_info.side_effect = initialize_subtask_info queue_subtasks_for_query( entry=instructor_task, action_name='action_name', create_subtask_fcn=create_subtask_fcn, item_queryset=task_queryset, item_fields=[], items_per_query=items_per_query, items_per_task=items_per_task, )
def _queue_subtasks(self, create_subtask_fcn, items_per_task, initial_count, extra_count): """Queue subtasks while enrolling more students into course in the middle of the process.""" task_id = str(uuid4()) instructor_task = InstructorTaskFactory.create( course_id=self.course.id, task_id=task_id, task_key='dummy_task_key', task_type='bulk_course_email', ) self._enroll_students_in_course(self.course.id, initial_count) task_querysets = [CourseEnrollment.objects.filter(course_id=self.course.id)] def initialize_subtask_info(*args): # pylint: disable=unused-argument """Instead of initializing subtask info enroll some more students into course.""" self._enroll_students_in_course(self.course.id, extra_count) return {} with patch('instructor_task.subtasks.initialize_subtask_info') as mock_initialize_subtask_info: mock_initialize_subtask_info.side_effect = initialize_subtask_info queue_subtasks_for_query( entry=instructor_task, action_name='action_name', create_subtask_fcn=create_subtask_fcn, item_querysets=task_querysets, item_fields=[], items_per_task=items_per_task, total_num_items=initial_count, )
def perform_delegate_email_batches(entry_id, course_id, task_input, action_name): """ Delegates emails by querying for the list of recipients who should get the mail, chopping up into batches of no more than settings.BULK_EMAIL_EMAILS_PER_TASK in size, and queueing up worker jobs. """ entry = InstructorTask.objects.get(pk=entry_id) # Get inputs to use in this task from the entry. user_id = entry.requester.id task_id = entry.task_id # Perfunctory check, since expansion is made for convenience of other task # code that doesn't need the entry_id. if course_id != entry.course_id: format_msg = u"Course id conflict: explicit value %r does not match task value %r" log.warning(u"Task %s: " + format_msg, task_id, course_id, entry.course_id) raise ValueError(format_msg % (course_id, entry.course_id)) # Fetch the CourseEmail. email_id = task_input['email_id'] try: email_obj = CourseEmail.objects.get(id=email_id) except CourseEmail.DoesNotExist: # The CourseEmail object should be committed in the view function before the task # is submitted and reaches this point. log.warning(u"Task %s: Failed to get CourseEmail with id %s", task_id, email_id) raise # Check to see if email batches have already been defined. This seems to # happen sometimes when there is a loss of connection while a task is being # queued. When this happens, the same task gets called again, and a whole # new raft of subtasks gets queued up. We will assume that if subtasks # have already been defined, there is no need to redefine them below. # So we just return right away. We don't raise an exception, because we want # the current task to be marked with whatever it had been marked with before. if len(entry.subtasks) > 0 and len(entry.task_output) > 0: log.warning( u"Task %s has already been processed for email %s! InstructorTask = %s", task_id, email_id, entry) progress = json.loads(entry.task_output) return progress # Sanity check that course for email_obj matches that of the task referencing it. if course_id != email_obj.course_id: format_msg = u"Course id conflict: explicit value %r does not match email value %r" log.warning(u"Task %s: " + format_msg, task_id, course_id, email_obj.course_id) raise ValueError(format_msg % (course_id, email_obj.course_id)) # Fetch the course object. course = get_course(course_id) if course is None: msg = u"Task %s: course not found: %s" log.error(msg, task_id, course_id) raise ValueError(msg % (task_id, course_id)) # Get arguments that will be passed to every subtask. to_option = email_obj.to_option global_email_context = _get_course_email_context(course) recipient_qsets = _get_recipient_querysets(user_id, to_option, course_id) recipient_fields = ['profile__name', 'email'] log.info( u"Task %s: Preparing to queue subtasks for sending emails for course %s, email %s, to_option %s", task_id, course_id, email_id, to_option) total_recipients = sum( [recipient_queryset.count() for recipient_queryset in recipient_qsets]) routing_key = settings.BULK_EMAIL_ROUTING_KEY # if there are few enough emails, send them through a different queue # to avoid large courses blocking emails to self and staff if total_recipients <= settings.BULK_EMAIL_JOB_SIZE_THRESHOLD: routing_key = settings.BULK_EMAIL_ROUTING_KEY_SMALL_JOBS def _create_send_email_subtask(to_list, initial_subtask_status): """Creates a subtask to send email to a given recipient list.""" subtask_id = initial_subtask_status.task_id new_subtask = send_course_email.subtask( ( entry_id, email_id, to_list, global_email_context, initial_subtask_status.to_dict(), ), task_id=subtask_id, routing_key=routing_key, ) return new_subtask progress = queue_subtasks_for_query( entry, action_name, _create_send_email_subtask, recipient_qsets, recipient_fields, settings.BULK_EMAIL_EMAILS_PER_TASK, total_recipients, ) # We want to return progress here, as this is what will be stored in the # AsyncResult for the parent task as its return value. # The AsyncResult will then be marked as SUCCEEDED, and have this return value as its "result". # That's okay, for the InstructorTask will have the "real" status, and monitoring code # should be using that instead. return progress
def perform_delegate_email_batches(entry_id, course_id, task_input, action_name): """ Delegates emails by querying for the list of recipients who should get the mail, chopping up into batches of no more than settings.BULK_EMAIL_EMAILS_PER_TASK in size, and queueing up worker jobs. """ entry = InstructorTask.objects.get(pk=entry_id) # Get inputs to use in this task from the entry. user_id = entry.requester.id task_id = entry.task_id # Perfunctory check, since expansion is made for convenience of other task # code that doesn't need the entry_id. if course_id != entry.course_id: format_msg = u"Course id conflict: explicit value %r does not match task value %r" log.warning(u"Task %s: " + format_msg, task_id, course_id, entry.course_id) raise ValueError(format_msg % (course_id, entry.course_id)) # Fetch the CourseEmail. email_id = task_input['email_id'] try: email_obj = CourseEmail.objects.get(id=email_id) except CourseEmail.DoesNotExist: # The CourseEmail object should be committed in the view function before the task # is submitted and reaches this point. log.warning(u"Task %s: Failed to get CourseEmail with id %s", task_id, email_id) raise # Check to see if email batches have already been defined. This seems to # happen sometimes when there is a loss of connection while a task is being # queued. When this happens, the same task gets called again, and a whole # new raft of subtasks gets queued up. We will assume that if subtasks # have already been defined, there is no need to redefine them below. # So we just return right away. We don't raise an exception, because we want # the current task to be marked with whatever it had been marked with before. if len(entry.subtasks) > 0 and len(entry.task_output) > 0: log.warning(u"Task %s has already been processed for email %s! InstructorTask = %s", task_id, email_id, entry) progress = json.loads(entry.task_output) return progress # Sanity check that course for email_obj matches that of the task referencing it. if course_id != email_obj.course_id: format_msg = u"Course id conflict: explicit value %r does not match email value %r" log.warning(u"Task %s: " + format_msg, task_id, course_id, email_obj.course_id) raise ValueError(format_msg % (course_id, email_obj.course_id)) # Fetch the course object. course = get_course(course_id) if course is None: msg = u"Task %s: course not found: %s" log.error(msg, task_id, course_id) raise ValueError(msg % (task_id, course_id)) # Get arguments that will be passed to every subtask. to_option = email_obj.to_option global_email_context = _get_course_email_context(course) recipient_qsets = _get_recipient_querysets(user_id, to_option, course_id) recipient_fields = ['profile__name', 'email'] log.info(u"Task %s: Preparing to queue subtasks for sending emails for course %s, email %s, to_option %s", task_id, course_id, email_id, to_option) total_recipients = sum([recipient_queryset.count() for recipient_queryset in recipient_qsets]) routing_key = settings.BULK_EMAIL_ROUTING_KEY # if there are few enough emails, send them through a different queue # to avoid large courses blocking emails to self and staff if total_recipients <= settings.BULK_EMAIL_JOB_SIZE_THRESHOLD: routing_key = settings.BULK_EMAIL_ROUTING_KEY_SMALL_JOBS def _create_send_email_subtask(to_list, initial_subtask_status): """Creates a subtask to send email to a given recipient list.""" subtask_id = initial_subtask_status.task_id new_subtask = send_course_email.subtask( ( entry_id, email_id, to_list, global_email_context, initial_subtask_status.to_dict(), ), task_id=subtask_id, routing_key=routing_key, ) return new_subtask progress = queue_subtasks_for_query( entry, action_name, _create_send_email_subtask, recipient_qsets, recipient_fields, settings.BULK_EMAIL_EMAILS_PER_TASK, total_recipients, ) # We want to return progress here, as this is what will be stored in the # AsyncResult for the parent task as its return value. # The AsyncResult will then be marked as SUCCEEDED, and have this return value as its "result". # That's okay, for the InstructorTask will have the "real" status, and monitoring code # should be using that instead. return progress