Exemplo n.º 1
0
    def iter(self, course, students):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        for student in students:
            with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter',
                                     tags=[u'action:{}'.format(course.id)]):

                try:
                    course_grade = CourseGradeFactory().create(student, course)
                    yield self.GradeResult(student, course_grade, "")

                except Exception as exc:  # pylint: disable=broad-except
                    # Keep marching on even if this student couldn't be graded for
                    # some reason, but log it for future reference.
                    log.exception(
                        'Cannot grade student %s (%s) in course %s because of exception: %s',
                        student.username, student.id, course.id, exc.message)
                    yield self.GradeResult(student, None, exc.message)
Exemplo n.º 2
0
    def iter(self, course, students):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        for student in students:
            with dog_stats_api.timer("lms.grades.CourseGradeFactory.iter", tags=[u"action:{}".format(course.id)]):

                try:
                    course_grade = CourseGradeFactory().create(student, course)
                    yield self.GradeResult(student, course_grade, "")

                except Exception as exc:  # pylint: disable=broad-except
                    # Keep marching on even if this student couldn't be graded for
                    # some reason, but log it for future reference.
                    log.exception(
                        "Cannot grade student %s (%s) in course %s because of exception: %s",
                        student.username,
                        student.id,
                        course.id,
                        exc.message,
                    )
                    yield self.GradeResult(student, None, exc.message)
Exemplo n.º 3
0
def run_main_task(entry_id, task_fcn, action_name):
    """
    Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask.

    Arguments passed to `task_fcn` are:

     `entry_id` : the primary key for the InstructorTask entry representing the task.
     `course_id` : the id for the course.
     `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input.
     `action_name` : past-tense verb to use for constructing status messages.

    If no exceptions are raised, the `task_fcn` should return a dict containing
    the task's result with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.
              Should be past-tense.  Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    """

    # Get the InstructorTask to be updated. If this fails then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)
    entry.task_state = PROGRESS
    entry.save_now()

    # Get inputs to use in this task from the entry
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)

    # Construct log message
    fmt = u'Task: {task_id}, InstructorTask ID: {entry_id}, Course: {course_id}, Input: {task_input}'
    task_info_string = fmt.format(task_id=task_id, entry_id=entry_id, course_id=course_id, task_input=task_input)
    TASK_LOG.info(u'%s, Starting update (nothing %s yet)', task_info_string, action_name)

    # Check that the task_id submitted in the InstructorTask matches the current task
    # that is running.
    request_task_id = _get_current_task().request.id
    if task_id != request_task_id:
        fmt = u'{task_info}, Requested task did not match actual task "{actual_id}"'
        message = fmt.format(task_info=task_info_string, actual_id=request_task_id)
        TASK_LOG.error(message)
        raise ValueError(message)

    # Now do the work
    with dog_stats_api.timer('instructor_tasks.time.overall', tags=[u'action:{name}'.format(name=action_name)]):
        task_progress = task_fcn(entry_id, course_id, task_input, action_name)

    # Release any queries that the connection has been hanging onto
    reset_queries()

    # Log and exit, returning task_progress info as task result
    TASK_LOG.info(u'%s, Task type: %s, Finishing task: %s', task_info_string, action_name, task_progress)
    return task_progress
Exemplo n.º 4
0
    def iter(
            self,
            users,
            course=None,
            collected_block_structure=None,
            course_key=None,
            force_update=False,
    ):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        # Pre-fetch the collected course_structure (in _iter_grade_result) so:
        # 1. Correctness: the same version of the course is used to
        #    compute the grade for all students.
        # 2. Optimization: the collected course_structure is not
        #    retrieved from the data store multiple times.
        course_data = CourseData(
            user=None, course=course, collected_block_structure=collected_block_structure, course_key=course_key,
        )
        stats_tags = [u'action:{}'.format(course_data.course_key)]
        for user in users:
            with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=stats_tags):
                yield self._iter_grade_result(user, course_data, force_update)
Exemplo n.º 5
0
    def iter(self, course, students):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        # Pre-fetch the collected course_structure so:
        # 1. Correctness: the same version of the course is used to
        #    compute the grade for all students.
        # 2. Optimization: the collected course_structure is not
        #    retrieved from the data store multiple times.

        collected_block_structure = get_block_structure_manager(course.id).get_collected()
        for student in students:
            with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=[u'action:{}'.format(course.id)]):
                try:
                    course_grade = CourseGradeFactory().create(student, course, collected_block_structure)
                    yield self.GradeResult(student, course_grade, "")

                except Exception as exc:  # pylint: disable=broad-except
                    # Keep marching on even if this student couldn't be graded for
                    # some reason, but log it for future reference.
                    log.exception(
                        'Cannot grade student %s (%s) in course %s because of exception: %s',
                        student.username,
                        student.id,
                        course.id,
                        exc.message
                    )
                    yield self.GradeResult(student, None, exc.message)
Exemplo n.º 6
0
def run_main_task(entry_id, task_fcn, action_name):
    """
    Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask.

    Arguments passed to `task_fcn` are:

     `entry_id` : the primary key for the InstructorTask entry representing the task.
     `course_id` : the id for the course.
     `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input.
     `action_name` : past-tense verb to use for constructing status messages.

    If no exceptions are raised, the `task_fcn` should return a dict containing
    the task's result with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.
              Should be past-tense.  Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    """

    # Get the InstructorTask to be updated. If this fails then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)
    entry.task_state = PROGRESS
    entry.save_now()

    # Get inputs to use in this task from the entry
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)

    # Construct log message
    fmt = u'Task: {task_id}, InstructorTask ID: {entry_id}, Course: {course_id}, Input: {task_input}'
    task_info_string = fmt.format(task_id=task_id, entry_id=entry_id, course_id=course_id, task_input=task_input)
    TASK_LOG.info(u'%s, Starting update (nothing %s yet)', task_info_string, action_name)

    # Check that the task_id submitted in the InstructorTask matches the current task
    # that is running.
    request_task_id = _get_current_task().request.id
    if task_id != request_task_id:
        fmt = u'{task_info}, Requested task did not match actual task "{actual_id}"'
        message = fmt.format(task_info=task_info_string, actual_id=request_task_id)
        TASK_LOG.error(message)
        raise ValueError(message)

    # Now do the work
    with dog_stats_api.timer('instructor_tasks.time.overall', tags=[u'action:{name}'.format(name=action_name)]):
        task_progress = task_fcn(entry_id, course_id, task_input, action_name)

    # Release any queries that the connection has been hanging onto
    reset_queries()

    # Log and exit, returning task_progress info as task result
    TASK_LOG.info(u'%s, Task type: %s, Finishing task: %s', task_info_string, action_name, task_progress)
    return task_progress
    def iter(self, course, students, force_update=False):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        # Pre-fetch the collected course_structure so:
        # 1. Correctness: the same version of the course is used to
        #    compute the grade for all students.
        # 2. Optimization: the collected course_structure is not
        #    retrieved from the data store multiple times.

        collected_block_structure = get_block_structure_manager(
            course.id).get_collected()
        for student in students:
            with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter',
                                     tags=[u'action:{}'.format(course.id)]):
                try:
                    operation = CourseGradeFactory(
                    ).update if force_update else CourseGradeFactory().create
                    course_grade = operation(student, course,
                                             collected_block_structure)
                    yield self.GradeResult(student, course_grade, "")

                except Exception as exc:  # pylint: disable=broad-except
                    # Keep marching on even if this student couldn't be graded for
                    # some reason, but log it for future reference.
                    log.exception(
                        'Cannot grade student %s (%s) in course %s because of exception: %s',
                        student.username, student.id, course.id, exc.message)
                    yield self.GradeResult(student, None, exc.message)
    def iter(
        self,
        users,
        course=None,
        collected_block_structure=None,
        course_key=None,
        force_update=False,
    ):
        """
        Given a course and an iterable of students (User), yield a GradeResult
        for every student enrolled in the course.  GradeResult is a named tuple of:

            (student, course_grade, err_msg)

        If an error occurred, course_grade will be None and err_msg will be an
        exception message. If there was no error, err_msg is an empty string.
        """
        # Pre-fetch the collected course_structure (in _iter_grade_result) so:
        # 1. Correctness: the same version of the course is used to
        #    compute the grade for all students.
        # 2. Optimization: the collected course_structure is not
        #    retrieved from the data store multiple times.
        course_data = CourseData(
            user=None,
            course=course,
            collected_block_structure=collected_block_structure,
            course_key=course_key,
        )
        stats_tags = [u'action:{}'.format(course_data.course_key)]
        for user in users:
            with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter',
                                     tags=stats_tags):
                yield self._iter_grade_result(user, course_data, force_update)
Exemplo n.º 9
0
def request_timer(request_id, method, url, tags=None):
    start = time()
    with dog_stats_api.timer("comment_client.request.time", tags=tags):
        yield
    end = time()
    duration = end - start

    log.info(
        u"comment_client_request_log: request_id={request_id}, method={method}, "
        u"url={url}, duration={duration}".format(request_id=request_id, method=method, url=url, duration=duration)
    )
Exemplo n.º 10
0
def request_timer(request_id, method, url, tags=None):
    start = time()
    with dog_stats_api.timer('comment_client.request.time', tags=tags):
        yield
    end = time()
    duration = end - start

    log.info(
        u"comment_client_request_log: request_id={request_id}, method={method}, "
        u"url={url}, duration={duration}".format(request_id=request_id,
                                                 method=method,
                                                 url=url,
                                                 duration=duration))
Exemplo n.º 11
0
def iterate_grades_for(course_or_id, students, keep_raw_scores=False):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

    If an error occurred, gradeset will be an empty dict and err_msg will be an
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
    - raw_scores: contains scores for every graded module
    """
    if isinstance(course_or_id, (basestring, CourseKey)):
        course = courses.get_course_by_id(course_or_id)
    else:
        course = course_or_id

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course.id)]):
            try:
                request.user = student
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
                gradeset = grade(student, request, course, keep_raw_scores)
                yield student, gradeset, ""
            except Exception as exc:  # pylint: disable=broad-except
                # Keep marching on even if this student couldn't be graded for
                # some reason, but log it for future reference.
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course.id,
                    exc.message
                )
                yield student, {}, exc.message
Exemplo n.º 12
0
def send_all_users_email(entry_id, email_id, to_list, global_email_context,
                         subtask_status_dict):
    subtask_status = SubtaskStatus.from_dict(subtask_status_dict)
    current_task_id = subtask_status.task_id
    num_to_send = len(to_list)
    log.info((u"Preparing to send email %s to %d recipients as subtask %s "
              u"for email for all users task %d: context = %s, status=%s"),
             email_id, num_to_send, current_task_id, entry_id,
             global_email_context, subtask_status)

    check_all_users_email_subtask_is_valid(entry_id, current_task_id,
                                           subtask_status)

    send_exception = None
    new_subtask_status = None
    try:
        # course_title = global_email_context['course_title']
        # ENTENDER LOS PARAMETROS DEL TIMER
        with dog_stats_api.timer('course_email.single_task.time.overall'):
            new_subtask_status, send_exception = _send_all_users_email(
                entry_id,
                email_id,
                to_list,
                global_email_context,
                subtask_status,
            )
    except Exception:
        log.exception("Send-email task %s for email %s: failed unexpectedly!",
                      current_task_id, email_id)
        subtask_status.increment(failed=num_to_send, state=FAILURE)
        update_subtask_status(entry_id, current_task_id, subtask_status)
        raise

    if send_exception is None:
        log.info("Send-email task %s for email %s: succeeded", current_task_id,
                 email_id)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
    elif isinstance(send_exception, RetryTaskError):
        log.warning("Send-email task %s for email %s: being retried",
                    current_task_id, email_id)
        raise send_exception  # pylint: disable=raising-bad-type
    else:
        log.error("Send-email task %s for email %s: failed: %s",
                  current_task_id, email_id, send_exception)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise send_exception  # pylint: disable=raising-bad-type

    log.info("Send-email task %s for email %s: returning status %s",
             current_task_id, email_id, new_subtask_status)
    return new_subtask_status.to_dict()
Exemplo n.º 13
0
def iterate_grades_for(course_or_id, students):
    """
    Given a course_id and an iterable of students (User), yield a GradeResult
    for every student enrolled in the course.  GradeResult is a named tuple of:

    (student, gradeset, err_msg)

    If an error occurred, gradeset will be an empty dict and err_msg will be an
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
        - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
    - raw_scores: contains scores for every graded module
    """
    if isinstance(course_or_id, (basestring, CourseKey)):
        course = get_course_by_id(course_or_id)
    else:
        course = course_or_id

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course.id)]):
            try:
                gradeset = summary(student, course)
                yield GradeResult(student, gradeset, "")
            except Exception as exc:  # pylint: disable=broad-except
                # Keep marching on even if this student couldn't be graded for
                # some reason, but log it for future reference.
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course.id,
                    exc.message
                )
                yield GradeResult(student, {}, exc.message)
Exemplo n.º 14
0
def iterate_grades_for(course_or_id, students):
    """
    Given a course_id and an iterable of students (User), yield a GradeResult
    for every student enrolled in the course.  GradeResult is a named tuple of:

    (student, gradeset, err_msg)

    If an error occurred, gradeset will be an empty dict and err_msg will be an
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
        - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
    - raw_scores: contains scores for every graded module
    """
    if isinstance(course_or_id, (basestring, CourseKey)):
        course = get_course_by_id(course_or_id)
    else:
        course = course_or_id

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course.id)]):
            try:
                gradeset = summary(student, course)
                yield GradeResult(student, gradeset, "")
            except Exception as exc:  # pylint: disable=broad-except
                # Keep marching on even if this student couldn't be graded for
                # some reason, but log it for future reference.
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course.id,
                    exc.message
                )
                yield GradeResult(student, {}, exc.message)
Exemplo n.º 15
0
def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status):
    """
    Performs the email sending task.

    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status` : object of class SubtaskStatus representing current status.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.

    Returns a tuple of two values:
      * First value is a SubtaskStatus object which represents current progress at the end of this call.

      * Second value is an exception returned by the innards of the method, indicating a fatal error.
        In this case, the number of recipients that were not sent have already been added to the
        'failed' count above.
    """
    # Get information from current task's request:
    parent_task_id = InstructorTask.objects.get(pk=entry_id).task_id
    task_id = subtask_status.task_id
    total_recipients = len(to_list)
    recipient_num = 0
    total_recipients_successful = 0
    total_recipients_failed = 0
    recipients_info = Counter()

    log.info(
        "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, TotalRecipients: %s",
        parent_task_id,
        task_id,
        email_id,
        total_recipients
    )

    try:
        course_email = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist as exc:
        log.exception(
            "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Could not find email to send.",
            parent_task_id,
            task_id,
            email_id
        )
        raise

    # Exclude optouts (if not a retry):
    # Note that we don't have to do the optout logic at all if this is a retry,
    # because we have presumably already performed the optout logic on the first
    # attempt.  Anyone on the to_list on a retry has already passed the filter
    # that existed at that time, and we don't need to keep checking for changes
    # in the Optout list.
    if subtask_status.get_retry_count() == 0:
        to_list, num_optout = _filter_optouts_from_recipients(to_list, course_email.course_id)
        subtask_status.increment(skipped=num_optout)

    course_title = global_email_context['course_title']

    # use the email from address in the CourseEmail, if it is present, otherwise compute it
    from_addr = course_email.from_addr if course_email.from_addr else \
        _get_source_address(course_email.course_id, course_title)

    # use the CourseEmailTemplate that was associated with the CourseEmail
    course_email_template = course_email.get_template()
    try:
        connection = get_connection()
        connection.open()

        # Define context values to use in all course emails:
        email_context = {'name': '', 'email': ''}
        email_context.update(global_email_context)

        while to_list:
            # Update context with user-specific values from the user at the end of the list.
            # At the end of processing this user, they will be popped off of the to_list.
            # That way, the to_list will always contain the recipients remaining to be emailed.
            # This is convenient for retries, which will need to send to those who haven't
            # yet been emailed, but not send to those who have already been sent to.
            recipient_num += 1
            current_recipient = to_list[-1]
            email = current_recipient['email']
            email_context['email'] = email
            email_context['name'] = current_recipient['profile__name']
            email_context['user_id'] = current_recipient['pk']
            email_context['course_id'] = course_email.course_id

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(course_email.text_message, email_context)
            html_msg = course_email_template.render_htmltext(course_email.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(
                course_email.subject,
                plaintext_msg,
                from_addr,
                [email],
                connection=connection
            )
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we have gotten the rate limiter.  This is not very high-tech,
            # but if a task has been retried for rate-limiting reasons, then we sleep
            # for a period of time between all emails within this task.  Choice of
            # the value depends on the number of workers that might be sending email in
            # parallel, and what the SES throttle rate is.
            if subtask_status.retried_nomax > 0:
                sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS)

            try:
                log.info(
                    "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \
                    Recipient name: %s, Email address: %s",
                    parent_task_id,
                    task_id,
                    email_id,
                    recipient_num,
                    total_recipients,
                    current_recipient['profile__name'],
                    email
                )
                with dog_stats_api.timer('course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure.
                total_recipients_failed += 1
                log.error(
                    "BulkEmail ==> Status: Failed(SMTPDataError), Task: %s, SubTask: %s, EmailId: %s, \
                    Recipient num: %s/%s, Email address: %s",
                    parent_task_id,
                    task_id,
                    email_id,
                    recipient_num,
                    total_recipients,
                    email
                )
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task.
                    raise exc
                else:
                    # This will fall through and not retry the message.
                    log.warning(
                        'BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \
                        Email not delivered to %s due to error %s',
                        parent_task_id,
                        task_id,
                        email_id,
                        recipient_num,
                        total_recipients,
                        email,
                        exc.smtp_error
                    )
                    dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
                    subtask_status.increment(failed=1)

            except SINGLE_EMAIL_FAILURE_ERRORS as exc:
                # This will fall through and not retry the message.
                total_recipients_failed += 1
                log.error(
                    "BulkEmail ==> Status: Failed(SINGLE_EMAIL_FAILURE_ERRORS), Task: %s, SubTask: %s, \
                    EmailId: %s, Recipient num: %s/%s, Email address: %s, Exception: %s",
                    parent_task_id,
                    task_id,
                    email_id,
                    recipient_num,
                    total_recipients,
                    email,
                    exc
                )
                dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
                subtask_status.increment(failed=1)

            else:
                total_recipients_successful += 1
                log.info(
                    "BulkEmail ==> Status: Success, Task: %s, SubTask: %s, EmailId: %s, \
                    Recipient num: %s/%s, Email address: %s,",
                    parent_task_id,
                    task_id,
                    email_id,
                    recipient_num,
                    total_recipients,
                    email
                )
                dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)])
                if settings.BULK_EMAIL_LOG_SENT_EMAILS:
                    log.info('Email with id %s sent to %s', email_id, email)
                else:
                    log.debug('Email with id %s sent to %s', email_id, email)
                subtask_status.increment(succeeded=1)

            # Pop the user that was emailed off the end of the list only once they have
            # successfully been processed.  (That way, if there were a failure that
            # needed to be retried, the user is still on the list.)
            recipients_info[email] += 1
            to_list.pop()

        log.info(
            "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Successful Recipients: %s/%s, \
            Failed Recipients: %s/%s",
            parent_task_id,
            task_id,
            email_id,
            total_recipients_successful,
            total_recipients,
            total_recipients_failed,
            total_recipients
        )
        duplicate_recipients = ["{0} ({1})".format(email, repetition)
                                for email, repetition in recipients_info.most_common() if repetition > 1]
        if duplicate_recipients:
            log.info(
                "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Duplicate Recipients [%s]: [%s]",
                parent_task_id,
                task_id,
                email_id,
                len(duplicate_recipients),
                ', '.join(duplicate_recipients)
            )

    except INFINITE_RETRY_ERRORS as exc:
        dog_stats_api.increment('course_email.infinite_retry', tags=[_statsd_tag(course_title)])
        # Increment the "retried_nomax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_nomax=1, state=RETRY)
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=True
        )

    except LIMITED_RETRY_ERRORS as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # Errors caught are those that indicate a temporary condition that might succeed on retry.
        dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)])
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False
        )

    except BULK_EMAIL_FAILURE_ERRORS as exc:
        dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
        num_pending = len(to_list)
        log.exception(('Task %s: email with id %d caused send_course_email task to fail '
                       'with "fatal" exception.  %d emails unsent.'),
                      task_id, email_id, num_pending)
        # Update counters with progress to date, counting unsent emails as failures,
        # and set the state to FAILURE:
        subtask_status.increment(failed=num_pending, state=FAILURE)
        return subtask_status, exc

    except Exception as exc:  # pylint: disable=broad-except
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # These are unexpected errors.  Since they might be due to a temporary condition that might
        # succeed on retry, we give them a retry.
        dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)])
        log.exception(('Task %s: email with id %d caused send_course_email task to fail '
                       'with unexpected exception.  Generating retry.'),
                      task_id, email_id)
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False
        )

    else:
        # All went well.  Update counters with progress to date,
        # and set the state to SUCCESS:
        subtask_status.increment(state=SUCCESS)
        # Successful completion is marked by an exception value of None.
        return subtask_status, None
    finally:
        # Clean up at the end.
        connection.close()
Exemplo n.º 16
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    The student modules are fetched for update the `update_fcn` is called on each StudentModule
    that passes the resulting filtering. It is passed four arguments:  the module_descriptor for
    the module pointed to by the module_state_key, the particular StudentModule to update, the
    xmodule_instance_args, and the task_input being passed through.  If the value returned by the
    update function evaluates to a boolean True, the update is successful; False indicates the update
    on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    start_time = time()
    usage_keys = []
    problem_url = task_input.get('problem_url')
    entrance_exam_url = task_input.get('entrance_exam_url')
    student_identifier = task_input.get('student')
    override_score_task = action_name == ugettext_noop('overridden')
    problems = {}

    # if problem_url is present make a usage key from it
    if problem_url:
        usage_key = UsageKey.from_string(problem_url).map_into_course(course_id)
        usage_keys.append(usage_key)

        # find the problem descriptor:
        problem_descriptor = modulestore().get_item(usage_key)
        problems[unicode(usage_key)] = problem_descriptor

    # if entrance_exam is present grab all problems in it
    if entrance_exam_url:
        problems = get_problems_in_section(entrance_exam_url)
        usage_keys = [UsageKey.from_string(location) for location in problems.keys()]

    modules_to_update = _get_modules_to_update(
        course_id, usage_keys, student_identifier, filter_fcn, override_score_task
    )

    task_progress = TaskProgress(action_name, len(modules_to_update), start_time)
    task_progress.update_task_state()

    for module_to_update in modules_to_update:
        task_progress.attempted += 1
        module_descriptor = problems[unicode(module_to_update.module_state_key)]
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer('instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update, task_input)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                task_progress.succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                task_progress.failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                task_progress.skipped += 1
            else:
                raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status))

    return task_progress.update_task_state()
Exemplo n.º 17
0
def send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status_dict):
    """
    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status_dict` : dict containing values representing current status.  Keys are:

        'task_id' : id of subtask.  This is used to pass task information across retries.
        'attempted' : number of attempts -- should equal succeeded plus failed
        'succeeded' : number that succeeded in processing
        'skipped' : number that were not processed.
        'failed' : number that failed during processing
        'retried_nomax' : number of times the subtask has been retried for conditions that
            should not have a maximum count applied
        'retried_withmax' : number of times the subtask has been retried for conditions that
            should have a maximum count applied
        'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS)

        Most values will be zero on initial call, but may be different when the task is
        invoked as part of a retry.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.  Updates InstructorTask object
    with status information (sends, failures, skips) and updates number of subtasks completed.
    """
    subtask_status = SubtaskStatus.from_dict(subtask_status_dict)
    current_task_id = subtask_status.task_id
    num_to_send = len(to_list)
    log.info((u"Preparing to send email %s to %d recipients as subtask %s "
              u"for instructor task %d: context = %s, status=%s"),
             email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status)

    # Check that the requested subtask is actually known to the current InstructorTask entry.
    # If this fails, it throws an exception, which should fail this subtask immediately.
    # This can happen when the parent task has been run twice, and results in duplicate
    # subtasks being created for the same InstructorTask entry.  This can happen when Celery
    # loses its connection to its broker, and any current tasks get requeued.
    # We hope to catch this condition in perform_delegate_email_batches() when it's the parent
    # task that is resubmitted, but just in case we fail to do so there, we check here as well.
    # There is also a possibility that this task will be run twice by Celery, for the same reason.
    # To deal with that, we need to confirm that the task has not already been completed.
    check_subtask_is_valid(entry_id, current_task_id, subtask_status)

    send_exception = None
    new_subtask_status = None
    try:
        course_title = global_email_context['course_title']
        with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]):
            new_subtask_status, send_exception = _send_course_email(
                entry_id,
                email_id,
                to_list,
                global_email_context,
                subtask_status,
            )
    except Exception:
        # Unexpected exception. Try to write out the failure to the entry before failing.
        log.exception("Send-email task %s for email %s: failed unexpectedly!", current_task_id, email_id)
        # We got here for really unexpected reasons.  Since we don't know how far
        # the task got in emailing, we count all recipients as having failed.
        # It at least keeps the counts consistent.
        subtask_status.increment(failed=num_to_send, state=FAILURE)
        update_subtask_status(entry_id, current_task_id, subtask_status)
        raise

    if send_exception is None:
        # Update the InstructorTask object that is storing its progress.
        log.info("Send-email task %s for email %s: succeeded", current_task_id, email_id)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
    elif isinstance(send_exception, RetryTaskError):
        # If retrying, a RetryTaskError needs to be returned to Celery.
        # We assume that the the progress made before the retry condition
        # was encountered has already been updated before the retry call was made,
        # so we only log here.
        log.warning("Send-email task %s for email %s: being retried", current_task_id, email_id)
        raise send_exception  # pylint: disable=raising-bad-type
    else:
        log.error("Send-email task %s for email %s: failed: %s", current_task_id, email_id, send_exception)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise send_exception  # pylint: disable=raising-bad-type

    # return status in a form that can be serialized by Celery into JSON:
    log.info("Send-email task %s for email %s: returning status %s", current_task_id, email_id, new_subtask_status)
    return new_subtask_status.to_dict()
Exemplo n.º 18
0
    def post(self, request, course_key_string):
        """
        The restful handler for importing a course.

        GET
            json: return json import status
        POST or PUT
            json: import a course via the .tar.gz file specified inrequest.FILES
        """
        courselike_key = CourseKey.from_string(course_key_string)
        library = isinstance(courselike_key, LibraryLocator)

        if library:
            root_name = LIBRARY_ROOT
            import_func = import_library_from_xml
        else:
            root_name = COURSE_ROOT
            import_func = import_course_from_xml

        filename = request.FILES['course-data'].name
        courselike_string = unicode(courselike_key) + filename
        data_root = path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(repr(courselike_key))
        course_dir = data_root / subdir

        status_key = "import_export.import.status:{}|{}".format(
            request.user.username, courselike_string)

        # Do everything in a try-except block to make sure everything is
        # properly cleaned up.
        try:
            # Cache the import progress
            self._save_request_status(request, courselike_string, 0)
            if not filename.endswith('.tar.gz'):
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message':
                        _('We only support uploading a .tar.gz file.'),
                        'stage':
                        -1
                    },
                    status=415)

            temp_filepath = course_dir / filename

            # Only handle exceptions caused by the directory already existing,
            # to avoid a potential race condition caused by the "check and go"
            # method.
            try:
                os.makedirs(course_dir)
            except OSError as exc:
                if exc.errno != exc.EEXIST:
                    raise

            logging.debug('importing course to %s', temp_filepath)

            # Get upload chunks byte ranges
            try:
                matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                content_range = matches.groupdict()
            except KeyError:  # Single chunk
                # no Content-Range header, so make one that will work
                content_range = {'start': 0, 'stop': 1, 'end': 2}

            # stream out the uploaded files in chunks to disk
            if int(content_range['start']) == 0:
                mode = "wb+"
            else:
                mode = "ab+"
                size = os.path.getsize(temp_filepath)
                # Check to make sure we haven't missed a chunk
                # This shouldn't happen, even if different instances are
                # handling the same session, but it's always better to catch
                # errors earlier.
                if size < int(content_range['start']):
                    self._save_request_status(request, courselike_string, -1)
                    log.warning(
                        "Reported range %s does not match size downloaded so "
                        "far %s", content_range['start'], size)
                    return JsonResponse(
                        {
                            'error_message':
                            _('File upload corrupted. Please try again'),
                            'stage':
                            -1
                        },
                        status=409)
                # The last request sometimes comes twice. This happens because
                # nginx sends a 499 error code when the response takes too long.
                elif size > int(content_range['stop']) \
                        and size == int(content_range['end']):
                    return JsonResponse({'ImportStatus': 1})

            with open(temp_filepath, mode) as temp_file:
                for chunk in request.FILES['course-data'].chunks():
                    temp_file.write(chunk)

            size = os.path.getsize(temp_filepath)

            if int(content_range['stop']) != int(content_range['end']) - 1:
                # More chunks coming
                return JsonResponse({
                    "files": [{
                        "name": filename,
                        "size": size,
                        "delete_url": "",
                        "delete_type": "",
                        "thumbnail_url": ""
                    }]
                })
        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            self._save_request_status(request, courselike_string, -1)
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info("Course import %s: Temp data cleared", courselike_key)

            log.exception("error importing course")
            return JsonResponse({
                'error_message': str(exception),
                'stage': -1
            },
                                status=400)

        # try-finally block for proper clean up after receiving last chunk.
        try:
            # This was the last chunk.
            log.info("Course import %s: Upload complete", courselike_key)
            self._save_request_status(request, courselike_string, 1)

            tar_file = tarfile.open(temp_filepath)
            try:
                safetar_extractall(tar_file,
                                   (course_dir + '/').encode('utf-8'))
            except SuspiciousOperation as exc:
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': 'Unsafe tar file. Aborting import.',
                        'suspicious_operation_message': exc.args[0],
                        'stage': -1
                    },
                    status=400)
            finally:
                tar_file.close()

            log.info("Course import %s: Uploaded file extracted",
                     courselike_key)
            self._save_request_status(request, courselike_string, 2)

            # find the 'course.xml' file
            def get_all_files(directory):
                """
                For each file in the directory, yield a 2-tuple of (file-name,
                directory-path)
                """
                for dirpath, _dirnames, filenames in os.walk(directory):
                    for filename in filenames:
                        yield (filename, dirpath)

            def get_dir_for_fname(directory, filename):
                """
                Returns the dirpath for the first file found in the directory
                with the given name.  If there is no file in the directory with
                the specified name, return None.
                """
                for fname, dirpath in get_all_files(directory):
                    if fname == filename:
                        return dirpath
                return None

            dirpath = get_dir_for_fname(course_dir, root_name)
            if not dirpath:
                self._save_request_status(request, courselike_string, -2)
                return JsonResponse(
                    {
                        'error_message':
                        _('Could not find the {root_xml_file} file in the package.'
                          ).format(root_xml_file=root_name),
                        'stage':
                        -2
                    },
                    status=415)

            dirpath = os.path.relpath(dirpath, data_root)
            logging.debug('found %s at %s', root_name, dirpath)

            log.info("Course import %s: Extracted file verified",
                     courselike_key)
            self._save_request_status(request, courselike_string, 3)

            with dog_stats_api.timer(
                    'courselike_import.time',
                    tags=[u"courselike:{}".format(courselike_key)]):
                courselike_items = import_func(
                    modulestore(),
                    request.user.id,
                    settings.GITHUB_REPO_ROOT,
                    [dirpath],
                    load_error_modules=False,
                    static_content_store=contentstore(),
                    target_id=courselike_key,
                )

            new_location = courselike_items[0].location
            logging.debug('new course at %s', new_location)

            log.info("Course import %s: Course import successful",
                     courselike_key)
            self._save_request_status(request, courselike_string, 4)

        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            log.exception("error importing course")
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -cache.get(status_key)
                },
                status=400)

        finally:
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared",
                    courselike_key  # pylint: disable=no-value-for-parameter
                )
            # set failed stage number with negative sign in case of an
            # unsuccessful import
            if cache.get(status_key) != 4:
                self._save_request_status(request, courselike_string,
                                          -abs(cache.get(status_key)))

        return JsonResponse({'status': 'OK'})
Exemplo n.º 19
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    StudentModule instances are those that match the specified `course_id` and `module_state_key`.
    If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging
    to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem.

    If a `filter_fcn` is not None, it is applied to the query that has been constructed.  It takes one
    argument, which is the query being filtered, and returns the filtered version of the query.

    The `update_fcn` is called on each StudentModule that passes the resulting filtering.
    It is passed three arguments:  the module_descriptor for the module pointed to by the
    module_state_key, the particular StudentModule to update, and the xmodule_instance_args being
    passed through.  If the value returned by the update function evaluates to a boolean True,
    the update is successful; False indicates the update on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    start_time = time()
    usage_keys = []
    problem_url = task_input.get('problem_url')
    entrance_exam_url = task_input.get('entrance_exam_url')
    student_identifier = task_input.get('student')
    problems = {}

    # if problem_url is present make a usage key from it
    if problem_url:
        usage_key = course_id.make_usage_key_from_deprecated_string(problem_url)
        usage_keys.append(usage_key)

        # find the problem descriptor:
        problem_descriptor = modulestore().get_item(usage_key)
        problems[unicode(usage_key)] = problem_descriptor

    # if entrance_exam is present grab all problems in it
    if entrance_exam_url:
        problems = get_problems_in_section(entrance_exam_url)
        usage_keys = [UsageKey.from_string(location) for location in problems.keys()]

    # find the modules in question
    modules_to_update = StudentModule.objects.filter(course_id=course_id, module_state_key__in=usage_keys)

    # give the option of updating an individual student. If not specified,
    # then updates all students who have responded to a problem so far
    student = None
    if student_identifier is not None:
        # if an identifier is supplied, then look for the student,
        # and let it throw an exception if none is found.
        if "@" in student_identifier:
            student = User.objects.get(email=student_identifier)
        elif student_identifier is not None:
            student = User.objects.get(username=student_identifier)

    if student is not None:
        modules_to_update = modules_to_update.filter(student_id=student.id)

    if filter_fcn is not None:
        modules_to_update = filter_fcn(modules_to_update)

    task_progress = TaskProgress(action_name, modules_to_update.count(), start_time)
    task_progress.update_task_state()

    for module_to_update in modules_to_update:
        task_progress.attempted += 1
        module_descriptor = problems[unicode(module_to_update.module_state_key)]
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer('instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                task_progress.succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                task_progress.failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                task_progress.skipped += 1
            else:
                raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status))

    return task_progress.update_task_state()
Exemplo n.º 20
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id,
                                task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    StudentModule instances are those that match the specified `course_id` and `module_state_key`.
    If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging
    to that student. If `student_identifier` is None, performs update on modules for all students on the specified
    problem.

    If a `filter_fcn` is not None, it is applied to the query that has been constructed.  It takes one
    argument, which is the query being filtered, and returns the filtered version of the query.

    The `update_fcn` is called on each StudentModule that passes the resulting filtering.
    It is passed four arguments:  the module_descriptor for the module pointed to by the
    module_state_key, the particular StudentModule to update, the xmodule_instance_args, and the task_input
    being passed through.  If the value returned by the update function evaluates to a boolean True,
    the update is successful; False indicates the update on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    start_time = time()
    usage_keys = []
    problem_url = task_input.get('problem_url')
    entrance_exam_url = task_input.get('entrance_exam_url')
    student_identifier = task_input.get('student')
    problems = {}

    # if problem_url is present make a usage key from it
    if problem_url:
        usage_key = course_id.make_usage_key_from_deprecated_string(
            problem_url)
        usage_keys.append(usage_key)

        # find the problem descriptor:
        problem_descriptor = modulestore().get_item(usage_key)
        problems[unicode(usage_key)] = problem_descriptor

    # if entrance_exam is present grab all problems in it
    if entrance_exam_url:
        problems = get_problems_in_section(entrance_exam_url)
        usage_keys = [
            UsageKey.from_string(location) for location in problems.keys()
        ]

    # find the modules in question
    modules_to_update = StudentModule.objects.filter(
        course_id=course_id, module_state_key__in=usage_keys)

    # give the option of updating an individual student. If not specified,
    # then updates all students who have responded to a problem so far
    student = None
    if student_identifier is not None:
        # if an identifier is supplied, then look for the student,
        # and let it throw an exception if none is found.
        if "@" in student_identifier:
            student = User.objects.get(email=student_identifier)
        elif student_identifier is not None:
            student = User.objects.get(username=student_identifier)

    if student is not None:
        modules_to_update = modules_to_update.filter(student_id=student.id)

    if filter_fcn is not None:
        modules_to_update = filter_fcn(modules_to_update)

    task_progress = TaskProgress(action_name, modules_to_update.count(),
                                 start_time)
    task_progress.update_task_state()

    for module_to_update in modules_to_update:
        task_progress.attempted += 1
        module_descriptor = problems[unicode(
            module_to_update.module_state_key)]
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer(
                'instructor_tasks.module.time.step',
                tags=[u'action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update,
                                       task_input)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                task_progress.succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                task_progress.failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                task_progress.skipped += 1
            else:
                raise UpdateProblemModuleStateError(
                    "Unexpected update_status returned: {}".format(
                        update_status))

    return task_progress.update_task_state()
Exemplo n.º 21
0
def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func):
    """
    Parameterized function containing the meat of import_handler.
    """
    if not has_course_author_access(request.user, courselike_key):
        raise PermissionDenied()

    if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'):
        if request.method == 'GET':
            raise NotImplementedError('coming soon')
        else:
            # Do everything in a try-except block to make sure everything is properly cleaned up.
            try:
                data_root = path(settings.GITHUB_REPO_ROOT)
                subdir = base64.urlsafe_b64encode(repr(courselike_key))
                course_dir = data_root / subdir
                filename = request.FILES['course-data'].name

                # Use sessions to keep info about import progress
                session_status = request.session.setdefault("import_status", {})
                courselike_string = unicode(courselike_key) + filename
                _save_request_status(request, courselike_string, 0)

                # If the course has an entrance exam then remove it and its corresponding milestone.
                # current course state before import.
                if root_name == COURSE_ROOT:
                    if courselike_module.entrance_exam_enabled:
                        remove_entrance_exam_milestone_reference(request, courselike_key)
                        log.info(
                            "entrance exam milestone content reference for course %s has been removed",
                            courselike_module.id
                        )

                if not filename.endswith('.tar.gz'):
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': _('We only support uploading a .tar.gz file.'),
                            'Stage': -1
                        },
                        status=415
                    )

                temp_filepath = course_dir / filename
                if not course_dir.isdir():
                    os.mkdir(course_dir)

                logging.debug('importing course to {0}'.format(temp_filepath))

                # Get upload chunks byte ranges
                try:
                    matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                    content_range = matches.groupdict()
                except KeyError:    # Single chunk
                    # no Content-Range header, so make one that will work
                    content_range = {'start': 0, 'stop': 1, 'end': 2}

                # stream out the uploaded files in chunks to disk
                if int(content_range['start']) == 0:
                    mode = "wb+"
                else:
                    mode = "ab+"
                    size = os.path.getsize(temp_filepath)
                    # Check to make sure we haven't missed a chunk
                    # This shouldn't happen, even if different instances are handling
                    # the same session, but it's always better to catch errors earlier.
                    if size < int(content_range['start']):
                        _save_request_status(request, courselike_string, -1)
                        log.warning(
                            "Reported range %s does not match size downloaded so far %s",
                            content_range['start'],
                            size
                        )
                        return JsonResponse(
                            {
                                'ErrMsg': _('File upload corrupted. Please try again'),
                                'Stage': -1
                            },
                            status=409
                        )
                    # The last request sometimes comes twice. This happens because
                    # nginx sends a 499 error code when the response takes too long.
                    elif size > int(content_range['stop']) and size == int(content_range['end']):
                        return JsonResponse({'ImportStatus': 1})

                with open(temp_filepath, mode) as temp_file:
                    for chunk in request.FILES['course-data'].chunks():
                        temp_file.write(chunk)

                size = os.path.getsize(temp_filepath)

                if int(content_range['stop']) != int(content_range['end']) - 1:
                    # More chunks coming
                    return JsonResponse({
                        "files": [{
                            "name": filename,
                            "size": size,
                            "deleteUrl": "",
                            "deleteType": "",
                            "url": reverse_course_url('import_handler', courselike_key),
                            "thumbnailUrl": ""
                        }]
                    })
            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                _save_request_status(request, courselike_string, -1)
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared", courselike_key)

                log.exception(
                    "error importing course"
                )
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -1
                    },
                    status=400
                )

            # try-finally block for proper clean up after receiving last chunk.
            try:
                # This was the last chunk.
                log.info("Course import %s: Upload complete", courselike_key)
                _save_request_status(request, courselike_string, 1)

                tar_file = tarfile.open(temp_filepath)
                try:
                    safetar_extractall(tar_file, (course_dir + '/').encode('utf-8'))
                except SuspiciousOperation as exc:
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': 'Unsafe tar file. Aborting import.',
                            'SuspiciousFileOperationMsg': exc.args[0],
                            'Stage': -1
                        },
                        status=400
                    )
                finally:
                    tar_file.close()

                log.info("Course import %s: Uploaded file extracted", courselike_key)
                _save_request_status(request, courselike_string, 2)

                # find the 'course.xml' file
                def get_all_files(directory):
                    """
                    For each file in the directory, yield a 2-tuple of (file-name,
                    directory-path)
                    """
                    for dirpath, _dirnames, filenames in os.walk(directory):
                        for filename in filenames:
                            yield (filename, dirpath)

                def get_dir_for_fname(directory, filename):
                    """
                    Returns the dirpath for the first file found in the directory
                    with the given name.  If there is no file in the directory with
                    the specified name, return None.
                    """
                    for fname, dirpath in get_all_files(directory):
                        if fname == filename:
                            return dirpath
                    return None

                dirpath = get_dir_for_fname(course_dir, root_name)
                if not dirpath:
                    _save_request_status(request, courselike_string, -2)
                    return JsonResponse(
                        {
                            'ErrMsg': _('Could not find the {0} file in the package.').format(root_name),
                            'Stage': -2
                        },
                        status=415
                    )

                dirpath = os.path.relpath(dirpath, data_root)
                logging.debug('found %s at %s', root_name, dirpath)

                log.info("Course import %s: Extracted file verified", courselike_key)
                _save_request_status(request, courselike_string, 3)

                with dog_stats_api.timer(
                    'courselike_import.time',
                    tags=[u"courselike:{}".format(courselike_key)]
                ):
                    courselike_items = import_func(
                        modulestore(), request.user.id,
                        settings.GITHUB_REPO_ROOT, [dirpath],
                        load_error_modules=False,
                        static_content_store=contentstore(),
                        target_id=courselike_key
                    )

                new_location = courselike_items[0].location
                logging.debug('new course at %s', new_location)

                log.info("Course import %s: Course import successful", courselike_key)
                _save_request_status(request, courselike_string, 4)

            # Send errors to client with stage at which error occurred.
            except Exception as exception:   # pylint: disable=broad-except
                log.exception(
                    "error importing course"
                )
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -session_status[courselike_string]
                    },
                    status=400
                )

            finally:
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared", courselike_key)
                # set failed stage number with negative sign in case of unsuccessful import
                if session_status[courselike_string] != 4:
                    _save_request_status(request, courselike_string, -abs(session_status[courselike_string]))

                # status == 4 represents that course has been imported successfully.
                if session_status[courselike_string] == 4 and root_name == COURSE_ROOT:
                    # Reload the course so we have the latest state
                    course = modulestore().get_course(courselike_key)
                    if course.entrance_exam_enabled:
                        entrance_exam_chapter = modulestore().get_items(
                            course.id,
                            qualifiers={'category': 'chapter'},
                            settings={'is_entrance_exam': True}
                        )[0]

                        metadata = {'entrance_exam_id': unicode(entrance_exam_chapter.location)}
                        CourseMetadata.update_from_dict(metadata, course, request.user)
                        add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                        log.info("Course %s Entrance exam imported", course.id)

            return JsonResponse({'Status': 'OK'})
    elif request.method == 'GET':  # assume html
        status_url = reverse_course_url(
            "import_status_handler", courselike_key, kwargs={'filename': "fillerName"}
        )
        return render_to_response('import.html', {
            context_name: courselike_module,
            'successful_import_redirect_url': successful_url,
            'import_status_url': status_url,
            'library': isinstance(courselike_key, LibraryLocator)
        })
    else:
        return HttpResponseNotFound()
Exemplo n.º 22
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    courselike_key = CourseKey.from_string(course_key_string)
    try:
        user = User.objects.get(pk=user_id)
    except User.DoesNotExist:
        with respect_language(language):
            self.status.fail(_(u'Unknown User ID: {0}').format(user_id))
        return
    if not has_course_author_access(user, courselike_key):
        with respect_language(language):
            self.status.fail(_(u'Permission denied'))
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(repr(courselike_key))
    course_dir = data_root / subdir
    try:
        self.status.set_state(u'Unpacking')

        if not archive_name.endswith(u'.tar.gz'):
            with respect_language(language):
                self.status.fail(_(u'We only support uploading a .tar.gz file.'))
                return

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():  # pylint: disable=no-value-for-parameter
            os.mkdir(course_dir)

        LOGGER.debug(u'importing course to {0}'.format(temp_filepath))

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not course_import_export_storage.exists(archive_path):
            LOGGER.info(u'Course import %s: Uploaded file %s not found', courselike_key, archive_path)
            with respect_language(language):
                self.status.fail(_(u'Tar file not found'))
            return
        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:
                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)
                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)
        LOGGER.info(u'Course import %s: Download from storage complete', courselike_key)
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get(u'/')
                fake_request.user = user
                from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(fake_request, courselike_key)
                LOGGER.info(
                    u'entrance exam milestone content reference for course %s has been removed',
                    courselike_module.id
                )
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        LOGGER.exception(u'Error importing course %s', courselike_key, exc_info=True)
        self.status.fail(text_type(exception))
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8'))
        except SuspiciousOperation as exc:
            LOGGER.info(u'Course import %s: Unsafe tar file - %s', courselike_key, exc.args[0])
            with respect_language(language):
                self.status.fail(_(u'Unsafe tar file. Aborting import.'))
            return
        finally:
            tar_file.close()

        LOGGER.info(u'Course import %s: Uploaded file extracted', courselike_key)
        self.status.set_state(u'Verifying')
        self.status.increment_completed_steps()

        # find the 'course.xml' file
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            with respect_language(language):
                self.status.fail(_(u'Could not find the {0} file in the package.').format(root_name))
                return

        dirpath = os.path.relpath(dirpath, data_root)
        LOGGER.debug(u'found %s at %s', root_name, dirpath)

        LOGGER.info(u'Course import %s: Extracted file verified', courselike_key)
        self.status.set_state(u'Updating')
        self.status.increment_completed_steps()

        with dog_stats_api.timer(
            u'courselike_import.time',
            tags=[u"courselike:{}".format(courselike_key)]
        ):
            courselike_items = import_func(
                modulestore(), user.id,
                settings.GITHUB_REPO_ROOT, [dirpath],
                load_error_modules=False,
                static_content_store=contentstore(),
                target_id=courselike_key
            )

        new_location = courselike_items[0].location
        LOGGER.debug(u'new course at %s', new_location)

        LOGGER.info(u'Course import %s: Course import successful', courselike_key)
    except Exception as exception:   # pylint: disable=broad-except
        LOGGER.exception(u'error importing course', exc_info=True)
        self.status.fail(text_type(exception))
    finally:
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        if self.status.state == u'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={u'category': u'chapter'},
                    settings={u'is_entrance_exam': True}
                )[0]

                metadata = {u'entrance_exam_id': text_type(entrance_exam_chapter.location)}
                CourseMetadata.update_from_dict(metadata, course, user)
                from contentstore.views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(u'Course %s Entrance exam imported', course.id)
Exemplo n.º 23
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name,
               language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    courselike_key = CourseKey.from_string(course_key_string)
    try:
        user = User.objects.get(pk=user_id)
    except User.DoesNotExist:
        with respect_language(language):
            self.status.fail(_(u'Unknown User ID: {0}').format(user_id))
        return
    if not has_course_author_access(user, courselike_key):
        with respect_language(language):
            self.status.fail(_(u'Permission denied'))
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(repr(courselike_key))
    course_dir = data_root / subdir
    try:
        self.status.set_state(u'Unpacking')

        if not archive_name.endswith(u'.tar.gz'):
            with respect_language(language):
                self.status.fail(
                    _(u'We only support uploading a .tar.gz file.'))
                return

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():  # pylint: disable=no-value-for-parameter
            os.mkdir(course_dir)

        LOGGER.debug(u'importing course to {0}'.format(temp_filepath))

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not course_import_export_storage.exists(archive_path):
            LOGGER.info(u'Course import %s: Uploaded file %s not found',
                        courselike_key, archive_path)
            with respect_language(language):
                self.status.fail(_(u'Tar file not found'))
            return
        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:

                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)

                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)
        LOGGER.info(u'Course import %s: Download from storage complete',
                    courselike_key)
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get(u'/')
                fake_request.user = user
                from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(
                    fake_request, courselike_key)
                LOGGER.info(
                    u'entrance exam milestone content reference for course %s has been removed',
                    courselike_module.id)
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        LOGGER.exception(u'Error importing course %s',
                         courselike_key,
                         exc_info=True)
        self.status.fail(text_type(exception))
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8'))
        except SuspiciousOperation as exc:
            LOGGER.info(u'Course import %s: Unsafe tar file - %s',
                        courselike_key, exc.args[0])
            with respect_language(language):
                self.status.fail(_(u'Unsafe tar file. Aborting import.'))
            return
        finally:
            tar_file.close()

        LOGGER.info(u'Course import %s: Uploaded file extracted',
                    courselike_key)
        self.status.set_state(u'Verifying')
        self.status.increment_completed_steps()

        # find the 'course.xml' file
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            with respect_language(language):
                self.status.fail(
                    _(u'Could not find the {0} file in the package.').format(
                        root_name))
                return

        dirpath = os.path.relpath(dirpath, data_root)
        LOGGER.debug(u'found %s at %s', root_name, dirpath)

        LOGGER.info(u'Course import %s: Extracted file verified',
                    courselike_key)
        self.status.set_state(u'Updating')
        self.status.increment_completed_steps()

        with dog_stats_api.timer(
                u'courselike_import.time',
                tags=[u"courselike:{}".format(courselike_key)]):
            courselike_items = import_func(modulestore(),
                                           user.id,
                                           settings.GITHUB_REPO_ROOT,
                                           [dirpath],
                                           load_error_modules=False,
                                           static_content_store=contentstore(),
                                           target_id=courselike_key)

        new_location = courselike_items[0].location
        LOGGER.debug(u'new course at %s', new_location)

        LOGGER.info(u'Course import %s: Course import successful',
                    courselike_key)
    except Exception as exception:  # pylint: disable=broad-except
        LOGGER.exception(u'error importing course', exc_info=True)
        self.status.fail(text_type(exception))
    finally:
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        if self.status.state == u'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={u'category': u'chapter'},
                    settings={u'is_entrance_exam': True})[0]

                metadata = {
                    u'entrance_exam_id':
                    text_type(entrance_exam_chapter.location)
                }
                CourseMetadata.update_from_dict(metadata, course, user)
                from contentstore.views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(u'Course %s Entrance exam imported', course.id)
Exemplo n.º 24
0
def _import_handler(request, courselike_key, root_name, successful_url,
                    context_name, courselike_module, import_func):
    """
    Parameterized function containing the meat of import_handler.
    """
    if not has_course_author_access(request.user, courselike_key):
        raise PermissionDenied()

    if 'application/json' in request.META.get('HTTP_ACCEPT',
                                              'application/json'):
        if request.method == 'GET':
            raise NotImplementedError('coming soon')
        else:
            # Do everything in a try-except block to make sure everything is properly cleaned up.
            try:
                data_root = path(settings.GITHUB_REPO_ROOT)
                subdir = base64.urlsafe_b64encode(repr(courselike_key))
                course_dir = data_root / subdir
                filename = request.FILES['course-data'].name

                # Use sessions to keep info about import progress
                session_status = request.session.setdefault(
                    "import_status", {})
                courselike_string = unicode(courselike_key) + filename
                _save_request_status(request, courselike_string, 0)

                # If the course has an entrance exam then remove it and its corresponding milestone.
                # current course state before import.
                if root_name == COURSE_ROOT:
                    if courselike_module.entrance_exam_enabled:
                        remove_entrance_exam_milestone_reference(
                            request, courselike_key)
                        log.info(
                            "entrance exam milestone content reference for course %s has been removed",
                            courselike_module.id)

                if not filename.endswith('.tar.gz'):
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg':
                            _('We only support uploading a .tar.gz file.'),
                            'Stage':
                            -1
                        },
                        status=415)

                temp_filepath = course_dir / filename
                if not course_dir.isdir():
                    os.mkdir(course_dir)

                logging.debug('importing course to {0}'.format(temp_filepath))

                # Get upload chunks byte ranges
                try:
                    matches = CONTENT_RE.search(
                        request.META["HTTP_CONTENT_RANGE"])
                    content_range = matches.groupdict()
                except KeyError:  # Single chunk
                    # no Content-Range header, so make one that will work
                    content_range = {'start': 0, 'stop': 1, 'end': 2}

                # stream out the uploaded files in chunks to disk
                if int(content_range['start']) == 0:
                    mode = "wb+"
                else:
                    mode = "ab+"
                    size = os.path.getsize(temp_filepath)
                    # Check to make sure we haven't missed a chunk
                    # This shouldn't happen, even if different instances are handling
                    # the same session, but it's always better to catch errors earlier.
                    if size < int(content_range['start']):
                        _save_request_status(request, courselike_string, -1)
                        log.warning(
                            "Reported range %s does not match size downloaded so far %s",
                            content_range['start'], size)
                        return JsonResponse(
                            {
                                'ErrMsg':
                                _('File upload corrupted. Please try again'),
                                'Stage':
                                -1
                            },
                            status=409)
                    # The last request sometimes comes twice. This happens because
                    # nginx sends a 499 error code when the response takes too long.
                    elif size > int(content_range['stop']) and size == int(
                            content_range['end']):
                        return JsonResponse({'ImportStatus': 1})

                with open(temp_filepath, mode) as temp_file:
                    for chunk in request.FILES['course-data'].chunks():
                        temp_file.write(chunk)

                size = os.path.getsize(temp_filepath)

                if int(content_range['stop']) != int(content_range['end']) - 1:
                    # More chunks coming
                    return JsonResponse({
                        "files": [{
                            "name":
                            filename,
                            "size":
                            size,
                            "deleteUrl":
                            "",
                            "deleteType":
                            "",
                            "url":
                            reverse_course_url('import_handler',
                                               courselike_key),
                            "thumbnailUrl":
                            ""
                        }]
                    })
            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                _save_request_status(request, courselike_string, -1)
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared",
                             courselike_key)

                log.exception("error importing course")
                return JsonResponse({
                    'ErrMsg': str(exception),
                    'Stage': -1
                },
                                    status=400)

            # try-finally block for proper clean up after receiving last chunk.
            try:
                # This was the last chunk.
                log.info("Course import %s: Upload complete", courselike_key)
                _save_request_status(request, courselike_string, 1)

                tar_file = tarfile.open(temp_filepath)
                try:
                    safetar_extractall(tar_file,
                                       (course_dir + '/').encode('utf-8'))
                except SuspiciousOperation as exc:
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': 'Unsafe tar file. Aborting import.',
                            'SuspiciousFileOperationMsg': exc.args[0],
                            'Stage': -1
                        },
                        status=400)
                finally:
                    tar_file.close()

                log.info("Course import %s: Uploaded file extracted",
                         courselike_key)
                _save_request_status(request, courselike_string, 2)

                # find the 'course.xml' file
                def get_all_files(directory):
                    """
                    For each file in the directory, yield a 2-tuple of (file-name,
                    directory-path)
                    """
                    for dirpath, _dirnames, filenames in os.walk(directory):
                        for filename in filenames:
                            yield (filename, dirpath)

                def get_dir_for_fname(directory, filename):
                    """
                    Returns the dirpath for the first file found in the directory
                    with the given name.  If there is no file in the directory with
                    the specified name, return None.
                    """
                    for fname, dirpath in get_all_files(directory):
                        if fname == filename:
                            return dirpath
                    return None

                dirpath = get_dir_for_fname(course_dir, root_name)
                if not dirpath:
                    _save_request_status(request, courselike_string, -2)
                    return JsonResponse(
                        {
                            'ErrMsg':
                            _('Could not find the {0} file in the package.').
                            format(root_name),
                            'Stage':
                            -2
                        },
                        status=415)

                dirpath = os.path.relpath(dirpath, data_root)
                logging.debug('found %s at %s', root_name, dirpath)

                log.info("Course import %s: Extracted file verified",
                         courselike_key)
                _save_request_status(request, courselike_string, 3)

                with dog_stats_api.timer(
                        'courselike_import.time',
                        tags=[u"courselike:{}".format(courselike_key)]):
                    courselike_items = import_func(
                        modulestore(),
                        request.user.id,
                        settings.GITHUB_REPO_ROOT, [dirpath],
                        load_error_modules=False,
                        static_content_store=contentstore(),
                        target_id=courselike_key)

                new_location = courselike_items[0].location
                logging.debug('new course at %s', new_location)

                log.info("Course import %s: Course import successful",
                         courselike_key)
                _save_request_status(request, courselike_string, 4)

            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                log.exception("error importing course")
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -session_status[courselike_string]
                    },
                    status=400)

            finally:
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared",
                             courselike_key)
                # set failed stage number with negative sign in case of unsuccessful import
                if session_status[courselike_string] != 4:
                    _save_request_status(
                        request, courselike_string,
                        -abs(session_status[courselike_string]))

                # status == 4 represents that course has been imported successfully.
                if session_status[
                        courselike_string] == 4 and root_name == COURSE_ROOT:
                    # Reload the course so we have the latest state
                    course = modulestore().get_course(courselike_key)
                    if course.entrance_exam_enabled:
                        entrance_exam_chapter = modulestore().get_items(
                            course.id,
                            qualifiers={'category': 'chapter'},
                            settings={'is_entrance_exam': True})[0]

                        metadata = {
                            'entrance_exam_id':
                            unicode(entrance_exam_chapter.location)
                        }
                        CourseMetadata.update_from_dict(
                            metadata, course, request.user)
                        add_entrance_exam_milestone(course.id,
                                                    entrance_exam_chapter)
                        log.info("Course %s Entrance exam imported", course.id)

            return JsonResponse({'Status': 'OK'})
    elif request.method == 'GET':  # assume html
        status_url = reverse_course_url("import_status_handler",
                                        courselike_key,
                                        kwargs={'filename': "fillerName"})
        return render_to_response(
            'import.html', {
                context_name: courselike_module,
                'successful_import_redirect_url': successful_url,
                'import_status_url': status_url,
                'library': isinstance(courselike_key, LibraryLocator)
            })
    else:
        return HttpResponseNotFound()
Exemplo n.º 25
0
    def post(self, request, course_key_string):
        """
        The restful handler for importing a course.

        GET
            json: return json import status
        POST or PUT
            json: import a course via the .tar.gz file specified inrequest.FILES
        """
        courselike_key = CourseKey.from_string(course_key_string)
        library = isinstance(courselike_key, LibraryLocator)

        if library:
            root_name = LIBRARY_ROOT
            import_func = import_library_from_xml
        else:
            root_name = COURSE_ROOT
            import_func = import_course_from_xml

        filename = request.FILES['course-data'].name
        courselike_string = unicode(courselike_key) + filename
        data_root = path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(repr(courselike_key))
        course_dir = data_root / subdir

        status_key = "import_export.import.status:{}|{}".format(
            request.user.username,
            courselike_string
        )

        # Do everything in a try-except block to make sure everything is
        # properly cleaned up.
        try:
            # Cache the import progress
            self._save_request_status(request, courselike_string, 0)
            if not filename.endswith('.tar.gz'):
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': _(
                            'We only support uploading a .tar.gz file.'
                        ),
                        'stage': -1
                    },
                    status=415
                )

            temp_filepath = course_dir / filename

            # Only handle exceptions caused by the directory already existing,
            # to avoid a potential race condition caused by the "check and go"
            # method.
            try:
                os.makedirs(course_dir)
            except OSError as exc:
                if exc.errno != exc.EEXIST:
                    raise

            logging.debug('importing course to %s', temp_filepath)

            # Get upload chunks byte ranges
            try:
                matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                content_range = matches.groupdict()
            except KeyError:    # Single chunk
                # no Content-Range header, so make one that will work
                content_range = {'start': 0, 'stop': 1, 'end': 2}

            # stream out the uploaded files in chunks to disk
            if int(content_range['start']) == 0:
                mode = "wb+"
            else:
                mode = "ab+"
                size = os.path.getsize(temp_filepath)
                # Check to make sure we haven't missed a chunk
                # This shouldn't happen, even if different instances are
                # handling the same session, but it's always better to catch
                # errors earlier.
                if size < int(content_range['start']):
                    self._save_request_status(request, courselike_string, -1)
                    log.warning(
                        "Reported range %s does not match size downloaded so "
                        "far %s",
                        content_range['start'],
                        size
                    )
                    return JsonResponse(
                        {
                            'error_message': _(
                                'File upload corrupted. Please try again'
                            ),
                            'stage': -1
                        },
                        status=409
                    )
                # The last request sometimes comes twice. This happens because
                # nginx sends a 499 error code when the response takes too long.
                elif size > int(content_range['stop']) \
                        and size == int(content_range['end']):
                    return JsonResponse({'ImportStatus': 1})

            with open(temp_filepath, mode) as temp_file:
                for chunk in request.FILES['course-data'].chunks():
                    temp_file.write(chunk)

            size = os.path.getsize(temp_filepath)

            if int(content_range['stop']) != int(content_range['end']) - 1:
                # More chunks coming
                return JsonResponse({
                    "files": [{
                        "name": filename,
                        "size": size,
                        "delete_url": "",
                        "delete_type": "",
                        "thumbnail_url": ""
                    }]
                })
        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            self._save_request_status(request, courselike_string, -1)
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared", courselike_key
                )

            log.exception("error importing course")
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -1
                },
                status=400
            )

        # try-finally block for proper clean up after receiving last chunk.
        try:
            # This was the last chunk.
            log.info("Course import %s: Upload complete", courselike_key)
            self._save_request_status(request, courselike_string, 1)

            tar_file = tarfile.open(temp_filepath)
            try:
                safetar_extractall(
                    tar_file,
                    (course_dir + '/').encode('utf-8'))
            except SuspiciousOperation as exc:
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': 'Unsafe tar file. Aborting import.',
                        'suspicious_operation_message': exc.args[0],
                        'stage': -1
                    },
                    status=400
                )
            finally:
                tar_file.close()

            log.info(
                "Course import %s: Uploaded file extracted", courselike_key
            )
            self._save_request_status(request, courselike_string, 2)

            # find the 'course.xml' file
            def get_all_files(directory):
                """
                For each file in the directory, yield a 2-tuple of (file-name,
                directory-path)
                """
                for dirpath, _dirnames, filenames in os.walk(directory):
                    for filename in filenames:
                        yield (filename, dirpath)

            def get_dir_for_fname(directory, filename):
                """
                Returns the dirpath for the first file found in the directory
                with the given name.  If there is no file in the directory with
                the specified name, return None.
                """
                for fname, dirpath in get_all_files(directory):
                    if fname == filename:
                        return dirpath
                return None

            dirpath = get_dir_for_fname(course_dir, root_name)
            if not dirpath:
                self._save_request_status(request, courselike_string, -2)
                return JsonResponse(
                    {

                        'error_message': _(
                            'Could not find the {root_xml_file} file in the package.'
                        ).format(root_xml_file=root_name),
                        'stage': -2
                    },
                    status=415
                )

            dirpath = os.path.relpath(dirpath, data_root)
            logging.debug('found %s at %s', root_name, dirpath)

            log.info(
                "Course import %s: Extracted file verified",
                courselike_key
            )
            self._save_request_status(request, courselike_string, 3)

            with dog_stats_api.timer(
                'courselike_import.time',
                tags=[u"courselike:{}".format(courselike_key)]
            ):
                courselike_items = import_func(
                    modulestore(),
                    request.user.id,
                    settings.GITHUB_REPO_ROOT,
                    [dirpath],
                    load_error_modules=False,
                    static_content_store=contentstore(),
                    target_id=courselike_key,
                )

            new_location = courselike_items[0].location
            logging.debug('new course at %s', new_location)

            log.info(
                "Course import %s: Course import successful", courselike_key
            )
            self._save_request_status(request, courselike_string, 4)

        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            log.exception(
                "error importing course"
            )
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -cache.get(status_key)
                },
                status=400
            )

        finally:
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared", courselike_key  # pylint: disable=no-value-for-parameter
                )
            # set failed stage number with negative sign in case of an
            # unsuccessful import
            if cache.get(status_key) != 4:
                self._save_request_status(
                    request,
                    courselike_string,
                    -abs(cache.get(status_key))
                )

        return JsonResponse({'status': 'OK'})
Exemplo n.º 26
0
def send_course_email(entry_id, email_id, to_list, global_email_context,
                      subtask_status_dict):
    """
    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status_dict` : dict containing values representing current status.  Keys are:

        'task_id' : id of subtask.  This is used to pass task information across retries.
        'attempted' : number of attempts -- should equal succeeded plus failed
        'succeeded' : number that succeeded in processing
        'skipped' : number that were not processed.
        'failed' : number that failed during processing
        'retried_nomax' : number of times the subtask has been retried for conditions that
            should not have a maximum count applied
        'retried_withmax' : number of times the subtask has been retried for conditions that
            should have a maximum count applied
        'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS)

        Most values will be zero on initial call, but may be different when the task is
        invoked as part of a retry.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.  Updates InstructorTask object
    with status information (sends, failures, skips) and updates number of subtasks completed.
    """
    subtask_status = SubtaskStatus.from_dict(subtask_status_dict)
    current_task_id = subtask_status.task_id
    num_to_send = len(to_list)
    log.info((u"Preparing to send email %s to %d recipients as subtask %s "
              u"for instructor task %d: context = %s, status=%s"), email_id,
             num_to_send, current_task_id, entry_id, global_email_context,
             subtask_status)

    # Check that the requested subtask is actually known to the current InstructorTask entry.
    # If this fails, it throws an exception, which should fail this subtask immediately.
    # This can happen when the parent task has been run twice, and results in duplicate
    # subtasks being created for the same InstructorTask entry.  This can happen when Celery
    # loses its connection to its broker, and any current tasks get requeued.
    # We hope to catch this condition in perform_delegate_email_batches() when it's the parent
    # task that is resubmitted, but just in case we fail to do so there, we check here as well.
    # There is also a possibility that this task will be run twice by Celery, for the same reason.
    # To deal with that, we need to confirm that the task has not already been completed.
    check_subtask_is_valid(entry_id, current_task_id, subtask_status)

    send_exception = None
    new_subtask_status = None
    try:
        course_title = global_email_context['course_title']
        with dog_stats_api.timer('course_email.single_task.time.overall',
                                 tags=[_statsd_tag(course_title)]):
            new_subtask_status, send_exception = _send_course_email(
                entry_id,
                email_id,
                to_list,
                global_email_context,
                subtask_status,
            )
    except Exception:
        # Unexpected exception. Try to write out the failure to the entry before failing.
        log.exception("Send-email task %s for email %s: failed unexpectedly!",
                      current_task_id, email_id)
        # We got here for really unexpected reasons.  Since we don't know how far
        # the task got in emailing, we count all recipients as having failed.
        # It at least keeps the counts consistent.
        subtask_status.increment(failed=num_to_send, state=FAILURE)
        update_subtask_status(entry_id, current_task_id, subtask_status)
        raise

    if send_exception is None:
        # Update the InstructorTask object that is storing its progress.
        log.info("Send-email task %s for email %s: succeeded", current_task_id,
                 email_id)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
    elif isinstance(send_exception, RetryTaskError):
        # If retrying, a RetryTaskError needs to be returned to Celery.
        # We assume that the the progress made before the retry condition
        # was encountered has already been updated before the retry call was made,
        # so we only log here.
        log.warning("Send-email task %s for email %s: being retried",
                    current_task_id, email_id)
        raise send_exception  # pylint: disable=raising-bad-type
    else:
        log.error("Send-email task %s for email %s: failed: %s",
                  current_task_id, email_id, send_exception)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise send_exception  # pylint: disable=raising-bad-type

    # return status in a form that can be serialized by Celery into JSON:
    log.info("Send-email task %s for email %s: returning status %s",
             current_task_id, email_id, new_subtask_status)
    return new_subtask_status.to_dict()
Exemplo n.º 27
0
def _send_course_email(entry_id, email_id, to_list, global_email_context,
                       subtask_status):
    """
    Performs the email sending task.

    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status` : object of class SubtaskStatus representing current status.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.

    Returns a tuple of two values:
      * First value is a SubtaskStatus object which represents current progress at the end of this call.

      * Second value is an exception returned by the innards of the method, indicating a fatal error.
        In this case, the number of recipients that were not sent have already been added to the
        'failed' count above.
    """
    # Get information from current task's request:
    parent_task_id = InstructorTask.objects.get(pk=entry_id).task_id
    task_id = subtask_status.task_id
    total_recipients = len(to_list)
    recipient_num = 0
    total_recipients_successful = 0
    total_recipients_failed = 0
    recipients_info = Counter()

    log.info(
        "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, TotalRecipients: %s",
        parent_task_id, task_id, email_id, total_recipients)

    try:
        course_email = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist as exc:
        log.exception(
            "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Could not find email to send.",
            parent_task_id, task_id, email_id)
        raise

    # Exclude optouts (if not a retry):
    # Note that we don't have to do the optout logic at all if this is a retry,
    # because we have presumably already performed the optout logic on the first
    # attempt.  Anyone on the to_list on a retry has already passed the filter
    # that existed at that time, and we don't need to keep checking for changes
    # in the Optout list.
    if subtask_status.get_retry_count() == 0:
        to_list, num_optout = _filter_optouts_from_recipients(
            to_list, course_email.course_id)
        subtask_status.increment(skipped=num_optout)

    course_title = global_email_context['course_title']

    # use the email from address in the CourseEmail, if it is present, otherwise compute it
    from_addr = course_email.from_addr if course_email.from_addr else \
        _get_source_address(course_email.course_id, course_title)

    # use the CourseEmailTemplate that was associated with the CourseEmail
    course_email_template = course_email.get_template()
    try:
        connection = get_connection()
        connection.open()

        # Define context values to use in all course emails:
        email_context = {'name': '', 'email': ''}
        email_context.update(global_email_context)

        while to_list:
            # Update context with user-specific values from the user at the end of the list.
            # At the end of processing this user, they will be popped off of the to_list.
            # That way, the to_list will always contain the recipients remaining to be emailed.
            # This is convenient for retries, which will need to send to those who haven't
            # yet been emailed, but not send to those who have already been sent to.
            recipient_num += 1
            current_recipient = to_list[-1]
            email = current_recipient['email']
            email_context['email'] = email
            email_context['name'] = current_recipient['profile__name']
            email_context['user_id'] = current_recipient['pk']
            email_context['course_id'] = course_email.course_id

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(
                course_email.text_message, email_context)
            html_msg = course_email_template.render_htmltext(
                course_email.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(course_email.subject,
                                               plaintext_msg,
                                               from_addr, [email],
                                               connection=connection)
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we have gotten the rate limiter.  This is not very high-tech,
            # but if a task has been retried for rate-limiting reasons, then we sleep
            # for a period of time between all emails within this task.  Choice of
            # the value depends on the number of workers that might be sending email in
            # parallel, and what the SES throttle rate is.
            if subtask_status.retried_nomax > 0:
                sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS)

            try:
                log.info(
                    "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \
                    Recipient name: %s, Email address: %s", parent_task_id,
                    task_id, email_id, recipient_num, total_recipients,
                    current_recipient['profile__name'], email)
                with dog_stats_api.timer(
                        'course_email.single_send.time.overall',
                        tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure.
                total_recipients_failed += 1
                log.error(
                    "BulkEmail ==> Status: Failed(SMTPDataError), Task: %s, SubTask: %s, EmailId: %s, \
                    Recipient num: %s/%s, Email address: %s", parent_task_id,
                    task_id, email_id, recipient_num, total_recipients, email)
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task.
                    raise exc
                else:
                    # This will fall through and not retry the message.
                    log.warning(
                        'BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \
                        Email not delivered to %s due to error %s',
                        parent_task_id, task_id, email_id, recipient_num,
                        total_recipients, email, exc.smtp_error)
                    dog_stats_api.increment('course_email.error',
                                            tags=[_statsd_tag(course_title)])
                    subtask_status.increment(failed=1)

            except SINGLE_EMAIL_FAILURE_ERRORS as exc:
                # This will fall through and not retry the message.
                total_recipients_failed += 1
                log.error(
                    "BulkEmail ==> Status: Failed(SINGLE_EMAIL_FAILURE_ERRORS), Task: %s, SubTask: %s, \
                    EmailId: %s, Recipient num: %s/%s, Email address: %s, Exception: %s",
                    parent_task_id, task_id, email_id, recipient_num,
                    total_recipients, email, exc)
                dog_stats_api.increment('course_email.error',
                                        tags=[_statsd_tag(course_title)])
                subtask_status.increment(failed=1)

            else:
                total_recipients_successful += 1
                log.info(
                    "BulkEmail ==> Status: Success, Task: %s, SubTask: %s, EmailId: %s, \
                    Recipient num: %s/%s, Email address: %s,", parent_task_id,
                    task_id, email_id, recipient_num, total_recipients, email)
                dog_stats_api.increment('course_email.sent',
                                        tags=[_statsd_tag(course_title)])
                if settings.BULK_EMAIL_LOG_SENT_EMAILS:
                    log.info('Email with id %s sent to %s', email_id, email)
                else:
                    log.debug('Email with id %s sent to %s', email_id, email)
                subtask_status.increment(succeeded=1)

            # Pop the user that was emailed off the end of the list only once they have
            # successfully been processed.  (That way, if there were a failure that
            # needed to be retried, the user is still on the list.)
            recipients_info[email] += 1
            to_list.pop()

        log.info(
            "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Successful Recipients: %s/%s, \
            Failed Recipients: %s/%s", parent_task_id, task_id, email_id,
            total_recipients_successful, total_recipients,
            total_recipients_failed, total_recipients)
        duplicate_recipients = [
            "{0} ({1})".format(email, repetition)
            for email, repetition in recipients_info.most_common()
            if repetition > 1
        ]
        if duplicate_recipients:
            log.info(
                "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Duplicate Recipients [%s]: [%s]",
                parent_task_id, task_id, email_id, len(duplicate_recipients),
                ', '.join(duplicate_recipients))

    except INFINITE_RETRY_ERRORS as exc:
        dog_stats_api.increment('course_email.infinite_retry',
                                tags=[_statsd_tag(course_title)])
        # Increment the "retried_nomax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_nomax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=True)

    except LIMITED_RETRY_ERRORS as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # Errors caught are those that indicate a temporary condition that might succeed on retry.
        dog_stats_api.increment('course_email.limited_retry',
                                tags=[_statsd_tag(course_title)])
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=False)

    except BULK_EMAIL_FAILURE_ERRORS as exc:
        dog_stats_api.increment('course_email.error',
                                tags=[_statsd_tag(course_title)])
        num_pending = len(to_list)
        log.exception(
            ('Task %s: email with id %d caused send_course_email task to fail '
             'with "fatal" exception.  %d emails unsent.'), task_id, email_id,
            num_pending)
        # Update counters with progress to date, counting unsent emails as failures,
        # and set the state to FAILURE:
        subtask_status.increment(failed=num_pending, state=FAILURE)
        return subtask_status, exc

    except Exception as exc:  # pylint: disable=broad-except
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # These are unexpected errors.  Since they might be due to a temporary condition that might
        # succeed on retry, we give them a retry.
        dog_stats_api.increment('course_email.limited_retry',
                                tags=[_statsd_tag(course_title)])
        log.exception(
            ('Task %s: email with id %d caused send_course_email task to fail '
             'with unexpected exception.  Generating retry.'), task_id,
            email_id)
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=False)

    else:
        # All went well.  Update counters with progress to date,
        # and set the state to SUCCESS:
        subtask_status.increment(state=SUCCESS)
        # Successful completion is marked by an exception value of None.
        return subtask_status, None
    finally:
        # Clean up at the end.
        connection.close()
Exemplo n.º 28
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id,
                                task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    The student modules are fetched for update the `update_fcn` is called on each StudentModule
    that passes the resulting filtering. It is passed four arguments:  the module_descriptor for
    the module pointed to by the module_state_key, the particular StudentModule to update, the
    xmodule_instance_args, and the task_input being passed through.  If the value returned by the
    update function evaluates to a boolean True, the update is successful; False indicates the update
    on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    start_time = time()
    usage_keys = []
    problem_url = task_input.get('problem_url')
    entrance_exam_url = task_input.get('entrance_exam_url')
    student_identifier = task_input.get('student')
    override_score_task = action_name == ugettext_noop('overridden')
    problems = {}

    # if problem_url is present make a usage key from it
    if problem_url:
        usage_key = UsageKey.from_string(problem_url).map_into_course(
            course_id)
        usage_keys.append(usage_key)

        # find the problem descriptor:
        problem_descriptor = modulestore().get_item(usage_key)
        problems[unicode(usage_key)] = problem_descriptor

    # if entrance_exam is present grab all problems in it
    if entrance_exam_url:
        problems = get_problems_in_section(entrance_exam_url)
        usage_keys = [
            UsageKey.from_string(location) for location in problems.keys()
        ]

    modules_to_update = _get_modules_to_update(course_id, usage_keys,
                                               student_identifier, filter_fcn,
                                               override_score_task)

    task_progress = TaskProgress(action_name, len(modules_to_update),
                                 start_time)
    task_progress.update_task_state()

    for module_to_update in modules_to_update:
        task_progress.attempted += 1
        module_descriptor = problems[unicode(
            module_to_update.module_state_key)]
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer(
                'instructor_tasks.module.time.step',
                tags=[u'action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update,
                                       task_input)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                task_progress.succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                task_progress.failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                task_progress.skipped += 1
            else:
                raise UpdateProblemModuleStateError(
                    "Unexpected update_status returned: {}".format(
                        update_status))

    return task_progress.update_task_state()