class ValidateEnrollmentForEvents(object):
    """
    Collects and validates events for a given user in a given course.
    """
    def __init__(self, course_id, user_id, interval, events, **kwargs):
        self.course_id = course_id
        self.user_id = user_id
        self.interval = interval
        self.creation_timestamp = None
        self.tuple_output = kwargs.get('tuple_output')
        self.include_nonstate_changes = kwargs.get('include_nonstate_changes')
        self.generate_before = kwargs.get('generate_before')
        self.lower_bound_date_string = kwargs.get('lower_bound_date_string')
        self.earliest_timestamp = kwargs.get('earliest_timestamp')
        self.expected_validation = kwargs.get('expected_validation')

        if self.tuple_output:
            self.generate_output = self._create_tuple
        else:
            self.factory = SyntheticEventFactory(
                event_source='server',
                synthesizer='enrollment_validation',
            )
            self.generate_output = self._synthetic_event

        # Create list of events in reverse order, as processing goes backwards
        # from validation states.
        self.sorted_events = [
            EnrollmentEvent(timestamp, event_type, mode, validation_info)
            for timestamp, event_type, mode, validation_info in sorted(events, reverse=True)
        ]

        self._reorder_within_dumps()

        if self.expected_validation:
            self._check_for_missing_validation()

        # Add a marker event to signal the beginning of the interval.
        initial_state = EnrollmentEvent(None, SENTINEL, mode='honor', validation_info=None)
        self.sorted_events.append(initial_state)

    def _reorder_within_dumps(self):
        """
        Fix the timestamp of a validation event if an enrollment event occurs during the dump.
        """
        num_events = len(self.sorted_events) - 1
        for index in range(num_events):
            event = self.sorted_events[index]
            prev_event = self.sorted_events[index + 1]
            is_nonvalidate_during_validate = (
                event.event_type == VALIDATED and
                prev_event.event_type != VALIDATED and
                event.is_during_dump(prev_event.timestamp)
            )
            if is_nonvalidate_during_validate:
                is_active_is_inconsistent = (
                    (event.is_active and prev_event.event_type == DEACTIVATED) or
                    (not event.is_active and prev_event.event_type == ACTIVATED)
                )
                mode_is_inconsistent = (event.mode != prev_event.mode and prev_event.event_type == MODE_CHANGED)

                if is_active_is_inconsistent or mode_is_inconsistent:
                    # Change the timestamp of the validation event to precede
                    # the other event, and swap them.
                    event.timestamp = add_microseconds(prev_event.timestamp, -1)
                    self.sorted_events[index] = prev_event
                    self.sorted_events[index + 1] = event

    def _check_for_missing_validation(self):
        """Check last event to see if it is missing an expected validation event."""
        if len(self.sorted_events) > 0 and self.sorted_events[0].timestamp < self.expected_validation:
            validation_info = {
                'is_active': False,
                'created': None,
                'dump_start': None,
                'dump_end': None,
            }
            missing_event = [EnrollmentEvent(
                self.expected_validation, MISSING, mode=None, validation_info=validation_info
            )]
            missing_event.extend(self.sorted_events)
            self.sorted_events = missing_event

    def missing_enrolled(self):
        """
        A synthetic event is yielded for each transition in user's events for which a real event is missing.

        Yields:
            json-encoded string representing a synthetic event, or a tuple.
        """
        # The last element of the list is a placeholder indicating the beginning of the interval.
        # Don't process it.
        num_events = len(self.sorted_events) - 1

        self._initialize_state(self.sorted_events[0])
        all_missing_events = []
        for index in range(num_events):
            event = self.sorted_events[index + 1]
            missing_events = self._check_event(event)
            if missing_events:
                all_missing_events.extend(missing_events)

        return all_missing_events

    def _create_tuple(self, timestamp, event_type, mode, reason, after=None, before=None):
        """Returns a tuple representation of the output, for TSV-based debugging."""
        datestamp = eventlog.timestamp_to_datestamp(timestamp)
        return datestamp, (self.course_id, self.user_id, timestamp, event_type, mode, reason, after, before)

    def _synthetic_event(self, timestamp, event_type, mode, reason, after=None, before=None):
        """Create a synthetic event."""
        # data specific to course enrollment events:
        event_data = {
            'course_id': self.course_id,
            'user_id': self.user_id,
            'mode': mode,
        }

        event_properties = {
            # main properties:
            'time': timestamp,
            'event_type': event_type,
            # stuff for context:
            'user_id': self.user_id,
            'course_id': self.course_id,
            'org_id': opaque_key_util.get_org_id_for_course(self.course_id),
            # stuff for synthesized:
            'reason': reason,
        }

        event = self.factory.create_event_dict(event_data, **event_properties)
        synthesized = event['synthesized']
        if after:
            synthesized['after_time'] = after
        if before:
            synthesized['before_time'] = before

        datestamp = eventlog.timestamp_to_datestamp(timestamp)
        return datestamp, json.dumps(event)

    def _truncate_timestamp(self, timestamp):
        """Makes sure that timestamp is no earlier than limit specified, if any."""
        if self.earliest_timestamp and timestamp < self.earliest_timestamp:
            return self.earliest_timestamp
        else:
            return timestamp

    def _get_fake_timestamp(self, after, before):
        """
        Pick a time in an interval.

        Picks a microsecond after `after`, else a microsecond before `before`.

        Input and output values are ISO format strings.
        """
        # Just pick the time at the beginning of the interval.
        if after:
            # Add a microsecond to 'after'
            return add_microseconds(after, 1)
        else:
            # Subtract a microsecond from 'before'
            return add_microseconds(before, -1)

    def _get_reason_string(self, prev_event, curr_event_string, curr_mode=None):
        """Provide a readable string giving the reason for generating the synthetic event."""
        if curr_mode:
            return "{prev} => {curr} ({prev_mode}=>{curr_mode})".format(
                prev=prev_event.get_state_string(),
                curr=curr_event_string,
                prev_mode=prev_event.mode,
                curr_mode=curr_mode,
            )
        else:
            return "{prev} => {curr}".format(
                prev=prev_event.get_state_string(),
                curr=curr_event_string,
            )

    def _initialize_state(self, event):
        """Define initial values for validation state machine."""
        self._update_state(event)
        # If the most-recent event is a mode-change event, then we don't
        # know from it what the activation is.
        if event.event_type == MODE_CHANGED:
            self.activation_type = None

    def _update_state(self, event):
        """Define current values for validation state machine."""
        # Some events define an activation state.
        if event.event_type != MODE_CHANGED:
            self.activation_type = event.event_type
            self.activation_state = event.get_state_string()
            self.activation_timestamp = event.timestamp

        # Most events set mode.
        self.mode_changed = (event.event_type == MODE_CHANGED)
        if event.event_type != MISSING:
            self.current_mode = event.mode
            self.mode_type = event.event_type
            self.mode_state = event.get_state_string()
            self.mode_timestamp = event.timestamp
        else:
            self.current_mode = None

        # Only validation events define a created timestamp and activation state.
        if event.event_type == VALIDATED:
            self.currently_active = event.is_active

            if self.creation_timestamp:
                # compare with previously-viewed (i.e. later-in-time) validation:
                if event.created != self.creation_timestamp:
                    log.error("Encountered validation with different creation timestamp: %s => %s",
                              event.created, self.creation_timestamp)
            # Use the earliest validation:
            self.creation_timestamp = event.created

    def _check_for_mode_change(self, prev_event, last_timestamp):
        """Check if a mode-change event should be synthesized."""
        # There are several reasons to suppress a mode-change.
        # The most obvious is if there is no change in mode.
        # Other cases include:
        #  * No "current" mode (e.g. current event is "MISSING").
        #  * Current event is known to reset the mode, so no comparison
        #    is needed (or possible).  This includes MODE_CHANGED and ACTIVATE
        #    events.
        #  * No previous event (i.e. previous event is "SENTINEL").
        suppress_mode_change = (
            prev_event.mode == self.current_mode or
            not self.current_mode or
            self.mode_changed or
            self.mode_type == ACTIVATED or
            prev_event.event_type == SENTINEL
        )

        if suppress_mode_change:
            return []
        else:
            curr = self.mode_timestamp
            timestamp = self._get_fake_timestamp(last_timestamp, curr)
            reason = self._get_reason_string(prev_event, self.mode_state, self.current_mode)
            return [self.generate_output(timestamp, MODE_CHANGED, self.current_mode, reason, last_timestamp, curr)]

    def _check_on_activated(self, generate_output_for_event):
        """Check if a deactivation event should be synthesized after an activation event."""
        if self.activation_type == ACTIVATED and self.include_nonstate_changes:
            # Duplicate activate event (a/a).
            return [generate_output_for_event(DEACTIVATED)]
        elif self.activation_type == DEACTIVATED:
            pass  # normal case
        elif self.activation_type == VALIDATED and self.currently_active:
            pass  # normal case
        elif self.activation_type == VALIDATED and not self.currently_active:
            # Missing deactivate event (a/vi)
            return [generate_output_for_event(DEACTIVATED)]
        elif self.activation_type == MISSING:
            # Missing deactivate event (a/m)
            return [generate_output_for_event(DEACTIVATED)]
        return []

    def _check_on_deactivated(self, generate_output_for_event):
        """Check if an activation event should be synthesized after a deactivation event."""
        if self.activation_type == ACTIVATED:
            pass  # normal case
        elif self.activation_type == DEACTIVATED and self.include_nonstate_changes:
            # Duplicate deactivate event (d/d).
            return [generate_output_for_event(ACTIVATED)]
        elif self.activation_type == VALIDATED and not self.currently_active:
            pass  # normal case
        elif self.activation_type == VALIDATED and self.currently_active:
            # Missing activate event (d/va)
            return [generate_output_for_event(ACTIVATED)]
        elif self.activation_type == MISSING and self.include_nonstate_changes:
            # Missing validation event (d/m) -- generate a second deactivate event
            # just to mark the inconsistency.
            return [generate_output_for_event(DEACTIVATED)]
        return []

    def _check_on_validation(self, prev_event, generate_output_for_event):
        """Check if an event should be synthesized after a validation event."""
        if self.activation_type == ACTIVATED:
            if prev_event.is_active and self.include_nonstate_changes:
                # Missing deactivate (va/a)
                return [generate_output_for_event(DEACTIVATED)]
        elif self.activation_type == DEACTIVATED:
            if not prev_event.is_active and self.include_nonstate_changes:
                # Missing activate (vi/d)
                return [generate_output_for_event(ACTIVATED)]
        elif self.activation_type == VALIDATED:
            if prev_event.is_active and not self.currently_active:
                # Missing deactivate (va/vi)
                return [generate_output_for_event(DEACTIVATED)]
            elif not prev_event.is_active and self.currently_active:
                # Missing activate (vi/va)
                return [generate_output_for_event(ACTIVATED)]
        return []

    def _check_earliest_event(self, reason):
        """Check if an event should be synthesized before the first real event."""
        curr = self.activation_timestamp

        if self.activation_type == ACTIVATED:
            pass  # normal case
        elif self.activation_type == DEACTIVATED:
            # If we had a validation after the deactivation,
            # and it provided a creation_timestamp within the interval,
            # then there should be an activate within the interval.
            if self.creation_timestamp and (
                    self.generate_before or
                    self.creation_timestamp >= self.lower_bound_date_string):
                timestamp = self._truncate_timestamp(self.creation_timestamp)
                return [(self.generate_output(
                    timestamp, ACTIVATED, self.current_mode, reason, self.creation_timestamp, curr
                ))]
            elif self.generate_before:
                # For now, hack the timestamp by making it a little before the deactivate,
                # so that it at least has a value.
                timestamp = self._get_fake_timestamp(None, curr)
                return [(self.generate_output(timestamp, ACTIVATED, self.current_mode, reason, None, curr))]

        elif self.activation_type == VALIDATED:
            creation_timestamp = self._truncate_timestamp(self.creation_timestamp)

            if not self.generate_before and self.creation_timestamp < self.lower_bound_date_string:
                # If we are validating only within an interval and the create_timestamp
                # is outside this interval, we can't know whether the events are really
                # missing or just not included.
                pass
            elif self.currently_active:
                return [(self.generate_output(
                    creation_timestamp, ACTIVATED, self.current_mode, reason, self.creation_timestamp, curr
                ))]
            elif self.include_nonstate_changes:
                # There may be missing Activate and Deactivate events, or there may
                # just be an inactive table row that was created as part of an enrollment
                # flow, but no enrollment was completed.
                event1 = self.generate_output(
                    creation_timestamp, ACTIVATED, self.current_mode, reason, self.creation_timestamp, curr
                )
                timestamp = self._get_fake_timestamp(creation_timestamp, curr)
                event2 = self.generate_output(
                    timestamp, DEACTIVATED, self.current_mode, reason, self.creation_timestamp, curr
                )
                return [event1, event2]
        return []

    def _check_event(self, prev_event):
        """Compare a previous event with current state generated from later events. """
        prev_timestamp_for_mode_change = prev_event.timestamp

        missing = []
        if self.activation_type is not None:
            reason = self._get_reason_string(prev_event, self.activation_state)
            timestamp = self._get_fake_timestamp(prev_event.timestamp, self.activation_timestamp)

            def generate_output_for_event(event_type):
                """Wrapper to generate a synthetic event with common values."""
                return self.generate_output(
                    timestamp, event_type, prev_event.mode, reason, prev_event.timestamp, self.activation_timestamp
                )

            prev_type = prev_event.event_type
            if prev_type == ACTIVATED:
                missing.extend(self._check_on_activated(generate_output_for_event))
            elif prev_type == DEACTIVATED:
                missing.extend(self._check_on_deactivated(generate_output_for_event))
            elif prev_type == VALIDATED:
                missing.extend(self._check_on_validation(prev_event, generate_output_for_event))
            elif prev_type == SENTINEL:
                missing.extend(self._check_earliest_event(reason))

            if missing:
                prev_timestamp_for_mode_change = timestamp

        # Check for mode change for all events:
        missing.extend(self._check_for_mode_change(prev_event, prev_timestamp_for_mode_change))

        # Finally, set state for the next one.
        self._update_state(prev_event)

        return missing
Exemplo n.º 2
0
class CourseEnrollmentValidationTaskMapTest(InitializeOpaqueKeysMixin,
                                            MapperTestMixin,
                                            unittest.TestCase):
    """
    Tests to verify that event log parsing by mapper works correctly.
    """
    def setUp(self):
        self.task_class = CourseEnrollmentValidationTask
        super(CourseEnrollmentValidationTaskMapTest, self).setUp()

        self.initialize_ids()

        self.user_id = 21
        self.timestamp = "2013-12-17T15:38:32.805444"
        self.mode = 'honor'

        self.factory = SyntheticEventFactory(
            timestamp=self.timestamp,
            event_source='server',
            event_type=ACTIVATED,
            synthesizer='enrollment_from_db',
            reason='db entry',
            user_id=self.user_id,
            course_id=self.course_id,
            org_id=self.org_id,
        )

        self.expected_key = (self.course_id, self.user_id)

    def _create_event_log_line(self, **kwargs):
        """Create an event log with test values, as a JSON string."""
        return json.dumps(self._create_event_dict(**kwargs))

    def _create_event_dict(self, **kwargs):
        """Create an event log with test values, as a dict."""
        event_data = {
            'course_id': self.course_id,
            'user_id': self.user_id,
            'mode': self.mode,
        }
        event = self.factory.create_event_dict(event_data, **kwargs)
        return event

    def test_non_enrollment_event(self):
        line = 'this is garbage'
        self.assert_no_map_output_for(line)

    def test_unparseable_enrollment_event(self):
        line = 'this is garbage but contains edx.course.enrollment'
        self.assert_no_map_output_for(line)

    def test_missing_event_type(self):
        event_dict = self._create_event_dict()
        event_dict['old_event_type'] = event_dict['event_type']
        del event_dict['event_type']
        line = json.dumps(event_dict)
        self.assert_no_map_output_for(line)

    def test_nonenroll_event_type(self):
        line = self._create_event_log_line(
            event_type='edx.course.enrollment.unknown')
        self.assert_no_map_output_for(line)

    def test_bad_datetime(self):
        line = self._create_event_log_line(time='this is a bogus time')
        self.assert_no_map_output_for(line)

    def test_bad_event_data(self):
        line = self._create_event_log_line(event=["not an event"])
        self.assert_no_map_output_for(line)

    def test_illegal_course_id(self):
        line = self._create_event_log_line(event={
            "course_id": ";;;;bad/id/val",
            "user_id": self.user_id
        })
        self.assert_no_map_output_for(line)

    def test_missing_user_id(self):
        line = self._create_event_log_line(event={"course_id": self.course_id})
        self.assert_no_map_output_for(line)

    def test_good_enroll_event(self):
        line = self._create_event_log_line()
        expected_value = (self.timestamp, ACTIVATED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_unenroll_event(self):
        line = self._create_event_log_line(event_type=DEACTIVATED)
        expected_value = (self.timestamp, DEACTIVATED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_mode_change_event(self):
        line = self._create_event_log_line(event_type=MODE_CHANGED)
        expected_value = (self.timestamp, MODE_CHANGED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_validation_event(self):
        validation_info = {
            'is_active': True,
            'created': '2012-07-24T12:37:32.000000',
            'dump_start': '2014-10-08T04:52:48.154228',
            'dump_end': '2014-10-08T04:57:38.145282',
        }
        event_info = {
            "course_id": self.course_id,
            "user_id": self.user_id,
            "mode": self.mode,
        }
        event_info.update(validation_info)
        line = self._create_event_log_line(event_type=VALIDATED,
                                           event=event_info)
        expected_value = (self.timestamp, VALIDATED, self.mode,
                          validation_info)
        self.assert_single_map_output(line, self.expected_key, expected_value)
class CourseEnrollmentValidationTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, unittest.TestCase):
    """
    Tests to verify that event log parsing by mapper works correctly.
    """
    def setUp(self):
        self.task_class = CourseEnrollmentValidationTask
        super(CourseEnrollmentValidationTaskMapTest, self).setUp()

        self.initialize_ids()

        self.user_id = 21
        self.timestamp = "2013-12-17T15:38:32.805444"
        self.mode = 'honor'

        self.factory = SyntheticEventFactory(
            timestamp=self.timestamp,
            event_source='server',
            event_type=ACTIVATED,
            synthesizer='enrollment_from_db',
            reason='db entry',
            user_id=self.user_id,
            course_id=self.course_id,
            org_id=self.org_id,
        )

        self.expected_key = (self.course_id, self.user_id)

    def _create_event_log_line(self, **kwargs):
        """Create an event log with test values, as a JSON string."""
        return json.dumps(self._create_event_dict(**kwargs))

    def _create_event_dict(self, **kwargs):
        """Create an event log with test values, as a dict."""
        event_data = {
            'course_id': self.course_id,
            'user_id': self.user_id,
            'mode': self.mode,
        }
        event = self.factory.create_event_dict(event_data, **kwargs)
        return event

    def test_non_enrollment_event(self):
        line = 'this is garbage'
        self.assert_no_map_output_for(line)

    def test_unparseable_enrollment_event(self):
        line = 'this is garbage but contains edx.course.enrollment'
        self.assert_no_map_output_for(line)

    def test_missing_event_type(self):
        event_dict = self._create_event_dict()
        event_dict['old_event_type'] = event_dict['event_type']
        del event_dict['event_type']
        line = json.dumps(event_dict)
        self.assert_no_map_output_for(line)

    def test_nonenroll_event_type(self):
        line = self._create_event_log_line(event_type='edx.course.enrollment.unknown')
        self.assert_no_map_output_for(line)

    def test_bad_datetime(self):
        line = self._create_event_log_line(time='this is a bogus time')
        self.assert_no_map_output_for(line)

    def test_bad_event_data(self):
        line = self._create_event_log_line(event=["not an event"])
        self.assert_no_map_output_for(line)

    def test_illegal_course_id(self):
        line = self._create_event_log_line(event={"course_id": ";;;;bad/id/val", "user_id": self.user_id})
        self.assert_no_map_output_for(line)

    def test_missing_user_id(self):
        line = self._create_event_log_line(event={"course_id": self.course_id})
        self.assert_no_map_output_for(line)

    def test_good_enroll_event(self):
        line = self._create_event_log_line()
        expected_value = (self.timestamp, ACTIVATED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_unenroll_event(self):
        line = self._create_event_log_line(event_type=DEACTIVATED)
        expected_value = (self.timestamp, DEACTIVATED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_mode_change_event(self):
        line = self._create_event_log_line(event_type=MODE_CHANGED)
        expected_value = (self.timestamp, MODE_CHANGED, self.mode, None)
        self.assert_single_map_output(line, self.expected_key, expected_value)

    def test_good_validation_event(self):
        validation_info = {
            'is_active': True,
            'created': '2012-07-24T12:37:32.000000',
            'dump_start': '2014-10-08T04:52:48.154228',
            'dump_end': '2014-10-08T04:57:38.145282',
        }
        event_info = {
            "course_id": self.course_id,
            "user_id": self.user_id,
            "mode": self.mode,
        }
        event_info.update(validation_info)
        line = self._create_event_log_line(event_type=VALIDATED, event=event_info)
        expected_value = (self.timestamp, VALIDATED, self.mode, validation_info)
        self.assert_single_map_output(line, self.expected_key, expected_value)