def mapper(self, line): # Get events prefiltered by interval: value = self.get_event_and_date_string(line) if value is None: return event, _date_string = value username = event.get('username') if not username: return username = username.strip() # Get timestamp instead of date string, so we get the latest ip # address for events on the same day. # TODO: simplify the round-trip conversion, so that we don't have to wait for the slow parse. # The parse provides error checking, allowing bad dates to be skipped. # But we may now have enough confidence in the data that this is rare (if ever). # Or we could implement a faster check, e.g. reject any that are "greater" than now. timestamp_as_datetime = eventlog.get_event_time(event) if timestamp_as_datetime is None: return timestamp = eventlog.datetime_to_timestamp(timestamp_as_datetime) ip_address = event.get('ip') if not ip_address: log.warning("No ip_address found for user '%s' on '%s'.", username, timestamp) return yield username, (timestamp, ip_address)
def mapper(self, line): event = eventlog.parse_json_event(line) if event is None: return username = event.get('username') if not username: return stripped_username = username.strip() if username != stripped_username: log.error("User '%s' has extra whitespace, which is being stripped. Event: %s", username, event) username = stripped_username timestamp_as_datetime = eventlog.get_event_time(event) if timestamp_as_datetime is None: return if timestamp_as_datetime >= self.end_datetime: return timestamp = eventlog.datetime_to_timestamp(timestamp_as_datetime) ip_address = event.get('ip') if not ip_address: log.warning("No ip_address found for user '%s' on '%s'.", username, timestamp) return yield username, (timestamp, ip_address)
def test_good_datetime_with_no_microseconds_or_timezone(self): item = {"time": "2013-12-17T15:38:32"} dt_value = eventlog.get_event_time(item) self.assertIsNotNone(dt_value) self.assertEquals(eventlog.datetime_to_timestamp(dt_value), "2013-12-17T15:38:32") self.assertEquals(eventlog.datetime_to_datestamp(dt_value), "2013-12-17")
def get_explicit_enrollment_output(line): """ Generates output values for explicit enrollment events. Args: line: text line from a tracking event log. Returns: (course_id, user_id), (timestamp, action_value) where action_value = 1 (enrolled) or -1 (unenrolled) and timestamp is in ISO format, with resolution to the millisecond. or None if there is no valid enrollment event on the line. Example: (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1) """ # Before parsing, check that the line contains something that # suggests it's an enrollment event. if 'edx.course.enrollment' not in line: return None # try to parse the line into a dict: event = eventlog.parse_json_event(line) if event is None: # The line didn't parse. For this specific purpose, # we can assume that all enrollment-related lines would parse, # and these non-parsing lines would get skipped anyway. return None # get event type, and check that it exists: event_type = event.get('event_type') if event_type is None: log.error("encountered event with no event_type: %s", event) return None # convert the type to a value: if event_type == 'edx.course.enrollment.activated': action_value = ENROLLED elif event_type == 'edx.course.enrollment.deactivated': action_value = UNENROLLED else: # not an enrollment event... return None # get the timestamp: datetime = eventlog.get_event_time(event) if datetime is None: log.error("encountered event with bad datetime: %s", event) return None timestamp = eventlog.datetime_to_timestamp(datetime) # Use the `user_id` from the event `data` field, since the # `user_id` in the `context` field is the user who made the # request but not necessarily the one who got enrolled. (The # `course_id` should be the same in `context` as in `data`.) # Get the event data: event_data = eventlog.get_event_data(event) if event_data is None: # Assume it's already logged (and with more specifics). return None # Get the course_id from the data, and validate. course_id = event_data['course_id'] if not opaque_key_util.is_valid_course_id(course_id): log.error("encountered explicit enrollment event with bogus course_id: %s", event) return None # Get the user_id from the data: user_id = event_data.get('user_id') if user_id is None: log.error("encountered explicit enrollment event with no user_id: %s", event) return None # For now, ignore the enrollment 'mode' (e.g. 'honor'). return (course_id, user_id), (timestamp, action_value)
def get_explicit_enrollment_output(line): """ Generates output values for explicit enrollment events. Args: line: text line from a tracking event log. Returns: (course_id, user_id), (timestamp, action_value) where action_value = 1 (enrolled) or -1 (unenrolled) and timestamp is in ISO format, with resolution to the millisecond. or None if there is no valid enrollment event on the line. Example: (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1) """ # Before parsing, check that the line contains something that # suggests it's an enrollment event. if 'edx.course.enrollment' not in line: return None # try to parse the line into a dict: event = eventlog.parse_json_event(line) if event is None: # The line didn't parse. For this specific purpose, # we can assume that all enrollment-related lines would parse, # and these non-parsing lines would get skipped anyway. return None # get event type, and check that it exists: event_type = event.get('event_type') if event_type is None: log.error("encountered event with no event_type: %s", event) return None # convert the type to a value: if event_type == 'edx.course.enrollment.activated': action_value = ENROLLED elif event_type == 'edx.course.enrollment.deactivated': action_value = UNENROLLED else: # not an enrollment event... return None # get the timestamp: datetime = eventlog.get_event_time(event) if datetime is None: log.error("encountered event with bad datetime: %s", event) return None timestamp = eventlog.datetime_to_timestamp(datetime) # Use the `user_id` from the event `data` field, since the # `user_id` in the `context` field is the user who made the # request but not necessarily the one who got enrolled. (The # `course_id` should be the same in `context` as in `data`.) # Get the event data: event_data = eventlog.get_event_data(event) if event_data is None: # Assume it's already logged (and with more specifics). return None # Get the course_id from the data, and validate. course_id = event_data['course_id'] if not opaque_key_util.is_valid_course_id(course_id): log.error( "encountered explicit enrollment event with bogus course_id: %s", event) return None # Get the user_id from the data: user_id = event_data.get('user_id') if user_id is None: log.error("encountered explicit enrollment event with no user_id: %s", event) return None # For now, ignore the enrollment 'mode' (e.g. 'honor'). return (course_id, user_id), (timestamp, action_value)