def compare_event_lines(self, expected_line, reducer_output_line): """Compare events as dicts instead of JSON-encoded strings, due to ordering.""" reducer_output = eventlog.decode_json(reducer_output_line) expected_event = eventlog.decode_json(expected_line) self.assertDictEqual(expected_event, reducer_output)
def obfuscate_event_entry(self, line): event = eventlog.parse_json_event(line) if event is None: # Unexpected here... log.error(u"Encountered event entry which failed to parse: %r", line) return line course_id = eventlog.get_course_id(event, from_url=True) if course_id is None: # Unexpected here... log.error(u"Encountered event entry with no course_id: %r", line) return line # We cannot use this method as-is, since we need to know what was done to the event, so # that it can be transformed back to its original form once cleaned. # NOT event_data = eventlog.get_event_data(event) event_json_decoded = False event_data = event.get('event') if event_data is None: log.error(u"Encountered event entry with no 'event' payload: %r", line) if event_data == '': # Note that this happens with some browser events. Instead of # failing to parse it as a JSON string, just leave as-is. pass elif isinstance(event_data, basestring): # Cjson produces str, while json produces unicode. Hmm. if len(event_data) == 512 and 'POST' in event_data: # It's a truncated JSON string. But we're going to throw it out anyway, so no worries. pass elif '{' not in event_data and '=' in event_data: # It's a key-value pair from a browser event. Just process as-is, rather than parsing and reassembling. pass else: try: event_data = eventlog.decode_json(event_data) event_json_decoded = True except Exception: log.error( u"Encountered event entry with unparseable 'event' payload: %r", line) # TODO: update the comment! This is where we traverse the event in search of values that should be "cleansed". # Much along the model of what we already do for 'state' in CWSM. Except that we need to be more # flexible in determining the level of backslash encoding -- decode and re-encode as many levels as needed # to get to strings that can be properly interpreted. event_user_info = self.get_userinfo_from_event(event, event_data) if 'POST' in event_data: if self.parameters['skip_post']: return None updated_event_data = self.obfuscator.obfuscate_structure( event_data, u"event", event_user_info) if updated_event_data is not None: event_source = event.get('event_source') event_type = event.get('event_type') log.info(u"Obfuscated %s event with event_type = '%s'", event_source, event_type) if event_json_decoded: # TODO: should really use cjson, if that were originally used for decoding the json. updated_event_data = json.dumps(updated_event_data) event['event'] = updated_event_data # TODO: should really use cjson, if that were originally used for decoding the json. return json.dumps(event)
def obfuscate_event_entry(self, line): event = eventlog.parse_json_event(line) if event is None: # Unexpected here... log.error(u"Encountered event entry which failed to parse: %r", line) return line course_id = eventlog.get_course_id(event, from_url=True) if course_id is None: # Unexpected here... log.error(u"Encountered event entry with no course_id: %r", line) return line # We cannot use this method as-is, since we need to know what was done to the event, so # that it can be transformed back to its original form once cleaned. # NOT event_data = eventlog.get_event_data(event) event_json_decoded = False event_data = event.get('event') if event_data is None: log.error(u"Encountered event entry with no 'event' payload: %r", line) if event_data == '': # Note that this happens with some browser events. Instead of # failing to parse it as a JSON string, just leave as-is. pass elif isinstance(event_data, basestring): # Cjson produces str, while json produces unicode. Hmm. if len(event_data) == 512 and 'POST' in event_data: # It's a truncated JSON string. But we're going to throw it out anyway, so no worries. pass elif '{' not in event_data and '=' in event_data: # It's a key-value pair from a browser event. Just process as-is, rather than parsing and reassembling. pass else: try: event_data = eventlog.decode_json(event_data) event_json_decoded = True except Exception: log.error(u"Encountered event entry with unparseable 'event' payload: %r", line) # TODO: update the comment! This is where we traverse the event in search of values that should be "cleansed". # Much along the model of what we already do for 'state' in CWSM. Except that we need to be more # flexible in determining the level of backslash encoding -- decode and re-encode as many levels as needed # to get to strings that can be properly interpreted. event_user_info = self.get_userinfo_from_event(event, event_data) if 'POST' in event_data: if self.parameters['skip_post']: return None updated_event_data = self.obfuscator.obfuscate_structure(event_data, u"event", event_user_info) if updated_event_data is not None: event_source = event.get('event_source') event_type = event.get('event_type') log.info(u"Obfuscated %s event with event_type = '%s'", event_source, event_type) if event_json_decoded: # TODO: should really use cjson, if that were originally used for decoding the json. updated_event_data = json.dumps(updated_event_data) event['event'] = updated_event_data # TODO: should really use cjson, if that were originally used for decoding the json. return json.dumps(event)