def obfuscate_wiki_entry(self, line, user_profile): fields = line.rstrip('\r\n').decode('utf8').split('\t') record = ArticleRevisionRecord(*fields) user_info = {} if user_profile is not None: user_id = record.user_id if user_id != 'NULL': profile_entry = user_profile.get(user_id) if profile_entry is None: log.error("Missing profile entry for user_id %s", user_id) else: user_info['name'] = [ profile_entry.name, ] if record.ip_address != 'NULL' and record.ip_address != 'ip_address': log.warning("Found non-NULL IP address") if record.automatic_log != '' and record.automatic_log != 'automatic_log': log.warning(u"Found non-zero-length automatic_log: %s", record.automatic_log) # Can't reset values, so update original fields. fields[12] = backslash_encode_value( self.obfuscator.obfuscate_text( backslash_decode_value(record.content), user_info)) fields[2] = backslash_encode_value( self.obfuscator.obfuscate_text( backslash_decode_value(record.user_message), user_info)) return u"\t".join(fields).encode('utf-8')
def filter_row(self, row): user_id = row[5] user_info = {} if user_id != 'NULL': user_id = int(user_id) user_info['user_id'] = [user_id, ] try: entry = self.user_by_id[user_id] if 'username' in entry: user_info['username'] = [entry['username'], ] if 'name' in entry: user_info['name'] = [entry['name'], ] except KeyError: log.error("Unable to find wiki user_id: %s in the user_by_id map", user_id) row[2] = '' # user_message row[3] = '' # automatic_log row[4] = '' # ip_address # For user_id, preserve 'NULL' value if present. if user_id != 'NULL': row[5] = self.remap_id(user_id) wiki_content = backslash_decode_value(row[12].decode('utf8')) cleaned_content = self.obfuscator.obfuscate_text(wiki_content, user_info) row[12] = backslash_encode_value(cleaned_content).encode('utf8') return row
def filter_row(self, row): user_id = row[5] user_info = {} if user_id != 'NULL': user_id = int(user_id) user_info['user_id'] = [user_id, ] try: entry = self.user_by_id[user_id] if 'username' in entry: user_info['username'] = [entry['username'], ] if 'name' in entry: user_info['name'] = [entry['name'], ] except KeyError: log.error("Unable to find wiki user_id: %s in the user_by_id map", user_id) row[2] = '' # user_message row[3] = '' # automatic_log row[4] = '' # ip_address # For user_id, preserve 'NULL' value if present. if user_id != 'NULL': row[5] = self.remap_id(user_id) wiki_content = backslash_decode_value(row[12].decode('utf8')) cleaned_content = self.obfuscator.obfuscate_text(wiki_content, user_info) row[12] = backslash_encode_value(cleaned_content).encode('utf8') return row
def obfuscate_wiki_entry(self, line, user_profile): fields = line.rstrip('\r\n').decode('utf8').split('\t') record = ArticleRevisionRecord(*fields) user_info = {} if user_profile is not None: user_id = record.user_id if user_id != 'NULL': profile_entry = user_profile.get(user_id) if profile_entry is None: log.error("Missing profile entry for user_id %s", user_id) else: user_info['name'] = [profile_entry.name, ] if record.ip_address != 'NULL' and record.ip_address != 'ip_address': log.warning("Found non-NULL IP address") if record.automatic_log != '' and record.automatic_log != 'automatic_log': log.warning(u"Found non-zero-length automatic_log: %s", record.automatic_log) # Can't reset values, so update original fields. fields[12] = backslash_encode_value(self.obfuscator.obfuscate_text(backslash_decode_value(record.content), user_info)) fields[2] = backslash_encode_value(self.obfuscator.obfuscate_text(backslash_decode_value(record.user_message), user_info)) return u"\t".join(fields).encode('utf-8')
def load_user_info(userinfo_path): """Reads a custom user-info file from the local fs that contains username, email, user-id, fullname.""" result = {} with open(userinfo_path, 'r') as infile: next(infile, None) for line in infile: fields = line.rstrip('\r\n').decode('utf8').split('\t') # Once split, we can clean up the individual entries, and interpret the embedded newlines and tabs. # We'll also strip here, to remove the additional whitespace on usernames and fullnames. fields = [backslash_decode_value(field).strip() for field in fields] record = UserInfoRecord(*fields) entry = {key: [value, ] for key, value in record.__dict__.iteritems()} # Store records twice, once with an int key, and once with a string key. # (They should therefore not collide.) result[int(record.user_id)] = entry result[record.username] = entry return result
def load_user_info(userinfo_path): """Reads a custom user-info file from the local fs that contains username, email, user-id, fullname.""" result = {} with open(userinfo_path, 'r') as infile: next(infile, None) for line in infile: fields = line.rstrip('\r\n').decode('utf8').split('\t') # Once split, we can clean up the individual entries, and interpret the embedded newlines and tabs. # We'll also strip here, to remove the additional whitespace on usernames and fullnames. fields = [ backslash_decode_value(field).strip() for field in fields ] record = UserInfoRecord(*fields) entry = { key: [ value, ] for key, value in record.__dict__.iteritems() } # Store records twice, once with an int key, and once with a string key. # (They should therefore not collide.) result[int(record.user_id)] = entry result[record.username] = entry return result
def test_decoding(self, text, expected_result): self.assertEquals(obfuscate_util.backslash_decode_value(text), expected_result)
def test_encoding_round_trip(self, text): self.assertEquals( text, obfuscate_util.backslash_decode_value( obfuscate_util.backslash_encode_value(text)))
def test_decoding(self, text, expected_result): self.assertEquals(obfuscate_util.backslash_decode_value(text), expected_result)
def test_encoding_round_trip(self, text): self.assertEquals(text, obfuscate_util.backslash_decode_value(obfuscate_util.backslash_encode_value(text)))