def log_file_update(filename): """Update the log file.""" #--------------------------------------------------------------------- # Read input file # # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) #--------------------------------------------------------------------- # Print information about the file # log_util.print_log_index_summary(raw_log_index, "Log Index Summary:") #--------------------------------------------------------------------- # Write output file # # Write the log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_update" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
else: print("Reading log file '{0}' ({1:5.1f} MB)\n".format( LOGFILE, (os.path.getsize(LOGFILE) / 2**20))) #----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries log_index = log_util.filter_log_index( raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX_HIGH', 'TX_LOW'], merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW': ['TX_LOW', 'TX_LOW_LTG'] }) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW': ['TX_LOW', 'TX_LOW_LTG'] }) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format( len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1])) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format( len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1])) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format( len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1])) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format( len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1])) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii // (256**2)), ((ii // 256) % 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
else: print("Reading log file '{0}' ({1:5.1f} MB)\n".format(os.path.split(LOGFILE)[1], (os.path.getsize(LOGFILE)/1E6))) #----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:") # Filter log index to include all Rx entries, merged into RX_ALL, and all Tx entries log_index = log_util.filter_log_index(raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX']) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) ###############################################################################
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii//256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx+addr_o : idx+addr_o+6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx+name_o : idx+name_o+20] = bytearray(new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii // 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx + addr_o:idx + addr_o + 6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx + name_o:idx + name_o + 20] = bytearray( new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']}) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format(len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format(len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format(len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format(len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii//(256**2)), ((ii//256)%256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return