def print_data_from_log(print_time,log_tail_pad=0): """Adds data to the log.""" global log_container while True: buffer = n_sta1.log_get_all_new(log_tail_pad=0) data_new = buffer.get_bytes() raw_log_indexes = log_util.gen_raw_log_index(data_new) filtered_indexes = log_util.filter_log_index(raw_log_indexes,include_only=['RX_OFDM'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG']}) data1 = log_util.log_data_to_np_arrays(data_new, filtered_indexes) log_rx_ofdm1 = data1['RX_OFDM'] tstamp = data1['RX_OFDM']['timestamp'] channel_ests = data1['RX_OFDM']['chan_est'] if len(log_rx_ofdm1)>=1: print('Current time = ') print(tstamp) print(log_rx_ofdm1) #conn.sendall(tstamp.tobytes()) s.sendto(tstamp[1:20].tobytes(),(HOST,PORT)) #s.sendto('Station1',(HOST,PORT)) s.sendto('Station1' + channel_ests[1:20].tobytes(),(HOST,PORT)) buffer = n_sta2.log_get_all_new(log_tail_pad=0) data_new = buffer.get_bytes() raw_log_indexes = log_util.gen_raw_log_index(data_new) filtered_indexes = log_util.filter_log_index(raw_log_indexes,include_only=['RX_OFDM'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG']}) data2 = log_util.log_data_to_np_arrays(data_new, filtered_indexes) log_rx_ofdm2 = data2['RX_OFDM'] tstamp = data2['RX_OFDM']['timestamp'] channel_ests = data1['RX_OFDM']['chan_est'] if len(log_rx_ofdm1)>=1: print('Current time = ') print(tstamp) print(log_rx_ofdm2) #conn.sendall(tstamp.tobytes()) s.sendto(tstamp[1:20].tobytes(),(HOST,PORT)) #s.sendto('Station2',(HOST,PORT)) s.sendto('Station2' + channel_ests[1:20].tobytes(),(HOST,PORT)) ''' cfo = log_rx_ofdm['cfo_est'] mcs = log_rx_ofdm['mcs'] ant_mode = log_rx_ofdm['ant_mode'] pwr = log_rx_ofdm['power'] rf_gain = log_rx_ofdm['rf_gain'] bb_gain = log_rx_ofdm['bb_gain'] ''' '''
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW': ['TX_LOW', 'TX_LOW_LTG'] }) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format( len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1])) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format( len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1])) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format( len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1])) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format( len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1])) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii // (256**2)), ((ii // 256) % 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
#----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:") # Filter log index to include all Rx entries, merged into RX_ALL, and all Tx entries log_index = log_util.filter_log_index(raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX']) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) ############################################################################### # Example 0: Print node info / Time info log_node_info = log_np['NODE_INFO'][0] print("Node Info:")
LOGFILE, (os.path.getsize(LOGFILE) / 2**20))) if len(sys.argv) == 3: HDF5_FILE_OUT = str(sys.argv[2]) else: HDF5_FILE_OUT = 'np_rx_ofdm_entries.hdf5' print("WLAN Exp Log Example: OFDM Rx Entry Exporter") #Extract the raw log data and log index from the HDF5 file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) #Generate indexes with only Rx_OFDM events log_index_rx = log_util.filter_log_index( raw_log_index, include_only=['RX_OFDM'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG']}) # Generate numpy array of all OFDM Rx entries log_np = log_util.log_data_to_np_arrays(log_data, log_index_rx) log_rx_ofdm = log_np['RX_OFDM'] ################################################################# # Filter the OFDM Rx Entries # Find the source address for which we have the most receptions #Extract unique values for address 2 (transmitting address in received MAC headers) uniq_addrs = np.unique(log_rx_ofdm['addr2']) #Count the number of receptions per source address num_rx = list()
exit_script = False # Extract the log data and index from the log files log_data_ap = hdf_util.hdf5_to_log_data(filename=LOGFILE_AP) raw_log_index_ap = hdf_util.hdf5_to_log_index(filename=LOGFILE_AP) log_data_sta = hdf_util.hdf5_to_log_data(filename=LOGFILE_STA) raw_log_index_sta = hdf_util.hdf5_to_log_index(filename=LOGFILE_STA) # Generate indexes with just Tx and Rx events entries_filt = ['NODE_INFO', 'RX_OFDM', 'TX_HIGH', 'TX_LOW'] entries_merge = {'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']} log_index_txrx_ap = log_util.filter_log_index(raw_log_index_ap, include_only=entries_filt, merge=entries_merge) log_index_txrx_sta = log_util.filter_log_index(raw_log_index_sta, include_only=entries_filt, merge=entries_merge) # Generate numpy arrays log_np_ap = log_util.log_data_to_np_arrays(log_data_ap, log_index_txrx_ap) log_np_sta = log_util.log_data_to_np_arrays(log_data_sta, log_index_txrx_sta) # Extract tne NODE_INFO's and determine each node's MAC address try: addr_ap = log_np_ap['NODE_INFO']['wlan_mac_addr'] except: print("ERROR: Log for AP did not contain a NODE_INFO. Cannot determine MAC Address of AP.\n") exit_script = True try: addr_sta = log_np_sta['NODE_INFO']['wlan_mac_addr']
for file in os.listdir("."): if file.endswith(".hdf5"): LOGFILE = file # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries log_index = log_util.filter_log_index(raw_log_index, include_only=['RX_OFDM'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG']}) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) exp_info = log_np['RX_OFDM'] for info in exp_info:
import numpy as np import pandas as pd from matplotlib.pyplot import * import wlan_exp.log.util as log_util with open('big_logs/sta_log_stats_2014_03_06.bin', 'rb') as fh: print("Reading log file...") log_b = fh.read() print("Generating log index...") log_index_raw = log_util.gen_log_index_raw(log_b) #Extract just OFDM Rx events log_idx_rx_ofdm = log_util.filter_log_index(log_index_raw, include_only=['RX_OFDM']) #Generate numpy array print("Generating numpy arrays...") log_nd = log_util.gen_log_np_arrays(log_b, log_idx_rx_ofdm) rx = log_nd['RX_OFDM'] #Extract length and timestamp fields l = rx['length'] t = rx['timestamp'] #Convert to pandas dataset, indexed by microsecond timestamp print("Calculating throughput...") t_pd = pd.to_datetime(t, unit='us') len_pd = pd.Series(l, index=t_pd)
# Ensure the log file actually exists - quit immediately if not if(not os.path.isfile(LOGFILE)): print("ERROR: Logfile {0} not found".format(LOGFILE)) sys.exit() else: print("Reading log file '{0}' ({1:5.1f} MB)\n".format(LOGFILE, (os.path.getsize(LOGFILE)/1E6))) # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Extract just OFDM Tx events tx_log_index = log_util.filter_log_index(raw_log_index, include_only=['TX']) # Generate numpy array tx_recs = log_np = log_util.log_data_to_np_arrays(log_data, tx_log_index) # Define the fields to group by group_fields = ('addr1',) # Define the aggregation functions stat_calc = ( ('retry_count', np.mean, 'avg_num_tx'), ('length', len, 'num_pkts'), ('length', np.mean, 'avg_len'), ('length', sum, 'tot_len'), ('time_to_done', np.mean, 'avg_time'))
os.path.getsize(LOGFILE)/1E6)) except: print("ERROR: Logfile {0} not found".format(LOGFILE_IN)) sys.exit() else: LOGFILE = LOGFILE_IN print("Reading log file '{0}' ({1:5.1f} MB)\n".format( os.path.split(LOGFILE)[1], os.path.getsize(LOGFILE)/1E6)) #Extract the raw log data and log index from the HDF5 file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) #Generate indexes with only Rx_OFDM events log_index_rx = log_util.filter_log_index(raw_log_index, include_only=['RX_OFDM']) # Generate numpy array of all OFDM Rx entries log_np = log_util.log_data_to_np_arrays(log_data, log_index_rx) log_rx_ofdm = log_np['RX_OFDM'] ################################################################# # Filter the OFDM Rx Entries # Find the source address for which we have the most receptions #Extract unique values for address 2 (transmitting address in received MAC headers) uniq_addrs = np.unique(log_rx_ofdm['addr2']) #Count the number of receptions per source address num_rx = list() for ii,ua in enumerate(uniq_addrs):
# Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries # Merge LTG events into the non-LTG log entry types, so we can # count all Tx/Rx events together log_index = log_util.filter_log_index( raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX', 'EXP_INFO'], merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX': ['TX', 'TX_LTG'] }) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) exp_info = log_np['EXP_INFO']
# Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries log_index = log_util.filter_log_index(raw_log_index, include_only=['NODE_INFO', 'RX_OFDM', 'TX', 'TX_LOW'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX' : ['TX', 'TX_LTG'], 'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']}) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) ############################################################################### # Example 1: Gather some Tx information from the log
#----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Log Index Contents:") # Filter log index to include all Rx entries, merged into RX_ALL, and all Tx entries log_index = log_util.filter_log_index( raw_log_index, include_only=['NODE_INFO', 'RX_OFDM', 'TX', 'TX_LOW']) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) ############################################################################### # Example 1: Gather some Tx information from the log # NOTE: Since there are only loops, this example can deal with TX / TX_LOW # being an empty list and does not need a try / except. #
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii // 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx + addr_o:idx + addr_o + 6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx + name_o:idx + name_o + 20] = bytearray( new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
os.path.split(STA_LOGFILE)[1], (os.path.getsize(STA_LOGFILE) / 1E6))) #----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- #Extract the log data and index from the log files log_data_ap = hdf_util.hdf5_to_log_data(filename=AP_LOGFILE) raw_log_index_ap = hdf_util.hdf5_to_log_index(filename=AP_LOGFILE) log_data_sta = hdf_util.hdf5_to_log_data(filename=STA_LOGFILE) raw_log_index_sta = hdf_util.hdf5_to_log_index(filename=STA_LOGFILE) #Generate indexes with just Tx and Rx events entries_filt = ['NODE_INFO', 'RX_OFDM', 'TX', 'TX_LOW'] log_index_txrx_ap = log_util.filter_log_index(raw_log_index_ap, include_only=entries_filt) log_index_txrx_sta = log_util.filter_log_index(raw_log_index_sta, include_only=entries_filt) #Generate numpy arrays log_np_ap = log_util.log_data_to_np_arrays(log_data_ap, log_index_txrx_ap) log_np_sta = log_util.log_data_to_np_arrays(log_data_sta, log_index_txrx_sta) #Extract tne NODE_INFO's and determine each node's MAC address addr_ap = log_np_ap['NODE_INFO']['wlan_mac_addr'] addr_sta = log_np_sta['NODE_INFO']['wlan_mac_addr'] #Extract Tx entry arrays tx_ap = log_np_ap['TX'] tx_sta = log_np_sta['TX']
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii//256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx+addr_o : idx+addr_o+6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx+name_o : idx+name_o+20] = bytearray(new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
else: print("Reading log file '{0}' ({1:5.1f} MB)\n".format(LOGFILE, (os.path.getsize(LOGFILE)/2**20))) #----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Extract just OFDM Tx events tx_log_index = log_util.filter_log_index(raw_log_index, include_only=['TX_HIGH'], merge={'TX_HIGH' : ['TX_HIGH', 'TX_HIGH_LTG']}) # Generate numpy array log_np = log_util.log_data_to_np_arrays(log_data, tx_log_index) log_tx = log_np['TX_HIGH'] # Define the fields to group by group_fields = ('addr1',) # Define the aggregation functions stat_calc = ( ('num_tx', np.mean, 'avg_num_tx'), ('length', len, 'num_pkts'), ('length', np.mean, 'avg_len'), ('length', sum, 'tot_len'), ('time_to_done', np.mean, 'avg_time'))
# Main script #----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries # Merge LTG events into the non-LTG log entry types, to count all Tx/Rx events together log_index = log_util.filter_log_index(raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX_HIGH', 'EXP_INFO'], merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG']}) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) exp_info = log_np['EXP_INFO'] for info in exp_info: print("Timestamp = {0}".format(info['timestamp']))
#----------------------------------------------------------------------------- # Get the log_data from the file log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE) # Describe the raw_log_index log_util.print_log_index_summary(raw_log_index, "Log Index Contents:") # Filter log index to include all Rx entries and all Tx entries log_index = log_util.filter_log_index( raw_log_index, include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX_HIGH', 'TX_LOW'], merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW': ['TX_LOW', 'TX_LOW_LTG'] }) log_util.print_log_index_summary(log_index, "Filtered Log Index:") # Unpack the log into numpy structured arrays # log_data_to_np_arrays returns a dictionary with one key-value pair per # entry type included in the log_index argument. The log_index keys are reused # as the output dictionary keys. Each output dictionary value is a numpy record array # Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype log_np = log_util.log_data_to_np_arrays(log_data, log_index) ############################################################################### # Example 0: Print node info / Time info
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']}) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format(len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format(len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format(len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format(len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii//(256**2)), ((ii//256)%256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return