def write_dict_to_csv(self, dict_fp, out_file): header = PCAPToCSV.get_csv_header(dict_fp) with gzip_writer(out_file) as f_out: writer = csv.DictWriter(f_out, fieldnames=header) writer.writeheader() try: with gzip_reader(dict_fp) as f_in: for line in f_in: writer.writerow(json.loads(line.strip())) except Exception as e: # pragma: no cover self.logger.error(f'Failed to write to CSV because: {e}')
def get_tshark_packet_data(self, pcap_file, dict_fp): options = '-n -V -Tjson' try: process = subprocess.Popen(shlex.split(' '.join( ['tshark', '-r', pcap_file, options])), stdout=subprocess.PIPE) with gzip_writer(dict_fp) as f_out: for item in self.json_packet_records(process): f_out.write(json.dumps(self.flatten_json(item)) + '\n') except Exception as e: # pragma: no cover self.logger.error(f'{e}')
def combine_csvs(out_paths, combined_path): # First determine the field names from the top line of each input file fieldnames = {'filename'} for filename in out_paths: with gzip_reader(filename) as f_in: reader = csv.reader(f_in) fieldnames.update({header for header in next(reader)}) # Then copy the data with gzip_writer(combined_path) as f_out: writer = csv.DictWriter(f_out, fieldnames=fieldnames) writer.writeheader() for filename in out_paths: with gzip_reader(filename) as f_in: reader = csv.DictReader(f_in) for line in reader: line['filename'] = filename.split('/')[-1].split( 'csv.gz')[0] writer.writerow(line) PCAPToCSV.cleanup_files([filename])
def get_tshark_conv_data(self, pcap_file, dict_fp): # TODO (add a summary of other packets with protocols?) output = '' try: # TODO perhaps more than just tcp/udp in the future options = '-n -q -z conv,tcp -z conv,udp' output = subprocess.check_output( shlex.split(' '.join(['tshark', '-r', pcap_file, options]))) output = output.decode('utf-8') except Exception as e: # pragma: no cover self.logger.error(f'{e}') in_block = False name = None results = {} for line in output.split('\n'): if line.startswith('==='): if in_block: in_block = False name = None continue else: in_block = True continue if in_block: if not name: name = ''.join(line.split(':')).strip() results[name] = '' continue elif not line.startswith('Filter:') and line != '': results[name] += line + '\n' with gzip_writer(dict_fp) as f_out: for result in results.keys(): if 'Conversations' in result: transport_proto = result.split()[0] # handle conversation parsing for line in results[result].split('\n'): if line == '' or line.startswith(' '): # header or padding, dicard continue else: # TODO perhaps additional features can be extracted for flows from tshark src, _, dst, frames_l, bytes_l, frames_r, bytes_r, frames_total, bytes_total, rel_start, duration = line.split( ) conv = { 'Source': src.rsplit(':', 1)[0], 'Source Port': src.rsplit(':', 1)[1], 'Destination': dst.rsplit(':', 1)[0], 'Destination Port': dst.rsplit(':', 1)[1], 'Transport Protocol': transport_proto, 'Frames to Source': frames_l, 'Bytes to Source': bytes_l, 'Frames to Destination': frames_r, 'Bytes to Destination': bytes_r, 'Total Frames': frames_total, 'Total Bytes': bytes_total, 'Relative Start': rel_start, 'Duration': duration } f_out.write(json.dumps(conv) + '\n')
def get_pyshark_packet_data(self, pcap_file, dict_fp): all_protocols = set() pcap_file_short = ntpath.basename(pcap_file) with gzip_writer(dict_fp) as f_out: with pyshark.FileCapture(pcap_file, use_json=True, include_raw=True, keep_packets=False, custom_parameters=[ '-o', 'tcp.desegment_tcp_streams:false', '-n' ]) as cap: for packet in cap: packet_dict = {} packet_dict['filename'] = pcap_file_short frame_info = packet.frame_info._all_fields for key in frame_info: packet_dict[key] = frame_info[key] # can overflow the field size for csv #packet_dict['raw_packet'] = packet.get_raw_packet() layers = str(packet.layers) packet_dict['layers'] = layers str_layers = layers[1:-1].split(', ') for str_layer in str_layers: # ignore raw layers if 'RAW' not in str_layer: all_protocols.add(str_layer) # only include specified protocols due to unknown parsing for some layers if str_layer in self.PROTOCOLS: layer_info = getattr( packet, str_layer.split()[0][1:].lower())._all_fields # check for nested dicts, one level deep for key in layer_info: # DNS doesn't parse well if isinstance( layer_info[key], dict) and str_layer != '<DNS Layer>': for inner_key in layer_info[key]: packet_dict[inner_key] = layer_info[ key][inner_key] else: packet_dict[key] = layer_info[key] # clean up records packet_dict_copy = deepcopy(packet_dict) keys = packet_dict_copy.keys() for key in keys: if not key[0].isalpha( ) or key == 'tcp.payload_raw' or key == 'tcp.payload': del packet_dict[key] f_out.write(json.dumps(packet_dict) + '\n') for protocol in self.PROTOCOLS: if protocol in all_protocols: all_protocols.remove(protocol) if all_protocols: self.logger.warning( f'Found the following other layers in {pcap_file_short} that were not added to the CSV: {all_protocols}' )
def get_writer(out_file, use_gzip): if use_gzip: return gzip_writer(out_file) return open(out_file, 'w')