def main_MAWI(): is_stat = False if is_stat: import os # file_name = 'samplepoint-F_202007011400-src_dst_185.8.54.240.pcap' file_name = 'samplepoint-F_202007011400.pcap' pcap_file = get_file_path(ipt_dir='original_data/reprst', dataset_name='MAWI/WIDE_2020', file_name=file_name) stat_file = pcap_file + '-stat.dat' print(stat_file) if not os.path.exists(stat_file): stat = get_stat(pcap_file, out_file=stat_file) else: stat = load_data(stat_file) stat = dict(sorted(stat.items(), key=lambda kv: kv[1], reverse=True)) for i, (k, v) in enumerate(stat.items()): if v > 1000: print(f'i={i}, {k}: {v}\n') # print(stat) ips = [ '23.222.78.164', '203.78.4.32', '203.78.8.151', '23.223.19.175', '114.234.20.197', '114.234.12.139' ] # ['202.119.210.242', '23.99.220.247', '203.78.23.227', '51.95.212.72', '163.98.16.76', # '92.206.43.252', '202.66.205.237', '202.75.33.206', '202.75.33.114', '167.50.204.117'] direction = 'src_dst' file_name = f'samplepoint-F_202007011400.pcap' for v in ips: if direction == 'src_dst': # # filter pcap: tshark -r samplepoint-F_202007011400.pcap -w samplepoint-F_202007011400-src_dst_202.119.210.242.pcap ip.addr=='202.119.210.242' pcap_file = get_file_path(ipt_dir='original_data/reprst', dataset_name='MAWI/WIDE_2020', file_name=file_name) else: pcap_file = get_file_path(ipt_dir='original_data/reprst', dataset_name='MAWI/WIDE_2020', file_name=file_name + f'-src_dst_{v}.pcap') out_file = get_file_path(ipt_dir='original_data/reprst', dataset_name='MAWI/WIDE_2020', file_name=file_name + f'-{direction}_{v}.pcap') print(pcap_file, out_file) filter_ip(pcap_file, out_file, ips=[v], direction=direction, verbose=20)
def get_scam2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019', out_dir='', dataset_name='scam_192.168.143.42', direction='src'): IP = '192.168.143.42' normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap') check_path(normal_pcap) file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_normal.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction) normal_flows = _pcap2flows( normal_pcap, verbose=10) # ~1000 normal flows, it will generate > 1000 subflows max_interval = np.quantile( [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9) normal_flows = augment_flows(normal_flows, step=10, max_interval=max_interval) lg.debug(f'normal_flows: {len(normal_flows)}') abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap') check_path(normal_pcap) # file_name = 'samsung_camera-2daysactiv-src_192.168.143.42-anomaly.pca' file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_abnormal.pcap' pcap_file = get_file_path(ipt_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction) abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10) abnormal_flows = augment_flows(abnormal_flows, step=1, max_interval=max_interval) lg.debug(f'after augmenting abnormal_flows: {len(abnormal_flows)}') meta = { 'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcaps': [normal_pcap], 'abnormal_pcaps': [abnormal_pcap], 'direction': direction, 'in_dir': in_dir } return meta
def get_ctu_flows(self, in_dir='../Datatsets', direction='src'): """ https://www.stratosphereips.org/datasets-iot Malware on IoT Dataset """ self.normal_pcap = os.path.join(self.out_dir, f'pc_192.168.1.196.pcap') check_path(self.normal_pcap) # filter pcap # file_name = '2019-01-09-22-46-52-src_192.168.1.196_CTU_IoT_CoinMiner_anomaly.pcap' file_name = 'CTU-IoT-Malware-Capture-41-1_2019-01-09-22-46-52-192.168.1.196.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name='CTU/IOT_2017', file_name=file_name) filter_ip(pcap_file, self.normal_pcap, ips=['192.168.1.196'], direction=direction) normal_flows = _pcap2flows(self.normal_pcap, verbose=10) # normal flows self.abnormal_pcap = os.path.join(self.out_dir, f'pc_192.168.1.195_abnormal.pcap') check_path(self.normal_pcap) # file_name = '2018-12-21-15-50-14-src_192.168.1.195-CTU_IoT_Mirai_normal.pcap' file_name = 'CTU-IoT-Malware-Capture-34-1_2018-12-21-15-50-14-192.168.1.195.pcap' pcap_file = get_file_path(ipt_dir=in_dir, dataset_name='CTU/IOT_2017', file_name=file_name) filter_ip(pcap_file, self.abnormal_pcap, ips=['192.168.1.195'], direction=direction) abnormal_flows = _pcap2flows(self.abnormal_pcap, verbose=10) # normal flows meta = { 'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcap': self.normal_pcap, 'abnormal_pcap': self.abnormal_pcap, 'direction': direction, 'in_dir': in_dir } return meta
def get_smtv2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019', out_dir='', dataset_name='smtv_10.42.0.1', direction='src'): IP = '10.42.0.1' normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap') check_path(normal_pcap) file_name = 'pc_10.42.0.1_normal.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction) normal_flows = _pcap2flows(normal_pcap, verbose=10) # normal flows max_interval = np.quantile( [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9) normal_flows = augment_flows(normal_flows, step=10, max_interval=max_interval) abnormal_pcap = os.path.join(out_dir, f'pc_10.42.0.119_abnormal.pcap') check_path(normal_pcap) file_name = 'pc_10.42.0.119_anomaly.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, abnormal_pcap, ips=['10.42.0.119'], direction=direction) abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10) # normal flows abnormal_flows = augment_flows(abnormal_flows, step=10, max_interval=max_interval) meta = { 'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcaps': [normal_pcap], 'abnormal_pcaps': [abnormal_pcap], 'direction': direction, 'in_dir': in_dir } return meta
def get_bstch2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019', out_dir='', dataset_name='scam_192.168.143.48', direction='src'): IP = '192.168.143.48' normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap') check_path(normal_pcap) # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-normal.pcap' file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_normal.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction) normal_flows = _pcap2flows(normal_pcap, verbose=10) # normal flows max_interval = np.quantile( [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9) normal_flows = augment_flows(normal_flows, step=10, max_interval=max_interval) abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap') check_path(normal_pcap) # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-anomaly.pcap' file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_abnormal.pcap' pcap_file = get_file_path(ipt_dir=in_dir, dataset_name=dataset_name, file_name=file_name) filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction) abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10) # abnormal flows # abnormal_flows = augment_flows(abnormal_flows, starts=50, max_len=max_len) abnormal_flows = augment_flows(abnormal_flows, step=10, max_interval=max_interval) meta = { 'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcaps': [normal_pcap], 'abnormal_pcaps': [abnormal_pcap], 'direction': direction, 'in_dir': in_dir } return meta
def get_mawi_flows(self, in_dir='../Datatsets', direction='src'): self.normal_pcap = os.path.join(self.out_dir, f'pc_202.171.168.50.pcap') check_path(self.normal_pcap) file_name = 'samplepoint-F_201912071400-src_dst_202.171.168.50.pcap' pcap_file = get_file_path(in_dir=in_dir, dataset_name='MAWI/WIDE_2019', file_name=file_name) filter_ip(pcap_file, self.normal_pcap, ips=['202.171.168.50'], direction=direction) normal_flows = _pcap2flows(self.normal_pcap, verbose=10) # normal flows self.abnormal_pcap = os.path.join(self.out_dir, f'pc_203.113.113.16_abnormal.pcap') check_path(self.normal_pcap) # file_name = 'samplepoint-F_201912071400-src_dst_202.4.27.109.pcap' # ~5000 file_name = 'samplepoint-F_201912071400-src_203.113.113.16.pcap' # ~1500 pcap_file = get_file_path(ipt_dir=in_dir, dataset_name='MAWI/WIDE_2019', file_name=file_name) filter_ip(pcap_file, self.abnormal_pcap, ips=['203.113.113.16'], direction=direction) abnormal_flows = _pcap2flows(self.abnormal_pcap, verbose=10) # normal flows meta = {'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcap': self.normal_pcap, 'abnormal_pcap': self.abnormal_pcap, 'direction': direction, 'in_dir': in_dir} return meta
def get_unb_flows(self, in_dir='../Datatsets', direction='src'): # preprocessed the pcap and label on original pcap and label self.pcap_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.pcap') self.label_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.csv') remove_file(self.pcap_file, self.overwrite) remove_file(self.label_file, self.overwrite) check_path(self.pcap_file) check_path(self.label_file) if not os.path.exists(self.pcap_file) or not os.path.exists( self.label_file): # 1. original pcap friday_pacp_orig = get_file_path( ipt_dir=in_dir, dataset_name='UNB/CICIDS_2017/', data_cat='pcaps/Friday', file_name='Friday-WorkingHours.pcap') # filter pcap filter_ip(friday_pacp_orig, out_file=self.pcap_file, ips=[self.IP], direction=self.direction, keep_original=True) # 2. merge original labels friday_label = get_file_path( ipt_dir=self.out_dir, dataset_name='UNB/CICIDS_2017/', data_cat='labels/Friday', file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv') friday_label_orig1 = get_file_path( ipt_dir=in_dir, dataset_name='UNB/CICIDS_2017/', data_cat='labels/Friday', file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv') friday_label_orig2 = get_file_path( ipt_dir=in_dir, dataset_name='UNB/CICIDS_2017/', data_cat='labels/Friday', file_name='Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv') friday_label_orig3 = get_file_path( ipt_dir=in_dir, dataset_name='UNB/CICIDS_2017/', data_cat='labels/Friday', file_name='Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv' ) friday_label_tmp = friday_label + '-all.csv' check_path(friday_label_tmp) merge_labels( [friday_label_orig1, friday_label_orig2, friday_label_orig3], mrg_label_path=friday_label_tmp) filter_csv_ip(friday_label_tmp, out_file=self.label_file, ips=[self.IP], direction=self.direction) ############################################################################################## # step 2.1 extract flows flows = _pcap2flows(self.pcap_file, verbose=10) # normal and abnormal flows # step 2.2 split normal flow and abnormal flow labels = pd.read_csv(self.label_file).values # normal_flows, abnormal_flows = split_normal_abnormal(flows, labels) # augment abnormal flows max_interval = np.quantile( [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9) abnormal_flows = augment_flows(abnormal_flows, step=1, max_interval=max_interval) meta = { 'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows, 'normal_pcap': self.pcap_file, 'abnormal_pcap': self.label_file, 'direction': direction, 'in_dir': in_dir } return meta