Пример #1
0
def main():
    # load data
    data_file = 'out/data/demo_IAT.dat'
    X, y = load(data_file)
    # split train and test test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=RANDOM_STATE)
    print(
        f'X_train.shape: {X_train.shape}, X_test.shape: {X_test.shape}, y_train.shape: {y_train.shape}, '
        f'y_test.shape: {y_test.shape}')

    # model_name in ['OCSVM', 'KDE','IF', 'AE', 'GMM', 'PCA']
    model_name = 'OCSVM'
    print(f'model_name: {model_name}')
    # create detection model
    model = generate_model(model_name)

    ndm = MODEL(model,
                score_metric='auc',
                verbose=10,
                random_state=RANDOM_STATE)

    # learned the model from the train set
    ndm.train(X_train)

    # evaluate the learned model
    ndm.test(X_test, y_test)

    # dump data to disk
    out_dir = os.path.dirname(data_file)
    dump((model, ndm.history),
         out_file=f'{out_dir}/{ndm.model_name}-results.dat')

    print(ndm.train.tot_time, ndm.test.tot_time, ndm.score)
Пример #2
0
def get_correlation(in_dir='',
                    datasets='',
                    feature='SIZE',
                    header=True,
                    out_dir='',
                    out_file='.dat'):
    corr_results = {}
    for i, dataset in enumerate(datasets):
        in_file = os.path.join(in_dir, dataset, feature, f"header_{header}",
                               'Xy.dat')
        lg.debug(in_file)
        data = load(in_file)
        X_train, y_train, X_val, y_val, X_test, y_test = split_train_val_test(
            data['X'], data['y'], shuffle=True, random_state=RANDOM_STATE)
        # normalization
        ss, X_train, y_train, X_val, y_val, X_test, y_test = normalize(
            X_train, y_train, X_val, y_val, X_test, y_test)
        # 2 get correlation
        dim = X_test.shape[1]
        if feature == 'IAT':
            # iat_dim + header_dim = dim, here header_dim =  (8 + ttl_dim (i.e., size_dim))
            # => iat_dim + 8 + size_dim = iat_dim + 8 + (iat_dim + 1) = dim
            # => iat_dim = (dim - 9)//2
            start_idx = (dim - 8 - 1) // 2
        elif feature == 'SIZE':
            # size_dim + header_dim = dim
            # size_dim + (8+size_dim) = dim
            # size_dim = (dim - 8 ) // 2
            start_idx = (
                dim - 8
            ) // 2  # # feature + header_feature:(8 tcp flags + TTL). only works for 'SIZE'
        else:
            msg = f'Error: {feature}'
            raise NotImplementedError(msg)
        corrs = []
        lg.debug(f'header_feature_start_idx: {start_idx}')
        for j in range(
                9):  # feature + header_feature:(8 tcp flags + first TTL)
            _corr = _get_each_correlation(X_test[:, start_idx + j], y_test)
            corrs.append(_corr)
        corr_results[(in_file, dataset, feature, X_test.shape)] = corrs

        _out_file = os.path.join(out_dir, dataset, 'correlation.dat')
        check_path(_out_file)
        dump(corrs, _out_file)
        print(_out_file)
    # save all results
    check_path(out_file)
    dump(corr_results, out_file)

    return out_file
Пример #3
0
    def generate(self):
        remove_file(self.Xy_file, self.overwrite)
        if os.path.exists(self.Xy_file):
            Xy_meta = load(self.Xy_file)
        else:
            if self.dataset_name in ['CTU']:
                self._generate_pcap()  # generate data
                flows_meta = self._generate_flows()  # normal_abnormal.data
                # Xy (fixed feature data)
                Xy_meta = self._generate_features(flows_meta['normal_flows'],
                                                  flows_meta['abnormal_flows'])
            else:
                msg = f'{self.dataset_name}'
                raise NotImplementedError(msg)
        self.X, self.y = Xy_meta['X'], Xy_meta['y']

        return Xy_meta
Пример #4
0
def main():
    root_dir = 'examples/representation'
    in_dir = f'{root_dir}/report/out/'
    corr_file = os.path.join(in_dir, 'correlation', 'correlation.dat')
    DATASETS = ['UNB(PC1)', 'CTU', 'MAWI', 'UCHI(SFRIG_2021)']
    feature = 'SIZE'  # the correlation code only works for SIZE
    header = True
    out_dir = f'{root_dir}/report/out/correlation'
    get_correlation(in_dir=f'{root_dir}/out/src',
                    datasets=DATASETS,
                    feature=feature,
                    header=header,
                    out_dir=out_dir,
                    out_file=corr_file)
    data = load(corr_file)
    out_file = os.path.join(out_dir, feature, f"header_{header}",
                            'correlation.pdf')
    plot_correlation_multi(data, out_file=out_file, show=True)
Пример #5
0
def report(in_file='gather.dat', delimiter=','):
    res = load(in_file)
    out_file = os.path.split(in_file) + 'report.csv'
    check_path(out_file)
    with open(out_file, 'w') as f:
        for header in HEADER:
            for tuning in TUNING:
                for feature in FEATURES:
                    for dataset in DATASETS:
                        for model in MODELS:
                            data = get_one_res(res, f'header_{header}',
                                               f'tuning_{tuning}', feature,
                                               dataset, model)
                            line = f'{delimiter}'.join(data) + '\n'
                            lg.debug(line)
                            f.write(line)

    lg.info(f'report: {out_file}')
    return out_file
Пример #6
0
    def generate(self):
        remove_file(self.Xy_file, self.overwrite)

        if os.path.exists(self.Xy_file):
            Xy_meta = load(self.Xy_file)
        else:
            if self.dataset_name in [
                    'UCHI(SFRIG_2021)', 'UCHI(SMTV_2019)', 'UCHI(GHOME_2019)',
                    'UCHI(SCAM_2019)', 'UCHI(BSTCH_2019)'
            ]:
                self._generate_pcap()  # generate data
                flows_meta = self._generate_flows()  # normal_abnormal.data
                # Xy (fixed feature data)
                Xy_meta = self._generate_features(flows_meta['normal_flows'],
                                                  flows_meta['abnormal_flows'])
            else:
                msg = f'{self.dataset_name}'
                raise NotImplementedError(msg)
        self.X, self.y = Xy_meta['X'], Xy_meta['y']
        self.Xy_meta = Xy_meta
        return self.Xy_meta
Пример #7
0
    def generate(self):
        if os.path.exists(self.Xy_file):
            self.X, self.y = load(self.Xy_file)
        else:
            q_interval = 0.9
            # pcap to flows
            flows = self.pcap2flows(self.pcap_file)

            # flows to subflow
            labels = [1] * len(flows)
            durations = [_get_flow_duration(pkts) for fid, pkts in flows]
            interval = _get_split_interval(durations, q_interval=q_interval)
            subflows, labels = self.flow2subflows(flows,
                                                  interval=interval,
                                                  labels=labels)

            # get dimension
            normal_flows = subflows
            num_pkts = [len(pkts)
                        for fid, pkts in normal_flows]  # only on normal flows
            dim = int(np.floor(np.quantile(
                num_pkts,
                q_interval)))  # use the same q_interval to get the dimension
            lg.info(f'dim={dim}')

            # flows to features
            features, fids = self.flow2features(subflows,
                                                name=self.feature_name)

            # fixed the feature size
            features = self.fix_feature(features, dim=dim)

            self.X = features
            self.y = np.asarray([0] * len(features))

            # save data to disk
            check_path(os.path.dirname(self.Xy_file))
            dump((self.X, self.y), out_file=self.Xy_file)

        return self.X, self.y
Пример #8
0
    def _generate_flows(self):
        self.subflows_file = os.path.join(self.out_dir,
                                          'normal_abnormal_subflows.dat')
        remove_file(self.subflows_file, self.overwrite)
        if os.path.exists(self.subflows_file):
            return load(self.subflows_file)

        # step 2: extract flows from pcap
        ##############################################################################################
        meta = load(self.orig_flows)
        normal_flows, abnormal_flows = meta['normal_flows'], meta[
            'abnormal_flows']
        lg.debug(
            f'original normal flows: {len(normal_flows)} and abnormal flows: {len(abnormal_flows)}'
        )
        qs = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1]
        len_stat = np.quantile([len(pkts) for f, pkts in normal_flows], q=qs)
        lg.debug(
            f'flows: {len(normal_flows)}, length statistic: {len_stat}, when q = {qs}'
        )
        meta = {
            'flows': normal_flows,
            'len_stat': (len_stat, qs),
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows
        }
        dump(meta,
             out_file=os.path.join(self.out_dir, 'normal_abnormal_flows.dat'))

        # step 2.2. only get normal flows durations
        self.flows_durations = [
            _get_flow_duration(pkts) for (fids, pkts) in normal_flows
        ]
        normal_durations_stat = np.quantile(self.flows_durations, q=qs)
        lg.debug(f'normal_durations_stat: {normal_durations_stat}')
        self.subflow_interval = np.quantile(
            self.flows_durations,
            q=self.q_flow_dur)  # median  of flow_durations
        lg.debug(
            f'---subflow_interval: {self.subflow_interval}, q_flow_dur: {self.q_flow_dur}'
        )
        # step 2.3 get subflows
        normal_flows, _ = _flows2subflows(normal_flows,
                                          interval=self.subflow_interval,
                                          labels=['0'] * len(normal_flows))
        abnormal_flows, _ = _flows2subflows(abnormal_flows,
                                            interval=self.subflow_interval,
                                            labels=['1'] * len(abnormal_flows))
        lg.debug(
            f'normal_flows: {len(normal_flows)}, and abnormal_flows: {len(abnormal_flows)} '
            f'with interval: {self.subflow_interval} and q: {self.q_flow_dur}')
        meta = {
            'normal_flows_durations': self.flows_durations,
            'normal_durations_stat': (normal_durations_stat, qs),
            'subflow_interval': self.subflow_interval,
            'q_flow_dur': self.q_flow_dur,
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows
        }
        dump(meta, out_file=self.subflows_file)

        # only return subflows
        return meta
Пример #9
0
print(dir())

a  = 10
print(dir(a))

class DEMO:
	def __init__(self):
		self.val = 10
		self._val = 10
		self.__val = 10

a = DEMO()
print(dir(a))
print(a.__val)
print(a._DEMO__val)







from odet.utils.tool import load

in_file = 'examples/data/pc_192.168.10.5_AGMT.pcap-subflow_interval=None_q_flow_duration=0.9-all.dat'
data = load(in_file)
# print(data)