Exemplo n.º 1
0
def merging_flows(directory):
    form = MergingFlowsForm()

    if request.method == 'POST' and form.validate_on_submit():
        path = (session['paths_hist']['root'] +
                session['paths_hist'][directory])
        logger.info(f'path: {path}')
        logger.info(f'name: {form.name.data}')

        size = 0
        header = []
        dataset = []
        for file in session['files']:
            header, flows = gatherer.open_csv(path, file)
            logger.info(f'merged flow: {flows[0]}')
            size += len(flows)
            dataset.extend(flows)
        exporter.flows_csv(
            header, dataset, f'{util.paths["csv"]}/datasets/',
            f'{form.name.data}_w{form.window.data}_'
            f't{form.threshold.data}_s{size}.csv')

        return redirect(
            url_for('creation.content',
                    function='merging_flows',
                    directory=directory))
    return render_template('creation/merging_flows.html',
                           form=form,
                           directory=directory)
Exemplo n.º 2
0
    def gathering(self, nfcapd_files):
        gatherer.convert_nfcapd_csv(util.paths['nfcapd'], nfcapd_files,
                                    f'{util.paths["csv"]}tmp/', 'realtime')
        csv_file = util.directory_content(f'{util.paths["csv"]}tmp/')[1]
        logger.info(f'csv files: {csv_file[0]}')
        _, flows = gatherer.open_csv(f'{util.paths["csv"]}tmp/', csv_file[0])

        return flows
Exemplo n.º 3
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions."""

        # gathering flows
        raw_csv_file = util.directory_content(formatter_path)[1][0]
        header, flows = gatherer.open_csv(formatter_path, raw_csv_file)

        # preprocessing flows
        formatter = Formatter()
        cls.header = formatter.format_header(header)
        cls.flows = formatter.format_flows(flows)
Exemplo n.º 4
0
    def test_extract_specific_features(self):
        """Tests if specifics features and labels were correctly extracted from
        the flows."""

        # gathering features
        expt_csv = util.directory_content(extractor_path)[1][-1]
        expt_features = gatherer.open_csv(extractor_path, expt_csv)[1]

        extractor = Extractor([feature + 7 for feature in [1, 3]])
        features, labels = extractor.extract_features_labels(self.flows)

        self.assertListEqual(features, expt_features,
                             'features extracted incorrectly')
Exemplo n.º 5
0
    def test_extract_features_labels(self):
        """Tests if the features and labels were correctly extracted from
        the flows."""

        # gathering features
        expt_csv = util.directory_content(extractor_path)[1][0]
        expt_features = gatherer.open_csv(extractor_path, expt_csv)[1]

        extractor = Extractor([feature + 7 for feature in range(1, 10)])
        features, labels = extractor.extract_features_labels(self.flows)

        self.assertListEqual(features, expt_features,
                             'features extracted incorrectly')
        self.assertEqual(labels[0], '0', 'labels extracted incorrectly')
Exemplo n.º 6
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions and previous
        functions to generated the necessary files."""

        gatherer.convert_pcap_nfcapd(pcap_path, pcap_file, nfcapd_path, 60)

        nfcapd_files = util.directory_content(nfcapd_path)[1]

        gatherer.convert_nfcapd_csv(nfcapd_path, nfcapd_files, csv_path,
                                    'test')

        csv_file = util.directory_content(csv_path)[1][0]

        cls.header, cls.flows = gatherer.open_csv(csv_path, csv_file, 30)
Exemplo n.º 7
0
def model():
    if request.method == 'POST':
        # creating an absolute path of a temporary directory
        tmp_directory = mkdtemp()
        model = Model.query.get(request.form['model_pk'])
        dataset = Dataset.query.get(model.dataset_id)
        preprocessing = Preprocessing.query.get(model.preprocessing_id)
        classifier = Classifier.query.get(model.classifier_id)
        prep_key = '_'.join(preprocessing.name.lower().split(' '))
        clf_key = '_'.join(classifier.name.lower().split(' '))
        logger.info(f'classifier: {classifier.name}')
        logger.info(f'preprocessing: {preprocessing.name}')

        # gathering flows.
        header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/',
                                          dataset.file)
        session['last_models'].remove(model.id)
        logger.info(f'raw flow: {flows[0]}')

        # removing unselected models.
        for model_pk in session['last_models']:
            db.session.delete(Model.query.get(model_pk))
        db.session.commit()

        # preprocessing flows.
        formatter = Formatter(gather=False, train=True)
        flows = formatter.format_flows(flows)
        logger.info(f'final flow: {flows[0]}')

        # extracting features.
        # adding extra value to skip first unused features.
        extractor = Extractor([feature.id + 7 for feature in model.features])
        features, labels = extractor.extract_features_labels(flows)
        logger.info(f'feature: {features[0]}, label: {labels[0]}')

        # tunning and retraining.
        detector = Detector(copy.deepcopy(classifiers_obj[clf_key]))
        detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]),
                               dataset.kfolds, tmp_directory)
        detector.retrain(features, labels)

        # model persistence.
        pickle.dump(detector, open(f'{util.paths["models"]}{model.file}',
                                   'wb'))
        logger.info(f'model file: {model.file}')
        # removing the temporary directory used by the Pipeline object.
        rmtree(tmp_directory)

    return redirect(url_for('setting.load'))
Exemplo n.º 8
0
    def test_aggregate_flows(self):
        """Tests if the features were correctly aggregated."""

        # gathering flows
        expt_csv = util.directory_content(modifier_path)[1][0]
        expt_header, expt_flows = gatherer.open_csv(modifier_path, expt_csv)

        # preprocessing flows
        formatter = Formatter(gather=False, train=True)
        expt_flows = formatter.format_flows(expt_flows)

        self.assertListEqual(self.header, expt_header,
                             'aggregation performed incorrectly in header')
        self.assertListEqual(self.flows, expt_flows,
                             'aggregation performed incorrectly in flows')
Exemplo n.º 9
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions."""

        # gathering flows
        raw_csv_file = util.directory_content(modifier_path)[1][-1]
        header, flows = gatherer.open_csv(modifier_path, raw_csv_file)

        # preprocessing flows
        formatter = Formatter()
        header = formatter.format_header(header)
        flows = formatter.format_flows(flows)

        # threshold defined according to the expected result in test dataset
        modifier = Modifier(label=0, threshold=5)
        cls.header = modifier.extend_header(header)
        cls.flows = modifier.aggregate_flows(flows)
Exemplo n.º 10
0
def preprocessing_flows(directory):
    form = PreprocessingFlowsForm()

    if request.method == 'POST' and form.validate_on_submit():
        path = (session['paths_hist']['root'] +
                session['paths_hist'][directory])
        logger.info(f'path: {path}')
        logger.info(f'sample: {form.sample.data}, '
                    f'threshold: {form.threshold.data}, '
                    f'label: {form.label.data}')

        for file in session['files']:
            # gathering flows.
            header, flows = gatherer.open_csv(path, file, form.sample.data)
            logger.info(f'flow: {flows[0]}')

            # preprocessing flows.
            formatter = Formatter()
            header = formatter.format_header(header)
            flows = formatter.format_flows(flows)
            logger.info(f'formatted flow: {flows[0]}')

            modifier = Modifier(label=form.label.data,
                                threshold=form.threshold.data)
            header = modifier.extend_header(header)
            flows = modifier.aggregate_flows(flows)
            logger.info(f'modified flow: {flows[0]}')

            # exporting flows.
            name = file.split(".csv")[0]
            exporter.flows_csv(header, flows, f'{util.paths["csv"]}/flows/',
                               f'{name}_s{len(flows)}.csv')

        return redirect(
            url_for('creation.content',
                    function='preprocessing_flows',
                    directory=directory))
    return render_template('creation/preprocessing_flows.html',
                           form=form,
                           directory=directory)
Exemplo n.º 11
0
def result():
    models = [Model.query.get(model_pk) for model_pk in session['last_models']]
    dataset = Dataset.query.get(models[-1].dataset_id)

    # gathering flows.
    header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/',
                                      dataset.file)
    logger.info(f'raw flow: {flows[0]}')

    # preprocessing flows.
    formatter = Formatter(gather=False, train=True)
    flows = formatter.format_flows(flows)
    logger.info(f'final flow: {flows[0]}')

    # extracting features.
    # adding extra value to skip first unused features.
    extractor = Extractor([feature.id + 7 for feature in models[-1].features])
    features, labels = extractor.extract_features_labels(flows)
    logger.info(f'feature: {features[0]}, label: {labels[0]}')

    x_train, x_test, y_train, y_test = train_test_split(
        features, labels, test_size=dataset.split / 100, stratify=labels)
    logger.info(f'x_train: {len(x_train)}')
    logger.info(f'x_test: {len(x_test)}')
    logger.info(f'y_train: {len(y_train)}')
    logger.info(f'y_test: {len(y_test)}')

    for model in models:
        # creating an absolute path of a temporary directory.
        cachedir = mkdtemp()
        preprocessing = Preprocessing.query.get(model.preprocessing_id)
        classifier = Classifier.query.get(model.classifier_id)
        prep_key = '_'.join(preprocessing.name.lower().split(' '))
        clf_key = '_'.join(classifier.name.lower().split(' '))
        logger.info(f'classifier: {classifier.name}')
        logger.info(f'preprocessing: {preprocessing.name}')

        # tunning, training and test.
        detector = Detector(copy.deepcopy(classifiers_obj[clf_key]))
        detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]),
                               dataset.kfolds, cachedir)

        hparam, train_date, train_dur = detector.train(x_train, y_train)
        pred, test_date, test_dur = detector.test(x_test)

        # results.
        outcome = evaluator.metrics(y_test, pred)
        result = Result(train_date=train_date,
                        test_date=test_date,
                        train_duration=train_dur,
                        test_duration=test_dur,
                        accuracy=outcome['accuracy'],
                        precision=outcome['precision'],
                        recall=outcome['recall'],
                        f1_score=outcome['f1_score'],
                        true_negative=outcome['tn'],
                        false_positive=outcome['fp'],
                        false_negative=outcome['fn'],
                        true_positive=outcome['tp'],
                        hyperparameters=str(hparam),
                        model_id=model.id)
        db.session.add(result)
        db.session.commit()
        # removing the temporary directory used by the Pipeline object.
        rmtree(cachedir)
    columns = Model.__table__.columns

    return render_template('setting/result.html',
                           columns=columns,
                           models=models)
Exemplo n.º 12
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions."""

        # gathering flows
        modified_csv_file = util.directory_content(extractor_path)[1][1]
        _, cls.flows = gatherer.open_csv(extractor_path, modified_csv_file)