def merging_flows(directory): form = MergingFlowsForm() if request.method == 'POST' and form.validate_on_submit(): path = (session['paths_hist']['root'] + session['paths_hist'][directory]) logger.info(f'path: {path}') logger.info(f'name: {form.name.data}') size = 0 header = [] dataset = [] for file in session['files']: header, flows = gatherer.open_csv(path, file) logger.info(f'merged flow: {flows[0]}') size += len(flows) dataset.extend(flows) exporter.flows_csv( header, dataset, f'{util.paths["csv"]}/datasets/', f'{form.name.data}_w{form.window.data}_' f't{form.threshold.data}_s{size}.csv') return redirect( url_for('creation.content', function='merging_flows', directory=directory)) return render_template('creation/merging_flows.html', form=form, directory=directory)
def gathering(self, nfcapd_files): gatherer.convert_nfcapd_csv(util.paths['nfcapd'], nfcapd_files, f'{util.paths["csv"]}tmp/', 'realtime') csv_file = util.directory_content(f'{util.paths["csv"]}tmp/')[1] logger.info(f'csv files: {csv_file[0]}') _, flows = gatherer.open_csv(f'{util.paths["csv"]}tmp/', csv_file[0]) return flows
def setUpClass(cls): """Initiates the parameters to feed the test functions.""" # gathering flows raw_csv_file = util.directory_content(formatter_path)[1][0] header, flows = gatherer.open_csv(formatter_path, raw_csv_file) # preprocessing flows formatter = Formatter() cls.header = formatter.format_header(header) cls.flows = formatter.format_flows(flows)
def test_extract_specific_features(self): """Tests if specifics features and labels were correctly extracted from the flows.""" # gathering features expt_csv = util.directory_content(extractor_path)[1][-1] expt_features = gatherer.open_csv(extractor_path, expt_csv)[1] extractor = Extractor([feature + 7 for feature in [1, 3]]) features, labels = extractor.extract_features_labels(self.flows) self.assertListEqual(features, expt_features, 'features extracted incorrectly')
def test_extract_features_labels(self): """Tests if the features and labels were correctly extracted from the flows.""" # gathering features expt_csv = util.directory_content(extractor_path)[1][0] expt_features = gatherer.open_csv(extractor_path, expt_csv)[1] extractor = Extractor([feature + 7 for feature in range(1, 10)]) features, labels = extractor.extract_features_labels(self.flows) self.assertListEqual(features, expt_features, 'features extracted incorrectly') self.assertEqual(labels[0], '0', 'labels extracted incorrectly')
def setUpClass(cls): """Initiates the parameters to feed the test functions and previous functions to generated the necessary files.""" gatherer.convert_pcap_nfcapd(pcap_path, pcap_file, nfcapd_path, 60) nfcapd_files = util.directory_content(nfcapd_path)[1] gatherer.convert_nfcapd_csv(nfcapd_path, nfcapd_files, csv_path, 'test') csv_file = util.directory_content(csv_path)[1][0] cls.header, cls.flows = gatherer.open_csv(csv_path, csv_file, 30)
def model(): if request.method == 'POST': # creating an absolute path of a temporary directory tmp_directory = mkdtemp() model = Model.query.get(request.form['model_pk']) dataset = Dataset.query.get(model.dataset_id) preprocessing = Preprocessing.query.get(model.preprocessing_id) classifier = Classifier.query.get(model.classifier_id) prep_key = '_'.join(preprocessing.name.lower().split(' ')) clf_key = '_'.join(classifier.name.lower().split(' ')) logger.info(f'classifier: {classifier.name}') logger.info(f'preprocessing: {preprocessing.name}') # gathering flows. header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/', dataset.file) session['last_models'].remove(model.id) logger.info(f'raw flow: {flows[0]}') # removing unselected models. for model_pk in session['last_models']: db.session.delete(Model.query.get(model_pk)) db.session.commit() # preprocessing flows. formatter = Formatter(gather=False, train=True) flows = formatter.format_flows(flows) logger.info(f'final flow: {flows[0]}') # extracting features. # adding extra value to skip first unused features. extractor = Extractor([feature.id + 7 for feature in model.features]) features, labels = extractor.extract_features_labels(flows) logger.info(f'feature: {features[0]}, label: {labels[0]}') # tunning and retraining. detector = Detector(copy.deepcopy(classifiers_obj[clf_key])) detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]), dataset.kfolds, tmp_directory) detector.retrain(features, labels) # model persistence. pickle.dump(detector, open(f'{util.paths["models"]}{model.file}', 'wb')) logger.info(f'model file: {model.file}') # removing the temporary directory used by the Pipeline object. rmtree(tmp_directory) return redirect(url_for('setting.load'))
def test_aggregate_flows(self): """Tests if the features were correctly aggregated.""" # gathering flows expt_csv = util.directory_content(modifier_path)[1][0] expt_header, expt_flows = gatherer.open_csv(modifier_path, expt_csv) # preprocessing flows formatter = Formatter(gather=False, train=True) expt_flows = formatter.format_flows(expt_flows) self.assertListEqual(self.header, expt_header, 'aggregation performed incorrectly in header') self.assertListEqual(self.flows, expt_flows, 'aggregation performed incorrectly in flows')
def setUpClass(cls): """Initiates the parameters to feed the test functions.""" # gathering flows raw_csv_file = util.directory_content(modifier_path)[1][-1] header, flows = gatherer.open_csv(modifier_path, raw_csv_file) # preprocessing flows formatter = Formatter() header = formatter.format_header(header) flows = formatter.format_flows(flows) # threshold defined according to the expected result in test dataset modifier = Modifier(label=0, threshold=5) cls.header = modifier.extend_header(header) cls.flows = modifier.aggregate_flows(flows)
def preprocessing_flows(directory): form = PreprocessingFlowsForm() if request.method == 'POST' and form.validate_on_submit(): path = (session['paths_hist']['root'] + session['paths_hist'][directory]) logger.info(f'path: {path}') logger.info(f'sample: {form.sample.data}, ' f'threshold: {form.threshold.data}, ' f'label: {form.label.data}') for file in session['files']: # gathering flows. header, flows = gatherer.open_csv(path, file, form.sample.data) logger.info(f'flow: {flows[0]}') # preprocessing flows. formatter = Formatter() header = formatter.format_header(header) flows = formatter.format_flows(flows) logger.info(f'formatted flow: {flows[0]}') modifier = Modifier(label=form.label.data, threshold=form.threshold.data) header = modifier.extend_header(header) flows = modifier.aggregate_flows(flows) logger.info(f'modified flow: {flows[0]}') # exporting flows. name = file.split(".csv")[0] exporter.flows_csv(header, flows, f'{util.paths["csv"]}/flows/', f'{name}_s{len(flows)}.csv') return redirect( url_for('creation.content', function='preprocessing_flows', directory=directory)) return render_template('creation/preprocessing_flows.html', form=form, directory=directory)
def result(): models = [Model.query.get(model_pk) for model_pk in session['last_models']] dataset = Dataset.query.get(models[-1].dataset_id) # gathering flows. header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/', dataset.file) logger.info(f'raw flow: {flows[0]}') # preprocessing flows. formatter = Formatter(gather=False, train=True) flows = formatter.format_flows(flows) logger.info(f'final flow: {flows[0]}') # extracting features. # adding extra value to skip first unused features. extractor = Extractor([feature.id + 7 for feature in models[-1].features]) features, labels = extractor.extract_features_labels(flows) logger.info(f'feature: {features[0]}, label: {labels[0]}') x_train, x_test, y_train, y_test = train_test_split( features, labels, test_size=dataset.split / 100, stratify=labels) logger.info(f'x_train: {len(x_train)}') logger.info(f'x_test: {len(x_test)}') logger.info(f'y_train: {len(y_train)}') logger.info(f'y_test: {len(y_test)}') for model in models: # creating an absolute path of a temporary directory. cachedir = mkdtemp() preprocessing = Preprocessing.query.get(model.preprocessing_id) classifier = Classifier.query.get(model.classifier_id) prep_key = '_'.join(preprocessing.name.lower().split(' ')) clf_key = '_'.join(classifier.name.lower().split(' ')) logger.info(f'classifier: {classifier.name}') logger.info(f'preprocessing: {preprocessing.name}') # tunning, training and test. detector = Detector(copy.deepcopy(classifiers_obj[clf_key])) detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]), dataset.kfolds, cachedir) hparam, train_date, train_dur = detector.train(x_train, y_train) pred, test_date, test_dur = detector.test(x_test) # results. outcome = evaluator.metrics(y_test, pred) result = Result(train_date=train_date, test_date=test_date, train_duration=train_dur, test_duration=test_dur, accuracy=outcome['accuracy'], precision=outcome['precision'], recall=outcome['recall'], f1_score=outcome['f1_score'], true_negative=outcome['tn'], false_positive=outcome['fp'], false_negative=outcome['fn'], true_positive=outcome['tp'], hyperparameters=str(hparam), model_id=model.id) db.session.add(result) db.session.commit() # removing the temporary directory used by the Pipeline object. rmtree(cachedir) columns = Model.__table__.columns return render_template('setting/result.html', columns=columns, models=models)
def setUpClass(cls): """Initiates the parameters to feed the test functions.""" # gathering flows modified_csv_file = util.directory_content(extractor_path)[1][1] _, cls.flows = gatherer.open_csv(extractor_path, modified_csv_file)