示例#1
0
 def test_calculate_metric_hex_encoded_length(self):
     result = MetricsAnalyzer.calculate_metric("hex_encoded_length",
                                               "12c322adc020 12322029620")
     expected_observation = {
         'max_hex_encoded_length': 12,
         'max_hex_encoded_word': '12c322adc020'
     }
     self.assertEqual(result, (12, expected_observation))
示例#2
0
    def test_calculate_metric_url_length(self):
        result = MetricsAnalyzer.calculate_metric(
            "url_length", "why don't we go http://www.nviso.com")
        expected_observation = {
            'extracted_urls_length': 20,
            'extracted_urls': 'http://www.nviso.com'
        }

        self.assertEqual(result, (20, expected_observation))
示例#3
0
    def test_calculate_metric_base64_encoded_length(self):
        result = MetricsAnalyzer.calculate_metric(
            "base64_encoded_length", "houston we have a cHJvYmxlbQ==")
        expected_observation = {
            'max_base64_decoded_length': 7,
            'max_base64_decoded_word': 'problem'
        }

        self.assertEqual(result, (7, expected_observation))
    def _test_whitelist_batch_document_not_process_all(
            self):  # TODO FIX with new whitelist system
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_with_whitelist.conf")
        analyzer = MetricsAnalyzer("metrics_length_dummy_test")

        # Whitelisted (ignored)
        doc1_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_01_test_file)
        self.test_es.add_doc(doc1_without_outlier)
        # Not whitelisted (add)
        doc2_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_02_test_file)
        self.test_es.add_doc(doc2_without_outlier)
        # Not whitelisted
        doc3_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_03_test_file)
        self.test_es.add_doc(doc3_without_outlier)

        analyzer.evaluate_model()

        self.assertEqual(len(analyzer.outliers), 2)
示例#5
0
    def test_remove_metric_from_batch_simple_value(self):
        eval_metrics_array = defaultdict()
        aggregator_value = "agg"
        target_value = "dummy_target"
        metrics_value = "dummy_metric"
        observations = {}
        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document()

        batch = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                    aggregator_value,
                                                    target_value,
                                                    metrics_value,
                                                    observations, doc)
        result = MetricsAnalyzer.remove_metric_from_batch(
            batch[aggregator_value], 0)

        expected_aggregator_value = defaultdict(list)
        expected_aggregator_value["metrics"] = []
        expected_aggregator_value["observations"] = []
        expected_aggregator_value["raw_docs"] = []

        self.assertEqual(result, expected_aggregator_value)
示例#6
0
    def test_evaluate_batch_for_outliers_fetch_remain_metrics(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf"
        )

        eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \
            self._preperate_data_terms_with_doc()
        doc = DummyDocumentsGenerate().generate_document()
        metrics = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                      aggregator_value,
                                                      target_value,
                                                      metrics_value,
                                                      observations, doc)

        result = analyzer._evaluate_batch_for_outliers(metrics, False)
        # outliers, not_enough_value, document_need_to_be_recompute
        self.assertEqual(result, ([], metrics))
示例#7
0
    def test_add_metric_to_batch_no_modification(self):
        eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc = \
            self._preperate_dummy_data_terms()

        # Create expected result
        observations["target"] = [target_value]
        observations["aggregator"] = [aggregator_value]
        expected_eval_terms = defaultdict()
        expected_eval_terms[aggregator_value] = defaultdict(list)
        expected_eval_terms[aggregator_value]["metrics"] = [metrics_value]
        expected_eval_terms[aggregator_value]["observations"] = [observations]
        expected_eval_terms[aggregator_value]["raw_docs"] = [doc]

        result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                     aggregator_value,
                                                     target_value,
                                                     metrics_value,
                                                     observations, doc)
        self.assertEqual(result, expected_eval_terms)
示例#8
0
def perform_analysis():
    """ The entrypoint for analysis """
    analyzers = list()

    for config_section_name in settings.config.sections():
        try:
            if config_section_name.startswith("simplequery_"):
                simplequery_analyzer = SimplequeryAnalyzer(config_section_name=config_section_name)
                analyzers.append(simplequery_analyzer)

            if config_section_name.startswith("metrics_"):
                metrics_analyzer = MetricsAnalyzer(config_section_name=config_section_name)
                analyzers.append(metrics_analyzer)

            if config_section_name.startswith("terms_"):
                terms_analyzer = TermsAnalyzer(config_section_name=config_section_name)
                analyzers.append(terms_analyzer)

            if config_section_name.startswith("beaconing_"):
                beaconing_analyzer = BeaconingAnalyzer(config_section_name=config_section_name)
                analyzers.append(beaconing_analyzer)

            if config_section_name.startswith("word2vec_"):
                word2vec_analyzer = Word2VecAnalyzer(config_section_name=config_section_name)
                analyzers.append(word2vec_analyzer)
        except Exception:
            logging.logger.error(traceback.format_exc())

    analyzers_to_evaluate = list()

    for idx, analyzer in enumerate(analyzers):
        if analyzer.should_run_model or analyzer.should_test_model:
            analyzers_to_evaluate.append(analyzer)

    random.shuffle(analyzers_to_evaluate)
    analyzed_models = 0
    for analyzer in analyzers_to_evaluate:
        try:
            analyzer.evaluate_model()
            analyzed_models = analyzed_models + 1
            logging.logger.info("finished processing use case - " + str(analyzed_models + 1) + "/" + str(len(analyzers_to_evaluate)) + " [" + '{:.2f}'.format(round(float(analyzed_models + 1) / float(len(analyzers_to_evaluate)) * 100, 2)) + "% done" + "]")
        except Exception:
            logging.logger.error(traceback.format_exc())
示例#9
0
    def test_evaluate_batch_for_outliers_add_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf"
        )

        eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \
            self._preperate_data_terms_with_doc(metrics_value=12)
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        metrics = MetricsAnalyzer.add_metric_to_batch(
            eval_metrics_array, aggregator_value, target_value, metrics_value,
            observations, doc_without_outlier)

        outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers(
            metrics, True)
        analyzer.process_outlier(outliers[0])
        result = [elem for elem in es._scan()][0]
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.maxDiff = None
        self.assertEqual(result, doc_with_outlier)
示例#10
0
    def test_add_metric_to_batch_empty(self):
        eval_metrics_array = defaultdict()
        aggregator_value = ""
        target_value = ""
        metrics_value = ""
        observations = {}
        doc = {}
        # Create expected result
        observations["target"] = [target_value]
        observations["aggregator"] = [aggregator_value]
        expected_eval_terms = defaultdict()
        expected_eval_terms[aggregator_value] = defaultdict(list)
        expected_eval_terms[aggregator_value]["metrics"] = [metrics_value]
        expected_eval_terms[aggregator_value]["observations"] = [observations]
        expected_eval_terms[aggregator_value]["raw_docs"] = [doc]

        result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                     aggregator_value,
                                                     target_value,
                                                     metrics_value,
                                                     observations, doc)
        self.assertEqual(result, expected_eval_terms)
示例#11
0
def perform_analysis():
    """ The entrypoint for analysis """
    analyzers = list()

    for config_section_name in settings.config.sections():
        _analyzer = None
        try:
            if config_section_name.startswith("simplequery_"):
                _analyzer = SimplequeryAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("metrics_"):
                _analyzer = MetricsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("terms_"):
                _analyzer = TermsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("beaconing_"):
                logging.logger.error(
                    "use of the beaconing model is deprecated, please use the terms model using "
                    +
                    "coeff_of_variation trigger method to convert use case " +
                    config_section_name)

            elif config_section_name.startswith("word2vec_"):
                _analyzer = Word2VecAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)
        except Exception:
            logging.logger.error("error while initializing analyzer " +
                                 config_section_name,
                                 exc_info=True)

    analyzers_to_evaluate = list()

    for analyzer in analyzers:
        if analyzer.should_run_model or analyzer.should_test_model:
            analyzers_to_evaluate.append(analyzer)

    random.shuffle(analyzers_to_evaluate)

    for index, analyzer in enumerate(analyzers_to_evaluate):
        if analyzer.configuration_parsing_error:
            continue

        try:
            analyzer.analysis_start_time = datetime.today().timestamp()
            analyzer.evaluate_model()
            analyzer.analysis_end_time = datetime.today().timestamp()
            analyzer.completed_analysis = True

            logging.logger.info("finished processing use case - " +
                                str(index + 1) + "/" +
                                str(len(analyzers_to_evaluate)) + " [" +
                                '{:.2f}'.format(
                                    round((index + 1) /
                                          float(len(analyzers_to_evaluate)) *
                                          100, 2)) + "% done" + "]")
        except elasticsearch.exceptions.NotFoundError:
            analyzer.index_not_found_analysis = True
            logging.logger.warning(
                "index %s does not exist, skipping use case" %
                analyzer.es_index)
        except Exception:
            analyzer.unknown_error_analysis = True
            logging.logger.error("error while analyzing use case",
                                 exc_info=True)
        finally:
            es.flush_bulk_actions(refresh=True)

    return analyzers_to_evaluate
示例#12
0
 def test_calculate_metric_unexist_operation(self):
     self.assertEqual(
         MetricsAnalyzer.calculate_metric("dummy operation", ""),
         (None, dict()))
示例#13
0
 def test_calculate_metric_entropy(self):
     self.assertEqual(MetricsAnalyzer.calculate_metric("entropy", "test"),
                      (helpers.utils.shannon_entropy("test"), dict()))
示例#14
0
 def test_calculate_metric_length(self):
     self.assertEqual(MetricsAnalyzer.calculate_metric("length", "test"),
                      (len("test"), dict()))
示例#15
0
 def test_calculate_metric_numerical_value(self):
     self.assertEqual(
         MetricsAnalyzer.calculate_metric("numerical_value", "12"),
         (float(12), dict()))