Пример #1
0
    def test_simple_duplicates_match(self):

        # Clean database state
        settings = Settings()
        settings.logger.info('[test] test_simple_duplicates_match')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 10000

        # 100 unique documents, duplicated 5 times
        total_unique_documents = 100
        total_duplicate_documents = 400
        total_raw_documents = total_unique_documents + total_duplicate_documents

        total_unique_pairs = int(total_unique_documents / 2)

        unique_raw_documents = []
        for i in range(total_unique_pairs):
            client, producer = ci_helper.create_raw_document_pair()
            unique_raw_documents.append(client.copy())
            unique_raw_documents.append(producer.copy())

        raw_documents = []
        for i in range(total_raw_documents):
            i_doc = i % total_unique_documents
            doc = unique_raw_documents[i_doc].copy()
            raw_documents.append(doc)

        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector
        c_batch = CorrectorBatch(settings)
        process_dict = dict()
        process_dict['doc_len'] = -1
        c_batch.run(process_dict)

        # Total raw documents
        self.assertEqual(process_dict['doc_len'], total_raw_documents)

        # Check total Pair documents
        clean_docs = mongodb_h.get_clean_documents()
        self.assertEqual(len(clean_docs), total_unique_pairs)

        # Check total raw documents after duplicate removal
        raw_docs = mongodb_h.get_raw_documents()
        self.assertEqual(len(raw_docs), total_unique_documents)

        # If all are regular_pair, set size is 1
        match_type_set = set([x['matchingType'] for x in clean_docs])
        self.assertEqual(len(match_type_set), 1)

        # Clean before exit
        mongodb_h.remove_all()
Пример #2
0
    def test_pair_match_with_duplicate_messageId(self):

        # Clean database state
        settings = Settings()
        settings.logger.info('[test] test_pair_match_with_duplicate_messageId')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 10000
        total_pairs_with_unique_message_id = 100
        total_pairs_with_same_message_id = 100

        total_pairs = total_pairs_with_unique_message_id + total_pairs_with_same_message_id
        total_raw_documents = 2 * total_pairs

        # Add pairs with unique MessageId
        raw_documents = []
        for i in range(total_pairs_with_unique_message_id):
            client, producer = ci_helper.create_raw_document_pair()
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())

        # Add pairs with duplicate MessageId
        same_message_id = "abcde"
        for i in range(total_pairs_with_same_message_id):
            client, producer = ci_helper.create_raw_document_pair()
            client['messageId'] = same_message_id
            producer['messageId'] = same_message_id
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())

        # Add all to mongodb
        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector
        c_batch = CorrectorBatch(settings)
        process_dict = dict()
        process_dict['doc_len'] = -1
        c_batch.run(process_dict)

        # Total raw documents
        self.assertEqual(process_dict['doc_len'], total_raw_documents)
        clean_docs = mongodb_h.get_clean_documents()

        # Documents are a pair
        self.assertEqual(len(clean_docs), total_pairs)

        # If all are regular_pair, set size is 1
        match_type_set = set([x['matchingType'] for x in clean_docs])
        self.assertEqual(len(match_type_set), 1)

        # Clean before exit
        mongodb_h.remove_all()
Пример #3
0
    def setUp(self):
        # Initialize MongoDB
        self._mongodb_h = cl_db_handler.MongoDBHandler(
            settings.mongo_db['user'], settings.mongo_db['password'],
            settings.mongo_db['host_address'])
        self._mongodb_h.remove_all()
        self._mongodb_h.create_indexes()

        # Initialize PostgreSQL
        postgres_settings = settings.postgres.copy()
        del postgres_settings['buffer_size']
        del postgres_settings['readonly_users']
        self._postgres_manager = PostgreSQL_Manager(**postgres_settings)
        self._postgres_manager.remove_all()
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # load the test database settings
        settings = Settings()

        # initialize a helper for operations the test database
        self.mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                      settings.MONGODB_PWD,
                                                      settings.MONGODB_SERVER)
        self.mongodb_analyzer_h = ci_analyzer_db_handler.MongoAnalyzerDBHandler(
            settings.MONGODB_USER, settings.MONGODB_PWD,
            settings.MONGODB_SERVER)
        # db conf
        db_config = Mock()
        db_config.MDB_USER = settings.MONGODB_USER
        db_config.MDB_PWD = settings.MONGODB_PWD
        db_config.MDB_SERVER = settings.MONGODB_SERVER
        db_config.MONGODB_URI = "mongodb://{0}:{1}@{2}/auth_db".format(
            settings.MONGODB_USER, settings.MONGODB_PWD,
            settings.MONGODB_SERVER)
        db_config.MONGODB_QD = "CI_query_db"
        db_config.MONGODB_AD = "CI_analyzer_database"
        self._db_config = db_config

        # analyzer conf
        config = Mock()
        config.timestamp_field = "timestamp"
        config.service_call_fields = ["service_call"]
        config.failed_request_ratio_threshold = 0.7
        config.historic_averages_thresholds = {'request_count': 0.95}
        config.relevant_cols_nested = [
            "service_call", "succeeded", "messageId", "timestamp"
        ]
        config.relevant_cols_general_alternative = [
            ('requestSize', 'clientRequestSize', 'producerRequestSize'),
            ('responseSize', 'clientResponseSize', 'producerResponseSize')
        ]
        config.relevant_cols_general = [
            "_id", 'totalDuration', 'producerDurationProducerView',
            'requestNwDuration', 'responseNwDuration', 'correctorStatus'
        ]
        config.incident_expiration_time = 14400  # minutes
        config.training_period_time = 3  # months
        config.corrector_buffer_time = 14400
        self._config = config

        # set up the Analyzer database manager to be tested
        self.db_manager = AnalyzerDatabaseManager(db_config, config)
Пример #5
0
    def test_multiple_orphan_pair_match(self):

        # Clean database state
        settings = Settings()
        settings.logger.info('[test] test_multiple_orphan_pair_match')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 20
        total_raw_documents = 100
        total_pairs = int(total_raw_documents / 2)
        total_steps = 5

        # Add pairs
        raw_documents = []
        for i in range(total_pairs):
            client, producer = ci_helper.create_raw_document_pair(
                orphan_match=True)
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())
        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector Multiple Steps
        c_batch = CorrectorBatch(settings)
        docs_processed = 0
        for i in range(total_steps):
            process_dict = dict()
            process_dict['doc_len'] = -1
            c_batch.run(process_dict)
            self.assertNotEqual(process_dict['doc_len'], -1)
            docs_processed += process_dict['doc_len']

        # Total raw documents
        self.assertEqual(docs_processed, total_raw_documents)
        clean_docs = mongodb_h.get_clean_documents()

        # Documents are a pair
        self.assertEqual(len(clean_docs), total_pairs)

        # If all are regular_pair, set size is 1
        match_type_set = set([x['matchingType'] for x in clean_docs])
        self.assertEqual(len(match_type_set), 1)

        # Clean before exit
        mongodb_h.remove_all()
Пример #6
0
    def test_simple_orphan_pair_match(self):

        # Clean database state
        settings = Settings()
        settings.logger.info('[test] test_simple_orphan_pair_match')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 10
        total_raw_documents = 2

        # Add pairs
        raw_documents = []
        for i in range(int(total_raw_documents / 2)):
            client, producer = ci_helper.create_raw_document_pair(
                orphan_match=True)
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())
        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector
        c_batch = CorrectorBatch(settings)
        process_dict = dict()
        process_dict['doc_len'] = -1
        c_batch.run(process_dict)

        # Total raw documents should be 2
        self.assertEqual(process_dict['doc_len'], 2)
        clean_docs = mongodb_h.get_clean_documents()

        # Documents are a pair
        self.assertEqual(len(clean_docs), 1)

        # It is a regular match
        doc = clean_docs[0]
        self.assertEqual(doc['matchingType'], 'orphan_pair')

        # Clean before exit
        mongodb_h.remove_all()
Пример #7
0
    def test_client_orphan_matching_with_orphan_producer(self):

        # Clean database state
        settings = Settings()
        settings.logger.info(
            '[test] test_client_orphan_matching_with_orphan_producer')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 5
        total_clean_documents = 30
        total_steps = 6

        raw_documents = []
        clean_documents = []

        # Create pair documents, but make the client be none in the clean data
        for i in range(total_clean_documents):
            clean_doc = ci_helper.create_clean_document(orphan_match=True)
            client = clean_doc['client']
            clean_doc['client'] = None
            clean_doc['clientHash'] = None
            clean_doc['matchingType'] = 'orphan'
            clean_documents.append(clean_doc)
            raw_documents.append(client)

        mongodb_h.add_raw_documents(raw_documents)
        mongodb_h.add_clean_documents(clean_documents)

        # Run Corrector Multiple Steps
        c_batch = CorrectorBatch(settings)
        docs_processed = 0
        for i in range(total_steps):
            process_dict = dict()
            process_dict['doc_len'] = -1
            c_batch.run(process_dict)
            self.assertNotEqual(process_dict['doc_len'], -1)
            docs_processed += process_dict['doc_len']

        # Total raw documents
        self.assertEqual(docs_processed, len(raw_documents))
        clean_docs = mongodb_h.get_clean_documents()

        # Documents are a pair
        self.assertEqual(len(clean_docs), total_clean_documents)

        for d in clean_docs:
            self.assertNotEqual(d['client'], None)
            self.assertNotEqual(d['producer'], None)

        matching_type_freq = {}
        for x in clean_docs:
            k = x['matchingType']
            if k not in matching_type_freq:
                matching_type_freq[k] = 0
            matching_type_freq[k] += 1

        self.assertEqual(matching_type_freq.get('orphan', None), None)
        self.assertEqual(matching_type_freq.get('orphan_pair', None),
                         total_clean_documents)
        self.assertEqual(matching_type_freq.get('regular_pair', None), None)

        # Clean before exit
        mongodb_h.remove_all()
Пример #8
0
    def test_multiple_many_matchingType_match(self):

        # Clean database state
        settings = Settings()
        settings.logger.info('[test] test_multiple_orphan_pair_match')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()
        # Scenario:
        # 5 orphan pairs (10 docs)
        # 5 regular pairs (10 docs)
        # 5 orphans (5 docs)

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 5
        total_raw_documents = 25
        total_pairs = 15
        total_steps = 5

        raw_documents = []
        # Add pairs multiple matchingType
        # Orphan pairs
        for i in range(5):
            client, producer = ci_helper.create_raw_document_pair(
                orphan_match=True)
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())
        # Regular pairs
        for i in range(5):
            client, producer = ci_helper.create_raw_document_pair()
            raw_documents.append(client.copy())
            raw_documents.append(producer.copy())
        # Orphans
        for i in range(5):
            client, producer = ci_helper.create_raw_document_pair()
            # Only client documents
            raw_documents.append(client.copy())

        random.shuffle(raw_documents)
        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector Multiple Steps
        c_batch = CorrectorBatch(settings)
        docs_processed = 0
        for i in range(total_steps):
            process_dict = dict()
            process_dict['doc_len'] = -1
            c_batch.run(process_dict)
            self.assertNotEqual(process_dict['doc_len'], -1)
            docs_processed += process_dict['doc_len']

        # Total raw documents
        self.assertEqual(docs_processed, total_raw_documents)
        clean_docs = mongodb_h.get_clean_documents()

        # Documents are a pair
        self.assertEqual(len(clean_docs), total_pairs)

        # If all are regular_pair, set size is 1
        matching_type_freq = {}
        for x in clean_docs:
            k = x['matchingType']
            if k not in matching_type_freq:
                matching_type_freq[k] = 0
            matching_type_freq[k] += 1

        self.assertEqual(matching_type_freq.get('orphan', None), 5)
        self.assertEqual(matching_type_freq.get('regular_pair', None), 5)
        self.assertEqual(matching_type_freq.get('orphan_pair', None), 5)

        # Clean before exit
        mongodb_h.remove_all()
Пример #9
0
    def test_multiple_all_producer_duplicates_match(self):

        # Clean database state
        settings = Settings()
        settings.logger.info(
            '[test] test_multiple_all_producer_duplicates_match')
        mongodb_h = cl_db_handler.MongoDBHandler(settings.MONGODB_USER,
                                                 settings.MONGODB_PWD,
                                                 settings.MONGODB_SERVER)
        mongodb_h.remove_all()
        mongodb_h.create_indexes()

        # Configure test scenario
        settings.CORRECTOR_DOCUMENTS_LIMIT = 10
        total_steps = 10

        # 100 unique documents, duplicated 5 times
        total_unique_documents = 1
        total_duplicate_documents = 99
        total_raw_documents = total_unique_documents + total_duplicate_documents

        total_unique_pairs = 1

        unique_raw_documents = []
        for i in range(total_unique_pairs):
            client, producer = ci_helper.create_raw_document_pair()
            # Only producer
            unique_raw_documents.append(producer.copy())

        raw_documents = []
        for i in range(total_raw_documents):
            doc = unique_raw_documents[0].copy()
            raw_documents.append(doc)

        mongodb_h.add_raw_documents(raw_documents)

        # Run Corrector Multiple Steps
        c_batch = CorrectorBatch(settings)
        docs_processed = 0
        for i in range(total_steps):
            process_dict = dict()
            process_dict['doc_len'] = -1
            c_batch.run(process_dict)
            self.assertNotEqual(process_dict['doc_len'], -1)
            docs_processed += process_dict['doc_len']

        # Total raw documents
        self.assertEqual(docs_processed, total_raw_documents)

        # Check total Pair documents
        clean_docs = mongodb_h.get_clean_documents()
        self.assertEqual(len(clean_docs), total_unique_pairs)

        # If all are orphan, set size is 1
        match_type_set = set([x['matchingType'] for x in clean_docs])
        self.assertEqual(len(match_type_set), 1)

        # The only document should be orphan
        self.assertEqual(clean_docs[0]['matchingType'], 'orphan')

        # Clean before exit
        mongodb_h.remove_all()