Пример #1
0
    def test_python_hashing_gives_correct_db(self):
        # Rudimentary test to ensure that python sharding matches SQL sharding
        num_forms = 100
        form_ids = [create_form_for_test(DOMAIN).form_id for i in range(num_forms)]

        dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
        for form_id, db_alias in dbs_for_docs.items():
            XFormInstanceSQL.objects.using(db_alias).get(form_id=form_id)
Пример #2
0
    def test_python_hashing_gives_correct_db(self):
        # Rudimentary test to ensure that python sharding matches SQL sharding
        num_forms = 100
        form_ids = [create_form_for_test(DOMAIN).form_id for i in range(num_forms)]

        dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
        for form_id, db_alias in dbs_for_docs.items():
            XFormInstanceSQL.objects.using(db_alias).get(form_id=form_id)
Пример #3
0
 def test_get_docs_by_database(self):
     # test_python_hashing_gives_correct_db ensures the hashing works correctly so this just tests
     # that get_docs_by_database is consistent with get_database_for_docs
     form_ids = [str(uuid4()) for i in range(100)]
     dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
     docs_for_dbs = ShardAccessor.get_docs_by_database(form_ids)
     for db, doc_ids in docs_for_dbs.items():
         for doc_id in doc_ids:
             self.assertEqual(db, dbs_for_docs[doc_id])
Пример #4
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N / num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)
Пример #5
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N // num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)