def test_python_hashing_gives_correct_db(self): # Rudimentary test to ensure that python sharding matches SQL sharding num_forms = 100 form_ids = [create_form_for_test(DOMAIN).form_id for i in range(num_forms)] dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids) for form_id, db_alias in dbs_for_docs.items(): XFormInstanceSQL.objects.using(db_alias).get(form_id=form_id)
def test_get_docs_by_database(self): # test_python_hashing_gives_correct_db ensures the hashing works correctly so this just tests # that get_docs_by_database is consistent with get_database_for_docs form_ids = [str(uuid4()) for i in range(100)] dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids) docs_for_dbs = ShardAccessor.get_docs_by_database(form_ids) for db, doc_ids in docs_for_dbs.items(): for doc_id in doc_ids: self.assertEqual(db, dbs_for_docs[doc_id])
def test_get_database_for_docs(self): # test that sharding 1000 docs gives a distribution withing some tolerance # (bit of a vague test) N = 1000 doc_ids = [str(i) for i in range(N)] doc_db_map = ShardAccessor.get_database_for_docs(doc_ids) doc_count_per_db = defaultdict(int) for db_alias in doc_db_map.values(): doc_count_per_db[db_alias] += 1 num_dbs = len(partition_config.get_form_processing_dbs()) even_split = int(N / num_dbs) tolerance = N * 0.05 # 5% tollerance diffs = [abs(even_split - count) for count in doc_count_per_db.values()] outliers = [diff for diff in diffs if diff > tolerance] message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs) self.assertEqual(len(outliers), 0, message)
def test_get_database_for_docs(self): # test that sharding 1000 docs gives a distribution withing some tolerance # (bit of a vague test) N = 1000 doc_ids = [str(i) for i in range(N)] doc_db_map = ShardAccessor.get_database_for_docs(doc_ids) doc_count_per_db = defaultdict(int) for db_alias in doc_db_map.values(): doc_count_per_db[db_alias] += 1 num_dbs = len(partition_config.get_form_processing_dbs()) even_split = int(N // num_dbs) tolerance = N * 0.05 # 5% tollerance diffs = [abs(even_split - count) for count in doc_count_per_db.values()] outliers = [diff for diff in diffs if diff > tolerance] message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs) self.assertEqual(len(outliers), 0, message)