Пример #1
0
    def test_hash_in_python(self):
        # test that python hashing matches with SQL hashing
        N = 2048
        doc_ids = [str(i) for i in range(N)]

        sql_hashes = ShardAccessor.hash_doc_ids_sql(doc_ids)

        csiphash_hashes = ShardAccessor.hash_doc_ids_python(doc_ids)
        self.assertEquals(len(csiphash_hashes), N)
        self.assertTrue(
            all(
                isinstance(hash_, (int, long))
                for hash_ in csiphash_hashes.values()))

        N_shards = 1024
        part_mask = N_shards - 1

        sql_shards = {
            doc_id: hash_ & part_mask
            for doc_id, hash_ in sql_hashes.items()
        }
        python_shards = {
            doc_id: hash_ & part_mask
            for doc_id, hash_ in sql_hashes.items()
        }

        self.assertEqual(python_shards, sql_shards)
Пример #2
0
 def handle(self, domain, **options):
     forms_by_shard = Counter()
     forms_by_db = Counter()
     cases_by_shard = Counter()
     cases_by_db = Counter()
     print('======================== forms ========================')
     print('id\t\t\t\t\tshard\tdatabase')
     for form_id in sorted(
             FormAccessors(domain=domain).get_all_form_ids_in_domain()):
         shard_id, dbname = ShardAccessor.get_shard_id_and_database_for_doc(
             form_id)
         forms_by_shard[shard_id] += 1
         forms_by_db[dbname] += 1
         print('{}\t{}\t{}'.format(form_id, shard_id, dbname))
     print('\n======================== cases ========================')
     print('id\t\t\t\t\tshard\tdatabase')
     for case_id in sorted(
             CaseAccessors(domain=domain).get_case_ids_in_domain()):
         shard_id, dbname = ShardAccessor.get_shard_id_and_database_for_doc(
             case_id)
         cases_by_shard[shard_id] += 1
         cases_by_db[dbname] += 1
         print('{}\t{}\t{}'.format(case_id, shard_id, dbname))
     _print(forms_by_shard, 'forms by shard')
     _print(forms_by_db, 'forms by db')
     _print(cases_by_shard, 'cases by shard')
     _print(cases_by_db, 'cases by db')
Пример #3
0
 def test_get_docs_by_database(self):
     # test_python_hashing_gives_correct_db ensures the hashing works correctly so this just tests
     # that get_docs_by_database is consistent with get_database_for_docs
     form_ids = [str(uuid4()) for i in range(100)]
     dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
     docs_for_dbs = ShardAccessor.get_docs_by_database(form_ids)
     for db, doc_ids in docs_for_dbs.items():
         for doc_id in doc_ids:
             self.assertEqual(db, dbs_for_docs[doc_id])
Пример #4
0
def get_db_alias_for_partitioned_doc(partition_value):
    if settings.USE_PARTITIONED_DATABASE:
        from corehq.form_processor.backends.sql.dbaccessors import ShardAccessor
        db_name = ShardAccessor.get_database_for_doc(partition_value)
    else:
        db_name = 'default'
    return db_name
Пример #5
0
def get_db_alias_for_partitioned_doc(partition_value):
    if settings.USE_PARTITIONED_DATABASE:
        from corehq.form_processor.backends.sql.dbaccessors import ShardAccessor
        db_name = ShardAccessor.get_database_for_doc(partition_value)
    else:
        db_name = 'default'
    return db_name
Пример #6
0
 def test_hash_doc_ids(self):
     N = 1001
     doc_ids = [str(i) for i in range(N)]
     hashes = ShardAccessor.hash_doc_ids_sql(doc_ids)
     self.assertEquals(len(hashes), N)
     self.assertTrue(
         all(isinstance(hash_, int) for hash_ in hashes.values()))
Пример #7
0
    def test_python_hashing_gives_correct_db(self):
        # Rudimentary test to ensure that python sharding matches SQL sharding
        num_forms = 100
        form_ids = [create_form_for_test(DOMAIN).form_id for i in range(num_forms)]

        dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
        for form_id, db_alias in dbs_for_docs.items():
            XFormInstanceSQL.objects.using(db_alias).get(form_id=form_id)
Пример #8
0
    def test_python_hashing_gives_correct_db(self):
        # Rudimentary test to ensure that python sharding matches SQL sharding
        num_forms = 100
        form_ids = [create_form_for_test(DOMAIN).form_id for i in range(num_forms)]

        dbs_for_docs = ShardAccessor.get_database_for_docs(form_ids)
        for form_id, db_alias in dbs_for_docs.items():
            XFormInstanceSQL.objects.using(db_alias).get(form_id=form_id)
Пример #9
0
    def test_hash_in_python(self):
        # test that python hashing matches with SQL hashing
        N = 2048
        doc_ids = [str(i) for i in range(N)]

        sql_hashes = ShardAccessor.hash_doc_ids_sql(doc_ids)

        csiphash_hashes = ShardAccessor.hash_doc_ids_python(doc_ids)
        self.assertEquals(len(csiphash_hashes), N)
        self.assertTrue(all(isinstance(hash_, (int, long)) for hash_ in csiphash_hashes.values()))

        N_shards = 1024
        part_mask = N_shards - 1

        sql_shards = {doc_id: hash_ & part_mask for doc_id, hash_ in sql_hashes.items()}
        python_shards = {doc_id: hash_ & part_mask for doc_id, hash_ in sql_hashes.items()}

        self.assertEqual(python_shards, sql_shards)
 def test_settings(self):
     """
     The tests in this class assume a certain partitioned setup to ensure the
     partitioning is working properly, so this test makes sure those assumptions
     are valid.
     """
     self.assertEqual(len(settings.PARTITION_DATABASE_CONFIG['shards']), 2)
     self.assertIn(self.db1, settings.PARTITION_DATABASE_CONFIG['shards'])
     self.assertIn(self.db2, settings.PARTITION_DATABASE_CONFIG['shards'])
     self.assertEqual(
         settings.PARTITION_DATABASE_CONFIG['shards'][self.db1], [0, 1])
     self.assertEqual(
         settings.PARTITION_DATABASE_CONFIG['shards'][self.db2], [2, 3])
     self.assertEqual(set(partition_config.get_form_processing_dbs()),
                      set([self.db1, self.db2]))
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid),
                      self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid),
                      self.db2)
 def test_uuids_used(self):
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid1), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid2), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid3), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid1), self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid2), self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid3), self.db2)
Пример #12
0
 def test_uuids_used(self):
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid1), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid2), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid3), self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid1), self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid2), self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid3), self.db2)
Пример #13
0
 def handle(self, domain, **options):
     forms_by_shard = Counter()
     forms_by_db = Counter()
     cases_by_shard = Counter()
     cases_by_db = Counter()
     print('======================== forms ========================')
     print('id\t\t\t\t\tshard\tdatabase')
     for form_id in sorted(FormAccessors(domain=domain).get_all_form_ids_in_domain()):
         shard_id, dbname = ShardAccessor.get_shard_id_and_database_for_doc(form_id)
         forms_by_shard[shard_id] += 1
         forms_by_db[dbname] += 1
         print('{}\t{}\t{}'.format(form_id, shard_id, dbname))
     print('\n======================== cases ========================')
     print('id\t\t\t\t\tshard\tdatabase')
     for case_id in sorted(CaseAccessors(domain=domain).get_case_ids_in_domain()):
         shard_id, dbname = ShardAccessor.get_shard_id_and_database_for_doc(case_id)
         cases_by_shard[shard_id] += 1
         cases_by_db[dbname] += 1
         print('{}\t{}\t{}'.format(case_id, shard_id, dbname))
     _print(forms_by_shard, 'forms by shard')
     _print(forms_by_db, 'forms by db')
     _print(cases_by_shard, 'cases by shard')
     _print(cases_by_db, 'cases by db')
Пример #14
0
def _group_objects_by_db(objects):
    """
    :param objects: Deserialized object dictionaries
    :return: List of tuples of (db_alias, [object,...])
    """
    objects_by_db = defaultdict(list)
    for obj in objects:
        app_label = obj['model']
        model = apps.get_model(app_label)
        db_alias = router.db_for_write(model)
        if settings.USE_PARTITIONED_DATABASE and db_alias == partition_config.get_proxy_db():
            doc_id = _get_doc_id(app_label, obj)
            db_alias = ShardAccessor.get_database_for_doc(doc_id)

        objects_by_db[db_alias].append(obj)
    return list(objects_by_db.items())
Пример #15
0
def delete_object_from_partitioned_database(obj, partition_value):
    """
    Determines from which database to delete a partitioned model object and
    deletes it there.

    :param obj: A Django model object

    :param parition_value: The value that is used to partition the model; this
    value will be used to select the database
    """
    if settings.USE_PARTITIONED_DATABASE:
        db_name = ShardAccessor.get_database_for_doc(partition_value)
    else:
        db_name = 'default'

    obj.delete(using=db_name)
Пример #16
0
def _group_objects_by_db(objects):
    """
    :param objects: Deserialized object dictionaries
    :return: List of tuples of (db_alias, [object,...])
    """
    objects_by_db = defaultdict(list)
    for obj in objects:
        app_label = obj['model']
        model = apps.get_model(app_label)
        db_alias = router.db_for_write(model)
        if settings.USE_PARTITIONED_DATABASE and db_alias == partition_config.proxy_db:
            doc_id = _get_doc_id(app_label, obj)
            db_alias = ShardAccessor.get_database_for_doc(doc_id)

        objects_by_db[db_alias].append(obj)
    return list(objects_by_db.items())
Пример #17
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N // num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)
Пример #18
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N / num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)
Пример #19
0
def get_object_from_partitioned_database(model_class, partition_value, partitioned_field_name):
    """
    Determines from which database to retrieve a paritioned model object and
    retrieves it.

    :param model_class: A Django model class

    :param parition_value: The value that is used to partition the model; this
    value will be used to select the database

    :param partitioned_field_name: The model field on which the object is partitioned; the
    object whose partitioned_field_name attribute equals partition_value is returned

    :return: The model object
    """
    if settings.USE_PARTITIONED_DATABASE:
        db_name = ShardAccessor.get_database_for_doc(partition_value)
    else:
        db_name = 'default'

    kwargs = {
        partitioned_field_name: partition_value,
    }
    return model_class.objects.using(db_name).get(**kwargs)
Пример #20
0
def _publish_cases_for_sql(domain, case_records):
    records_with_types = filter(lambda r: r.doc_subtype, case_records)
    records_with_no_types = filter(lambda r: not r.doc_subtype, case_records)
    # if we already had a type just publish as-is
    for record in records_with_types:
        producer.send_change(
            topics.CASE_SQL,
            _change_meta_for_sql_case(domain, record.doc_id,
                                      record.doc_subtype))

    # else lookup the type from the database
    for record_chunk in chunked(records_with_no_types, 10000):
        # databases will contain a mapping of shard database ids to case_ids in that DB
        id_chunk = [r.doc_id for r in record_chunk]
        databases = ShardAccessor.get_docs_by_database(id_chunk)
        for db, doc_ids in databases.items():
            results = CommCareCaseSQL.objects.using(db).filter(
                case_id__in=doc_ids, ).values_list('case_id', 'type')
            # make sure we found the same number of IDs
            assert len(results) == len(doc_ids)
            for case_id, case_type in results:
                producer.send_change(
                    topics.CASE_SQL,
                    _change_meta_for_sql_case(domain, case_id, case_type))
Пример #21
0
 def test_hash_uuid(self):
     uuid = UUID('403724ef9fe141f2908363918c62c2ff')
     self.assertEqual(ShardAccessor.hash_doc_id_python(uuid), 1415444857)
     self.assertEqual(ShardAccessor.hash_doc_uuid_sql_for_testing(uuid), 1415444857)
Пример #22
0
 def test_hash_doc_ids(self):
     N = 1001
     doc_ids = [str(i) for i in range(N)]
     hashes = ShardAccessor.hash_doc_ids_sql(doc_ids)
     self.assertEquals(len(hashes), N)
     self.assertTrue(all(isinstance(hash_, int) for hash_ in hashes.values()))
Пример #23
0
def get_db_alias_for_partitioned_doc(partition_value):
    if settings.USE_PARTITIONED_DATABASE:
        db_name = ShardAccessor.get_database_for_doc(partition_value)
    else:
        db_name = 'default'
    return db_name
Пример #24
0
 def test_hash_uuid(self):
     uuid = UUID('403724ef9fe141f2908363918c62c2ff')
     self.assertEqual(ShardAccessor.hash_doc_id_python(uuid), 1415444857)
     self.assertEqual(ShardAccessor.hash_doc_uuid_sql_for_testing(uuid), 1415444857)