def test_split_input(self):
        SHARD_COUNT = 10
        BATCH_SIZE = 2
        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            {
                "input_reader": {
                    "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                    "batch_size": BATCH_SIZE,
                }
            },
            SHARD_COUNT)

        def num_expected():
            batch_size = min(len(self.dataSet), BATCH_SIZE)
            free_division = abs(len(self.dataSet) / batch_size)
            return min(free_division, SHARD_COUNT)

        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        self.assertEqual(3, num_expected())  # 1-3, 3-5, 5-None
        self.assertEqual(3, len(ds_input_readers))

        # batch_size = dataSet bigger half
        BATCH_SIZE = int(math.ceil(len(self.dataSet) / 2.0))
        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            {
                "input_reader": {
                    "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                    "batch_size": BATCH_SIZE,
                }
            },
            SHARD_COUNT)
        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        self.assertEqual(2, num_expected())  # 1-4, 4-None
        self.assertEqual(2, len(ds_input_readers))

        # batch_size > dataSet itself
        BATCH_SIZE = len(self.dataSet) * 2
        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            {
                "input_reader": {
                    "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                    "batch_size": BATCH_SIZE,
                }
            },
            SHARD_COUNT)
        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        self.assertEqual(1, num_expected())  # 1-None
        self.assertEqual(1, len(ds_input_readers))
    def test_with_filter_factory(self):
        SHARD_COUNT = 10
        FF_PATH = \
            "test_mapreduce_utils.DatastoreQueryInputReaderTest." \
            "simple_parametrized_filter_factory"

        params = {
            "input_reader": {
                "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                "filter_factory_spec": {
                    "name": FF_PATH,
                    "args": ["B"]
                }
            }
        }
        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            params,
            SHARD_COUNT)

        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        got = reduce(operator.add,
            (list(reader) for reader in ds_input_readers))
        self.assertEqual(2, len(got))
        data1, data2, = filter(lambda i: i['type'] == "B", self.dataSet)
        got.sort(key=lambda i: i.name)
        self.assertDictEqual(data1, db.to_dict(got.pop(0)))
        self.assertDictEqual(data2, db.to_dict(got.pop(0)))
    def test_world(self):
        SHARD_COUNT = 10

        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            {
                "input_reader": {
                    "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                }
            },
            SHARD_COUNT)

        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        got = reduce(operator.add,
            (list(reader) for reader in ds_input_readers))
        self.assertEqual(len(self.dataSet), len(got))
    def test_with_query_filters(self):
        SHARD_COUNT = 10
        mapper_spec = model.MapperSpec(
            "FooHandler",
            "mapreduce_utils.DatastoreQueryInputReader",
            {
                "input_reader": {
                    "entity_kind": self.TEST_ENTITY_IMPORT_PATH,
                    "filters": [("type", "=", "C")],
                }
            },
            SHARD_COUNT)

        ds_input_readers = DatastoreQueryInputReader.split_input(mapper_spec)
        got = reduce(operator.add,
            (list(reader) for reader in ds_input_readers))
        self.assertEqual(3, len(got))
        data1, data2, data3 = filter(lambda i: i['type'] == "C", self.dataSet)
        got.sort(key=lambda i: i.name)
        self.assertDictEqual(data1, db.to_dict(got.pop(0)))
        self.assertDictEqual(data2, db.to_dict(got.pop(0)))
        self.assertDictEqual(data3, db.to_dict(got.pop(0)))