def split_input(cls, job_config): """Inherit docs.""" params = job_config.input_reader_params shard_count = job_config.shard_count query_spec = cls._get_query_spec(params) if not property_range.should_shard_by_property_range( query_spec.filters): return super(ModelDatastoreInputReader, cls).split_input(job_config) p_range = property_range.PropertyRange(query_spec.filters, query_spec.model_class_path) p_ranges = p_range.split(shard_count) if query_spec.ns: ns_range = namespace_range.NamespaceRange( namespace_start=query_spec.ns, namespace_end=query_spec.ns, _app=query_spec.app) ns_ranges = [copy.copy(ns_range) for _ in p_ranges] else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD + 1) if not ns_keys: return if len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: ns_ranges = [ namespace_range.NamespaceRange(_app=query_spec.app) for _ in p_ranges ] else: ns_ranges = namespace_range.NamespaceRange.split( n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) p_ranges = [copy.copy(p_range) for _ in ns_ranges] assert len(p_ranges) == len(ns_ranges) iters = [ db_iters.RangeIteratorFactory.create_property_range_iterator( p, ns, query_spec) for p, ns in zip(p_ranges, ns_ranges) ] return [cls(i) for i in iters]
def split_input(cls, job_config): """Inherit docs.""" params = job_config.input_reader_params shard_count = job_config.shard_count query_spec = cls._get_query_spec(params) if not property_range.should_shard_by_property_range(query_spec.filters): return super(ModelDatastoreInputReader, cls).split_input(job_config) p_range = property_range.PropertyRange(query_spec.filters, query_spec.model_class_path) p_ranges = p_range.split(shard_count) if query_spec.ns: ns_range = namespace_range.NamespaceRange( namespace_start=query_spec.ns, namespace_end=query_spec.ns, _app=query_spec.app) ns_ranges = [copy.copy(ns_range) for _ in p_ranges] else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD+1) if not ns_keys: return if len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: ns_ranges = [namespace_range.NamespaceRange(_app=query_spec.app) for _ in p_ranges] else: ns_ranges = namespace_range.NamespaceRange.split(n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) p_ranges = [copy.copy(p_range) for _ in ns_ranges] assert len(p_ranges) == len(ns_ranges) iters = [ db_iters.RangeIteratorFactory.create_property_range_iterator( p, ns, query_spec) for p, ns in zip(p_ranges, ns_ranges)] return [cls(i) for i in iters]
def split_input(cls, job_config): """Inherit doc.""" shard_count = job_config.shard_count params = job_config.input_reader_params query_spec = cls._get_query_spec(params) namespaces = None if query_spec.ns is not None: k_ranges = cls._to_key_ranges_by_shard(query_spec.app, [query_spec.ns], shard_count, query_spec) else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD + 1) if not ns_keys: return elif len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: namespaces = [ns_key.name() or "" for ns_key in ns_keys] k_ranges = cls._to_key_ranges_by_shard(query_spec.app, namespaces, shard_count, query_spec) else: ns_ranges = namespace_range.NamespaceRange.split( n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) k_ranges = [ key_ranges.KeyRangesFactory.create_from_ns_range(ns_range) for ns_range in ns_ranges ] iters = [ db_iters.RangeIteratorFactory.create_key_ranges_iterator( r, query_spec, cls._KEY_RANGE_ITER_CLS) for r in k_ranges ] return [cls(i) for i in iters]
def split_input(cls, job_config): """Inherit doc.""" shard_count = job_config.shard_count params = job_config.input_reader_params query_spec = cls._get_query_spec(params) namespaces = None if query_spec.ns is not None: k_ranges = cls._to_key_ranges_by_shard( query_spec.app, [query_spec.ns], shard_count, query_spec) else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD+1) if not ns_keys: return elif len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: namespaces = [ns_key.name() or "" for ns_key in ns_keys] k_ranges = cls._to_key_ranges_by_shard( query_spec.app, namespaces, shard_count, query_spec) else: ns_ranges = namespace_range.NamespaceRange.split(n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) k_ranges = [key_ranges.KeyRangesFactory.create_from_ns_range(ns_range) for ns_range in ns_ranges] iters = [db_iters.RangeIteratorFactory.create_key_ranges_iterator( r, query_spec, cls._KEY_RANGE_ITER_CLS) for r in k_ranges] return [cls(i) for i in iters]