def __init__(self, table: "Table", quals: Optional[Quals], columns: Sequence[str]) -> None: self.table = table self.quals = quals self.columns = columns self.tracer = Tracer() self.object_manager = table.repository.objects self.required_objects = table.objects self.tracer.log("resolve_objects") self.filtered_objects = self.object_manager.filter_fragments( self.required_objects, table, quals) # Estimate the number of rows in the filtered objects self.estimated_rows = sum([ o.rows_inserted - o.rows_deleted for o in self.object_manager.get_object_meta( self.filtered_objects).values() ]) self.tracer.log("filter_objects") # Prepare a list of objects to query # Special fast case: single-chunk groups can all be batched together # and queried directly without having to copy them to a staging table. # We also grab all fragments from multiple-fragment groups and batch them together # for future application. # # Technically, we could do multiple batches of application for these groups # (apply first batch to the staging table, extract the result, clean the table, # apply next batch etc): in the middle of it we could also talk back to the object # manager and release the objects that we don't need so that they can be garbage # collected. The tradeoff is that we perform more calls to apply_fragments (hence # more roundtrips). self.non_singletons, self.singletons = self._extract_singleton_fragments( ) logging.info( "Fragment grouping: %d singletons, %d non-singletons", len(self.singletons), len(self.non_singletons), ) self.tracer.log("group_fragments") self.sql_quals, self.sql_qual_vals = quals_to_sql( quals, column_types={c.name: c.pg_type for c in self.table.table_schema}) if self.singletons: self.singleton_queries = _generate_table_names( self.object_manager.object_engine, SPLITGRAPH_META_SCHEMA, self.singletons) else: self.singleton_queries = [] self.tracer.log("generate_singleton_queries")
def get_query_plan(self, quals: Optional[Quals], columns: Sequence[str], use_cache: bool = True) -> QueryPlan: """ Start planning a query (preliminary steps before object downloading, like qualifier filtering). :param quals: Qualifiers in CNF form :param columns: List of columns :param use_cache: If True, will fetch the plan from the cache for the same qualifiers and columns. :return: QueryPlan """ key = _get_plan_cache_key(quals, columns) if use_cache and key in self._query_plans: plan = self._query_plans[key] # Reset the tracer in the plan: if this instance # persisted, the resolve/filter times in it will be # way in the past. plan.tracer = Tracer() plan.tracer.log("resolve_objects") plan.tracer.log("filter_objects") plan.tracer.log("group_fragments") plan.tracer.log("generate_singleton_queries") return plan plan = QueryPlan(self, quals, columns) self._query_plans[key] = plan return plan
def test_tracer(): with patch("splitgraph.core.common.datetime") as datetime: datetime.now.return_value = dt(2019, 1, 1) tracer = Tracer() datetime.now.return_value = dt(2019, 1, 1, 0, 0, 1) tracer.log("event_1") datetime.now.return_value = dt(2019, 1, 1, 0, 0, 30) tracer.log("event_2") assert tracer.get_total_time() == 30 assert tracer.get_durations() == [("event_1", 1.0), ("event_2", 29.0)] assert (str(tracer) == """event_1: 1.000 event_2: 29.000 Total: 30.000""")
class QueryPlan: """ Represents the initial query plan (fragments to query) for given columns and qualifiers. """ def __init__(self, table: "Table", quals: Optional[Quals], columns: Sequence[str]) -> None: self.table = table self.quals = quals self.columns = columns self.tracer = Tracer() self.object_manager = table.repository.objects self.required_objects = table.objects self.tracer.log("resolve_objects") self.filtered_objects = self.object_manager.filter_fragments( self.required_objects, table, quals) # Estimate the number of rows in the filtered objects self.estimated_rows = sum([ o.rows_inserted - o.rows_deleted for o in self.object_manager.get_object_meta( self.filtered_objects).values() ]) self.tracer.log("filter_objects") # Prepare a list of objects to query # Special fast case: single-chunk groups can all be batched together # and queried directly without having to copy them to a staging table. # We also grab all fragments from multiple-fragment groups and batch them together # for future application. # # Technically, we could do multiple batches of application for these groups # (apply first batch to the staging table, extract the result, clean the table, # apply next batch etc): in the middle of it we could also talk back to the object # manager and release the objects that we don't need so that they can be garbage # collected. The tradeoff is that we perform more calls to apply_fragments (hence # more roundtrips). self.non_singletons, self.singletons = self._extract_singleton_fragments( ) logging.info( "Fragment grouping: %d singletons, %d non-singletons", len(self.singletons), len(self.non_singletons), ) self.tracer.log("group_fragments") self.sql_quals, self.sql_qual_vals = quals_to_sql( quals, column_types={c.name: c.pg_type for c in self.table.table_schema}) if self.singletons: self.singleton_queries = _generate_table_names( self.object_manager.object_engine, SPLITGRAPH_META_SCHEMA, self.singletons) else: self.singleton_queries = [] self.tracer.log("generate_singleton_queries") def _extract_singleton_fragments(self) -> Tuple[List[str], List[str]]: # Get fragment boundaries (min-max PKs of every fragment). table_pk = [(t[1], t[2]) for t in self.table.table_schema if t[3]] if not table_pk: table_pk = [(t[1], t[2]) for t in self.table.table_schema] object_pks = self.object_manager.get_min_max_pks( self.filtered_objects, table_pk) # Group fragments into non-overlapping groups: those can be applied independently of each other. object_groups = get_chunk_groups([ (object_id, min_max[0], min_max[1]) for object_id, min_max in zip(self.filtered_objects, object_pks) ]) singletons: List[str] = [] non_singletons: List[str] = [] for group in object_groups: if len(group) == 1: singletons.append(group[0][0]) else: non_singletons.extend(object_id for object_id, _, _ in group) return non_singletons, singletons