def evaluate(self, profile, plan_node_id): """ Determine if the join exploded the number of rows this format: { "impact": the amount of slow down (in ns), "message" : the displayed "explanation" string } :return: """ self.metric_names = ["Hosts", "Broadcast", "BuildRows", "ProbeRows"] hosts = models.query_node_by_id(profile, plan_node_id, "Hosts", True)[0][0] probeRows = models.query_node_by_id(profile, plan_node_id, "ProbeRows", True)[0][0] probeTime = models.query_node_by_id(profile, plan_node_id, "ProbeTime", True)[0][0] rowsReturned = models.query_node_by_id(profile, plan_node_id, "RowsReturned", True)[0][0] impact = 0 if (rowsReturned > 0): impact = probeTime * (rowsReturned - probeRows) / rowsReturned return { "impact": impact, "message": "Exploding join: %d input rows are exploded to %d output rows" % (probeRows, rowsReturned) }
def evaluate(self, profile, plan_node_id): """ Determine if the join order/strategy is correct and evaluate the impact of this cause to the query. The return is a json string with this format: { "impact": the amount of slow down (in ns), "message" : the displayed "explanation" string } :return: """ self.metric_names = ["Hosts", "Broadcast", "BuildRows", "ProbeRows"] hosts = models.query_node_by_id(profile, plan_node_id, "Hosts", True)[0][0] isBroadcast = models.query_node_by_id(profile, plan_node_id, "Broadcast", True)[0][0] buildRows = models.query_node_by_id(profile, plan_node_id, "BuildRows", True)[0][0] probeRows = models.query_node_by_id(profile, plan_node_id, "ProbeRows", True)[0][0] rhsRows = 0 lhsRows = 0 networkcost = 0 if (isBroadcast == 1): networkcost = buildRows * hosts rhsRows = buildRows lhsRows = probeRows * hosts else: networkcost = (buildRows + probeRows) * hosts rhsRows = buildRows * hosts lhsRows = probeRows * hosts impact = (rhsRows - lhsRows * 1.5) / hosts / 0.01 if (impact > 0): return { "impact": impact, "message": "Wrong join order - RHS %d; LHS %d" % (rhsRows, lhsRows) } bcost = rhsRows * hosts scost = lhsRows + rhsRows impact = (networkcost - min(bcost, scost) - 1) / hosts / 0.01 return { "impact": impact, "message": "Wrong join strategy - RHS %d; LHS %d" % (rhsRows, lhsRows) }
def evaluate(self, profile, plan_node_id): """ Determine the impact of NN RPC latency this format: { "impact": the amount of slow down (in ns), "message" : the displayed "explanation" string } :return: """ totalStorageTime = models.query_avg_fragment_metric_by_node_nid(profile, plan_node_id, "TotalStorageWaitTime") hdfsRawReadTime = models.query_node_by_id(profile, plan_node_id, "TotalRawHdfsReadTime(*)", True)[0][0] avgReadThreads = models.query_node_by_id(profile, plan_node_id, "AverageHdfsReadThreadConcurrency", True)[0][0] avgReadThreads = max(1, to_double(avgReadThreads)) impact = max(0, (totalStorageTime - hdfsRawReadTime) / avgReadThreads) return { "impact": impact, "message": "This is the time waiting for HDFS NN RPC." }
def evaluate(self, profile, plan_node_id): """ Evaluate the impact of this cause to the query. The return is a json string with this format: { "impact": the amount of slow down (in ns), "message" : the displayed "explanation" string } :return: """ impact = -1 expr_data = '' if len(self.exprs): assert len(self.metric_names) == 1 # metric_names can have multiple values create a dict for all of # them db_result = models.query_node_by_id(profile, plan_node_id, self.metric_names[0]) for k, g in groupby(db_result, lambda x: x.fid): grouped = list(g) # A list of pairs, with aggregated value and index at value for # max / min like exprs converted_exprs = self.check_exprs(grouped) expr_vars = { "vars": dict(zip(self.exprs, map(lambda x: x[0], converted_exprs))), "idxs": dict(zip(self.exprs, map(lambda x: x[1], converted_exprs))), } expr_val = exprs.Expr.evaluate(self.rule["expr"], expr_vars) if (impact is None or impact < expr_val): impact = expr_val else: # For each of the metrics get the result with Timer() as t: # Get the metric values from the db grouped by metric name db_result = [ models.query_node_by_id(profile, plan_node_id, m) for m in self.metric_names ] # Assuming that for all metric names the same number of rows have been returned transpose the array all_metrics = zip(*db_result) for row in all_metrics: # Convert to double values if unit is 6(double) metric_values = map( lambda x: x.value if x.unit != 6 else to_double(x.value), row) surrogate_node = row[0].node local_vars = { "vars": dict(zip(self.metric_names, metric_values)) } local_vars["vars"]["IOBound"] = self.isStorageBound( surrogate_node) local_vars["vars"]['InputRows'] = self.getNumInputRows( surrogate_node) condition = True if ("condition" in self.rule): condition = exprs.Expr.evaluate(self.rule["condition"], local_vars) if (condition): expr_val = exprs.Expr.evaluate(self.rule["expr"], local_vars) if (impact is None or impact < expr_val): impact = expr_val if self.kwargs.get('info_names'): db_result = [ models.query_element_by_info(profile, plan_node_id, m) for m in self.kwargs['info_names'] ] all_metrics = zip(*db_result) for row in all_metrics: metric_values = map(lambda x: x.value, row) local_vars['vars'].update( dict(zip(self.kwargs['info_names'], metric_values))) expr_data = exprs.Expr.evaluate(self.kwargs['fix']['data'], local_vars) msg = self.rule["label"] + ": " + self.rule["message"] return {"impact": impact, "message": msg, "data": expr_data}