Пример #1
0
def calculate_time_stats(grouped):
  """
  Add statistics to the nested dictionary.

  Each query name is supplemented with the average, standard deviation, number of clients,
  iterations, and a sorted list of the time taken to complete each run.
  """

  def remove_first_run(result_list):
    """We want to remove the first result because the performance is much worse on the
    first run.
    """
    if len(result_list) > 1:
      # We want to remove the first result only if there is more that one result
      result_list.remove(min(result_list, key=lambda result: result['start_time']))

  for workload_scale, workload in grouped.items():
    for file_format, queries in workload.items():
      for query_name, results in queries.items():
        result_list = results[RESULT_LIST]
        remove_first_run(result_list)
        avg = calculate_avg(
            [query_results[TIME_TAKEN] for query_results in result_list])
        dev = calculate_stddev(
            [query_results[TIME_TAKEN] for query_results in result_list])
        num_clients = max(
            int(query_results[CLIENT_NAME]) for query_results in result_list)

        iterations = int((len(result_list) + 1) / num_clients)
        results[AVG] = avg
        results[STDDEV] = dev
        results[NUM_CLIENTS] = num_clients
        results[ITERATIONS] = iterations
        results[SORTED] = [query_results[TIME_TAKEN] for query_results in result_list]
        results[SORTED].sort()
Пример #2
0
def calculate_time_stats(grouped):
  """Adds statistics to the nested dictionary. We are calculating the average runtime
     and Standard Deviation for each query type.
  """

  def remove_first_run(result_list):
    """We want to remove the first result because the performance is much worse on the
    first run.
    """
    if len(result_list) > 1:
      # We want to remove the first result only if there is more that one result
      result_list.remove(min(result_list, key=lambda result: result['start_time']))

  for workload_scale, workload in grouped.items():
    for file_format, queries in workload.items():
      for query_name, results in queries.items():
        result_list = results[RESULT_LIST]
        remove_first_run(result_list)
        avg = calculate_avg(
            [query_results[TIME_TAKEN] for query_results in result_list])
        dev = calculate_stddev(
            [query_results[TIME_TAKEN] for query_results in result_list])
        num_clients = max(
            int(query_results[CLIENT_NAME]) for query_results in result_list)

        iterations = int((len(result_list) + 1) / num_clients)
        results[AVG] = avg
        results[STDDEV] = dev
        results[NUM_CLIENTS] = num_clients
        results[ITERATIONS] = iterations
Пример #3
0
def create_exec_result(execution_times, iterations, result_data):
  exec_result = QueryExecResult()
  exec_result.success = False

  if result_data:
    # Just print the first result returned. There may be additional results if
    # there were multiple iterations executed.
    LOG.debug('Data:\n%s\n' % result_data[0])
    exec_result.data = result_data[0].split('\n')

  if len(execution_times) == iterations:
    exec_result.avg_time = calculate_avg(execution_times)
    if iterations > 1:
      exec_result.std_dev = calculate_stddev(execution_times)
    exec_result.success = True
  return exec_result
Пример #4
0
def calculate_time_stats(grouped):
  """Adds statistics to the nested dictionary. We are calculating the average runtime
     and Standard Deviation for each query type.
  """

  for workload_scale, workload in grouped.items():
    for file_format, queries in workload.items():
      for query_name, results in queries.items():
        result_list = results[RESULT_LIST]
        avg = calculate_avg(
            [query_results[TIME_TAKEN] for query_results in result_list])
        dev = calculate_stddev(
            [query_results[TIME_TAKEN] for query_results in result_list])
        num_clients = max(
            int(query_results[CLIENT_NAME]) for query_results in result_list)
        iterations = len(result_list)
        results[AVG] = avg
        results[STDDEV] = dev
        results[NUM_CLIENTS] = num_clients
        results[ITERATIONS] = iterations
def calculate_time_stats(grouped):
  """Adds statistics to the nested dictionary. We are calculating the average runtime
     and Standard Deviation for each query type.
  """

  for workload_scale in grouped:
    for query_name in grouped[workload_scale]:
      for file_format in grouped[workload_scale][query_name]:
        result_list = grouped[workload_scale][query_name][file_format][RESULT_LIST]
        avg = calculate_avg(
            [query_results[TIME_TAKEN] for query_results in result_list])
        dev = calculate_stddev(
            [query_results[TIME_TAKEN] for query_results in result_list])
        num_clients = max(
            int(query_results[CLIENT_NAME]) for query_results in result_list)
        iterations = len(result_list)

        grouped[workload_scale][query_name][file_format][AVG] = avg
        grouped[workload_scale][query_name][file_format][STDDEV] = dev
        grouped[workload_scale][query_name][file_format][NUM_CLIENTS] = num_clients
        grouped[workload_scale][query_name][file_format][ITERATIONS] = iterations
Пример #6
0
  def __build_rows(self, exec_summaries):

    first_exec_summary = exec_summaries[0]

    for row_num, row in enumerate(first_exec_summary):
      combined_row = {}
      # Copy fixed values from the first exec summary
      for key in [PREFIX, OPERATOR, NUM_HOSTS, NUM_ROWS, EST_NUM_ROWS, DETAIL]:
        combined_row[key] = row[key]

      avg_times = [exec_summary[row_num][AVG_TIME] for exec_summary in exec_summaries]
      max_times = [exec_summary[row_num][MAX_TIME] for exec_summary in exec_summaries]
      peak_mems = [exec_summary[row_num][PEAK_MEM] for exec_summary in exec_summaries]
      est_peak_mems = [exec_summary[row_num][EST_PEAK_MEM]
          for exec_summary in exec_summaries]

      # Set the calculated values
      combined_row[AVG_TIME] = calculate_avg(avg_times)
      combined_row[STDDEV_TIME] = calculate_stddev(avg_times)
      combined_row[MAX_TIME] = max(max_times)
      combined_row[PEAK_MEM] = max(peak_mems)
      combined_row[EST_PEAK_MEM] = max(est_peak_mems)
      self.rows.append(combined_row)
Пример #7
0
def calculate_time_stats(grouped):
    """Adds statistics to the nested dictionary. We are calculating the average runtime
     and Standard Deviation for each query type.
  """

    for workload_scale, workload in grouped.items():
        for file_format, queries in workload.items():
            for query_name, results in queries.items():
                result_list = results[RESULT_LIST]
                avg = calculate_avg([
                    query_results[TIME_TAKEN] for query_results in result_list
                ])
                dev = calculate_stddev([
                    query_results[TIME_TAKEN] for query_results in result_list
                ])
                num_clients = max(
                    int(query_results[CLIENT_NAME])
                    for query_results in result_list)
                iterations = len(result_list)
                results[AVG] = avg
                results[STDDEV] = dev
                results[NUM_CLIENTS] = num_clients
                results[ITERATIONS] = iterations
Пример #8
0
def construct_exec_result(iterations, query, results):
  """
  Calculate average running time and standard deviation.

  The summary of the first result is used as the summary for the entire execution.
  """
  # Use the output from the first result.
  exec_result = QueryExecResult()
  exec_result.query = query
  exec_result.data = results[0].data
  exec_result.beeswax_result = results[0]
  exec_result.set_result_note(results[0].summary)
  exec_result.runtime_profile = results[0].runtime_profile
  # If running more than 2 iterations, throw the first result out. Don't throw away
  # the first result if iterations = 2 to preserve the stddev calculation.
  if iterations > 2:
    results = results[1:]

  runtimes = [r.time_taken for r in results]
  exec_result.success = True
  exec_result.avg_time = calculate_avg(runtimes)
  if iterations > 1:
    exec_result.std_dev = calculate_stddev(runtimes)
  return exec_result