Пример #1
0
def get_dynamic_k_for_experiment(name):
    conn = db_connection.get_conn()
    df = pd.read_sql_query(
        "select k_value from data_science.experiments_dynamic_ratios where name = '{name}'"
        .format(name=name), conn)
    db_connection.put_conn(conn)
    return df.k_value[0]
Пример #2
0
def get_experiment_agg_data(experiment_id, start_date = '2020-01-11', end_date = None, filters = {}, ordering = None):
  '''
    Filters the input table for a specific experiment_id
  '''
  conn = db_connection.get_conn()
  print('get_experiment_agg_data experiment_id: ' + str(experiment_id) + ' start_date: ' + str(start_date) + ' end_date: ' + str(end_date))
  start_date = clean_date(start_date)
  end_date = clean_date(end_date)

  filters_clause = ""
  filters_array = []
  input_table = 'data_science.experiments_aggregate_base'
  if 'city' in filters:
    filters_array.append("city = '{selected_city}'".format(selected_city=filters['city']))
    # filters_clause = " and city = '{selected_city}'".format(selected_city=filters['city'])
    input_table = 'data_science.experiments_aggregate_base_per_city'
  if 'distinct_dates_7' in filters:
    filters_array.append("distinct_dates_7 >= 7")
  if 'distinct_dates_14' in filters:
    filters_array.append("distinct_dates_7 >= 14")
  if 'alternative_ratio' in filters:
    filters_array.append("alternative_ratio > 0")

  if filters_array != []:
    filters_clause = ' AND ' + ' and '.join(filters_array)


  order_clause = ''
  if not ordering is None:
    order_clause = 'order by {ordering}'.format(ordering=ordering)

  query = """
  select
    *
  from
    {input_table}
  where
    fee_experiment_id = {selected_experiment}
    and date >= '{start_date}'
    {end_date_clause}
    {filters_clause}
  {order_clause}
  """.format(selected_experiment=experiment_id,input_table=input_table,start_date=start_date,end_date_clause=_build_end_date_clause(end_date),filters_clause=filters_clause,order_clause=order_clause)

  # print(query)

  df_agg = pd.read_sql_query(query, conn)
  df_agg = adjust_zero_ratios(adjust_dataframe_fields(df_agg))
  db_connection.put_conn(conn)
  return df_agg
Пример #3
0
def get_alternatives_for_experiment(experiment_id, start_date = '2020-01-11', end_date = None):
  conn = db_connection.get_conn()
  start_date = clean_date(start_date)
  end_date = clean_date(end_date)
  df = pd.read_sql_query("""
  select
    max(alternative_ratio) as alternative_ratio,
    alternative,
    alternative_id
  from
    data_science.experiments_aggregate_base_per_city
  where
    fee_experiment_id = {selected_experiment}
    and date >= '{start_date}'
    {end_date_clause}
  group by alternative, alternative_id
  """.format(selected_experiment=experiment_id,start_date=start_date,end_date_clause=_build_end_date_clause(end_date)), conn)
  db_connection.put_conn(conn)
  return df
Пример #4
0
def get_alternative_agg_data(alternative_id, start_date, end_date, filters = {}):
  conn = db_connection.get_conn()
  baseline_alternative = sample_baseline_for_alternative(alternative_id, conn)

  df_dates = sample_valid_dates(alternative_id, baseline_alternative, conn, start_date, end_date)
  dates_str = ','.join(map(lambda x: "'{x}'".format(x=x.strftime('%Y-%m-%d')), df_dates['date'].values[:-1]))

  alternative_ids = [str(alternative_id), str(baseline_alternative)]

  location_filter = ""
  if 'city' in filters:
    location_filter = " AND city = '{city}'".format(city=filters['city'])

  base_selection = get_consolidated_orders_data_sql() + """
    and created_day in ({dates}) and alternative_id in ({alternatives}) {location_filter}
  """.format(alternatives=','.join(alternative_ids),dates=dates_str,location_filter=location_filter)

  df_cum_results = pd.read_sql_query(get_cumulative_agg_results_sql(base_selection, postgres=True, initial_date=df_dates['date'].values[0], final_date=df_dates['date'].values[-2]), conn)
  df_cum_results = adjust_dataframe_fields(df_cum_results)
  db_connection.put_conn(conn)
  return df_cum_results
Пример #5
0
def get_experiment_agg_data_per_city(experiment_id, start_date, end_date=None):
  conn = db_connection.get_conn()
  start_date = clean_date(start_date)
  end_date = clean_date(end_date)

  aux = pd.read_sql_query("""
  select
    sum(orders)::integer as order_id,
    city,
    city_id
  from
    data_science.experiments_aggregate_base_per_city
  where
    fee_experiment_id = {selected_experiment}
    and date >= '{start_date}'
    {end_date_clause}
  group by
   city,
   city_id
  order by 1 desc
  """.format(selected_experiment=experiment_id,start_date=start_date,end_date_clause=_build_end_date_clause(end_date)), conn)
  db_connection.put_conn(conn)
  return aux