Пример #1
0
def generate_report(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Generates the final BigQuery Table with channel-level attribution and ROAS.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """
  client.query(env.get_template('generate_report.sql').render(flags)).result()
Пример #2
0
def generate_report(client: bigquery.client.Client,
                    params: Mapping[str, Any]) -> None:
    """Generates the final BigQuery Table with channel-level attribution and ROAS.

  Args:
    client: BigQuery client.
    params: Mapping of all template parameter names to values.
  """
    client.query(
        jinja_env.get_template('generate_report.sql').render(params)).result()
Пример #3
0
def extract_fractribution_input_data(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Extracts the input data for fractribution into BigQuery.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """
  extract_data_sql = _strip_sql(
      env.get_template('extract_data.sql').render(flags))
  # Issue the query, and call result() to wait for it to finish. No results
  # are returned as all output is stored on BigQuery.
  client.query(extract_data_sql).result()
def _run_sql(client: bigquery.client.Client, template_sql: str,
             params: Dict[str, Any]) -> bigquery.table.RowIterator:
    """Runs a SQL query.

  Args:
    client: BigQuery client.
    template_sql: The SQL query statement.
    params: SQL query parameters.

  Returns:
    RowIterator query object.
  """
    sql = params['jinja_env'].get_template(template_sql).render(params)
    if params['verbose']:
        # Including a print here for easier debugging and to show pipeline progress.
        logging.info(sql)

    query_job = client.query(sql)
    while not query_job.done():
        elapsed_seconds = time.time() - query_job.started.timestamp()
        logging.info('BigQuery job is [%s]. %s seconds elapsed... ',
                     str(query_job.state), '%.2f' % elapsed_seconds)
        # Adds a sleep as a safeguard to avoid floods of requests.
        time.sleep(1)
    logging.info('BigQuery job is [%s].', query_job.state)
    return query_job.result()
def _run_sql(client: bigquery.client.Client, template_sql: str,
             params: Dict[str, Any]) -> bigquery.table.RowIterator:
    sql = _jinja_env.get_template(template_sql).render(params)
    if params['verbose']:
        # Including a print here for easier debugging and to show pipeline progress.
        print(sql)
    return client.query(sql).result()
Пример #6
0
def get_bigquery_bike_list(client: bigquery.client.Client) -> list:
    """get all from bikes"""
    get_bikes_query = (
        'SELECT * FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ')
    query_job = client.query(get_bikes_query)  # API request
    rows = query_job.result()  # Waits for query to finish

    return rows
def query_from_file(client: bigquery.client.Client=None, query_path: str=""):
    """Execute query from file on BQ engine via client library.
    """
    print(f"{query_path} execution started.")
    with open(query_path, 'r', encoding='utf-8') as f:
        query_str = f.read()
    query_job = client.query(query_str)  # Make an API request.
    query_result = query_job.result()
    print(f"Query result is ... \n{query_result}")
Пример #8
0
def run_fractribution(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Runs fractribution on the extract_fractribution_input_data BigQuery tables.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """

  # Step 1: Extract the paths from the path_summary_table.
  frac = fractribution.Fractribution(client.query(
      env.get_template('select_path_summary_query.sql').render(
          path_summary_table=flags['path_summary_table'])))
  # Step 2: Run Fractribution
  frac.run_fractribution()
  frac.normalize_channel_to_attribution_names()
  # Step 3: Create the path_summary_table and upload the results.
  create_path_summary_table_sql = env.get_template(
      'create_path_summary_results_table.sql').render(flags)
  client.query(create_path_summary_table_sql).result()
  frac.upload_path_summary(client, flags['path_summary_table'])
Пример #9
0
def run_fractribution(client: bigquery.client.Client,
                      params: Mapping[str, Any]) -> None:
    """Runs fractribution on the extract_fractribution_input_data BigQuery tables.

  Args:
    client: BigQuery client.
    params: Mapping of all template parameter names to values.
  """

    # Step 1: Extract the paths from the path_summary_table.
    frac = fractribution.Fractribution(
        client.query(
            jinja_env.get_template('select_path_summary_query.sql').render(
                path_summary_table=params['path_summary_table'])))
    frac.run_fractribution(params['attribution_model'])
    frac.normalize_channel_to_attribution_names()
    # Step 3: Create the path_summary_table and upload the results.
    create_path_summary_table_sql = jinja_env.get_template(
        'create_path_summary_results_table.sql').render(params)
    client.query(create_path_summary_table_sql).result()
    frac.upload_path_summary(client, params['path_summary_table'])
Пример #10
0
def _extract_channels(client: bigquery.client.Client,
                      flags: Dict[str, Any]) -> List[str]:
    """Updates the channel definitions from the channel flags.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  Returns:
    List of channel names.
  """
    extract_channels_sql = env.get_template('extract_channels.sql').render(
        flags)
    return [row.channel for row in client.query(extract_channels_sql).result()]
def checkBikeIsntLoadedAlready(bikeData: list, client: bigquery.client.Client) -> list:
    """Gets the UID's from the database and checks the newly scraped UID's, returning only the new ones"""

    QUERY = (
        'SELECT UID FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ')
    query_job = client.query(QUERY)  # API request
    rows = query_job.result()  # Waits for query to finish

    ExistingUID: list = []
    for row in rows:
        ExistingUID.append(row.UID)

    UniqueBikestoAdd: list = []
    for individualBike in bikeData:
        if individualBike[0] not in ExistingUID:
            UniqueBikestoAdd.append(individualBike)
    return UniqueBikestoAdd
Пример #12
0
def _extract_channels(client: bigquery.client.Client,
                      params: Mapping[str, Any]) -> List[str]:
    """Returns the list of names by running extract_channels.sql.

  Args:
    client: BigQuery client.
    params: Mapping of template parameter names to values.
  Returns:
    List of channel names.
  Raises:
    ValueError: User-formatted error if channel is not a valid BigQuery column.
  """
    extract_channels_sql = jinja_env.get_template(
        'extract_channels.sql').render(params)
    channels = [
        row.channel for row in client.query(extract_channels_sql).result()
    ]
    if fractribution.UNMATCHED_CHANNEL not in channels:
        channels.append(fractribution.UNMATCHED_CHANNEL)
    for channel in channels:
        if not _is_valid_column_name(channel):
            raise ValueError('Channel is not a legal BigQuery column name: ',
                             channel)
    return channels