Пример #1
0
def generate_report(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Generates the final BigQuery Table with channel-level attribution and ROAS.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """
  client.query(env.get_template('generate_report.sql').render(flags)).result()
Пример #2
0
def generate_report(client: bigquery.client.Client,
                    params: Mapping[str, Any]) -> None:
    """Generates the final BigQuery Table with channel-level attribution and ROAS.

  Args:
    client: BigQuery client.
    params: Mapping of all template parameter names to values.
  """
    client.query(
        jinja_env.get_template('generate_report.sql').render(params)).result()
Пример #3
0
def extract_fractribution_input_data(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Extracts the input data for fractribution into BigQuery.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """
  extract_data_sql = _strip_sql(
      env.get_template('extract_data.sql').render(flags))
  # Issue the query, and call result() to wait for it to finish. No results
  # are returned as all output is stored on BigQuery.
  client.query(extract_data_sql).result()
Пример #4
0
def stream_insert_rows_to_bq(client: bigquery.client.Client, table: str,
                             rows: list) -> list:
    """
	Inserts rows into BigQuery

	Parameters
	----------
	client : google.cloud.bigquery.client.Client
		BigQuery Client

	table_id : str
		if of the table {dataset}.{tablename} e.g. stocks.quotes

	rows : list of dicts
		List of dictionaries to insert into table

	Returns
	-------
	List of Errors (if any).
	"""
    errors = client.insert_rows(table, rows)

    if not all(x == [] for x in errors):
        print('Error: Not all rows inserted...')

    return errors
Пример #5
0
def InsertintoDB(Bikelist: list, client: bigquery.client.Client) -> bool:
    """take a list of bike sales, output them into the DB
    Setup DB connection, for loop through insert rows
    """

    table_id = "CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable"
    table = client.get_table(table_id)  # Make an API request.
    rows_to_insert = Bikelist

    errors = client.insert_rows(table, rows_to_insert)  # Make an API request.
    if errors != []:
        print("ERROR: New rows have not been added, errors = " + str(errors))
        return False
    else:
        print("rows inserted = " + str(len(Bikelist)))
        return True
Пример #6
0
    def upload_path_summary(self, client: bigquery.client.Client,
                            path_summary_table: str) -> None:
        """Uploads the path summary data to the given path_summary_table.

    Args:
      client: BigQuery Client
      path_summary_table: Name of the table to write the path summaries.
    """
        job_config = bigquery.LoadJobConfig()
        job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
        job_config.autodetect = False
        job = client.load_table_from_file(
            self._path_summary_to_json_stringio(),
            client.get_table(path_summary_table),
            job_config=job_config)
        job.result()  # Waits for table load to complete.
def update_fact_params(client: bigquery.client.Client, params: Dict[str, Any]):
    """Updates params with lists of facts, and numerical and categorical facts."""
    sessions_table = client.get_table(params['sessions_table'])
    params['facts'] = fact.Fact.extract_facts(sessions_table)
    params['numeric_facts'] = fact.Fact.get_numeric_facts(params['facts'])
    params['categorical_facts'] = fact.Fact.get_categorical_facts(
        params['facts'])
def _run_sql(client: bigquery.client.Client, template_sql: str,
             params: Dict[str, Any]) -> bigquery.table.RowIterator:
    """Runs a SQL query.

  Args:
    client: BigQuery client.
    template_sql: The SQL query statement.
    params: SQL query parameters.

  Returns:
    RowIterator query object.
  """
    sql = params['jinja_env'].get_template(template_sql).render(params)
    if params['verbose']:
        # Including a print here for easier debugging and to show pipeline progress.
        logging.info(sql)

    query_job = client.query(sql)
    while not query_job.done():
        elapsed_seconds = time.time() - query_job.started.timestamp()
        logging.info('BigQuery job is [%s]. %s seconds elapsed... ',
                     str(query_job.state), '%.2f' % elapsed_seconds)
        # Adds a sleep as a safeguard to avoid floods of requests.
        time.sleep(1)
    logging.info('BigQuery job is [%s].', query_job.state)
    return query_job.result()
def _run_sql(client: bigquery.client.Client, template_sql: str,
             params: Dict[str, Any]) -> bigquery.table.RowIterator:
    sql = _jinja_env.get_template(template_sql).render(params)
    if params['verbose']:
        # Including a print here for easier debugging and to show pipeline progress.
        print(sql)
    return client.query(sql).result()
Пример #10
0
def get_bigquery_bike_list(client: bigquery.client.Client) -> list:
    """get all from bikes"""
    get_bikes_query = (
        'SELECT * FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ')
    query_job = client.query(get_bikes_query)  # API request
    rows = query_job.result()  # Waits for query to finish

    return rows
def query_from_file(client: bigquery.client.Client=None, query_path: str=""):
    """Execute query from file on BQ engine via client library.
    """
    print(f"{query_path} execution started.")
    with open(query_path, 'r', encoding='utf-8') as f:
        query_str = f.read()
    query_job = client.query(query_str)  # Make an API request.
    query_result = query_job.result()
    print(f"Query result is ... \n{query_result}")
Пример #12
0
def run_fractribution(
    client: bigquery.client.Client, flags: Mapping[str, Any]) -> None:
  """Runs fractribution on the extract_fractribution_input_data BigQuery tables.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  """

  # Step 1: Extract the paths from the path_summary_table.
  frac = fractribution.Fractribution(client.query(
      env.get_template('select_path_summary_query.sql').render(
          path_summary_table=flags['path_summary_table'])))
  # Step 2: Run Fractribution
  frac.run_fractribution()
  frac.normalize_channel_to_attribution_names()
  # Step 3: Create the path_summary_table and upload the results.
  create_path_summary_table_sql = env.get_template(
      'create_path_summary_results_table.sql').render(flags)
  client.query(create_path_summary_table_sql).result()
  frac.upload_path_summary(client, flags['path_summary_table'])
Пример #13
0
def run_fractribution(client: bigquery.client.Client,
                      params: Mapping[str, Any]) -> None:
    """Runs fractribution on the extract_fractribution_input_data BigQuery tables.

  Args:
    client: BigQuery client.
    params: Mapping of all template parameter names to values.
  """

    # Step 1: Extract the paths from the path_summary_table.
    frac = fractribution.Fractribution(
        client.query(
            jinja_env.get_template('select_path_summary_query.sql').render(
                path_summary_table=params['path_summary_table'])))
    frac.run_fractribution(params['attribution_model'])
    frac.normalize_channel_to_attribution_names()
    # Step 3: Create the path_summary_table and upload the results.
    create_path_summary_table_sql = jinja_env.get_template(
        'create_path_summary_results_table.sql').render(params)
    client.query(create_path_summary_table_sql).result()
    frac.upload_path_summary(client, params['path_summary_table'])
Пример #14
0
def _extract_channels(client: bigquery.client.Client,
                      flags: Dict[str, Any]) -> List[str]:
    """Updates the channel definitions from the channel flags.

  Args:
    client: BigQuery client.
    flags: Dictionary of all flag names to flag values.
  Returns:
    List of channel names.
  """
    extract_channels_sql = env.get_template('extract_channels.sql').render(
        flags)
    return [row.channel for row in client.query(extract_channels_sql).result()]
def checkBikeIsntLoadedAlready(bikeData: list, client: bigquery.client.Client) -> list:
    """Gets the UID's from the database and checks the newly scraped UID's, returning only the new ones"""

    QUERY = (
        'SELECT UID FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ')
    query_job = client.query(QUERY)  # API request
    rows = query_job.result()  # Waits for query to finish

    ExistingUID: list = []
    for row in rows:
        ExistingUID.append(row.UID)

    UniqueBikestoAdd: list = []
    for individualBike in bikeData:
        if individualBike[0] not in ExistingUID:
            UniqueBikestoAdd.append(individualBike)
    return UniqueBikestoAdd
Пример #16
0
def _extract_channels(client: bigquery.client.Client,
                      params: Mapping[str, Any]) -> List[str]:
    """Returns the list of names by running extract_channels.sql.

  Args:
    client: BigQuery client.
    params: Mapping of template parameter names to values.
  Returns:
    List of channel names.
  Raises:
    ValueError: User-formatted error if channel is not a valid BigQuery column.
  """
    extract_channels_sql = jinja_env.get_template(
        'extract_channels.sql').render(params)
    channels = [
        row.channel for row in client.query(extract_channels_sql).result()
    ]
    if fractribution.UNMATCHED_CHANNEL not in channels:
        channels.append(fractribution.UNMATCHED_CHANNEL)
    for channel in channels:
        if not _is_valid_column_name(channel):
            raise ValueError('Channel is not a legal BigQuery column name: ',
                             channel)
    return channels