Python query_to_rows示例，starthinker.util.bigquery.query_to_rows Python示例

示例#1

0

显示文件

文件： log.py 项目： KakeJopulsky/starthinker

def log_get(job_id=None, timezone='America/Los_Angeles'):
  try:
    project.initialize(_service=UI_SERVICE)
    if job_id:
      query = JOB_STATUS_QUERY % job_id
      row = query_to_rows('service', UI_PROJECT, UI_LOG_DATASET, query, row_max=1, legacy=False).next()
      return { 
        'id':row[0], 
        'status':row[1], 
        'time_stamp':time_local(row[2], timezone), 
        'time_ago':time_ago(row[3]),
      }  if row else {}
    else:
      query = JOBS_STATUS_QUERY
      rows = query_to_rows('service', UI_PROJECT, UI_LOG_DATASET, query, legacy=False)
      return dict([(
        row[0], 
        {
          'id':row[0],
          'status':row[1],
          'time_stamp':time_local(row[2], timezone),
          'time_ago':time_ago(row[3]),
        }
      ) for row in rows])
  except Exception, e:
    print str(e)
    return {}

示例#2

0

显示文件

def bigquery():

  # check schema if given ( check independent of values )
  if 'schema' in project.task['bigquery']:
    schema = table_to_schema(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['table']
    )
    object_compare(schema['fields'], project.task['bigquery']['schema'])

  # if query given check it
  if 'query' in project.task['bigquery']:
    rows = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['query']
    )

    object_compare(sorted(rows), sorted(project.task['bigquery']['values']))

  # simple table check ( unless query given )
  elif 'values' in project.task['bigquery']:
    rows = table_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['table']
    )

    object_compare(sorted(rows), sorted(project.task['bigquery']['values']))

示例#3

0

显示文件

文件： run.py 项目： google/starthinker

def dcm_log(config, task):
    if config.verbose:
        print('DCM LOG')

    accounts = list(get_rows(config, 'user', task['accounts']))

    # determine start log date
    if table_exists(config, task['out']['auth'], task['out']['project'],
                    task['out']['dataset'], CHANGELOGS_TABLE):
        start = next(
            query_to_rows(
                config, task['out']['auth'], task['out']['project'],
                task['out']['dataset'],
                'SELECT FORMAT_TIMESTAMP("%%Y-%%m-%%dT%%H:%%M:%%S-00:00", MAX(changeTime), "UTC") FROM `%s`'
                % CHANGELOGS_TABLE, 1, False))[0]
        disposition = 'WRITE_APPEND'

    else:
        start = (datetime.utcnow() - timedelta(days=int(task['days']))
                 ).strftime('%Y-%m-%dT%H:%M:%S-00:00')
        disposition = 'WRITE_TRUNCATE'

    # load new logs
    rows = get_changelogs(config, task, accounts, start)
    if rows:
        rows_to_table(config, task['out']['auth'], task['out']['project'],
                      task['out']['dataset'], CHANGELOGS_TABLE, rows,
                      CHANGELOGS_SCHEMA, 0, disposition)

示例#4

0

显示文件

def anonymize_query():
    if project.verbose:
        print('ANONYMIZE QUERY', project.task['bigquery']['from']['query'])

    schema = query_to_schema(
        project.task['auth'],
        project.task['bigquery']['from']['project'],
        project.task['bigquery']['from']['dataset'],
        project.task['bigquery']['from']['query'],
        legacy=project.task['bigquery']['from'].get('legacy', False),
    )

    rows = query_to_rows(project.task['auth'],
                         project.task['bigquery']['from']['project'],
                         project.task['bigquery']['from']['dataset'],
                         project.task['bigquery']['from']['query'],
                         legacy=project.task['bigquery']['from'].get(
                             'legacy', False),
                         as_object=True)

    rows = anonymize_rows(rows,
                          project.task['bigquery']['to'].get('columns', []))

    json_to_table(project.task['auth'],
                  project.task['bigquery']['to']['project'],
                  project.task['bigquery']['to']['dataset'],
                  project.task['bigquery']['to']['table'],
                  rows,
                  schema,
                  disposition='WRITE_TRUNCATE')

示例#5

0

显示文件

文件： __init__.py 项目： quesadillasraclette/starthinker

def _process_filters(partners,
                     advertisers,
                     filters,
                     project_id,
                     dataset_id,
                     auth='user'):
    structures = []

    for p in (partners or []):
        structures.append({'type': 'FILTER_PARTNER', 'value': int(p)})

    for a in (advertisers or []):
        structures.append({'type': 'FILTER_ADVERTISER', 'value': int(a)})

    for f in (filters or []):
        if isinstance(f['value'],
                      basestring) and f['value'].startswith('SELECT '):

            items = query_to_rows(auth, project_id, dataset_id, f['value'])

            filtered = False
            for item in items:
                if item and len(item) != 0:
                    filtered = True
                    structures.append({'type': f['type'], 'value': item[0]})
                else:
                    break

            if not filtered:
                raise Exception('Select filter did not return any values: %s' %
                                f['value'])
        else:
            structures.append({'type': f['type'], 'value': f['value']})

    return structures

示例#6

0

显示文件

def get_solutions():
    if project.verbose: print 'GETTING SCORES'

    for solution in project.task['solutions']:
        scores = []

        if 'sheet' in solution:
            scores = sheets_read(project.task['auth'],
                                 solution['sheet']['url'],
                                 solution['sheet']['tab'],
                                 solution['sheet']['range'])
        elif 'bigquery' in solution:
            scores = query_to_rows(project.task['auth'], project.id,
                                   solution['bigquery']['dataset'],
                                   solution['bigquery']['query'])

        # for easy lookup use dictionary
        solution['scores'] = {}
        for score in scores:
            solution['scores'].setdefault(str(score[0]), [])
            solution['scores'][str(score[0])].append({
                'variant_id':
                str(score[1]),
                'variant':
                score[2],
                'score':
                float(score[3])
            })

        if project.verbose: print 'GOT SCORES:', len(solution['scores'])

    return project.task['solutions']

示例#7

0

显示文件

def create_cm_site_segmentation(config, task):
  # Read sheet to bq table
  sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A:C', retries=10)
  if not sheet_rows:
    sheet_rows = []

  schema = [
    { "type": "STRING", "name": "Site_Dcm", "mode": "NULLABLE" },
    { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" },
    { "type": "STRING", "name": "Site_Type", "mode": "NULLABLE" }
  ]

  rows_to_table(
    config,
    auth=task['auth_bq'],
    project_id=config.project,
    dataset_id=task['dataset'],
    table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
    rows=sheet_rows,
    schema=schema,
    skip_rows=1,
    disposition='WRITE_TRUNCATE'
  )

  # Get Site_Type from the sheet
  run_query_from_file(config, task, Queries.cm_site_segmentation.replace('{{dataset}}', task['dataset']), CM_SITE_SEGMENTATION_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
  rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  rows = rows_to_type(rows)
  sheets_clear(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C')
  sheets_write(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C', rows)

示例#8

0

显示文件

文件： run.py 项目： quan/starthinker

def conversions_download():
  if project.verbose:
    print('CONVERSION DOWNLOAD')

  # pull from bigquery if specified
  if 'bigquery' in project.task:
    if project.verbose:
      print('READING BIGQUERY')
    rows = query_to_rows(
        project.task['auth'],
        project.id,
        project.task['bigquery']['dataset'],
        'SELECT * FROM %s' % project.task['bigquery']['table'],
        legacy=project.task['bigquery'].get('legacy', True))
    for row in rows:
      yield row

  # pull from sheets if specified
  if 'sheets' in project.task:
    if project.verbose:
      print('READING SHEET')
    rows = sheets_read(project.task['auth'], project.task['sheets']['url'],
                       project.task['sheets']['tab'],
                       project.task['sheets']['range'])
    for row in rows:
      yield row

  # pull from csv if specified
  if 'csv' in project.task:
    if project.verbose:
      print('READING CSV FILE')
    with io.open(project.task['csv']['file']) as f:
      for row in csv_to_rows(f):
        if row[0] not in CSV_HEADERS:
          yield row

示例#9

0

显示文件

文件： run.py 项目： Gregorfran/starthinker-gregor

def get_impacts():
  if project.verbose: print('GETTING IMPACTS')

  impacts = []

  if 'sheet' in project.task['impacts']:
    impacts = sheets_read(
      project.task['auth'],
      project.task['impacts']['sheet']['url'],
      project.task['impacts']['sheet']['tab'],
      project.task['impacts']['sheet']['range']
    )
  elif 'bigquery' in project.task['impacts']:
    impacts = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['impacts']['bigquery']['dataset'],
      project.task['impacts']['bigquery']['query']
    )

  # for easy lookup use dictionary
  impacts = dict([(str(i[0]), float(i[1])) for i in impacts])

  if project.verbose: print('GOT IMPACTS:', len(impacts))

  return impacts

示例#10

0

显示文件

文件： run.py 项目： hernanperalta/starthinker

def create_cm_site_segmentation(project):
    # Read sheet to bq table
    sheet_rows = sheets_read('user',
                             project.task['sheet'],
                             'CM_Site_Segments',
                             'A:C',
                             retries=10)
    if not sheet_rows:
        sheet_rows = []

    schema = [{
        "type": "STRING",
        "name": "Site_Dcm",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Impressions",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Site_Type",
        "mode": "NULLABLE"
    }]

    rows_to_table(auth='service',
                  project_id=project.id,
                  dataset_id=project.task['dataset'],
                  table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
                  rows=sheet_rows,
                  schema=schema,
                  skip_rows=1,
                  disposition='WRITE_TRUNCATE')

    # Get Site_Type from the sheet
    run_query_from_file(
        os.path.join(os.path.dirname(__file__),
                     SQL_DIRECTORY + CM_SITE_SEGMENTATION_FILENAME),
        project.id, project.task['dataset'], CM_SITE_SEGMENTATION_TABLE)

    # Move Table back to sheets
    query = 'SELECT * from `' + project.id + '.' + project.task[
        'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
    rows = query_to_rows('service',
                         project.id,
                         project.task['dataset'],
                         query,
                         legacy=False)

    # makes sure types are correct in sheet
    rows = rows_to_type(rows)
    sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C')
    sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C',
                 rows)

示例#11

0

显示文件

def create_dv360_segments(config, task):
  a1_notation = 'A:N'
  schema = [
      { "type": "STRING", "name": "Advertiser", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Advertiser_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Campaign", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Campaign_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Insertion_Order", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Insertion_Order_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Line_Item", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Line_Item_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Line_Item_Type", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" },
      { "type": "STRING", "name": "SegmentAutoGen", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment1", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment2", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment3", "mode": "NULLABLE" }
    ]

  sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, retries=10)

  if not sheet_rows:
    sheet_rows = []

  print('DV360 SEGMENT SHEET TABLE WRITE')

  rows_to_table(
    config,
    auth=task['auth_bq'],
    project_id=config.project,
    dataset_id=task['dataset'],
    table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
    rows=sheet_rows,
    schema=schema,
    skip_rows=1,
    disposition='WRITE_TRUNCATE'
    )

  # Run Query
  if config.verbose:
    print('RUN DV360 Custom Segments Query')
  run_query_from_file(config, task, Queries.dv360_custom_segments.replace('{{dataset}}', task['dataset']), DV360_CUSTOM_SEGMENTS_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
  rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
  rows = rows_to_type(rows)
  sheets_clear(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation)
  sheets_write(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, rows)

示例#12

0

显示文件

def get_owners():
    if project.verbose: print 'GETTING OWNERS'

    owners = []

    if 'sheet' in project.task['owners']:
        owners = sheets_read(project.task['auth'],
                             project.task['owners']['sheet']['url'],
                             project.task['owners']['sheet']['tab'],
                             project.task['owners']['sheet']['range'])
    elif 'bigquery' in project.task['owners']:
        owners = query_to_rows(project.task['auth'], project.id,
                               project.task['owners']['bigquery']['dataset'],
                               project.task['owners']['bigquery']['query'])

    # group account owners by email, create easy lookup sets for ids
    owners_grouped = {}
    for owner in owners:
        try:

            owners_grouped.setdefault(
                owner[2], {
                    'Account Name': owner[0],
                    'Account Owner': owner[1],
                    'Account Email': owner[2],
                    'DCM Network ID': [],
                    'DBM Partner ID': [],
                    'DS Account ID': [],
                    'Studio Account ID': [],
                })

            if len(owner) > 3 and owner[3]:
                owners_grouped[owner[2]]['DCM Network ID'].append(str(
                    owner[3]))
            if len(owner) > 4 and owner[4]:
                owners_grouped[owner[2]]['DBM Partner ID'].append(str(
                    owner[4]))
            if len(owner) > 5 and owner[5]:
                owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5]))
            if len(owner) > 6 and owner[6]:
                owners_grouped[owner[2]]['Studio Account ID'].append(
                    str(owner[6]))

        except IndexError:
            print 'ERROR:', owner
            pass

    if project.verbose: print 'GOT OWNERS:', len(owners)

    return owners_grouped.values()

示例#13

0

显示文件

def bigquery():
    print('TEST: bigquery')

    # check schema if given ( check independent of values )
    if 'schema' in project.task['bigquery']:
        schema = table_to_schema(project.task['auth'], project.id,
                                 project.task['bigquery']['dataset'],
                                 project.task['bigquery']['table'])
        deltas = schema_compare(project.task['bigquery']['schema'],
                                schema,
                                path='')

        if deltas:
            print(
                '\nFAILED *******************************************************\n'
            )
            for delta in deltas.values():
                print('%(path)s: %(error)s ( %(expected)s - %(actual)s)' %
                      delta)
            print(
                '\n**************************************************************\n'
            )
            test_failed()
        else:
            test_passed()

    # if query given check it
    if 'query' in project.task['bigquery']:
        rows = query_to_rows(
            project.task['auth'],
            project.id,
            project.task['bigquery']['dataset'],
            query_parameters(project.task['bigquery']['query'],
                             project.task['bigquery'].get('parameters')),
            legacy=project.task['bigquery'].get('legacy', True))

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))

    # simple table check ( unless query given )
    elif 'values' in project.task['bigquery']:
        rows = table_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['table'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))

示例#14

0

显示文件

文件： run.py 项目： fivestones-apac/starthinker

def create_cm_site_segmentation(project):
  # Read sheet to bq table
  sheet_rows = sheets_read(
      'user', project.task['sheet'], 'CM_Site_Segments', 'A:C', retries=10)
  if not sheet_rows:
    sheet_rows = []

  schema = [{
      'type': 'STRING',
      'name': 'Site_Dcm',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Impressions',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Site_Type',
      'mode': 'NULLABLE'
  }]

  rows_to_table(
      auth='service',
      project_id=project.id,
      dataset_id=project.task['dataset'],
      table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
      rows=sheet_rows,
      schema=schema,
      skip_rows=1,
      disposition='WRITE_TRUNCATE')

  # Get Site_Type from the sheet
  run_query_from_file(Queries.cm_site_segmentation, CM_SITE_SEGMENTATION_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + project.id + '.' + project.task[
      'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
  rows = query_to_rows(
      'service', project.id, project.task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  rows = rows_to_type(rows)
  sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C')
  sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C', rows)

示例#15

0

显示文件

文件： run.py 项目： hernanperalta/starthinker

def create_dv360_segments(project):
    a1_notation = 'A:M'
    schema = [{
        "type": "STRING",
        "name": "Advertiser",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Advertiser_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Campaign",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Campaign_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Insertion_Order",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Insertion_Order_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Line_Item_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item_Type",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Impressions",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment1",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment2",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment3",
        "mode": "NULLABLE"
    }]

    sheet_rows = sheets_read('user',
                             project.task['sheet'],
                             'DV3 Segments',
                             a1_notation,
                             retries=10)

    if not sheet_rows:
        sheet_rows = []

    print('DV360 SEGMENT SHEET TABLE WRITE')

    rows_to_table(auth='service',
                  project_id=project.id,
                  dataset_id=project.task['dataset'],
                  table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
                  rows=sheet_rows,
                  schema=schema,
                  skip_rows=1,
                  disposition='WRITE_TRUNCATE')

    # Run Query
    path = os.path.join(os.path.dirname(__file__),
                        SQL_DIRECTORY + DV360_CUSTOM_SEGMENTS_FILENAME)
    query = ''
    with open(path, 'r') as file:
        data = file.read().replace('\n', ' ')
        query = data.replace("{{project_id}}",
                             project.id).replace("{{dataset}}",
                                                 project.task['dataset'])

    print('DV360 CUSTOM SEGMENT TABLE')

    query_to_table('service',
                   project.id,
                   project.task['dataset'],
                   DV360_CUSTOM_SEGMENTS_TABLE,
                   query,
                   legacy=False)

    # Move Table back to sheets
    query = 'SELECT * from `' + project.id + '.' + project.task[
        'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
    rows = query_to_rows('service',
                         project.id,
                         project.task['dataset'],
                         query,
                         legacy=False)

    # makes sure types are correct in sheet
    a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
    rows = rows_to_type(rows)
    sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation)
    sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation,
                 rows)

示例#16

0

显示文件

    def handle(self, *args, **kwargs):

        impact = [
        ]  #{ 'day': DATE, 'deployment':INT, 'account': INT, 'product': STRING, 'recipe': STRING, 'user': STRING }
        missing = {}
        id_max = 0

        project.initialize(_service=settings.UI_SERVICE, _verbose=True)

        if table_exists('service', 'google.com:starthinker', 'dashboard',
                        'ST_Scripts'):
            id_max = next(
                query_to_rows('service',
                              'google.com:starthinker',
                              'dashboard',
                              'SELECT MAX(Deployment) FROM ST_Scripts',
                              legacy=False))[0]

        for recipe in Recipe.objects.filter(
                id__gt=id_max).order_by('id')[:kwargs['recipes']]:

            project.initialize(_user=recipe.account.get_credentials_path(),
                               _service=settings.UI_SERVICE,
                               _verbose=True)

            values = recipe.get_values()
            for v in values:
                if v['tag'] in ('dcm_to_bigquery', 'dcm_to_sheets',
                                'dcm_to_storage', 'dcm_run',
                                'conversion_upload_from_bigquery',
                                'conversion_upload_from_sheets'):
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        v['values'].get('account'),
                        'script':
                        v['tag'],
                        'product':
                        'dcm',
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                elif v['tag'] in ('dbm_to_bigquery', 'dbm_to_sheets',
                                  'dbm_to_storage'):
                    for partner in account_from_dbm_report(
                            v['values'].get('dbm_report_id'),
                            v['values'].get('dbm_report_name')):
                        impact.append({
                            'day':
                            recipe.birthday,
                            'deployment':
                            recipe.id,
                            'account':
                            partner,
                            'script':
                            v['tag'],
                            'product':
                            'dbm',
                            'user':
                            recipe.account.email.replace('@google.com', '')
                        })
                elif v['tag'] in ('dt', ):
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        account_from_dt(v['values']),
                        'script':
                        v['tag'],
                        'product':
                        'dcm',
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                elif v['tag'] == 'barnacle':
                    for account in v['values']['accounts']:
                        impact.append({
                            'day':
                            recipe.birthday,
                            'deployment':
                            recipe.id,
                            'account':
                            account,
                            'script':
                            v['tag'],
                            'product':
                            'dcm',
                            'user':
                            recipe.account.email.replace('@google.com', '')
                        })
                elif v['tag'] in ('entity', ):
                    for partner in v['values']['partners']:
                        impact.append({
                            'day':
                            recipe.birthday,
                            'deployment':
                            recipe.id,
                            'account':
                            partner,
                            'script':
                            v['tag'],
                            'product':
                            'dbm',
                            'user':
                            recipe.account.email.replace('@google.com', '')
                        })
                elif v['tag'] == 'itp':
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        v['values']['dcm_account'],
                        'script':
                        v['tag'],
                        'product':
                        'dcm',
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        v['values']['dbm_partner'],
                        'script':
                        v['tag'],
                        'product':
                        'dbm',
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                elif v['tag'] == 'itp_audit':
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        v['values']['cm_account_id'],
                        'script':
                        v['tag'],
                        'product':
                        'dcm',
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                    for partner in account_from_dbm_report(
                            None, v['values'].get('dv360_report_name')):
                        impact.append({
                            'day':
                            recipe.birthday,
                            'deployment':
                            recipe.id,
                            'account':
                            partner,
                            'script':
                            v['tag'],
                            'product':
                            'dbm',
                            'user':
                            recipe.account.email.replace('@google.com', '')
                        })
                else:
                    impact.append({
                        'day':
                        recipe.birthday,
                        'deployment':
                        recipe.id,
                        'account':
                        None,
                        'script':
                        v['tag'],
                        'product':
                        None,
                        'user':
                        recipe.account.email.replace('@google.com', '')
                    })
                    missing.setdefault(v['tag'], 0)
                    missing[v['tag']] += 1

        if impact:
            if kwargs['test']:
                print(impact)
            else:

                print('WRITING TO ST_Scripts')
                rows_to_table('service',
                              'google.com:starthinker',
                              'dashboard',
                              'ST_Scripts',
                              [(i['day'], i['deployment'], i['user'],
                                i['product'], i['script'], i['account'])
                               for i in impact],
                              schema=[
                                  {
                                      'mode': 'REQUIRED',
                                      'name': 'Day',
                                      'type': 'Date'
                                  },
                                  {
                                      'mode': 'REQUIRED',
                                      'name': 'Deployment',
                                      'type': 'INTEGER'
                                  },
                                  {
                                      'mode': 'REQUIRED',
                                      'name': 'User',
                                      'type': 'STRING'
                                  },
                                  {
                                      'mode': 'NULLABLE',
                                      'name': 'Product',
                                      'type': 'STRING'
                                  },
                                  {
                                      'mode': 'NULLABLE',
                                      'name': 'Recipe',
                                      'type': 'STRING'
                                  },
                                  {
                                      'mode': 'NULLABLE',
                                      'name': 'Account',
                                      'type': 'INTEGER'
                                  },
                              ],
                              skip_rows=0,
                              disposition='WRITE_TRUNCATE'
                              if id_max == 0 else 'WRITE_APPEND',
                              wait=True)

            print('MISSING', missing)
            print('Coverage:',
                  (len(impact) * 100) / (len(missing) + len(impact)))
        else:
            print('No recipes newer than:', id_max)

示例#17

0

显示文件

def videos_from_bigquery(config, task):
    rows = query_to_rows(config, task['auth'],
                         task['bigquery'].get('project', config.project),
                         task['bigquery']['dataset'],
                         'SELECT * FROM %s;' % task['bigquery']['table'])
    return rows_to_videos(rows)

示例#18

0

显示文件

文件： run.py 项目： fivestones-apac/starthinker

def create_dv360_segments(project):
  a1_notation = 'A:M'
  schema = [{
      'type': 'STRING',
      'name': 'Advertiser',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Advertiser_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Campaign',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Campaign_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Insertion_Order',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Insertion_Order_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Line_Item',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Line_Item_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Line_Item_Type',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Impressions',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment1',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment2',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment3',
      'mode': 'NULLABLE'
  }]

  sheet_rows = sheets_read(
      'user', project.task['sheet'], 'DV3 Segments', a1_notation, retries=10)

  if not sheet_rows:
    sheet_rows = []

  print('DV360 SEGMENT SHEET TABLE WRITE')

  rows_to_table(
      auth='service',
      project_id=project.id,
      dataset_id=project.task['dataset'],
      table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
      rows=sheet_rows,
      schema=schema,
      skip_rows=1,
      disposition='WRITE_TRUNCATE')

  # Run Query
  if project.verbose:
    print('RUN DV360 Custom Segments Query')
  run_query_from_file(Queries.dv360_custom_segments, DV360_CUSTOM_SEGMENTS_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + project.id + '.' + project.task[
      'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
  rows = query_to_rows(
      'service', project.id, project.task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
  rows = rows_to_type(rows)
  sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation)
  sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation, rows)

示例#19

0

显示文件

def bigquery():

    if 'function' in project.task:
        query = None

        if project.task['function'] == 'pearson_significance_test':
            query = pearson_significance_test()

        if query:
            run_query(project.task['auth'], project.id, query, False,
                      project.task['to']['dataset'])

    elif 'run' in project.task and 'query' in project.task.get('run', {}):
        if project.verbose:
            print('QUERY', project.task['run']['query'])
        run_query(
            project.task['auth'],
            project.id,
            query_parameters(project.task['run']['query'],
                             project.task['run'].get('parameters')),
            project.task['run'].get('legacy', True),
        )

    elif 'values' in project.task['from']:
        rows = get_rows(project.task['auth'], project.task['from'])

        rows_to_table(project.task['to'].get('auth', project.task['auth']),
                      project.id, project.task['to']['dataset'],
                      project.task['to']['table'], rows,
                      project.task.get('schema', []), 0)

    elif 'query' in project.task['from']:

        if 'table' in project.task['to']:
            if project.verbose:
                print('QUERY TO TABLE', project.task['to']['table'])

            query_to_table(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['table'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                disposition=project.task['write_disposition']
                if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
                legacy=project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql,
                target_project_id=project.task['to'].get(
                    'project_id', project.id))

        elif 'sheet' in project.task['to']:
            if project.verbose:
                print('QUERY TO SHEET', project.task['to']['sheet'])
            rows = query_to_rows(
                project.task['auth'],
                project.id,
                project.task['from']['dataset'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                legacy=project.task['from'].get('legacy', True))

            # makes sure types are correct in sheet
            rows = rows_to_type(rows)

            sheets_clear(project.task['to'].get('auth', project.task['auth']),
                         project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'))
            sheets_write(project.task['to'].get('auth', project.task['auth']),
                         project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'), rows)

        elif 'sftp' in project.task['to']:
            rows = query_to_rows(
                project.task['auth'],
                project.id,
                project.task['from']['dataset'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                legacy=project.task['from'].get('use_legacy_sql', True))

            if rows:
                if project.verbose:
                    print('QUERY TO SFTP')
                put_rows(project.task['auth'], project.task['to'], rows)

        else:
            if project.verbose:
                print('QUERY TO VIEW', project.task['to']['view'])
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql
                project.task['to'].get('replace', False))

    else:
        if project.verbose:
            print('STORAGE TO TABLE', project.task['to']['table'])
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))

示例#20

0

显示文件

文件： run.py 项目： RMStanford/starthinker

def bigquery():

    if 'run' in project.task and 'query' in project.task.get('run', {}):
        if project.verbose: print("QUERY", project.task['run']['query'])
        run_query(
            project.task['auth'],
            project.id,
            project.task['run']['query'],
            project.task['run'].get('legacy', True),
            #project.task['run'].get('billing_project_id', None)
        )

    elif 'values' in project.task['from']:
        rows = get_rows(project.task['auth'], project.task['from'])

        rows_to_table(project.task['to'].get('auth', project.task['auth']),
                      project.id, project.task['to']['dataset'],
                      project.task['to']['table'], rows,
                      project.task.get('schema', []), 0)

    elif 'query' in project.task['from']:
        if 'table' in project.task['to']:
            if project.verbose:
                print("QUERY TO TABLE", project.task['to']['table'])

            if 'pre_process_query' in project.task['to']:
                print('executing statement')
                execute_statement(project.task['auth'],
                                  project.id,
                                  project.task['to']['dataset'],
                                  project.task['to']['pre_process_query'],
                                  use_legacy_sql=project.task['from'].get(
                                      'legacy', project.task['from'].get(
                                          'useLegacySql', True)))
            query_to_table(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['table'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                disposition=project.task['write_disposition']
                if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
                legacy=project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql,
                target_project_id=project.task['to'].get(
                    'project_id', project.id))
        # NOT USED SO RIPPING IT OUT
        # Mauriciod: Yes, it is used, look at project/mauriciod/target_winrate.json
        elif 'storage' in project.task['to']:
            if project.verbose:
                print("QUERY TO STORAGE", project.task['to']['storage'])
            local_file_name = '/tmp/%s' % str(uuid.uuid1())
            rows = query_to_rows(project.task['auth'], project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'])

            f = open(local_file_name, 'wb')
            writer = csv.writer(f)
            writer.writerows(rows)
            f.close()

            f = open(local_file_name, 'rb')
            object_put(project.task['auth'], project.task['to']['storage'], f)
            f.close()

            os.remove(local_file_name)
        elif 'sheet' in project.task['to']:
            if project.verbose:
                print("QUERY TO SHEET", project.task['to']['sheet'])
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'legacy', True))

            # makes sure types are correct in sheet
            rows = rows_to_type(rows)

            sheets_clear(project.task['auth'], project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'))
            sheets_write(project.task['auth'], project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'), rows)
        elif 'sftp' in project.task['to']:
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'use_legacy_sql', True))

            if rows:
                if project.verbose: print("QUERY TO SFTP")
                put_rows(project.task['auth'], project.task['to'], rows)
        else:
            if project.verbose:
                print("QUERY TO VIEW", project.task['to']['view'])
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql
                project.task['to'].get('replace', False))
    else:
        if project.verbose:
            print("STORAGE TO TABLE", project.task['to']['table'])
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))

示例#21

0

显示文件

def get_rows(auth, source):
    """Processes standard read JSON block for dynamic loading of data.

  Allows us to quickly pull a column or columns of data from and use it as an
  input
  into a script. For example pull a list of ids from bigquery and act on each
  one.

  - When pulling a single column specify single_cell = True. Returns list AKA
  values.
  - When pulling a multiple columns specify single_cell = False. Returns list of
  lists AKA rows.
  - Values are always given as a list ( single_cell will trigger necessary
  wrapping ).
  - Values, bigquery, sheet are optional, if multiple given result is one
  continous iterator.
  - Extensible, add a handler to define a new source ( be kind update the
  documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.

    from utils.data import get_rows

    var_json = {
      "in":{
        "single_cell":[ boolean ],
        "values": [ integer list ],
        "bigquery":{
          "dataset": [ string ],
          "query": [ string ],
          "legacy":[ boolean ]
        },
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ],
        },
        "sheet":{
          "sheet":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
        }
      }
    }

    values = get_rows('user', var_json)

  Or you can use it directly with project singleton.

    from util.project import project
    from utils.data import get_rows

    @project.from_parameters
    def something():
      values = get_rows(project.task['auth'], project.task['in'])

    if __name__ == "__main__":
      something()

  Args:
    auth: (string) The type of authentication to use, user or service.
    source: (json) A json block resembling var_json described above.

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

    # if handler points to list, concatenate all the values from various sources into one list
    if isinstance(source, list):
        for s in source:
            for r in get_rows(auth, s):
                yield r

    # if handler is an endpoint, fetch data
    else:
        if 'values' in source:
            if isinstance(source['values'], list):
                for value in source['values']:
                    yield value
            else:
                yield source['values']

        if 'sheet' in source:
            rows = sheets_read(
                project.task['auth'],
                source['sheet']['sheet'],
                source['sheet']['tab'],
                source['sheet']['range'],
            )

            for row in rows:
                yield row[0] if source.get('single_cell', False) else row

        if 'bigquery' in source:

            rows = []

            if 'table' in source['bigquery']:
                rows = table_to_rows(source['bigquery'].get('auth', auth),
                                     project.id,
                                     source['bigquery']['dataset'],
                                     source['bigquery']['table'],
                                     as_object=source['bigquery'].get(
                                         'as_object', False))

            else:
                rows = query_to_rows(
                    source['bigquery'].get('auth', auth),
                    project.id,
                    source['bigquery']['dataset'],
                    query_parameters(source['bigquery']['query'],
                                     source['bigquery'].get('parameters', {})),
                    legacy=source['bigquery'].get('legacy', False),
                    as_object=source['bigquery'].get('as_object', False))

            for row in rows:
                yield row[0] if source.get('single_cell', False) else row

示例#22

0

显示文件

文件： run.py 项目： fivestones-apac/starthinker

def bigquery_query():
    """Execute a query and write results to table.

  TODO: Replace with get_rows and put_rows combination.

  See: scripts/bigquery_query.json
       scripts/bigquery_storage.json
       scripts/bigquery_to_sheet.json
       scripts/bigquery_view.json
  """

    if 'table' in project.task['to']:
        if project.verbose:
            print('QUERY TO TABLE', project.task['to']['table'])

        query_to_table(
            project.task['auth'],
            project.id,
            project.task['to']['dataset'],
            project.task['to']['table'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            disposition=project.task['write_disposition']
            if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
            legacy=project.task['from'].get(
                'legacy',
                project.task['from'].get('useLegacySql',
                                         True)),  # DEPRECATED: useLegacySql,
            target_project_id=project.task['to'].get('project_id', project.id))

    elif 'sheet' in project.task['to']:
        if project.verbose:
            print('QUERY TO SHEET', project.task['to']['sheet'])

        rows = query_to_rows(project.task['auth'],
                             project.id,
                             project.task['from']['dataset'],
                             query_parameters(
                                 project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                             legacy=project.task['from'].get('legacy', True))

        # makes sure types are correct in sheet
        rows = rows_to_type(rows)

        sheets_clear(project.task['to'].get('auth', project.task['auth']),
                     project.task['to']['sheet'], project.task['to']['tab'],
                     project.task['to'].get('range', 'A2'))
        sheets_write(project.task['to'].get('auth', project.task['auth']),
                     project.task['to']['sheet'], project.task['to']['tab'],
                     project.task['to'].get('range', 'A2'), rows)

    elif 'sftp' in project.task['to']:
        if project.verbose:
            print('QUERY TO SFTP')

        rows = query_to_rows(
            project.task['auth'],
            project.id,
            project.task['from']['dataset'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            legacy=project.task['from'].get('use_legacy_sql', True))

        if rows:
            put_rows(project.task['auth'], project.task['to'], rows)

    else:
        if project.verbose:
            print('QUERY TO VIEW', project.task['to']['view'])

        query_to_view(
            project.task['auth'],
            project.id,
            project.task['to']['dataset'],
            project.task['to']['view'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            project.task['from'].get(
                'legacy',
                project.task['from'].get('useLegacySql',
                                         True)),  # DEPRECATED: useLegacySql
            project.task['to'].get('replace', False))

示例#23

0

显示文件

文件： run.py 项目： marciopocebon/starthinker

def videos_from_bigquery(bigquery):
    rows = query_to_rows(project.task['auth'],
                         bigquery.get('project',
                                      project.id), bigquery['dataset'],
                         'SELECT * FROM %s;' % bigquery['table'])
    return rows_to_videos(rows)