Exemplo n.º 1
0
def get_solutions():
  if project.verbose: print 'GETTING SCORES'

  for solution in project.task['solutions']:
    scores = []

    if 'sheet' in solution:
      scores = sheets_read(
        project.task['auth'],
        solution['sheet']['url'],
        solution['sheet']['tab'],
        solution['sheet']['range']
      )
    elif 'bigquery' in solution:
      scores = query_to_rows(
        project.task['auth'],
        project.id,
        solution['bigquery']['dataset'],
        solution['bigquery']['query']
      )

    # for easy lookup use dictionary
    solution['scores'] = {}
    for score in scores:
      solution['scores'].setdefault(str(score[0]), [])
      solution['scores'][str(score[0])].append({ 'variant_id':str(score[1]), 'variant':score[2], 'score':float(score[3]) })

    if project.verbose: print 'GOT SCORES:', len(solution['scores'])

  return project.task['solutions']
Exemplo n.º 2
0
def bigquery():

    # check schema if given ( check independent of values )
    if 'schema' in project.task['bigquery']:
        schema = table_to_schema(project.task['auth'], project.id,
                                 project.task['bigquery']['dataset'],
                                 project.task['bigquery']['table'])
        object_compare(schema['fields'], project.task['bigquery']['schema'])

    # if query given check it
    if 'query' in project.task['bigquery']:
        rows = query_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['query'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))

    # simple table check ( unless query given )
    elif 'values' in project.task['bigquery']:
        rows = table_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['table'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))
Exemplo n.º 3
0
def get_impacts():
  if project.verbose: print 'GETTING IMPACTS'

  impacts = []

  if 'sheet' in project.task['impacts']:
    impacts = sheets_read(
      project.task['auth'],
      project.task['impacts']['sheet']['url'],
      project.task['impacts']['sheet']['tab'],
      project.task['impacts']['sheet']['range']
    )
  elif 'bigquery' in project.task['impacts']:
    impacts = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['impacts']['bigquery']['dataset'],
      project.task['impacts']['bigquery']['query']
    )

  # for easy lookup use dictionary
  impacts = dict([(str(i[0]), float(i[1])) for i in impacts])

  if project.verbose: print 'GOT IMPACTS:', len(impacts)

  return impacts
Exemplo n.º 4
0
def conversions_download():
  if project.verbose: print 'CONVERSION DOWNLOAD'

  # pull from bigquery if specified
  if 'bigquery' in project.task:
    if project.verbose: print 'READING BIGQUERY'
    rows = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      'SELECT * FROM %s' % project.task['bigquery']['table'],
      legacy=project.task['bigquery'].get('legacy', True)
    )
    for row in rows: yield row

  # pull from sheets if specified
  if 'sheets' in project.task:
    if project.verbose: print 'READING SHEET'
    rows = sheets_read(
      project.task['auth'], 
      project.task['sheets']['url'], 
      project.task['sheets']['tab'], 
      project.task['sheets']['range']
    )
    for row in rows: yield row

  # pull from csv if specified
  if 'csv' in project.task:
    if project.verbose: print 'READING CSV FILE'
    with io.open(project.task['csv']['file']) as f:
      for row in csv_to_rows(f):
        if row[0] not in CSV_HEADERS: yield row
Exemplo n.º 5
0
def get_owners():
  if project.verbose: print 'GETTING OWNERS'

  owners = []

  if 'sheet' in project.task['owners']:
    owners = sheets_read(
      project.task['auth'],
      project.task['owners']['sheet']['url'],
      project.task['owners']['sheet']['tab'],
      project.task['owners']['sheet']['range']
    )
  elif 'bigquery' in project.task['owners']:
    owners = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['owners']['bigquery']['dataset'],
      project.task['owners']['bigquery']['query']
    )

  # group account owners by email, create easy lookup sets for ids
  owners_grouped = {}
  for owner in owners:
    try:

      owners_grouped.setdefault(owner[2], { 
        'Account Name':owner[0],
        'Account Owner':owner[1], 
        'Account Email':owner[2],
        'DCM Network ID':[],
        'DBM Partner ID':[],
        'DS Account ID':[],
        'Studio Account ID':[],
      })

      if len(owner) > 3 and owner[3]: owners_grouped[owner[2]]['DCM Network ID'].append(str(owner[3]))
      if len(owner) > 4 and owner[4]: owners_grouped[owner[2]]['DBM Partner ID'].append(str(owner[4]))
      if len(owner) > 5 and owner[5]: owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5]))
      if len(owner) > 6 and owner[6]: owners_grouped[owner[2]]['Studio Account ID'].append(str(owner[6]))
      
    except IndexError: 
      print 'ERROR:', owner
      pass

  if project.verbose: print 'GOT OWNERS:', len(owners)

  return owners_grouped.values()
Exemplo n.º 6
0
def _process_filters(partners, advertisers, filters, project_id, dataset_id, auth='user'):
  structures = []

  for p in (partners or []):
    structures.append({
      'type':'FILTER_PARTNER',
      'value':int(p)
    })

  for a in (advertisers or []):
    structures.append({
      'type':'FILTER_ADVERTISER',
      'value':int(a)
    })

  for f in (filters or []):
    if isinstance(f['value'], basestring) and f['value'].startswith('SELECT '):

      items = query_to_rows(auth, project_id, dataset_id, f['value'])

      filtered = False
      for item in items:
        if item and len(item) != 0:
          filtered = True
          structures.append({
            'type':f['type'],
            'value':item[0]
          })
        else: break

      if not filtered:
        raise Exception('Select filter did not return any values: %s' % f['value'])
    else:
      structures.append({
        'type':f['type'],
        'value':f['value']
      })

  return structures
Exemplo n.º 7
0
def bigquery():

    if 'query' in project.task['from']:
        if 'table' in project.task['to']:
            if project.verbose:
                print "QUERY TO TABLE", project.task['to']['table']

            if 'pre_process_query' in project.task['to']:
                print 'executing statement'
                execute_statement(project.task['auth'],
                                  project.id,
                                  project.task['to']['dataset'],
                                  project.task['to']['pre_process_query'],
                                  use_legacy_sql=project.task['from'].get(
                                      'legacy', project.task['from'].get(
                                          'useLegacySql', True)))
            query_to_table(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['table'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                disposition=project.task['write_disposition']
                if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
                legacy=project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql,
                target_project_id=project.task['to'].get(
                    'project_id', project.id))
        # NOT USED SO RIPPING IT OUT
        # Mauriciod: Yes, it is used, look at project/mauriciod/target_winrate.json
        elif 'storage' in project.task['to']:
            if project.verbose:
                print "QUERY TO STORAGE", project.task['to']['storage']
            local_file_name = '/tmp/%s' % str(uuid.uuid1())
            rows = query_to_rows(project.task['auth'], project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'])

            f = open(local_file_name, 'wb')
            writer = csv.writer(f)
            writer.writerows(rows)
            f.close()

            f = open(local_file_name, 'rb')
            object_put(project.task['auth'], project.task['to']['storage'], f)
            f.close()

            os.remove(local_file_name)
        elif 'trix' in project.task['to']:
            if project.verbose:
                print "QUERY TO SHEET", project.task['to']['trix']
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'legacy', True))

            # makes sure types are correct in sheet
            rows = rows_to_type(rows)

            sheets_clear(project.task['auth'], project.task['to']['trix'],
                         project.task['to']['range'].split('!')[0],
                         project.task['to']['range'].split('!')[1])
            sheets_write(project.task['auth'], project.task['to']['trix'],
                         project.task['to']['range'].split('!')[0],
                         project.task['to']['range'].split('!')[1], rows)
        elif 'sftp' in project.task['to']:
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'use_legacy_sql', True))

            if rows:
                if project.verbose: print "QUERY TO SFTP"
                put_rows(project.task['auth'], project.task['to'], '', rows)
        else:
            if project.verbose:
                print "QUERY TO VIEW", project.task['to']['view']
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get('legacy', project.task['from'].get(
                    'useLegacySql', True))  # DEPRECATED: useLegacySql
            )
    else:
        if project.verbose:
            print "STORAGE TO TABLE", project.task['to']['table']
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))
Exemplo n.º 8
0
def get_rows(auth, source):
  """Processes standard read JSON block for dynamic loading of data.
  
  Allows us to quickly pull a column or columns of data from and use it as an input 
  into a script. For example pull a list of ids from bigquery and act on each one.

  - When pulling a single column specify single_cell = True. Returns list AKA values. 
  - When pulling a multiple columns specify single_cell = False. Returns list of lists AKA rows.
  - Values are always given as a list ( single_cell will trigger necessary wrapping ).
  - Values, bigquery, sheet are optional, if multiple given result is one continous iterator.
  - Extensible, add a handler to define a new source ( be kind update the documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.
  
    from utils.data import get_rows
  
    var_json = {
      "in":{
        "single_cell":[ boolean ],
        "values": [ integer list ],
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ],
          "columns":[ integer list ],
          "legacy":[ boolean ]
        },
        "sheet":{
          "url":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
        }
      } 
    } 
  
    values = get_rows('user', var_json)
  
  Or you can use it directly with project singleton.
  
    from util.project import project
    from utils.data import get_rows
  
    def something():
      values = get_rows(project.task['auth'], project.task['in'])
  
    if __name__ == "__main__":
      project.load('something')
      something()
  
  Args:
    auth: (string) The type of authentication to use, user or service.
    source: (json) A json block resembling var_json described above.

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

  # if handler points to list, concatenate all the values from various sources into one list
  if isinstance(source, list):  
    for s in source:
      for r in get_rows(auth, s):
        yield r

  # if handler is an endpoint, fetch data
  else:
    if 'values' in source:
      for value in source['values']:
        yield value if source.get('single_cell', False) else [value]

    if 'sheet' in source:
      rows = sheets_read(project.task['auth'], source['sheet']['url'], source['sheet']['tab'], source['sheet']['range'])

      for row in rows:
        yield row[0] if source.get('single_cell', False) else row

    if 'bigquery' in source:
      rows = query_to_rows(
        source['bigquery'].get('auth', auth),
        project.id,
        source['bigquery']['dataset'],
        source['bigquery']['query'],
        legacy=source['bigquery'].get('legacy', False)
      )
      for row in rows:
        yield row[0] if source.get('single_cell', False) else row