def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""\ Command line to transform excel sheets into csv files. Prints to STDOUT, user is expected to pipe output into file. Typically used for BigQuery data imports. Examples: List sheets in workbook: python helper.py [EXCEL FILE] --list Convert excel to CSV: python helper.py [EXCEL FILE] --sheet [SHEET NAME] > results.csv """)) parser.add_argument('workbook', help='name of file to pull the rows.') parser.add_argument('--sheet', help='Sheet to pull the rows.', default=None) parser.add_argument('--list', help='List reports.', action='store_true') # initialize project parser = commandline_parser(parser, arguments=('-v')) args = parser.parse_args() config = Configuration( verbose=args.verbose ) with open(args.workbook, 'rb') as excel_file: if args.list: for sheet in excel_to_sheets(excel_file): print(sheet) elif args.sheet: for sheet, row in excel_to_rows(excel_file, args.sheet): print(rows_to_csv(row).read())
def lineitem_write(auth, rows, dry_run=True): """ Writes a list of lineitem configurations to DBM. Bulletproofing: https://developers.google.com/bid-manager/v1/lineitems/uploadlineitems Args: * auth: (string) Either user or service. * rows (iterator) List of lineitems: https://developers.google.com/bid-manager/guides/entity-write/format * dry_run (boolean) If set to True no write will occur, only a test of the upload for errors. Returns: * Results of upload. """ service = get_service('doubleclickbidmanager', API_VERSION, auth) header = [s['name'] for s in LineItem_Write_Schema] body = { "lineItems": '%s\n%s' % (','.join(header), rows_to_csv(rows).read()), # add header row "format": 'CSV', "dryRun": dry_run } job = service.lineitems().uploadlineitems(body=body) result = API_Retry(job) #print result return result
def lineitem_write(auth, rows, dry_run=True): """ Writes a list of lineitem configurations to DBM. Bulletproofing: https://developers.google.com/bid-manager/v1/lineitems/uploadlineitems Args: * auth: (string) Either user or service. * rows (iterator) List of lineitems: https://developers.google.com/bid-manager/guides/entity-write/format * dry_run (boolean) If set to True no write will occur, only a test of the upload for errors. Returns: * Results of upload. """ header = [s['name'] for s in LineItem_Write_Schema] body = { 'lineItems': '%s\n%s' % (','.join(header), rows_to_csv(rows).read()), # add header row 'format': 'CSV', 'dryRun': dry_run } result = API_DBM(auth).lineitems().uploadlineitems(body=body).execute() #print(result) return (result)
def send_email(auth, email_to, email_from, email_cc, subject, text, html=None, attachment_filename=None, attachment_rows=None): if project.verbose: print('SENDING EMAIL', email_to) service = get_service('gmail', 'v1', auth) message = MIMEMultipart('alternative') message.set_charset('utf8') message['to'] = email_to message['cc'] = email_cc message['from'] = email_from message['subject'] = subject text_part = MIMEText(text, 'plain', 'UTF-8') message.attach(text_part) if html: html_part = MIMEText(html, 'html', 'UTF-8') message.attach(html_part) if attachment_filename and attachment_rows: attachment = MIMEBase("text", "csv") attachment.set_payload(rows_to_csv(attachment_rows).read()) attachment.add_header('Content-Disposition', 'attachment',filename=attachment_filename) encode_base64(attachment) message.attach(attachment) #API_Retry(service.users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_string())})) API_Retry(service.users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_bytes()).decode()}))
def send_email(config, auth, email_to, email_from, email_cc, subject, text, html=None, attachment_filename=None, attachment_rows=None): if config.verbose: print('SENDING EMAIL', email_to) message = MIMEMultipart('alternative') message.set_charset('utf8') message['to'] = email_to message['cc'] = email_cc message['from'] = email_from message['subject'] = subject message.attach(MIMEText(text, 'plain', 'UTF-8')) if html: message.attach(MIMEText(html, 'html', 'UTF-8')) if attachment_filename and attachment_rows: attachment = MIMEBase('text', 'csv') attachment.set_payload(rows_to_csv(attachment_rows).read()) attachment.add_header('Content-Disposition', 'attachment', filename=attachment_filename) encode_base64(attachment) message.attach(attachment) #API_Gmail(config, auth).users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_string())}).execute() API_Gmail(config, auth).users().messages().send( userId='me', body={ 'raw': base64.urlsafe_b64encode(message.as_bytes()).decode() }).execute()
def put_rows(auth, destination, rows, variant=''): """Processes standard write JSON block for dynamic export of data. Allows us to quickly write the results of a script to a destination. For example write the results of a DCM report into BigQuery. - Will write to multiple destinations if specified. - Extensible, add a handler to define a new destination ( be kind update the documentation json ). Include the following JSON in a recipe, then in the run.py handler when encountering that block pass it to this function and use the returned results. from utils.data import put_rows var_json = { "out":{ "bigquery":{ "dataset": [ string ], "table": [ string ] "schema": [ json - standard bigquery schema json ], "skip_rows": [ integer - for removing header ] "disposition": [ string - same as BigQuery documentation ] }, "sheets":{ "sheet":[ string - full URL, suggest using share link ], "tab":[ string ], "range":[ string - A1:A notation ] "delete": [ boolean - if sheet range should be cleared before writing ] }, "storage":{ "bucket": [ string ], "path": [ string ] }, "file":[ string - full path to place to write file ] } } values = put_rows('user', var_json) Or you can use it directly with project singleton. from util.project import project from utils.data import put_rows @project.from_parameters def something(): values = get_rows(project.task['auth'], project.task['out']) if __name__ == "__main__": something() Args: auth: (string) The type of authentication to use, user or service. destination: (json) A json block resembling var_json described above. rows ( list ) The data being written as a list object. variant (string) Appended to destination to differentieate multiple objects Returns: If single_cell is False: Returns a list of row values [[v1], [v2], ... ] If single_cell is True: Returns a list of values [v1, v2, ...] """ if 'bigquery' in destination: if destination['bigquery'].get('format', 'CSV') == 'JSON': json_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) elif destination['bigquery'].get('is_incremental_load', False) == True: incremental_rows_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get( 'skip_rows', 1), #0 if 'schema' in destination['bigquery'] else 1), destination['bigquery'].get('disposition', 'WRITE_APPEND'), billing_project_id=project.id) else: rows_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get( 'skip_rows', 1), #0 if 'schema' in destination['bigquery'] else 1), destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) if 'sheets' in destination: if destination['sheets'].get('delete', False): sheets_clear( auth, destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range'], ) sheets_write(auth, destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range'], rows) if 'file' in destination: path_out, file_ext = destination['file'].rsplit('.', 1) file_out = path_out + variant + '.' + file_ext if project.verbose: print('SAVING', file_out) makedirs_safe(parse_path(file_out)) with open(file_out, 'w') as save_file: save_file.write(rows_to_csv(rows).read()) if 'storage' in destination and destination['storage'].get( 'bucket') and destination['storage'].get('path'): # create the bucket bucket_create(auth, project.id, destination['storage']['bucket']) # put the file file_out = destination['storage']['bucket'] + ':' + destination[ 'storage']['path'] + variant if project.verbose: print('SAVING', file_out) object_put(auth, file_out, rows_to_csv(rows)) if 'sftp' in destination: try: cnopts = pysftp.CnOpts() cnopts.hostkeys = None path_out, file_out = destination['sftp']['file'].rsplit('.', 1) file_out = path_out + variant + file_out sftp = pysftp.Connection(host=destination['sftp']['host'], username=destination['sftp']['username'], password=destination['sftp']['password'], port=destination['sftp']['port'], cnopts=cnopts) if '/' in file_out: dir_out, file_out = file_out.rsplit('/', 1) sftp.cwd(dir_out) sftp.putfo(rows_to_csv(rows), file_out) except e: print(str(e)) traceback.print_exc()
def handle(self, *args, **kwargs): print(rows_to_csv(self.get_scripts()).read())
def put_rows(auth, destination, filename, rows, variant=''): """Processes standard write JSON block for dynamic export of data. Allows us to quickly write the results of a script to a destination. For example write the results of a DCM report into BigQuery. - Will write to multiple destinations if specified. - Extensible, add a handler to define a new destination ( be kind update the documentation json ). Include the following JSON in a recipe, then in the run.py handler when encountering that block pass it to this function and use the returned results. from utils.data import put_rows var_json = { "out":{ "bigquery":{ "dataset": [ string ], "table": [ string ] "schema": [ json - standard bigquery schema json ], "skip_rows": [ integer - for removing header ] "disposition": [ string - same as BigQuery documentation ] }, "sheets":{ "url":[ string - full URL, suggest using share link ], "tab":[ string ], "range":[ string - A1:A notation ] "delete": [ boolean - if sheet range should be cleared before writing ] }, "storage":{ "bucket": [ string ], "path": [ string ] }, "directory":[ string - full path to place to write file ] } } values = put_rows('user', var_json) Or you can use it directly with project singleton. from util.project import project from utils.data import put_rows @project.from_parameters def something(): values = get_rows(project.task['auth'], project.task['out']) if __name__ == "__main__": something() Args: auth: (string) The type of authentication to use, user or service. destination: (json) A json block resembling var_json described above. filename: (string) A unique filename if writing to medium requiring one, Usually gnerated by script. rows ( list ) The data being written as a list object. variant ( string ) Appends this to the destination name to create a variant ( for example when downloading multiple tabs in a sheet ). Returns: If single_cell is False: Returns a list of row values [[v1], [v2], ... ] If single_cell is True: Returns a list of values [v1, v2, ...] """ if 'bigquery' in destination: if destination['bigquery'].get('format', 'CSV') == 'JSON': json_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) elif destination['bigquery'].get('is_incremental_load', False) == True: incremental_rows_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get( 'skip_rows', 1), #0 if 'schema' in destination['bigquery'] else 1), destination['bigquery'].get('disposition', 'WRITE_APPEND'), billing_project_id=project.id) else: rows_to_table( destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', project.id), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, destination['bigquery'].get('schema', []), destination['bigquery'].get( 'skip_rows', 1), #0 if 'schema' in destination['bigquery'] else 1), destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) if 'sheets' in destination: if destination['sheets'].get('delete', False): sheets_clear(auth, destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range']) sheets_write(auth, destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range'], rows) if 'directory' in destination: file_out = destination['directory'] + variant + filename if project.verbose: print 'SAVING', file_out makedirs_safe(parse_path(file_out)) with open(file_out, 'wb') as save_file: save_file.write(rows_to_csv(rows).read()) if 'storage' in destination and destination['storage'].get( 'bucket') and destination['storage'].get('path'): # create the bucket bucket_create(auth, project.id, destination['storage']['bucket']) # put the file file_out = destination['storage']['bucket'] + ':' + destination[ 'storage']['path'] + variant + filename if project.verbose: print 'SAVING', file_out object_put(auth, file_out, rows_to_csv(rows)) # deprecated do not use if 'trix' in destination: trix_update(auth, destination['trix']['sheet_id'], destination['trix']['sheet_range'], rows_to_csv(rows), destination['trix']['clear']) if 'email' in destination: pass if 'sftp' in destination: try: sys.stderr = StringIO() cnopts = pysftp.CnOpts() cnopts.hostkeys = None file_prefix = 'report' if 'file_prefix' in destination['sftp']: file_prefix = destination['sftp'].get('file_prefix') del destination['sftp']['file_prefix'] #sftp_configs = destination['sftp'] #sftp_configs['cnopts'] = cnopts #sftp = pysftp.Connection(**sftp_configs) sftp = pysftp.Connection(host=destination['sftp']['host'], username=destination['sftp']['username'], password=destination['sftp']['password'], port=destination['sftp']['port'], cnopts=cnopts) if 'directory' in destination['sftp']: sftp.cwd(destination['sftp']['directory']) tmp_file_name = '/tmp/%s_%s.csv' % ( file_prefix, datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S')) tmp_file = open(tmp_file_name, 'wb') tmp_file.write(rows_to_csv(rows).read()) tmp_file.close() sftp.put(tmp_file_name) os.remove(tmp_file_name) sys.stderr = sys.__stderr__ except e: print e traceback.print_exc()
def put_rows(config, auth, destination, rows, schema=None, variant=''): """Processes standard write JSON block for dynamic export of data. Allows us to quickly write the results of a script to a destination. For example write the results of a DCM report into BigQuery. - Will write to multiple destinations if specified. - Extensible, add a handler to define a new destination ( be kind update the documentation json ). Include the following JSON in a recipe, then in the run.py handler when encountering that block pass it to this function and use the returned results. from utils.data import put_rows var_json = { "out":{ "bigquery":{ "auth":"[ user or service ]", "dataset": [ string ], "table": [ string ] "schema": [ json - standard bigquery schema json ], "header": [ boolean - true if header exists in rows ] "disposition": [ string - same as BigQuery documentation ] }, "sheets":{ "auth":"[ user or service ]", "sheet":[ string - full URL, suggest using share link ], "tab":[ string ], "range":[ string - A1:A notation ] "append": [ boolean - if sheet range should be appended to ] "delete": [ boolean - if sheet range should be cleared before writing ] ] }, "storage":{ "auth":"[ user or service ]", "bucket": [ string ], "path": [ string ] }, "file":[ string - full path to place to write file ] } } values = put_rows('user', var_json) Args: auth: (string) The type of authentication to use, user or service. rows: ( iterator ) The list of rows to be written, if NULL no action is performed. schema: (json) A bigquery schema definition. destination: (json) A json block resembling var_json described above. rows ( list ) The data being written as a list object. variant (string) Appended to destination to differentieate multiple objects Returns: If unnest is False: Returns a list of row values [[v1], [v2], ... ] If unnest is True: Returns a list of values [v1, v2, ...] """ if rows is None: if config.verbose: print('PUT ROWS: Rows is None, ignoring write.') return if 'bigquery' in destination: if not schema: schema = destination['bigquery'].get('schema') skip_rows = 1 if destination['bigquery'].get('header') and schema else 0 if destination['bigquery'].get('format', 'CSV') == 'JSON': json_to_table( config, destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', config.project), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, schema, destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) elif destination['bigquery'].get('is_incremental_load', False) == True: incremental_rows_to_table( config, destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', config.project), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, schema, destination['bigquery'].get('skip_rows', skip_rows), destination['bigquery'].get('disposition', 'WRITE_APPEND'), billing_project_id=config.project) else: rows_to_table( config, destination['bigquery'].get('auth', auth), destination['bigquery'].get('project_id', config.project), destination['bigquery']['dataset'], destination['bigquery']['table'] + variant, rows, schema, destination['bigquery'].get('skip_rows', skip_rows), destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'), ) if 'sheets' in destination: if destination['sheets'].get('delete', False): sheets_clear( config, destination['sheets'].get('auth', auth), destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range'], ) sheets_write( config, destination['sheets'].get('auth', auth), destination['sheets']['sheet'], destination['sheets']['tab'] + variant, destination['sheets']['range'], rows_to_type(rows), destination['sheets'].get('append', False), ) if 'file' in destination: path_out, file_ext = destination['file'].rsplit('.', 1) file_out = path_out + variant + '.' + file_ext if config.verbose: print('SAVING', file_out) makedirs_safe(parse_path(file_out)) with open(file_out, 'w') as save_file: save_file.write(rows_to_csv(rows).read()) if 'storage' in destination and destination['storage'].get( 'bucket') and destination['storage'].get('path'): bucket_create( config, destination['storage'].get('auth', auth), config.project, destination['storage']['bucket'] ) # put the file file_out = destination['storage']['bucket'] + ':' + destination['storage'][ 'path'] + variant if config.verbose: print('SAVING', file_out) object_put(config, auth, file_out, rows_to_csv(rows)) if 'sftp' in destination: try: cnopts = pysftp.CnOpts() cnopts.hostkeys = None path_out, file_out = destination['sftp']['file'].rsplit('.', 1) file_out = path_out + variant + file_out sftp = pysftp.Connection( host=destination['sftp']['host'], username=destination['sftp']['username'], password=destination['sftp']['password'], port=destination['sftp']['port'], cnopts=cnopts) if '/' in file_out: dir_out, file_out = file_out.rsplit('/', 1) sftp.cwd(dir_out) sftp.putfo(rows_to_csv(rows), file_out) except e: print(str(e)) traceback.print_exc()