示例#1
0
def main():

  parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      description=textwrap.dedent("""\
    Command line to transform excel sheets into csv files.

    Prints to STDOUT, user is expected to pipe output into file.
    Typically used for BigQuery data imports.

    Examples:
      List sheets in workbook: python helper.py [EXCEL FILE] --list
      Convert excel to CSV: python helper.py [EXCEL FILE] --sheet [SHEET NAME] > results.csv

  """))

  parser.add_argument('workbook', help='name of file to pull the rows.')
  parser.add_argument('--sheet', help='Sheet to pull the rows.', default=None)
  parser.add_argument('--list', help='List reports.', action='store_true')

  # initialize project
  parser = commandline_parser(parser, arguments=('-v'))
  args = parser.parse_args()
  config = Configuration(
    verbose=args.verbose
  )

  with open(args.workbook, 'rb') as excel_file:
    if args.list:
      for sheet in excel_to_sheets(excel_file):
        print(sheet)
    elif args.sheet:
      for sheet, row in excel_to_rows(excel_file, args.sheet):
        print(rows_to_csv(row).read())
示例#2
0
def email_read():

  # process only most recent message
  email = get_email_messages(
    project.task['auth'],
    project.task['read']['from'],
    project.task['read']['to'],
    project.task['read'].get('subject', None),
    project.task['read'].get('link', None),
    project.task['read'].get('attachment', None),
    download=True
  )

  # only take the most recent email
  try: email = next(email)
  except:
    traceback.print_exc()
    if project.verbose: print('NO EMAILS FOUND')
    exit()

  if project.verbose: print('EMAIL:', email['subject'])

  # loop all attached files
  for filename, data in email_files(email):

    if project.verbose: print('EMAIL FILENAME:', filename)

    # decompress if necessary
    if filename.endswith('.gz'):
      data = gzip.GzipFile(fileobj=data, mode='rb')
      filename = filename[:-3]

    # if excel file, save each sheet individually
    if filename.endswith('.xlsx'):

      for sheet, rows in excel_to_rows(data):
        rows = rows_trim(rows)
        rows = rows_header_sanitize(rows)

        if project.verbose: print('EMAIL WRITE', filename)
        put_rows(project.task['auth'], project.task['read']['out'], rows, column_header_sanitize(sheet))

    # if csv, save directly
    elif filename.endswith('.csv'):
      rows = csv_to_rows(data.read().decode())
      rows = rows_header_sanitize(rows)

      if project.verbose: print('EMAIL WRITE', filename)
      put_rows(project.task['auth'], project.task['read']['out'], rows)

    else:
      if project.verbose: print('UNSUPPORTED FILE:', filename)
示例#3
0
def email_read():

    # process only most recent message
    try:
        message = next(
            get_email_messages(project.task['auth'],
                               project.task['read']['from'],
                               project.task['read']['to'],
                               project.task['read'].get('subject', None)))
    except StopIteration as e:
        if project.verbose: print('NO EMAILS FOUND')
        exit()

    if project.verbose: print('EMAIL:', get_subject(message))

    files = []

    if project.task['read'].get('attachment'):
        files.extend(
            get_email_attachments(project.task['auth'], message,
                                  project.task['read']['attachment']))

    if project.task['read'].get('link'):
        files.extend(
            get_email_links(project.task['auth'],
                            message,
                            project.task['read']['link'],
                            download=True))

    for filename, data in files:

        if project.verbose: print('EMAIL FILENAME:', filename)

        # decompress if necessary
        if filename.endswith('.gz'):
            data = gzip.GzipFile(fileobj=data, mode='rb')
            filename = filename[:-3]

        # if excel file, save each sheet individually
        if filename.endswith('.xlsx'):

            for sheet, rows in excel_to_rows(data):
                rows = rows_trim(rows)
                rows = rows_header_sanitize(rows)
                put_rows(project.task['auth'], project.task['read']['out'],
                         rows, sheet)

        # if CM report
        elif project.task['read']['from'] == '*****@*****.**':
            rows = cm_report_to_rows(data.read().decode())
            rows = cm_report_clean(rows)

            # if bigquery, remove header and determine schema
            schema = None
            if 'bigquery' in project.task['read']['out']:
                schema = cm_report_schema(next(rows))
                project.task['read']['out']['bigquery']['schema'] = schema
                project.task['read']['out']['bigquery']['skip_rows'] = 1

            put_rows(project.task['auth'], project.task['read']['out'], rows)

        # if dv360 report
        elif project.task['read']['from'] == '*****@*****.**':
            rows = dv360_report_to_rows(data.getvalue().decode())
            rows = dv360_report_clean(rows)
            put_rows(project.task['auth'], project.task['read']['out'], rows)

        # if csv
        elif filename.endswith('.csv'):
            rows = csv_to_rows(data.read().decode())
            rows = rows_header_sanitize(rows)
            put_rows(project.task['auth'], project.task['read']['out'], rows)

        else:
            if project.verbose: print('UNSUPPORTED FILE:', filename)
示例#4
0
def main():
    # get parameters
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""\
    Command line to get table schema from BigQuery.

    Helps developers upload data to BigQuery and pull schemas.  These are the
    most common BigQuery tasks when developing solutions.

    Examples:
      Display table schema: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]`
      Upload csv table: `python helper.py --project [id] --dataset [name] --table [name] --csv [file] --schema [file] -s [credentials]`
      Upload excel sheet: `python helper.py --project [id] --dataset [name] --table [name] --excel_file [file] --excel_sheet [name] --schema [file] -s [credentials]`

  """))

    parser.add_argument('--dataset',
                        help='name of BigQuery dataset',
                        default=None)
    parser.add_argument('--table', help='name of BigQuery table', default=None)
    parser.add_argument('--csv', help='CSV file path', default=None)
    parser.add_argument('--schema', help='SCHEMA file path', default=None)
    parser.add_argument('--excel_workbook',
                        help='Excel file path',
                        default=None)
    parser.add_argument('--excel_sheet', help='Excel sheet name', default=None)

    # initialize project
    parser = commandline_parser(parser,
                                arguments=('-u', '-c', '-s', '-v', '-p'))
    args = parser.parse_args()
    config = Configuration(user=args.user,
                           client=args.client,
                           service=args.service,
                           verbose=args.verbose,
                           project=args.project)

    auth = 'service' if args.service else 'user'

    schema = json.loads(args.schema) if args.schema else None

    if args.csv:

        with open(args.csv, 'r') as csv_file:
            rows = csv_to_rows(csv_file.read())

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    elif args.excel_workbook and args.excel_sheet:
        with open(args.excel_workbook, 'r') as excel_file:
            rows = excel_to_rows(excel_file, args.excel_sheet)

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    else:
        # print schema
        print(
            json.dumps(table_to_schema(config, auth, config.project,
                                       args.dataset, args.table),
                       indent=2))