示例#1
0
def main(argv=None):
    """Run parquet utility application."""
    argv = argv or sys.argv[1:]

    parser = argparse.ArgumentParser('parquet',
                                     description='Read parquet files')
    parser.add_argument('--metadata',
                        action='store_true',
                        help='show metadata on file')
    parser.add_argument('--row-group-metadata',
                        action='store_true',
                        help="show per row group metadata")
    parser.add_argument('--no-data',
                        action='store_true',
                        help="don't dump any data from the file")
    parser.add_argument('--limit',
                        action='store',
                        type=int,
                        default=-1,
                        help='max records to output')
    parser.add_argument('--col',
                        action='append',
                        type=str,
                        help='only include this column (can be '
                        'specified multiple times)')
    parser.add_argument('--no-headers',
                        action='store_true',
                        help='skip headers in output (only applies if '
                        'format=csv)')
    parser.add_argument('--format',
                        action='store',
                        type=str,
                        default='csv',
                        help='format for the output data. can be csv or json.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='log debug info to stderr')
    parser.add_argument('file', help='path to the file to parse')

    args = parser.parse_args(argv)

    setup_logging(args)

    # pylint: disable=import-outside-toplevel
    import parquet

    if args.metadata:
        parquet.dump_metadata(args.file, args.row_group_metadata)
    if not args.no_data:
        parquet.dump(args.file, args)
示例#2
0
文件: __main__.py 项目: cloudera/hue
def main(argv=None):
    """Run parquet utility application."""
    argv = argv or sys.argv[1:]

    parser = argparse.ArgumentParser('parquet',
                                     description='Read parquet files')
    parser.add_argument('--metadata', action='store_true',
                        help='show metadata on file')
    parser.add_argument('--row-group-metadata', action='store_true',
                        help="show per row group metadata")
    parser.add_argument('--no-data', action='store_true',
                        help="don't dump any data from the file")
    parser.add_argument('--limit', action='store', type=int, default=-1,
                        help='max records to output')
    parser.add_argument('--col', action='append', type=str,
                        help='only include this column (can be '
                             'specified multiple times)')
    parser.add_argument('--no-headers', action='store_true',
                        help='skip headers in output (only applies if '
                             'format=csv)')
    parser.add_argument('--format', action='store', type=str, default='csv',
                        help='format for the output data. can be csv or json.')
    parser.add_argument('--debug', action='store_true',
                        help='log debug info to stderr')
    parser.add_argument('file',
                        help='path to the file to parse')

    args = parser.parse_args(argv)

    setup_logging(args)

    import parquet

    if args.metadata:
        parquet.dump_metadata(args.file, args.row_group_metadata)
    if not args.no_data:
        parquet.dump(args.file, args)
示例#3
0
 def test_dump_metadata(self):
     data = StringIO.StringIO()
     parquet.dump_metadata(self.f, data)
示例#4
0
 def test_dump_metadata(self):
     data = StringIO.StringIO()
     parquet.dump_metadata(self.f, data)
示例#5
0
 def test_dump_metadata(self):
     """Test dumping metadata."""
     data = io.StringIO()
     parquet.dump_metadata(TEST_FILE, data)
示例#6
0
 def test_dump_metadata(self):
     data = io.StringIO()
     parquet.dump_metadata(TEST_FILE, data)
示例#7
0
 def test_dump_metadata(self):
     """Test dumping metadata."""
     data = io.StringIO()
     parquet.dump_metadata(TEST_FILE, data)