Python convert_json示例

编程语言: Python

命名空间/包名称: json2parquet

方法/功能: convert_json

hotexamples.com的示例: 7

Python convert_json - 已找到7个示例。这些是从开源项目中提取的最受好评的json2parquet.convert_json现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： json2parq.py 项目： csci-e-29/2020fa-final-project-kumar-anish

def Json2Parq(args=None):
    schema = pa.schema([
        pa.field('QueryID', pa.string),
        pa.field('QueryText', pa.string),
    ])
    date_format = "%Y-%m-%dT%H:%M:%S.%fZ"
    input_filename = "/data/query_logs.json"
    output_filename = "/Users/ka/2020fa-final-project-kumar-anish/data/query_logs.parquet"
    convert_json(input_filename, output_filename)
    print("done...")

示例#2

显示文件

    def invertedIndex(self,keyColumn, valueColumn, fileName,parquet=True):
        
        if self.df is None:
            print("No Data Available")
        else:
            inverted_index= dict()
            for index,row in self.df.iterrows():
                if row[keyColumn] in inverted_index:
                    inverted_index[row[keyColumn]].append(row[valueColumn])
                else:
                    inverted_index[row[keyColumn]]=[row[valueColumn]]
            JSONfileName=fileName+".json"

            with open(JSONfileName, 'w') as outfile:
                json.dump(inverted_index, outfile)

            if parquet==True:
                convert_json(JSONfileName,fileName+".parquet")

示例#3

显示文件

文件： conversor-parquet.py 项目： davidayalas/aws-s3-select-lambda

from json2parquet import convert_json

convert_json("../files/worldcities.json", "../files/worldcities.parquet")

示例#4

显示文件

 def JSONtoParquet(jsonFile,fileName):
     convert_json(jsonFile,fileName+".parquet")

示例#5

显示文件

文件： ndj2par.py 项目： autumnli11/data_model-1

import os
import shutil
import gzip

# this script converts zipped ndjson files from path ndjson_dir_name to parquet files stored in '../../parquet

dir = os.path.dirname(__file__)
ndjson_dir_name = "../../../zq-sample-data/zeek-ndjson"
unzipped_dir = '../../unzipped_ndj'
parquet_dir = '../../parquet'

for root, dirs, files in os.walk(ndjson_dir_name, topdown=False):
    for name in files:
        src_filename = os.path.join(dir, unzipped_dir,
                                    name.split('.')[0] + '.ndjson')
        dest_filename = os.path.join(dir, parquet_dir,
                                     name.split('.')[0] + '.parquet')
        os.makedirs(os.path.join(dir, unzipped_dir), exist_ok=True)
        os.makedirs(os.path.join(dir, parquet_dir), exist_ok=True)
        zipped_ndjson_file = os.path.join(root, name)
        print("processing " + os.path.join(root, name))

        with gzip.open(zipped_ndjson_file, 'rb') as f_in:
            with open(src_filename, 'wb') as f_out:
                f_out.write(f_in.read())
        try:
            convert_json(src_filename, dest_filename)
        except Exception as e:
            print("Failed to process the file: " + name)
            print(e)

示例#6

显示文件

# use Go IEX's pcap2json examples below
# pcap2csv parses out just open, high, low, close, volume by symbol w ns timestamp
pcap2csv < data%2Ffeeds%2F20180913%2F20180913_IEXTP1_DEEP1.0.pcap > 20180913_IEXTP1_DEEP1.0.csv
# pcap2json parses out the tcp headers and leave all of other message Database
pcap2json < data%2Ffeeds%2F20180913%2F20180913_IEXTP1_DEEP1.0.pcap > 20180913_IEXTP1_DEEP1.0.json

# json2parquet python library convert the json to parquet, which pandas and pyarrow work better with
from json2parquet import convert_json
# Infer Schema (requires reading dataset for column names)
convert_json('20180913_IEXTP1_DEEP1.0.json', '20180913_IEXTP1_DEEP1.0.parquet')


# -*- coding: utf-8 -*-
import click
import logging
from pathlib import Path
from dotenv import find_dotenv, load_dotenv


@click.command()
@click.argument('input_filepath', type=click.Path(exists=True))
@click.argument('output_filepath', type=click.Path())
def main(input_filepath, output_filepath):
    """ Runs data processing scripts to turn raw data from (../raw) into
        cleaned data ready to be analyzed (saved in ../processed).
    """
    logger = logging.getLogger(__name__)
    logger.info('making final data set from raw data')


if __name__ == '__main__':

示例#7

显示文件

文件： convert_json_to_parquet.py 项目： HEROGWP/s3_logs_json_convert_to_parquet

from json2parquet import convert_json
columns = [
    "method", "path", "format", "controller", "action", "status", "duration",
    "view", "db", "ip", "route", "request_id", "req_params", "user_id",
    "realname", "nickname", "email", "source", "tags", "@timestamp", "@version"
]

convert_json('logstasher.log', 'logstasher_current.log', columns)