示例#1
0
def main():
    #input file
    input_fn = argv[1]

    #ouptut file
    try:
        output_file = argv[2]
    except Exception:
        output_file = "output.json"

    #read input argument
    with open(input_fn, "r") as fh:
        for line in fh:
            line = line.strip('\n')
            page_size, num_page = line.split(' ')
    page_size = int(page_size)
    num_page = int(num_page)

    #get nycdata from sodapy and write to output file
    output_data = []
    for i in range(num_page):
        temp_data = api.get_data(page_size, i)
        output_data.extend(temp_data)
    with open(output_file, 'w') as outfile:
        json.dump(output_data, outfile)

    #load data into elasticsearch
    index_name = "nyc_index2"
    load_elasticsearch(output_file, index_name)
示例#2
0
from sys import argv
from src.api import get_data


if __name__ == '__main__':
	page_size = int(argv[1])
	num_pages = None
	output = None

	if len(argv) == 4:
		num_pages = int(argv[2])
		output = argv[3]

	elif len(argv) == 3:
		try:
			num_pages = int(argv[2])
		except:
			output = argv[2]

	
	get_data(page_size,num_pages,output)
	

示例#3
0
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            longopts=['page_size=', 'num_pages=', 'output='],
            shortopts='')
    except getopt.GetoptError:
        raise Exception('Parameter parsing error')

    for opt, arg in opts:
        if opt == '--page_size':
            page_size = arg
        elif opt == '--num_pages':
            page = arg
        elif opt == '--output':
            output = arg

    if not page_size:
        raise Exception('The required parameter page_size is missing')

    data = api.get_data(app_key, page_size=page_size, page=page)

    for item in data:
        print(item)
    if output:
        api.file_storage(data, output)

    data = api.format_data(data)

    # elasticsearch
    esearch.es_storage(data)
示例#4
0
import os
import sys

from src.api import get_data

if __name__ == "__main__":

    app_key = os.getenv('APP_KEY')

    list1 = sys.argv[1:]
    page_size = list1[0]
    num_pages = list1[1]
    output = list1[2]

    data = get_data(app_key, page_size, num_pages)
    print(data)

    with open(output, "w") as file:
        for i in data:
            for dic in i:
                file.write(f"{dic}" + '\n')
示例#5
0
import os
import argparse
from src.api import get_data

if __name__ == "__main__":

    app_key = os.getenv('APP_KEY')

    parser = argparse.ArgumentParser()
    parser.add_argument("--page_size", type=int)
    parser.add_argument("--num_pages", default=None, type=int)
    parser.add_argument("--output", default=None)
    parser.add_argument("--elastic", default=False, type=bool)
    args = parser.parse_args()

    data = get_data(app_key, args.page_size, args.num_pages, args.elastic)

    with open(args.output, "w") as file:
        for i in data:
            for dic in i:
                file.write(f"{dic}" + '\n')
示例#6
0
import requests
import sys
import argparse
import os
from src.api import get_data
from src.output import show_data

variables = argparse.ArgumentParser()

variables.add_argument('--page_size', type=int, required=True)
variables.add_argument('--num_pages', type=int, default=-1)
variables.add_argument('--output', type=str, default=-1)

args = variables.parse_args()

app_token = os.environ['APP_KEY']

page_size = args.page_size
num_pages = args.num_pages
output = args.output

client = get_data(app_token)
show_data(client, page_size, num_pages, output)
示例#7
0
import argparse
from src.api import get_data
from time import sleep

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--page_size", type=int)
    parser.add_argument("--num_pages", default=None, type=int)
    parser.add_argument("--output", default=None)
    args = parser.parse_args()

    j = 0
    while True:
        j += 1

        get_data(args.page_size, args.num_pages, args.output, j)

        print(f"DONE LOADING {j}, SLEEPING...")
        sleep(3)