Python jsonpretty示例，snap.common.jsonpretty Python示例

示例#1

0

显示文件

文件： warp.py 项目： binarymachines/warp

def main(args):
    print(common.jsonpretty(args))

    cmd_options = {}
    cmd_options['preview_mode'] = True if args.get('--preview') else False
    cmd_options['group_target'] = True if args.get('--group') else False
    cmd_options['config'] = True if args.get('--config') else False
    cmd_options['list'] = True if args.get('ls') or args.get(
        '<command_target>') == 'ls' else False

    warp_home_dir = os.getcwd()  # default is current directory
    warp_initfile = 'warp.ini'  # TODO: use a default constant instead of magic string
    warp_config = None
    with open(warp_initfile) as f:
        warp_config = yaml.load(f)

    if not warp_config:
        raise Exception('missing warp.ini config file.')

    warp_home_dir = load_config_var(warp_config['globals']['warp_home'])
    warpfiles_dir = os.path.join(warp_home_dir, 'warpfiles')
    extensions_dir = os.path.join(warp_home_dir, 'extensions')

    loader = CommandLoader(warpfiles_dir)
    extension_mgr = ExtensionManager(warp_config)
    extension_mgr.bind_methods_to_class(WarpCLI)

    cli = WarpCLI(loader, extension_mgr)
    cli.cmdloop()

示例#2

0

显示文件

    def write(self, records, **kwargs):
        for raw_record in records:
            record = json.loads(raw_record)
            customer_id = record['customerid']
            order_month = record['month']
            order_year = record['year']
            order_amount = record['order_amount']

            self.active_customers.add(customer_id)

            # update monthly customer orders
            key = (customer_id, order_month, order_year)

            if customer_id == 'cus_1683':
                print(
                    'recording order for %s in month %s and year %s. Amount: %s'
                    % (customer_id, order_month, order_year, order_amount))

            if not self.customer_orders.get(key):
                self.customer_orders[key] = [order_amount]
            else:
                self.customer_orders[key].append(order_amount)

            # calculate total revenue
            if not self.total_monthly_revenue_table.get(order_month):
                self.total_monthly_revenue_table[order_month] = order_amount
            else:
                self.total_monthly_revenue_table[order_month] += order_amount

        print(common.jsonpretty(self.total_monthly_revenue_table))

        print(
            self.customer_orders_before_month('cus_1683', 4, 2020,
                                              self.customer_orders))

示例#3

0

显示文件

文件： telegraf.py 项目： dexter-taylor/mercury

    def _send(self, msg_header, kafka_message, **kwargs):
        log.debug("writing kafka log message to db...")
        log.debug('### kafka_message keys: %s' %
                  '\n'.join(kafka_message.keys()))
        outbound_record = {}
        fact_data = self._schema_mapping_context.get_fact_values(
            kafka_message.get('body'), persistence_manager=self._pmgr)

        print('### OLAP fact data:')
        print(common.jsonpretty(fact_data))

        insert_query_template = '''
        INSERT INTO {fact_table} ({field_names})
        VALUES ({data_placeholders});
        '''

        data_placeholder_segment = ', '.join(
            [':%s' % name for name in fact_data.keys()])

        print('### initial rendering of insert statement: ')
        iqtemplate_render = insert_query_template.format(
            fact_table=self._schema_mapping_context.fact.table_name,
            field_names=','.join(fact_data.keys()),
            data_placeholders=data_placeholder_segment)
        print(iqtemplate_render)

        insert_statement = text(iqtemplate_render)
        insert_statement = insert_statement.bindparams(**fact_data)
        #dbconnection = self._pmgr.database.engine.connect()
        result = self._dbconnection.execute(insert_statement)

示例#4

0

显示文件

 def write(self, records, **kwargs):
     for rec in records:
         print('### ready to write record to Kinesis:')
         print('------------------------\n')
         print(common.jsonpretty(rec))
         status = self.kinesis_svc.write(rec, 'apollo_test_stream')
         print('Done with status: %s' % status)

示例#5

0

显示文件

def msg_handler(message, receipt_handle, service_registry):

    s3_svc = service_registry.lookup('s3')
    print('### Inside SQS message handler function.')
    print("### message follows:")
    print(common.jsonpretty(message))

    # unpack SQS message to get notification about S3 file upload
    message_body_raw = message['Body']
    message_body = json.loads(message_body_raw)

    for record in message_body['Records']:
        s3_data = record.get('s3')
        if not record:
            continue

        bucket_name = s3_data['bucket']['name']
        object_key = s3_data['object']['key']
        # TODO: set a limit on file size?

        print(
            '#--- received object upload notification [ bucket: %s, key: %s ]'
            % (bucket_name, object_key))

        s3key = S3Key(bucket_name, object_key)
        jsondata = None
        try:
            jsondata = s3_svc.download_json(bucket_name, object_key)
            print('### JSON payload data:')
            print(common.jsonpretty(jsondata))

            # we use the name of the top-level S3 "folder" to select the action to perform,
            # by keying into the dispatch table
            channel_id = object_key.split('/')[0]
            handler = S3_EVENT_DISPATCH_TABLE.get(channel_id)
            if not handler:
                raise Exception(
                    'no handler registered for S3 upload events to bucket %s with key %s'
                    % (bucket_name, object_key))

            handler(service_registry, **jsondata)

        except Exception as err:
            print('Error handling JSON job data from URI %s.' % s3key.uri)
            print(err)
            traceback.print_exc(file=sys.stdout)
            return

示例#6

0

显示文件

文件： vfy_handlers.py 项目： binarymachines/grip

def helloathena_func(input_data, service_registry, **kwargs):

    athenasvc = service_registry.lookup('athena')
    db_service = service_registry.lookup('postgres')
    encoded_input_query = input_data['input_query']

    input_query = base64.b64decode(encoded_input_query).decode('utf-8')
    s3_output_filename = athenasvc.athena_to_s3(input_query, 8)

    if not s3_output_filename:
        return 'No result from query.'

    s3_svc = service_registry.lookup('s3')
    querydata = s3_svc.download_data(s3_output_filename)

    # This is CSV data, so the first line will be the header
    if querydata.find('\n') > -1:
        query_output_header = querydata.split('\n')[0]
    else:
        query_output_header = querydata

    query_response_fields = [
        token.strip('"') for token in query_output_header.split(',')
    ]

    # query the knowledgebase to get the fields in the test definition

    definition_fields = {}
    obs_def_id = input_data['observation_def_id']

    with db_service.txn_scope() as session:

        print(f'########  OBSERVATION DEF ID: {obs_def_id}')
        ObservationVerification = db_service.Base.classes.issue_mgmt_observationverification
        verification_query = session.query(ObservationVerification).filter(
            ObservationVerification.observation_definition_id == obs_def_id)
        results = verification_query.all()

        for record in results:
            definition_fields[record.key_name] = record.data_type

        print(common.jsonpretty(definition_fields))

    errors = []
    for fieldname in query_response_fields:
        if fieldname not in definition_fields:
            errors.append({
                'error_type': 'undefined_field',
                'error_key': 'field_name',
                'error_value': fieldname
            })

    response = {'ok': True, 'input_query': input_query}

    if len(errors):
        response['ok'] = False
        response['errors'] = errors

    return response

示例#7

0

显示文件

    def do_update(self, cmd_args):
        '''Usage:
                  update (map | project)
                  update map <map_name>
                  update project (globals | datasources)
        '''

        print(common.jsonpretty(cmd_args))

示例#8

0

显示文件

文件： bx_services.py 项目： tarifa1/bxlogic

    def award_job(self, bid_window_id, bidder_array, **kwargs):
        payload = {'window_id': bid_window_id, 'bids': bidder_array}

        print('PAYLOAD for calling /award endpoint:')
        print(common.jsonpretty(payload))

        response = self._call_endpoint(self.award, payload, **kwargs)
        return response

示例#9

0

显示文件

def main(args):
    src_file = args.get('<datafile>')
    null_mode = args.get('--null')
    readable_dict_mode = args.get('--readable_dict')
    readable_line_mode = args.get('--readable_line')

    with open(src_file) as f:
        first_line = f.readline()
        fields = first_line.split('|')
        nb_reporter = dmap.NullByteFilter(delimiter='|', field_names=fields)
        if null_mode:
            null_pairs = nb_reporter.filter_with_null_output(src_file)
            for null_pair in null_pairs:
                print(
                    common.jsonpretty({
                        'line_number': null_pair[0],
                        'field': null_pair[1]
                    }))
        elif readable_dict_mode:
            readable_lines = nb_reporter.filter_with_readable_output(src_file)
            for line in readable_lines:
                if line == first_line:
                    continue
                record_dict = {}
                value_array = line.split('|')
                for r_index, field in enumerate(fields):
                    record_dict[field] = value_array[r_index]

                print(common.jsonpretty(record_dict))

        elif readable_line_mode:
            proc = Dictionary2CSVProcessor(fields, "|",
                                           dmap.WhitespaceCleanupProcessor())
            readable_lines = nb_reporter.filter_with_readable_output(src_file)
            for line in readable_lines:
                if line == first_line:
                    continue
                record_dict = {}
                value_array = line.split('|')
                for r_index, field in enumerate(fields):
                    record_dict[field] = value_array[r_index]
                proc.process(record_dict)

        else:
            print("Choose an option flag for record info output")

示例#10

0

显示文件

def trigger_arbitration(service_registry, **kwargs):

    current_time = datetime.datetime.now()
    # scan ALL open bidding windows

    api_service = service_registry.lookup('job_mgr_api')
    response = api_service.get_open_bid_windows()
    bid_windows = response.json()['data']['bidding_windows']

    print('###----- Retrieved open bid windows from API endpoint:')
    print(bid_windows)

    # for each open window, see who has bid;
    for bwindow in bid_windows:

        job_tag = bwindow['job_tag']
        window_id = bwindow['bidding_window_id']

        if bwindow['policy']['limit_type'] == 'num_bids':
            print('++ Policy limit is %d bids.' % int(bwindow['policy']['limit']))

            json_bidder_data = api_service.get_active_job_bids(job_tag)
            bidding_users = json_bidder_data.json()['data']['bidders']
            num_bids = len(bidding_users)
            policy_limit_bids = int(bwindow['policy']['limit'])

            if num_bids >= policy_limit_bids:
                winners = arbitrate(bidding_users, service_registry)
                if len(winners):
                    print('!!!!!!!!!!!  WE HAVE A WINNER !!!!!!!!!!!!!!!!!!')
                    print(common.jsonpretty(winners))
                    api_service.award_job(window_id, winners)
                else:
                    print('### No winner determined in the arbitration round ending %s.' % current_time.isoformat())

        elif bwindow['policy']['limit_type'] == 'time_seconds':
            # see how long the window has been open;
            window_opened_at = dateutil.parser.parse(bwindow['open_ts'])
            window_open_duration = (current_time - window_opened_at).seconds
            policy_limit_seconds = int(bwindow['policy']['limit'])

            if window_open_duration >= policy_limit_seconds:
                json_bidder_data = api_service.get_active_job_bids(job_tag)
                bid_data = json_bidder_data.json()['data']['bidders']
                if len(bid_data):
                    winners = arbitrate(bid_data, service_registry)
                    if len(winners):
                        print('!!!!!!!!!!!  WE HAVE A WINNER !!!!!!!!!!!!!!!!!!')
                        api_service.award_job(window_id, winners)
                    else:
                        print('### No winner determined in the arbitration round ending %s.' % current_time.isoformat())
                else:
                    print('### No more bidders in this round.')

        else:
            # raise hell; we don't support that
            raise Exception('Unrecognized bidding window policy limit_type: %s' % bwindow['policy']['limit_type'])

示例#11

0

显示文件

 def read_msg(self, input_data, **kwargs):
     self.log.info(common.jsonpretty(input_data))
     if input_data.get('Records'):
         self.log.info('### S3 bucket name: %s' %
                       input_data['Records'][0]['s3']['bucket']['name'])
         self.log.info('### new S3 object: %s' %
                       input_data['Records'][0]['s3']['object']['key'])
         return dict(input_data['Records'][0]['s3'])
     return {}

示例#12

0

显示文件

def arbitrate(bidder_list, service_registry):
    # Decide which bidder gets assigned a job, using a simple random selector.
    # This is only for the proof of concept; we will upgrade to smarter (and user-pluggable)
    # arbitration methods once we shake the system out.

    print('#####------- Arbitrating bid data:')
    print(common.jsonpretty(bidder_list))
    random.seed(time.time())
    index = random.randrange(0, len(bidder_list))
    return [bidder_list[index]]

示例#13

0

显示文件

文件： profiling.py 项目： e7dal/mercury

    def profile(self, record_generator, service_registry, **kwargs):
        time_log = dmap.TimeLog()
        result_tuple = None
        operation_name = kwargs.get(
            'op_name') or 'profile dataset "%s"' % self.table_name
        with jrnl.stopwatch(operation_name, time_log):
            result_tuple = self._profile(record_generator, service_registry,
                                         **kwargs)

        print(common.jsonpretty(time_log.readout))
        return result_tuple

示例#14

0

显示文件

def scrape_olr_condo_listings(soup_parse_tree, html_data):
    data = {
        'street_address': '',
        'neighborhood': '',
        'type': '',
        'tags': [],
        'price': '',
        'monthly_chg': '',
        'beds': None,
        'bathrooms': None,
        'square_footage': None,
        'size_description': '' 

    }
    #print(html_data)

    detail_div = soup_parse_tree.find('div', {'class': 'apt_details_left'})

    # this one contains the address
    addr_span = detail_div.find_all('span', {'class': 'txt_gray'})[0]
    address = addr_span.find_all('a')[0].get_text().strip()
    data['street_address'] = unicodedata.normalize('NFKD', address)

    # these spans contain the other feataures of the listing
    feature_spans = detail_div.find_all('span', {'class': 'txt_black_normal'})
    raw_neighborhood_string = unicodedata.normalize('NFKD', feature_spans[0].get_text().strip())

    print('### Raw neighborhood string: %s' % raw_neighborhood_string)

    tokens = [t.strip() for t in raw_neighborhood_string.split('\n')]
    print(tokens)
    data['neighborhood'] = tokens[0]
    data['tags'].extend([t.strip() for t in tokens[1].split('|')])

    raw_pricing_string = unicodedata.normalize('NFKD', feature_spans[1].get_text().strip())

    print('### Raw pricing string: %s' % raw_pricing_string)

    price_size_fields = [token.strip() for token in raw_pricing_string.split('\n')]
    print(price_size_fields)
    data.update(decode_olr_condo_coop_price_size_fields(price_size_fields))

    '''
    price_fields = [token.strip() for token in price_size_fields[0].split('|')]
    data['price'] = price_fields[0].split('$')[0]
    # skip over the price drop field for now
    size_fields = [token.strip() for token in price_size_fields[2].split('|')]
    data['size_description'] = size_fields[1]
    data['square_footage'] = size_fields[2]
    '''

    print(common.jsonpretty(data))

    '''

示例#15

0

显示文件

def main(args):
    if args['--list-codes']:
        print('Supported site codes:')
        print(common.jsonpretty(RE_SITE_CODES))
        print('\n')
        print('Supported neighborhood codes:')
        print(common.jsonpretty(BK_NEIGHBORHOOD_CODES))
        return

    site_code = args['<site-code>']
    if not RE_SITE_CODES.get(site_code):
        print('unrecognized site code "%s".' % site_code)
        print('valid codes:')
        print(common.jsonpretty(RE_SITE_CODES))
        return

    neighborhood_code = args['<neighborhood_code>']
    if not BK_NEIGHBORHOOD_CODES.get(neighborhood_code):
        print('unrecognized neighborhood code "%s".' % neighborhood_code)
        print('valid codes:')
        print(common.jsonpretty(BK_NEIGHBORHOOD_CODES))
        return

    url = RE_SITE_CODES[site_code]

    # create a new Firefox session
    #options = webdriver.FirefoxOptions()
    #options.add_argument('-headless')
    driver = webdriver.Firefox()
    driver.implicitly_wait(30)
    driver.get(url)    
    #
    
    print('### Issuing search against %s for neighborhood %s...' % (url, BK_NEIGHBORHOOD_CODES[neighborhood_code]))

    if site_code == 'trulia':
        search_trulia(neighborhood_code, driver)
    elif site_code == 'olr':
        scrape_olr(neighborhood_code, driver)

示例#16

0

显示文件

文件： warp.py 项目： binarymachines/warp

    def __init__(self, warp_yaml_cfg, **kwargs):
        self.registry = {}
        print(common.jsonpretty(warp_yaml_cfg))
        warp_home_dir = load_config_var(warp_yaml_cfg['globals']['warp_home'])
        extensions_dir = os.path.join(warp_home_dir, 'extensions')
        sys.path.append(extensions_dir)

        module_names = []
        should_load_all = False

        #  load context from the specified modules
        #module_names = warp_yaml_cfg.get('extensions') or []
        module_names = []

        # otherwise load all modules
        '''
        else:
            should_load_all = True
            module_names = [f[0:-3] for f in os.listdir(extensions_dir) if f.endswith('.py')]
        '''
        for module_name in module_names:
            extensions = {}
            dirmod = __import__('extensions.%s' % module_name)
            extmod = getattr(dirmod, module_name)
            context_loader_function = getattr(extmod, '__load__')

            extension_args = {}
            for param in warp_yaml_cfg['extensions'][module_name][
                    'init_params']:
                extension_args[param['name']] = param['value']

            print('### Extension params: %s' % extension_args)
            extension_context = context_loader_function(
                warp_home_dir, logger, **extension_args)

            function_names = [
                f[0] for f in getmembers(extmod)
                if isfunction(f[1]) and is_valid_extension_name(f[0])
            ]

            for raw_function_name in function_names:
                function_name = raw_function_name.lstrip('_')
                bound_method_name = '_'.join(
                    [CMD_METHOD_PREFIX, module_name, function_name])
                function_obj = getattr(extmod, raw_function_name)
                mx = MethodExtension(function_name, bound_method_name,
                                     function_obj)
                extensions[function_name] = mx

            self.registry[module_name] = extensions

示例#17

0

显示文件

def main(args):
    queue_url = args['<queue_url>']

    sendargs = {
        'QueueUrl': queue_url,
        'DelaySeconds': int(args.get('<delay>', 0)),
        'MessageAttributes': parse_attributes(args['--attrs'][0]),
        'MessageBody': args['<body>']
    }

    print(common.jsonpretty(sendargs))
    client = boto3.client('sqs')
    response = client.send_message(**sendargs)
    print(response)

示例#18

0

显示文件

文件： fconsole.py 项目： binarymachines/flow

def explore(parse_tree):
    print(dir(parse_tree))

    expression_buffer = []
    symbol_table = {}

    for node in parse_tree.iter_subtrees_topdown():
        if node.data == 'form':
            print('### start-of-form')

        if node.data == 'variable':

            symbol_table.update(resolve_var(node))
            print(f'### Symbol table updated: {resolve_var(node)}')

            #print(node.scan_values())

        if node.data == 'expression':
            print(
                f'### found an expression with {len(node.children)} child nodes.'
            )

            for child in node.children:
                if child.data == 'name':
                    expression_buffer.append(child.children[0])

                if child.data == 'operator':
                    expression_buffer.append(child.children[0])

                if child.data == 'number':
                    expression_buffer.append(child.children[0])

                if child.data == 'expression':
                    continue

    print(f'### symbols:')
    print(common.jsonpretty(symbol_table))

    pylines = []
    locals = []
    for key, value in symbol_table.items():
        locals.append(f'{key} = {value}')

    #pylines.append(f'{" ".join(expression_buffer)})')
    pystmt = ' '.join(expression_buffer)
    print(pystmt)

    print('### can I get a witness?')
    print(eval(pystmt, {}, symbol_table))

示例#19

0

显示文件

def main(args):
    print(args)

    local_env = common.LocalEnvironment('PGSQL_USER', 'PGSQL_PASSWORD')
    local_env.init()

    pgsql_user = local_env.get_variable('PGSQL_USER')
    pgsql_password = local_env.get_variable('PGSQL_PASSWORD')

    yaml_config = common.read_config_file(args['<initfile>'])

    print(common.jsonpretty(yaml_config))

    db_host = yaml_config['globals']['database_host']
    db_name = yaml_config['globals']['database_name']

    pubsub = pgpubsub.connect(host=db_host,
                              user=pgsql_user,
                              password=pgsql_password,
                              database=db_name)

    channel_id = args['<channel>']

    if not yaml_config['channels'].get(channel_id):
        raise NoSuchEventChannel(channel_id)

    handler_module_name = yaml_config['globals']['handler_module']

    project_dir = common.load_config_var(yaml_config['globals']['project_dir'])
    sys.path.append(project_dir)
    handlers = __import__(handler_module_name)
    handler_function_name = yaml_config['channels'][channel_id][
        'handler_function']

    if not hasattr(handlers, handler_function_name):
        raise NoSuchEventHandler(handler_function_name, handler_module_name)

    handler_function = getattr(handlers, handler_function_name)
    service_objects = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config, logger))

    pubsub.listen(channel_id)
    print('listening on channel "%s"...' % channel_id)
    for event in pubsub.events():
        print(event.payload)

示例#20

0

显示文件

文件： lisnr_transforms.py 项目： binarymachines/amc-apollo

def sns_receive_func(input_data, service_objects, **kwargs):
    log.info(input_data)

    sns_message_raw = input_data['Message']
    log.info(sns_message_raw)

    sns_message = json.loads(sns_message_raw)
    print(common.jsonpretty(sns_message))

    s3_segment = sns_message['Records'][0]['s3']

    keyname = s3_segment['object']['key']

    s3_svc = service_objects.lookup('s3')
    file_loc = s3_svc.download_object('datalab.mercury', keyname)
    log.info('### Downloaded S3 object to  %s.' % file_loc)
    
    return core.TransformStatus(json.dumps({}))

示例#21

0

显示文件

文件： grailed_datastores.py 项目： binarymachines/grailed-test

    def write(self, records, **kwargs):
        db_svc = self.service_object_registry.lookup('redshift_svc')
        Listing = db_svc.Base.classes.grailed_listings

        for record in records:
            print('>>> placeholder Redshift data write operation:')
            db_record = json.loads(record)
            with db_svc.txn_scope() as session:
                listing = Listing()
                for key, value in db_record.items():
                    if value == 'True':
                        setattr(listing, key, True)
                    elif value == 'False':
                        setattr(listing, key, False)
                    else:
                        setattr(listing, key, value)
                session.add(listing)
                session.commit()
            print(common.jsonpretty(json.loads(record)))

示例#22

0

显示文件

文件： datamap.py 项目： e7dal/mercury

 def _process(self, record):
     print(common.jsonpretty(record))
     return record

示例#23

0

显示文件

文件： edcore.py 项目： e7dal/mercury

def default_event_handler(event, svc_registry):
    print(common.jsonpretty(json.loads(event.payload)))

示例#24

0

显示文件

def handle_instructors_insert(json_obj, svc_object_registry):
    print(common.jsonpretty(json_obj))

示例#25

0

显示文件

文件： datamap.py 项目： e7dal/mercury

 def handle_default_error(self, exception, source_record):
     print('Error of type "%s" transforming record: %s' %
           (exception.__class__.__name__, exception),
           file=sys.stderr)
     print('Offending record:', file=sys.stderr)
     print(common.jsonpretty(source_record), file=sys.stderr)

示例#26

0

显示文件

def main(args):
    print(common.jsonpretty(args))

示例#27

0

显示文件

def main(args):
    #print(common.jsonpretty(args))
    config_filename = args['<configfile>']
    yaml_config = common.read_config_file(config_filename)
    service_object_registry = common.ServiceObjectRegistry(
        snap.initialize_services(yaml_config))
    datastore_registry = DatastoreRegistry(
        initialize_datastores(yaml_config, service_object_registry))

    preview_mode = False
    if args['--preview']:
        preview_mode = True

    limit = -1
    if args.get('--limit') is not None:
        limit = int(args['--limit'])
    list_mode = False
    stream_input_mode = False
    file_input_mode = False

    available_ingest_targets = load_ingest_targets(yaml_config,
                                                   datastore_registry)

    if args['--target'] == True and args['<datafile>'] is None:
        stream_input_mode = True
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name,
                                                     available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            while True:
                if record_count == limit:
                    break
                raw_line = sys.stdin.readline()
                line = raw_line.lstrip().rstrip()
                if not len(line):
                    break
                if not preview_mode:
                    buffer.write(line)
                else:
                    print(line)
                record_count += 1

    elif args['<datafile>']:
        file_input_mode = True
        input_file = args['<datafile>']
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name,
                                                     available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            with open(input_file) as f:
                for line in f:
                    if record_count == limit:
                        break
                    if not preview_mode:
                        buffer.write(line)
                    else:
                        print(line)
                    record_count += 1

    elif args['--list'] == True:
        if args['targets']:
            for target in yaml_config['ingest_targets']:
                print('::: Ingest target "%s": ' % target)
                print(common.jsonpretty(yaml_config['ingest_targets'][target]))

        if args['datastores']:
            for dstore in yaml_config['datastores']:
                print('::: Datastore alias "%s": ' % dstore)
                print(common.jsonpretty(yaml_config['datastores'][dstore]))

        if args['globals']:
            print('::: Global settings:')
            print(common.jsonpretty(yaml_config['globals']))