Пример #1
0
def main():
    # Load arguments.
    parser = make_parser()
    args = parser.parse_args()
    if args.debug and not args.quiet:
        logger.setLevel(logging.DEBUG)

    # Load the input file.
    with open(args.input_file, 'r') as f:
        input_lines = f.readlines()
    logger.info("Found %d files." % len(input_lines))
    for ftype in ['nxml', 'txt']:
        logger.debug('%d are %s' %
                     (len([f
                           for f in input_lines if f.endswith(ftype)]), ftype))

    # Select only a sample of the lines, if sample is chosen.
    if args.n_samp is not None:
        input_lines = random.sample(input_lines, args.n_samp)

    # If a range is specified, only use that range.
    if args.range_str is not None:
        start_idx, end_idx = [int(n) for n in args.range_str.split(':')]
        input_lines = input_lines[start_idx:end_idx]

    # Create a single base directory
    base_dir = _get_dir('run_%s' % ('_and_'.join(args.readers)))

    # Set the verbosity. The quiet argument overrides the verbose argument.
    verbose = args.verbose and not args.quiet

    # Get the readers objects.
    readers = [
        reader_class(base_dir=base_dir, n_proc=args.n_proc)
        for reader_class in get_reader_classes()
        if reader_class.name.lower() in args.readers
    ]

    # Read the files.
    outputs = read_files(input_lines, readers, verboes=verbose)
    reading_out_path = args.name + '_readings.pkl'
    with open(reading_out_path, 'wb') as f:
        pickle.dump([output.make_tuple(None) for output in outputs], f)
    print("Reading outputs stored in %s." % reading_out_path)

    stmts = [s for rd in outputs for s in rd.get_statements()]
    stmts_pkl_path = args.name + '_stmts.pkl'
    with open(stmts_pkl_path, 'wb') as f:
        pickle.dump(stmts, f)
        print("Statements pickled in %s." % stmts_pkl_path)
Пример #2
0
def main():
    # Load arguments.
    parser = make_parser()
    args = parser.parse_args()
    if args.debug and not args.quiet:
        logger.setLevel(logging.DEBUG)

    # Load the input file.
    with open(args.input_file, 'r') as f:
        input_lines = f.readlines()
    logger.info("Found %d files." % len(input_lines))
    for ftype in ['nxml', 'txt']:
        logger.debug('%d are %s' % (
            len([f for f in input_lines if f.endswith(ftype)]), ftype
        ))

    # Select only a sample of the lines, if sample is chosen.
    if args.n_samp is not None:
        input_lines = random.sample(input_lines, args.n_samp)

    # If a range is specified, only use that range.
    if args.range_str is not None:
        start_idx, end_idx = [int(n) for n in args.range_str.split(':')]
        input_lines = input_lines[start_idx:end_idx]

    # Create a single base directory
    base_dir = _get_dir('run_%s' % ('_and_'.join(args.readers)))

    # Set the verbosity. The quiet argument overrides the verbose argument.
    verbose = args.verbose and not args.quiet

    # Get the readers objects.
    readers = [reader_class(base_dir=base_dir, n_proc=args.n_proc)
               for reader_class in get_reader_classes()
               if reader_class.name.lower() in args.readers]

    # Read the files.
    outputs = read_files(input_lines, readers, verboes=verbose)
    reading_out_path = args.name + '_readings.pkl'
    with open(reading_out_path, 'wb') as f:
        pickle.dump([output.make_tuple(None) for output in outputs], f)
    print("Reading outputs stored in %s." % reading_out_path)

    stmts = [s for rd in outputs for s in rd.get_statements()]
    stmts_pkl_path = args.name + '_stmts.pkl'
    with open(stmts_pkl_path, 'wb') as f:
        pickle.dump(stmts, f)
        print("Statements pickled in %s." % stmts_pkl_path)
Пример #3
0
    for ftype in ['nxml', 'txt']:
        logger.debug('%d are %s' %
                     (len([f
                           for f in input_lines if f.endswith(ftype)]), ftype))

    # Select only a sample of the lines, if sample is chosen.
    if args.n_samp is not None:
        input_lines = random.sample(input_lines, args.n_samp)

    # If a range is specified, only use that range.
    if args.range_str is not None:
        start_idx, end_idx = [int(n) for n in args.range_str.split(':')]
        input_lines = input_lines[start_idx:end_idx]

    # Create a single base directory
    base_dir = _get_dir('run_%s' % ('_and_'.join(args.readers)))

    # Set the verbosity. The quiet argument overrides the verbose argument.
    verbose = args.verbose and not args.quiet

    # Get the readers objects.
    readers = [
        reader_class(base_dir=base_dir, n_proc=args.n_proc)
        for reader_class in get_readers()
        if reader_class.name.lower() in args.readers
    ]

    # Read the files.
    outputs = read_files(input_lines, readers, verboes=verbose)
    reading_out_path = args.name + '_readings.pkl'
    with open(reading_out_path, 'wb') as f: