def parse_args(): parser = SquidArgumentParser( description="Process a collection of squid logs and write certain extracted metrics to file" ) parser.add_argument( "providers", metavar="PROVIDER_IDENTIFIER", nargs="*", default=DEFAULT_PROVIDERS, help="list of provider identifiers used in squid log file names", ) parser.add_argument( "--name_format", dest="name_format", type=str, default="%s.log-%.gz.counts", help="a printf style format string which is formatted with the tuple: (provider_name, date_representation", ) parser.set_defaults(datadir="/home/erosen/src/dashboard/mobile/zero_counts") args = parser.parse_args() # custom logic for which files to grab prov_files = {} for prov in args.providers: args.basename = prov logging.info("args prior to ge_files: %s", pprint.pformat(args.__dict__)) prov_files[prov] = SquidArgumentParser.get_files(args) setattr(args, "squid_files", prov_files) logging.info(pprint.pformat(args.__dict__)) return args
def parse_args(): parser = SquidArgumentParser( description="Process a collection of squid logs and write certain extracted metrics to file", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "providers", metavar="PROVIDER_IDENTIFIER", nargs="*", default=DEFAULT_PROVIDERS, help="list of provider identifiers used in squid log file names", ) parser.add_argument( "--name_format", dest="name_format", type=str, default="%s.tab.log-%.gz", help="a printf style format string which is formatted with the tuple: (provider_name, date_representation", ) parser.set_defaults(datadir="/a/squid/archive/zero") args = parser.parse_args() # custom logic for which files to grab prov_files = {} for prov in args.providers: basename = "zero-%s" % prov logger.debug("basename: %s", basename) prov_files[prov] = get_files(args.start, args.end, args.datadir, basename) setattr(args, "squid_files", prov_files) logger.info(pprint.pformat(args.__dict__)) return args
def main(): parser = SquidArgumentParser() parser.add_argument('--nprocs', default=10) args = parser.parse_args() logger.info(pprint.pformat(args.__dict__)) keepers = ['date', 'language', 'project', 'site', 'country', 'na'] criteria = [ lambda r : r.old_init_request(), lambda r : r.site() == 'M', lambda r : r.datetime() > args.start, lambda r : r.datetime() < args.end, ] counts = count_files(args.squid_files, keepers, criteria, count_event=1000, limit=args.max_lines, nproc=15, fname='country_counts_incremental.csv') write_counts(counts, 'country_counts.csv')