def get_argument_parser(description): """Get the argument parser for the shared glean usage queries.""" parser = ArgumentParser(description=description) parser.add_argument( "--project_id", "--project-id", default="moz-fx-data-shar-nonprod-efed", help="ID of the project in which to find tables", ) parser.add_argument( "--date", required=True, type=lambda d: datetime.strptime(d, "%Y-%m-%d").date(), help="Date partition to process, in format 2019-01-01", ) parser.add_argument( "--output_dir", "--output-dir", help="Also write the query text underneath the given sql dir", ) parser.add_argument( "--output_only", "--output-only", "--views_only", # Deprecated name "--views-only", # Deprecated name action="store_true", help=( "If set, we only write out sql to --output-dir and we skip" " running the queries" ), ) standard_args.add_parallelism(parser) standard_args.add_dry_run(parser, debug_log_queries=False) standard_args.add_log_level(parser) standard_args.add_priority(parser) standard_args.add_billing_projects(parser) standard_args.add_table_filter(parser) return parser
"--preceding_days", "--preceding-days", type=int, default=0, help= "Number of days preceding --date that should be used to filter out duplicates", ) parser.add_argument( "--num_retries", "--num-retries", type=int, default=2, help="Number of times to retry each slice in case of query error", ) standard_args.add_billing_projects(parser) standard_args.add_table_filter(parser) def _get_query_job_configs( client, live_table, date, dry_run, slices, priority, preceding_days, num_retries, temp_dataset, ): sql = QUERY_TEMPLATE.format(live_table=live_table) stable_table = f"{live_table.replace('_live.', '_stable.', 1)}${date:%Y%m%d}"