Exemplo n.º 1
0
def get_argument_parser(description):
    """Get the argument parser for the shared glean usage queries."""
    parser = ArgumentParser(description=description)
    parser.add_argument(
        "--project_id",
        "--project-id",
        default="moz-fx-data-shar-nonprod-efed",
        help="ID of the project in which to find tables",
    )
    parser.add_argument(
        "--date",
        required=True,
        type=lambda d: datetime.strptime(d, "%Y-%m-%d").date(),
        help="Date partition to process, in format 2019-01-01",
    )
    parser.add_argument(
        "--output_dir",
        "--output-dir",
        help="Also write the query text underneath the given sql dir",
    )
    parser.add_argument(
        "--output_only",
        "--output-only",
        "--views_only",  # Deprecated name
        "--views-only",  # Deprecated name
        action="store_true",
        help=(
            "If set, we only write out sql to --output-dir and we skip"
            " running the queries"
        ),
    )
    standard_args.add_parallelism(parser)
    standard_args.add_dry_run(parser, debug_log_queries=False)
    standard_args.add_log_level(parser)
    standard_args.add_priority(parser)
    standard_args.add_billing_projects(parser)
    standard_args.add_table_filter(parser)
    return parser
Exemplo n.º 2
0
    "--project-id",
    default="moz-fx-data-shar-nonprod-efed",
    help="ID of the project in which to find tables",
)
parser.add_argument(
    "--dates",
    "--date",
    nargs="+",
    required=True,
    type=lambda d: datetime.strptime(d, "%Y-%m-%d").date(),
    help="One or more days of data to copy, in format 2019-01-01",
)
standard_args.add_parallelism(parser)
standard_args.add_dry_run(parser, debug_log_queries=False)
standard_args.add_log_level(parser)
standard_args.add_priority(parser)
standard_args.add_temp_dataset(parser)
parser.add_argument(
    "--slices",
    type=int,
    default=1,
    help=
    ("Number of queries to split deduplicate over, each handling an equal-size time "
     "slice of the date; avoids memory overflow at the cost of less effective "
     "clustering; recommended only for tables failing due to memory overflow"),
)
parser.add_argument(
    "--hourly",
    action="store_const",
    dest="slices",
    const=24,