Python get_base_parser示例，sherlock.common.pipeline.get_base_parser Python示例

示例#1

0

显示文件

文件： s3_to_redshift.py 项目： wlstyy/mycroft

def parse_command_line(sys_argv):
    """
    parse_command_line parses the arguments from the command line other than
    the name of the file

    Args:
    sys_argv -- sys.argv

    Returns:
    a namespace of arguments
    """

    parser = get_base_parser()
    parser = add_load_args(parser)
    parser.add_argument("--date",
                        help="either 'yesterday' or YYYY-MM-DD \
             if there is no date, s3_to_redshift checks back 5 days")
    # skip the file name, parse everything after
    return parser.parse_args(sys_argv[1:])

示例#2

0

显示文件

文件： s3_to_psv.py 项目： wlstyy/mycroft

def parse_command_line(sys_argv):
    """
    parse_command_line parses the arguments from the command line other than
    the name of the file

    Args:
    sys_argv -- sys.argv

    Returns:
    a namespace of arguments
    """

    parser = get_base_parser()
    parser.add_argument("--date",
                        help="'yesterday' or 'twodaysago' or YYYY-MM-DD",
                        required=True)

    # skip the file name, parse everything after
    return parser.parse_args(sys_argv[1:])

示例#3

0

显示文件

文件： s3_to_redshift.py 项目： Yelp/mycroft

def parse_command_line(sys_argv):
    """
    parse_command_line parses the arguments from the command line other than
    the name of the file

    Args:
    sys_argv -- sys.argv

    Returns:
    a namespace of arguments
    """

    parser = get_base_parser()
    parser = add_load_args(parser)
    parser.add_argument(
        "--date",
        help="either 'yesterday' or YYYY-MM-DD \
             if there is no date, s3_to_redshift checks back 5 days"
    )
    # skip the file name, parse everything after
    return parser.parse_args(sys_argv[1:])

示例#4

0

显示文件

文件： s3_to_psv.py 项目： Yelp/mycroft

def parse_command_line(sys_argv):
    """
    parse_command_line parses the arguments from the command line other than
    the name of the file

    Args:
    sys_argv -- sys.argv

    Returns:
    a namespace of arguments
    """

    parser = get_base_parser()
    parser.add_argument(
        "--date",
        help="'yesterday' or 'twodaysago' or YYYY-MM-DD",
        required=True
    )

    # skip the file name, parse everything after
    return parser.parse_args(sys_argv[1:])

示例#5

0

显示文件

def parse_command_line(sys_argv):
    """
    parse_command_line takes in sys.argv and parses the arguments other than
    the first (since it's the program name).  It starts with a parser from
    get_default_parser having the following arguments:
        -r, --run-local
        --io_yaml
        --conig
        --config-override
        --private

    and adds the following to the arguments:
        -s, --start_date
        -e, --end_date
        -p, --private
        db_file

    Returns:
    an ArgParse.Namespace
    """
    parser = get_base_parser()
    parser = add_load_args(parser)
    parser.description = """
        This program takes arguments that are the union of arguments for
        s3_to_psv and s3_to_redshift.  It processes each date using the yaml
        schema file specified in the pipeline_io.yaml file as
        'pipeline.yaml_schema_file'.

        The resulting tables will be found in the 'dev' database in the
        redshift cluster defined in the config override file in the
        'redshift_host' variable."""
    parser.add_argument('-s',
                        '--start_date',
                        help='YYYY-MM-DD',
                        required=True,
                        action=DateAction)
    parser.add_argument('-e',
                        '--end_date',
                        help='YYYY-MM-DD',
                        required=True,
                        action=DateAction)
    parser.add_argument(
        "-p",
        "--pool-size",
        type=int,
        help="processes to run simultaneously an int in [1,num cpu's]",
        required=False,
        default=1,
    )
    parser.add_argument(
        "--exceed-max-processes",
        action="store_true",
        help="use this to run more processes than cpu's",
    )
    parser.add_argument("--dry-run",
                        action="store_true",
                        help="use this to print out the steps we will take")
    stepper_type = parser.add_mutually_exclusive_group(required=False)
    stepper_type.add_argument(
        "--serial-stepper",
        action="store_true",
        help="use the serial stepper (alternates between et & load)")
    stepper_type.add_argument(
        "--load-polling-interval",
        type=int,
        default=0,
        help="interval in seconds between attempts to find data to load, \
default is 0 (meaning no polling)")
    etl_type = parser.add_mutually_exclusive_group(required=False)
    etl_type.add_argument(
        "--load-only",
        action="store_true",
        help="use this to do only a load step",
    )
    etl_type.add_argument(
        "--et-only",
        action="store_true",
        help="use this to do only an et step",
    )
    return parser.parse_args(sys_argv[1:])

示例#6

0

显示文件

文件： ingest_multiple_dates.py 项目： Yelp/mycroft

def parse_command_line(sys_argv):
    """
    parse_command_line takes in sys.argv and parses the arguments other than
    the first (since it's the program name).  It starts with a parser from
    get_default_parser having the following arguments:
        -r, --run-local
        --io_yaml
        --conig
        --config-override
        --private

    and adds the following to the arguments:
        -s, --start_date
        -e, --end_date
        -p, --private
        db_file

    Returns:
    an ArgParse.Namespace
    """
    parser = get_base_parser()
    parser = add_load_args(parser)
    parser.description = """
        This program takes arguments that are the union of arguments for
        s3_to_psv and s3_to_redshift.  It processes each date using the yaml
        schema file specified in the pipeline_io.yaml file as
        'pipeline.yaml_schema_file'.

        The resulting tables will be found in the 'dev' database in the
        redshift cluster defined in the config override file in the
        'redshift_host' variable."""
    parser.add_argument(
        '-s', '--start_date',
        help='YYYY-MM-DD',
        required=True,
        action=DateAction
    )
    parser.add_argument(
        '-e', '--end_date',
        help='YYYY-MM-DD',
        required=True,
        action=DateAction
    )
    parser.add_argument(
        "-p", "--pool-size",
        type=int,
        help="processes to run simultaneously an int in [1,num cpu's]",
        required=False,
        default=1,
    )
    parser.add_argument(
        "--exceed-max-processes",
        action="store_true",
        help="use this to run more processes than cpu's",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="use this to print out the steps we will take"
    )
    stepper_type = parser.add_mutually_exclusive_group(required=False)
    stepper_type.add_argument(
        "--serial-stepper",
        action="store_true",
        help="use the serial stepper (alternates between et & load)"
    )
    stepper_type.add_argument(
        "--load-polling-interval",
        type=int,
        default=0,
        help="interval in seconds between attempts to find data to load, \
default is 0 (meaning no polling)"
    )
    etl_type = parser.add_mutually_exclusive_group(required=False)
    etl_type.add_argument(
        "--load-only",
        action="store_true",
        help="use this to do only a load step",
    )
    etl_type.add_argument(
        "--et-only",
        action="store_true",
        help="use this to do only an et step",
    )
    return parser.parse_args(sys_argv[1:])