Пример #1
0
    def get_cluster_create_clone_update(arguments, action):
        customer_ssh_key = util._read_file(arguments.customer_ssh_key_file)
        # This will set cluster info and monitoring settings
        cluster_info = ClusterInfoV2(arguments.label)
        cluster_info.set_cluster_info(
            disallow_cluster_termination=arguments.
            disallow_cluster_termination,
            enable_ganglia_monitoring=arguments.enable_ganglia_monitoring,
            datadog_api_token=arguments.datadog_api_token,
            datadog_app_token=arguments.datadog_app_token,
            node_bootstrap=arguments.node_bootstrap_file,
            master_instance_type=arguments.master_instance_type,
            slave_instance_type=arguments.slave_instance_type,
            min_nodes=arguments.initial_nodes,
            max_nodes=arguments.max_nodes,
            slave_request_type=arguments.slave_request_type,
            fallback_to_ondemand=arguments.fallback_to_ondemand,
            node_base_cooldown_period=arguments.node_base_cooldown_period,
            node_spot_cooldown_period=arguments.node_spot_cooldown_period,
            custom_tags=arguments.custom_tags,
            heterogeneous_config=arguments.heterogeneous_config,
            maximum_bid_price_percentage=arguments.
            maximum_bid_price_percentage,
            timeout_for_request=arguments.timeout_for_request,
            maximum_spot_instance_percentage=arguments.
            maximum_spot_instance_percentage,
            stable_maximum_bid_price_percentage=arguments.
            stable_maximum_bid_price_percentage,
            stable_timeout_for_request=arguments.stable_timeout_for_request,
            stable_spot_fallback=arguments.stable_spot_fallback,
            spot_block_duration=arguments.spot_block_duration,
            idle_cluster_timeout=arguments.idle_cluster_timeout,
            disk_count=arguments.count,
            disk_type=arguments.disk_type,
            disk_size=arguments.size,
            root_disk_size=arguments.root_disk_size,
            upscaling_config=arguments.upscaling_config,
            enable_encryption=arguments.encrypted_ephemerals,
            customer_ssh_key=customer_ssh_key,
            image_uri_overrides=arguments.image_uri_overrides,
            env_name=arguments.env_name,
            python_version=arguments.python_version,
            r_version=arguments.r_version)

        #  This will set cloud config settings
        cloud_config = Qubole.get_cloud()
        cloud_config.set_cloud_config_from_arguments(arguments)

        # This will set engine settings
        engine_config = Engine(flavour=arguments.flavour)
        engine_config.set_engine_config_settings(arguments)

        cluster_request = ClusterCmdLine.get_cluster_request_parameters(
            cluster_info, cloud_config, engine_config)

        action = action
        if action == "create":
            return arguments.func(cluster_request)
        else:
            return arguments.func(arguments.cluster_id_label, cluster_request)
Пример #2
0
    def create_update_clone_parser(subparser, action=None):
        # cloud config parser
        cloud = Qubole.get_cloud()
        cloud.create_parser(subparser)

        # cluster info parser
        ClusterInfoV2.cluster_info_parser(subparser, action)

        # engine config parser
        Engine.engine_parser(subparser)
Пример #3
0
    def create_update_clone_parser(subparser, action=None):
        # cloud config parser
        cloud = Qubole.get_cloud()
        cloud.create_parser(subparser)

        # cluster info parser
        ClusterInfoV2.cluster_info_parser(subparser, action)

        # engine config parser
        Engine.engine_parser(subparser)
Пример #4
0
    def get_cluster_create_clone_update(arguments, action):
        customer_ssh_key = util._read_file(arguments.customer_ssh_key_file)
        # This will set cluster info and monitoring settings
        cluster_info = ClusterInfoV2(arguments.label)
        cluster_info.set_cluster_info(disallow_cluster_termination=arguments.disallow_cluster_termination,
                                      enable_ganglia_monitoring=arguments.enable_ganglia_monitoring,
                                      datadog_api_token=arguments.datadog_api_token,
                                      datadog_app_token=arguments.datadog_app_token,
                                      node_bootstrap=arguments.node_bootstrap_file,
                                      master_instance_type=arguments.master_instance_type,
                                      slave_instance_type=arguments.slave_instance_type,
                                      min_nodes=arguments.initial_nodes,
                                      max_nodes=arguments.max_nodes,
                                      slave_request_type=arguments.slave_request_type,
                                      fallback_to_ondemand=arguments.fallback_to_ondemand,
                                      custom_tags=arguments.custom_tags,
                                      heterogeneous_config=arguments.heterogeneous_config,
                                      maximum_bid_price_percentage=arguments.maximum_bid_price_percentage,
                                      timeout_for_request=arguments.timeout_for_request,
                                      maximum_spot_instance_percentage=arguments.maximum_spot_instance_percentage,
                                      stable_maximum_bid_price_percentage=arguments.stable_maximum_bid_price_percentage,
                                      stable_timeout_for_request=arguments.stable_timeout_for_request,
                                      stable_spot_fallback=arguments.stable_spot_fallback,
                                      idle_cluster_timeout=arguments.idle_cluster_timeout,
                                      disk_count=arguments.count,
                                      disk_type=arguments.disk_type,
                                      disk_size=arguments.size,
                                      upscaling_config=arguments.upscaling_config,
                                      enable_encryption=arguments.encrypted_ephemerals,
                                      customer_ssh_key=customer_ssh_key,
                                      image_uri_overrides=arguments.image_uri_overrides)

        #  This will set cloud config settings
        cloud_config = Qubole.get_cloud()
        cloud_config.set_cloud_config_from_arguments(arguments)

        # This will set engine settings
        engine_config = Engine(flavour=arguments.flavour)
        engine_config.set_engine_config_settings(arguments)

        cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config)

        action = action
        if action == "create":
            return arguments.func(cluster_request)
        else:
            return arguments.func(arguments.cluster_id_label, cluster_request)
def _create_spark_cluster_info(config):
    cluster_info = ClusterInfoV2(config['spark_cluster_name'])
    cluster_info.set_cluster_info(
        master_instance_type=config['hadoop_master_instance_type'],
        slave_instance_type=config['hadoop_slave_instance_type'],
        min_nodes=1,
        max_nodes=config['hadoop_max_nodes_count'],
        slave_request_type='spot')

    cloud_config = Qubole.get_cloud(cloud_name='aws')
    cloud_config.set_cloud_config(aws_region=config['region_name'],
                                  aws_availability_zone='Any',
                                  vpc_id=config['cluster_vpc_id'],
                                  subnet_id=config['cluster_subnet_id'])

    engine_config = Engine(flavour='spark')
    engine_config.set_engine_config(spark_version='2.1.0')

    cluster_request = ClusterCmdLine.get_cluster_request_parameters(
        cluster_info, cloud_config, engine_config)
    return cluster_request
Пример #6
0
    def get_cluster_create_clone_update(arguments, action):

        # This will set cluster info and monitoring settings
        cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls()
        cluster_info = cluster_info_cls(arguments.label)
        cluster_info.set_cluster_info_from_arguments(arguments)

        #  This will set cloud config settings
        cloud_config = Qubole.get_cloud()
        cloud_config.set_cloud_config_from_arguments(arguments)

        # This will set engine settings
        engine_config = Engine(flavour=arguments.flavour)
        engine_config.set_engine_config_settings(arguments)
        cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config)

        action = action
        if action == "create":
            return arguments.func(cluster_request)
        else:
            return arguments.func(arguments.cluster_id_label, cluster_request)
Пример #7
0
    def cluster_info_parser(argparser, action):
        create_required = False
        label_required = False
        if action == "create":
            create_required = True
        elif action == "update":
            argparser.add_argument("cluster_id_label",
                                   help="id/label of the cluster to update")
        elif action == "clone":
            argparser.add_argument("cluster_id_label",
                                   help="id/label of the cluster to update")
            label_required = True

        argparser.add_argument("--label",
                               dest="label",
                               nargs="+",
                               required=(create_required or label_required),
                               help="list of labels for the cluster" +
                               " (atleast one label is required)")
        cluster_info = argparser.add_argument_group("cluster_info")
        cluster_info.add_argument("--master-instance-type",
                                  dest="master_instance_type",
                                  help="instance type to use for the hadoop" +
                                  " master node")
        cluster_info.add_argument("--slave-instance-type",
                                  dest="slave_instance_type",
                                  help="instance type to use for the hadoop" +
                                  " slave nodes")
        cluster_info.add_argument(
            "--min-nodes",
            dest="initial_nodes",
            type=int,
            help="number of nodes to start the" + " cluster with",
        )
        cluster_info.add_argument("--max-nodes",
                                  dest="max_nodes",
                                  type=int,
                                  help="maximum number of nodes the cluster" +
                                  " may be auto-scaled up to")
        cluster_info.add_argument(
            "--idle-cluster-timeout",
            dest="idle_cluster_timeout",
            help="cluster termination timeout for idle cluster")
        cluster_info.add_argument(
            "--node-bootstrap-file",
            dest="node_bootstrap_file",
            help="""name of the node bootstrap file for this cluster. It
                                   should be in stored in S3 at
                                   <account-default-location>/scripts/hadoop/NODE_BOOTSTRAP_FILE
                                   """,
        )
        cluster_info.add_argument("--root-disk-size",
                                  dest="root_disk_size",
                                  type=int,
                                  help="size of the root volume in GB")
        cluster_info.add_argument(
            "--parent-cluster-id",
            dest="parent_cluster_id",
            type=int,
            help="Id of the parent cluster this hs2 cluster is attached to")
        cluster_info.add_argument("--image-version",
                                  dest="image_version",
                                  help="cluster image version")
        termination = cluster_info.add_mutually_exclusive_group()
        termination.add_argument(
            "--disallow-cluster-termination",
            dest="disallow_cluster_termination",
            action="store_true",
            default=None,
            help="don't auto-terminate idle clusters," +
            " use this with extreme caution",
        )
        termination.add_argument("--allow-cluster-termination",
                                 dest="disallow_cluster_termination",
                                 action="store_false",
                                 default=None,
                                 help="auto-terminate idle clusters,")

        node_cooldown_period_group = argparser.add_argument_group(
            "node cooldown period settings")
        node_cooldown_period_group.add_argument(
            "--node-base-cooldown-period",
            dest="node_base_cooldown_period",
            type=int,
            help="Cooldown period for on-demand nodes" + " unit: minutes")
        node_cooldown_period_group.add_argument(
            "--node-spot-cooldown-period",
            dest="node_spot_cooldown_period",
            type=int,
            help="Cooldown period for spot nodes" + " unit: minutes")
        cluster_info.add_argument("--customer-ssh-key",
                                  dest="customer_ssh_key_file",
                                  help="location for ssh key to use to" +
                                  " login to the instance")
        cluster_info.add_argument(
            "--custom-tags",
            dest="custom_tags",
            help="""Custom tags to be set on all instances
                                                 of the cluster. Specified as JSON object (key-value pairs)
                                                 e.g. --custom-ec2-tags '{"key1":"value1", "key2":"value2"}'
                                                 """,
        )

        # datadisk settings
        datadisk_group = argparser.add_argument_group("data disk settings")
        datadisk_group.add_argument(
            "--count",
            dest="count",
            type=int,
            help="Number of EBS volumes to attach to" +
            " each instance of the cluster",
        )
        datadisk_group.add_argument(
            "--disk-type",
            dest="disk_type",
            choices=["standard", "gp2"],
            help=
            "Type of the  volume attached to the instances. Valid values are "
            + "'standard' (magnetic) and 'gp2' (ssd).")
        datadisk_group.add_argument(
            "--size",
            dest="size",
            type=int,
            help="Size of each EBS volume, in GB",
        )
        datadisk_group.add_argument(
            "--upscaling-config",
            dest="upscaling_config",
            help="Upscaling config to be attached with the instances.",
        )
        ephemerals = datadisk_group.add_mutually_exclusive_group()
        ephemerals.add_argument(
            "--encrypted-ephemerals",
            dest="encrypted_ephemerals",
            action="store_true",
            default=None,
            help="encrypt the ephemeral drives on" + " the instance",
        )
        ephemerals.add_argument(
            "--no-encrypted-ephemerals",
            dest="encrypted_ephemerals",
            action="store_false",
            default=None,
            help="don't encrypt the ephemeral drives on" + " the instance",
        )

        cluster_info.add_argument("--heterogeneous-config",
                                  dest="heterogeneous_config",
                                  help="heterogeneous config for the cluster")

        composition_group = argparser.add_argument_group(
            "Cluster composition settings")
        Qubole.get_cloud().set_composition_arguments(composition_group)

        # monitoring settings
        monitoring_group = argparser.add_argument_group("monitoring settings")
        ganglia = monitoring_group.add_mutually_exclusive_group()
        ganglia.add_argument(
            "--enable-ganglia-monitoring",
            dest="enable_ganglia_monitoring",
            action="store_true",
            default=None,
            help="enable ganglia monitoring for the" + " cluster",
        )
        ganglia.add_argument(
            "--disable-ganglia-monitoring",
            dest="enable_ganglia_monitoring",
            action="store_false",
            default=None,
            help="disable ganglia monitoring for the" + " cluster",
        )

        datadog_group = argparser.add_argument_group("datadog settings")
        datadog_group.add_argument(
            "--datadog-api-token",
            dest="datadog_api_token",
            default=None,
            help="fernet key for airflow cluster",
        )
        datadog_group.add_argument(
            "--datadog-app-token",
            dest="datadog_app_token",
            default=None,
            help="overrides for airflow cluster",
        )

        internal_group = argparser.add_argument_group("internal settings")
        internal_group.add_argument(
            "--image-overrides",
            dest="image_uri_overrides",
            default=None,
            help="overrides for image",
        )

        env_group = argparser.add_argument_group("environment settings")
        env_group.add_argument("--env-name",
                               dest="env_name",
                               default=None,
                               help="name of Python and R environment")
        env_group.add_argument("--python-version",
                               dest="python_version",
                               default=None,
                               help="version of Python in environment")
        env_group.add_argument("--r-version",
                               dest="r_version",
                               default=None,
                               help="version of R in environment")

        start_stop_group = argparser.add_argument_group("start stop settings")
        start_stop_group.add_argument("--disable-cluster-pause",
                                      dest="disable_cluster_pause",
                                      action='store_true',
                                      default=None,
                                      help="disable cluster pause")
        start_stop_group.add_argument("--no-disable-cluster-pause",
                                      dest="disable_cluster_pause",
                                      action='store_false',
                                      default=None,
                                      help="disable cluster pause")
        start_stop_group.add_argument("--paused-cluster-timeout",
                                      dest="paused_cluster_timeout_mins",
                                      default=None,
                                      type=int,
                                      help="paused cluster timeout in min")
        start_stop_group.add_argument("--disable-autoscale-node-pause",
                                      dest="disable_autoscale_node_pause",
                                      action='store_true',
                                      default=None,
                                      help="disable autoscale node pause")
        start_stop_group.add_argument("--no-disable-autoscale-node-pause",
                                      dest="disable_autoscale_node_pause",
                                      action='store_false',
                                      default=None,
                                      help="disable autoscale node pause")
        start_stop_group.add_argument(
            "--paused-autoscale-node-timeout",
            dest="paused_autoscale_node_timeout_mins",
            default=None,
            type=int,
            help="paused autoscale node timeout in min")
Пример #8
0
 def set_composition_for_cluster(self, **kwargs):
     cloud = Qubole.get_cloud()
     composition = cloud.get_composition(**kwargs)
     if composition is not None:
         self.cluster_info["composition"] = composition