def get_cluster_create_clone_update(arguments, action): customer_ssh_key = util._read_file(arguments.customer_ssh_key_file) # This will set cluster info and monitoring settings cluster_info = ClusterInfoV2(arguments.label) cluster_info.set_cluster_info( disallow_cluster_termination=arguments. disallow_cluster_termination, enable_ganglia_monitoring=arguments.enable_ganglia_monitoring, datadog_api_token=arguments.datadog_api_token, datadog_app_token=arguments.datadog_app_token, node_bootstrap=arguments.node_bootstrap_file, master_instance_type=arguments.master_instance_type, slave_instance_type=arguments.slave_instance_type, min_nodes=arguments.initial_nodes, max_nodes=arguments.max_nodes, slave_request_type=arguments.slave_request_type, fallback_to_ondemand=arguments.fallback_to_ondemand, node_base_cooldown_period=arguments.node_base_cooldown_period, node_spot_cooldown_period=arguments.node_spot_cooldown_period, custom_tags=arguments.custom_tags, heterogeneous_config=arguments.heterogeneous_config, maximum_bid_price_percentage=arguments. maximum_bid_price_percentage, timeout_for_request=arguments.timeout_for_request, maximum_spot_instance_percentage=arguments. maximum_spot_instance_percentage, stable_maximum_bid_price_percentage=arguments. stable_maximum_bid_price_percentage, stable_timeout_for_request=arguments.stable_timeout_for_request, stable_spot_fallback=arguments.stable_spot_fallback, spot_block_duration=arguments.spot_block_duration, idle_cluster_timeout=arguments.idle_cluster_timeout, disk_count=arguments.count, disk_type=arguments.disk_type, disk_size=arguments.size, root_disk_size=arguments.root_disk_size, upscaling_config=arguments.upscaling_config, enable_encryption=arguments.encrypted_ephemerals, customer_ssh_key=customer_ssh_key, image_uri_overrides=arguments.image_uri_overrides, env_name=arguments.env_name, python_version=arguments.python_version, r_version=arguments.r_version) # This will set cloud config settings cloud_config = Qubole.get_cloud() cloud_config.set_cloud_config_from_arguments(arguments) # This will set engine settings engine_config = Engine(flavour=arguments.flavour) engine_config.set_engine_config_settings(arguments) cluster_request = ClusterCmdLine.get_cluster_request_parameters( cluster_info, cloud_config, engine_config) action = action if action == "create": return arguments.func(cluster_request) else: return arguments.func(arguments.cluster_id_label, cluster_request)
def create_update_clone_parser(subparser, action=None): # cloud config parser cloud = Qubole.get_cloud() cloud.create_parser(subparser) # cluster info parser ClusterInfoV2.cluster_info_parser(subparser, action) # engine config parser Engine.engine_parser(subparser)
def get_cluster_create_clone_update(arguments, action): customer_ssh_key = util._read_file(arguments.customer_ssh_key_file) # This will set cluster info and monitoring settings cluster_info = ClusterInfoV2(arguments.label) cluster_info.set_cluster_info(disallow_cluster_termination=arguments.disallow_cluster_termination, enable_ganglia_monitoring=arguments.enable_ganglia_monitoring, datadog_api_token=arguments.datadog_api_token, datadog_app_token=arguments.datadog_app_token, node_bootstrap=arguments.node_bootstrap_file, master_instance_type=arguments.master_instance_type, slave_instance_type=arguments.slave_instance_type, min_nodes=arguments.initial_nodes, max_nodes=arguments.max_nodes, slave_request_type=arguments.slave_request_type, fallback_to_ondemand=arguments.fallback_to_ondemand, custom_tags=arguments.custom_tags, heterogeneous_config=arguments.heterogeneous_config, maximum_bid_price_percentage=arguments.maximum_bid_price_percentage, timeout_for_request=arguments.timeout_for_request, maximum_spot_instance_percentage=arguments.maximum_spot_instance_percentage, stable_maximum_bid_price_percentage=arguments.stable_maximum_bid_price_percentage, stable_timeout_for_request=arguments.stable_timeout_for_request, stable_spot_fallback=arguments.stable_spot_fallback, idle_cluster_timeout=arguments.idle_cluster_timeout, disk_count=arguments.count, disk_type=arguments.disk_type, disk_size=arguments.size, upscaling_config=arguments.upscaling_config, enable_encryption=arguments.encrypted_ephemerals, customer_ssh_key=customer_ssh_key, image_uri_overrides=arguments.image_uri_overrides) # This will set cloud config settings cloud_config = Qubole.get_cloud() cloud_config.set_cloud_config_from_arguments(arguments) # This will set engine settings engine_config = Engine(flavour=arguments.flavour) engine_config.set_engine_config_settings(arguments) cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) action = action if action == "create": return arguments.func(cluster_request) else: return arguments.func(arguments.cluster_id_label, cluster_request)
def _create_spark_cluster_info(config): cluster_info = ClusterInfoV2(config['spark_cluster_name']) cluster_info.set_cluster_info( master_instance_type=config['hadoop_master_instance_type'], slave_instance_type=config['hadoop_slave_instance_type'], min_nodes=1, max_nodes=config['hadoop_max_nodes_count'], slave_request_type='spot') cloud_config = Qubole.get_cloud(cloud_name='aws') cloud_config.set_cloud_config(aws_region=config['region_name'], aws_availability_zone='Any', vpc_id=config['cluster_vpc_id'], subnet_id=config['cluster_subnet_id']) engine_config = Engine(flavour='spark') engine_config.set_engine_config(spark_version='2.1.0') cluster_request = ClusterCmdLine.get_cluster_request_parameters( cluster_info, cloud_config, engine_config) return cluster_request
def get_cluster_create_clone_update(arguments, action): # This will set cluster info and monitoring settings cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() cluster_info = cluster_info_cls(arguments.label) cluster_info.set_cluster_info_from_arguments(arguments) # This will set cloud config settings cloud_config = Qubole.get_cloud() cloud_config.set_cloud_config_from_arguments(arguments) # This will set engine settings engine_config = Engine(flavour=arguments.flavour) engine_config.set_engine_config_settings(arguments) cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) action = action if action == "create": return arguments.func(cluster_request) else: return arguments.func(arguments.cluster_id_label, cluster_request)
def cluster_info_parser(argparser, action): create_required = False label_required = False if action == "create": create_required = True elif action == "update": argparser.add_argument("cluster_id_label", help="id/label of the cluster to update") elif action == "clone": argparser.add_argument("cluster_id_label", help="id/label of the cluster to update") label_required = True argparser.add_argument("--label", dest="label", nargs="+", required=(create_required or label_required), help="list of labels for the cluster" + " (atleast one label is required)") cluster_info = argparser.add_argument_group("cluster_info") cluster_info.add_argument("--master-instance-type", dest="master_instance_type", help="instance type to use for the hadoop" + " master node") cluster_info.add_argument("--slave-instance-type", dest="slave_instance_type", help="instance type to use for the hadoop" + " slave nodes") cluster_info.add_argument( "--min-nodes", dest="initial_nodes", type=int, help="number of nodes to start the" + " cluster with", ) cluster_info.add_argument("--max-nodes", dest="max_nodes", type=int, help="maximum number of nodes the cluster" + " may be auto-scaled up to") cluster_info.add_argument( "--idle-cluster-timeout", dest="idle_cluster_timeout", help="cluster termination timeout for idle cluster") cluster_info.add_argument( "--node-bootstrap-file", dest="node_bootstrap_file", help="""name of the node bootstrap file for this cluster. It should be in stored in S3 at <account-default-location>/scripts/hadoop/NODE_BOOTSTRAP_FILE """, ) cluster_info.add_argument("--root-disk-size", dest="root_disk_size", type=int, help="size of the root volume in GB") cluster_info.add_argument( "--parent-cluster-id", dest="parent_cluster_id", type=int, help="Id of the parent cluster this hs2 cluster is attached to") cluster_info.add_argument("--image-version", dest="image_version", help="cluster image version") termination = cluster_info.add_mutually_exclusive_group() termination.add_argument( "--disallow-cluster-termination", dest="disallow_cluster_termination", action="store_true", default=None, help="don't auto-terminate idle clusters," + " use this with extreme caution", ) termination.add_argument("--allow-cluster-termination", dest="disallow_cluster_termination", action="store_false", default=None, help="auto-terminate idle clusters,") node_cooldown_period_group = argparser.add_argument_group( "node cooldown period settings") node_cooldown_period_group.add_argument( "--node-base-cooldown-period", dest="node_base_cooldown_period", type=int, help="Cooldown period for on-demand nodes" + " unit: minutes") node_cooldown_period_group.add_argument( "--node-spot-cooldown-period", dest="node_spot_cooldown_period", type=int, help="Cooldown period for spot nodes" + " unit: minutes") cluster_info.add_argument("--customer-ssh-key", dest="customer_ssh_key_file", help="location for ssh key to use to" + " login to the instance") cluster_info.add_argument( "--custom-tags", dest="custom_tags", help="""Custom tags to be set on all instances of the cluster. Specified as JSON object (key-value pairs) e.g. --custom-ec2-tags '{"key1":"value1", "key2":"value2"}' """, ) # datadisk settings datadisk_group = argparser.add_argument_group("data disk settings") datadisk_group.add_argument( "--count", dest="count", type=int, help="Number of EBS volumes to attach to" + " each instance of the cluster", ) datadisk_group.add_argument( "--disk-type", dest="disk_type", choices=["standard", "gp2"], help= "Type of the volume attached to the instances. Valid values are " + "'standard' (magnetic) and 'gp2' (ssd).") datadisk_group.add_argument( "--size", dest="size", type=int, help="Size of each EBS volume, in GB", ) datadisk_group.add_argument( "--upscaling-config", dest="upscaling_config", help="Upscaling config to be attached with the instances.", ) ephemerals = datadisk_group.add_mutually_exclusive_group() ephemerals.add_argument( "--encrypted-ephemerals", dest="encrypted_ephemerals", action="store_true", default=None, help="encrypt the ephemeral drives on" + " the instance", ) ephemerals.add_argument( "--no-encrypted-ephemerals", dest="encrypted_ephemerals", action="store_false", default=None, help="don't encrypt the ephemeral drives on" + " the instance", ) cluster_info.add_argument("--heterogeneous-config", dest="heterogeneous_config", help="heterogeneous config for the cluster") composition_group = argparser.add_argument_group( "Cluster composition settings") Qubole.get_cloud().set_composition_arguments(composition_group) # monitoring settings monitoring_group = argparser.add_argument_group("monitoring settings") ganglia = monitoring_group.add_mutually_exclusive_group() ganglia.add_argument( "--enable-ganglia-monitoring", dest="enable_ganglia_monitoring", action="store_true", default=None, help="enable ganglia monitoring for the" + " cluster", ) ganglia.add_argument( "--disable-ganglia-monitoring", dest="enable_ganglia_monitoring", action="store_false", default=None, help="disable ganglia monitoring for the" + " cluster", ) datadog_group = argparser.add_argument_group("datadog settings") datadog_group.add_argument( "--datadog-api-token", dest="datadog_api_token", default=None, help="fernet key for airflow cluster", ) datadog_group.add_argument( "--datadog-app-token", dest="datadog_app_token", default=None, help="overrides for airflow cluster", ) internal_group = argparser.add_argument_group("internal settings") internal_group.add_argument( "--image-overrides", dest="image_uri_overrides", default=None, help="overrides for image", ) env_group = argparser.add_argument_group("environment settings") env_group.add_argument("--env-name", dest="env_name", default=None, help="name of Python and R environment") env_group.add_argument("--python-version", dest="python_version", default=None, help="version of Python in environment") env_group.add_argument("--r-version", dest="r_version", default=None, help="version of R in environment") start_stop_group = argparser.add_argument_group("start stop settings") start_stop_group.add_argument("--disable-cluster-pause", dest="disable_cluster_pause", action='store_true', default=None, help="disable cluster pause") start_stop_group.add_argument("--no-disable-cluster-pause", dest="disable_cluster_pause", action='store_false', default=None, help="disable cluster pause") start_stop_group.add_argument("--paused-cluster-timeout", dest="paused_cluster_timeout_mins", default=None, type=int, help="paused cluster timeout in min") start_stop_group.add_argument("--disable-autoscale-node-pause", dest="disable_autoscale_node_pause", action='store_true', default=None, help="disable autoscale node pause") start_stop_group.add_argument("--no-disable-autoscale-node-pause", dest="disable_autoscale_node_pause", action='store_false', default=None, help="disable autoscale node pause") start_stop_group.add_argument( "--paused-autoscale-node-timeout", dest="paused_autoscale_node_timeout_mins", default=None, type=int, help="paused autoscale node timeout in min")
def set_composition_for_cluster(self, **kwargs): cloud = Qubole.get_cloud() composition = cloud.get_composition(**kwargs) if composition is not None: self.cluster_info["composition"] = composition