def stop_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Stopped']): print "Cluster %s has been `Stopped`, you can not stop it again." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters + slaves) <= 0: print "There is no master or slave running, check it first please." sys.exit(1) print "==> Stopping spark cluster..." utils.warning() msg = "Stopping Spark cluster will stop HDFS, spark-notebook and Hue at the same time. " \ "Stop %s? (Y/n): " % cluster_name to_stop = raw_input(msg) if to_stop == "Y": if opts.pwd == "": opts.pwd = getpass.getpass( "You need to provide the password for ECS instance:") spark.stop_spark_cluster(masters, slaves, opts) hdfs.stop_hdfs(masters, slaves, opts) hue.stop_hue(masters, opts) spark_notebook.stop_spark_notebook(masters, opts) utils.stop_nginx(opts, masters) # update cluster status os.system("echo Stopped > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name)) else: print "Not `Y`, give up stopping cluster %s" % cluster_name
def stop_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Stopped']): print "Cluster %s has been `Stopped`, you can not stop it again." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters + slaves) <= 0: print "There is no master or slave running, check it first please." sys.exit(1) print "==> Stopping spark cluster..." utils.warning() msg = "Stopping Spark cluster will stop HDFS, spark-notebook and Hue at the same time. " \ "Stop %s? (Y/n): " % cluster_name to_stop = raw_input(msg) if to_stop == "Y": if opts.pwd == "": opts.pwd = getpass.getpass("You need to provide the password for ECS instance:") spark.stop_spark_cluster(masters, slaves, opts) hdfs.stop_hdfs(masters, slaves, opts) hue.stop_hue(masters, opts) spark_notebook.stop_spark_notebook(masters, opts) utils.stop_nginx(opts,masters) # update cluster status os.system("echo Stopped > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name)) else: print "Not `Y`, give up stopping cluster %s" % cluster_name
def start_hdfs(master, slaves, opts): utils.warning() msg = "If this is the first time, you need to format HDFS, otherwise you should not format it! \n" \ "Format HDFS (Y/n): " confirm = raw_input(msg) if confirm == 'Y': msg = "Confirm to format HDFS? (Y/n): " confirm_again = raw_input(msg) if confirm_again == "Y": print "==> Formatting HDFS..." format_hdfs = "%s/bin/hdfs namenode -format -force 2> /dev/null" % GlobalVar.HADOOP_INSTALL_DIR utils.do_ssh(master, opts, str(format_hdfs)) else: print "==> Not `Y`, skipping formatting HDFS..." else: print "==> Not `Y`, skipping formatting HDFS..." print "==> Starting namenode..." start_namenode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs start namenode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(master, opts, start_namenode) print "==> Starting datanode..." for slave in slaves: start_datanode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs start datanode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(slave, opts, start_datanode)
if gateway in instances: instances.remove(gateway) to_release = [] for ins in instances: try: instance_info = ecs.get_instance_info(ins) to_release.append(ins) print "> %s" % (instance_info['HostName']) except Exception, e: if 'InvalidInstanceId.NotFound' in e.args: print "> %s, invalid `InstanceId` not found, skip it." % ins else: raise e utils.warning() msg = "All data on all nodes will be lost!!\nYou'd better stop it first. " \ "Destroy cluster %s (Y/n): " % cluster_name to_destroy = raw_input(msg) if to_destroy == "Y": try: ecs.release_ecs_instance(to_release) except Exception, e: print e, "\nReleasing ECS instances failed for some unknown reasons, " \ "you can do it through: https://console.aliyun.com/ecs/index.htm" raise e finally: utils.delete_file_safely(GlobalVar.CLUSTER_STATUS + cluster_name) utils.delete_file_safely(GlobalVar.CLUSTER_INSTANCES + cluster_name) utils.delete_file_safely(GlobalVar.SPARK_ECS_DIR + "/" +
if gateway in instances: instances.remove(gateway) to_release = [] for ins in instances: try: instance_info = ecs.get_instance_info(ins) to_release.append(ins) print "> %s" % (instance_info['HostName']) except Exception, e: if 'InvalidInstanceId.NotFound' in e.args: print "> %s, invalid `InstanceId` not found, skip it." % ins else: raise e utils.warning() msg = "All data on all nodes will be lost!!\nYou'd better stop it first. " \ "Destroy cluster %s (Y/n): " % cluster_name to_destroy = raw_input(msg) if to_destroy == "Y": try: ecs.release_ecs_instance(to_release) except Exception, e: print e, "\nReleasing ECS instances failed for some unknown reasons, " \ "you can do it through: https://console.aliyun.com/ecs/index.htm" raise e finally: utils.delete_file_safely(GlobalVar.CLUSTER_STATUS + cluster_name) utils.delete_file_safely(GlobalVar.CLUSTER_INSTANCES + cluster_name) utils.delete_file_safely(GlobalVar.SPARK_ECS_DIR + "/" + GlobalVar.CLUSTER_HOSTS) utils.delete_file_safely(GlobalVar.SPARK_ECS_DIR + "/" + GlobalVar.CLUSTER_HOSTS + "-public")