def comet_list_queue(request): cluster = "comet" output_format = "json" order = [ "jobid", "user", "partition", "nodes", "st", "name", "nodelist", "time", ] provider = BatchProvider(cluster) data = json.loads(provider.queue(cluster, format=output_format)) print(data) return dict_table(request, title="Comet Queue", data=data, order=order)
def comet_info(request): cluster = "comet" output_format = "json" order = [ 'partition', 'nodes', 'state', 'avail', 'timelimit', 'cluster', 'nodelist', # 'updated', ] provider = BatchProvider(cluster) data = json.loads(provider.info(cluster, format=output_format)) print(data) return dict_table(request, title="Comet Queue", data=data, order=order)
def comet_info(request): cluster = "comet" output_format = "json" order = [ 'partition', 'nodes', 'state', 'avail', 'timelimit', 'cluster', 'nodelist', # 'updated', ] provider = BatchProvider(cluster) data = json.loads(provider.info(cluster, format=output_format)) print (data) return dict_table(request, title="Comet Queue", data=data, order=order)
def comet_list_queue(request): cluster = "comet" output_format = "json" order = [ "jobid", "user", "partition", "nodes", "st", "name", "nodelist", "time", ] provider = BatchProvider(cluster) data = json.loads(provider.queue(cluster, format=output_format)) print (data) return dict_table(request, title="Comet Queue", data=data, order=order)
def hpc_queue(request, cluster=None): output_format = "json" order = [ "jobid", "user", "partition", "nodes", "st", "name", "nodelist", "time", ] provider = BatchProvider(cluster) data = json.loads(provider.queue(cluster, format=output_format)) print(data) return dict_table(request, title="Queues for {}".format(cluster), data=data, order=order)
def hpc_queue(request, cluster=None): output_format = "json" order = [ "jobid", "user", "partition", "nodes", "st", "name", "nodelist", "time", ] provider = BatchProvider(cluster) data = json.loads(provider.queue(cluster, format=output_format)) print (data) return dict_table(request, title="Queues for {}".format(cluster), data=data, order=order)
def hpc_info(request, cluster=None): output_format = "json" order = [ 'partition', 'nodes', 'state', 'avail', 'timelimit', 'cluster', 'nodelist', # 'updated', ] provider = BatchProvider(cluster) data = json.loads(provider.info(cluster, format=output_format)) print (data) return dict_table(request, title="Info for {}".format(cluster), data=data, order=order)
def hpc_info(request, cluster=None): output_format = "json" order = [ 'partition', 'nodes', 'state', 'avail', 'timelimit', 'cluster', 'nodelist', # 'updated', ] provider = BatchProvider(cluster) data = json.loads(provider.info(cluster, format=output_format)) print(data) return dict_table(request, title="Info for {}".format(cluster), data=data, order=order)
def do_hpc(self, args, arguments): # noinspection PyPep8 """ :: Usage: hpc queue [--job=NAME][--cluster=CLUSTER][--format=FORMAT] hpc info [--cluster=CLUSTER][--format=FORMAT] hpc run list [ID] [--cluster=CLUSTER] hpc run output [ID] [--cluster=CLUSTER] hpc run rm [ID] [--cluster=CLUSTER] hpc run SCRIPT [--queue=QUEUE] [--t=TIME] [--N=nodes] [--name=NAME] [--cluster=CLUSTER][--dir=DIR][--group=GROUP][--format=FORMAT] hpc delete --job=NAME [--cluster=CLUSTER][--group=GROUP] hpc delete all [--cluster=CLUSTER][--group=GROUP][--format=FORMAT] hpc status [--job=name] [--cluster=CLUSTER] [--group=GROUP] hpc test --cluster=CLUSTER [--time=SECONDS] Options: --format=FORMAT the output format [default: table] Examples: Special notes if the group is specified only jobs from that group are considered. Otherwise the default group is used. If the group is set to None, all groups are used. cm hpc queue lists the details of the queues of the hpc cluster cm hpc queue --job=NAME lists the details of the job in the queue of the hpc cluster cm hpc info lists the details of the hpc cluster cm hpc run SCRIPT submits the script to the cluster. The script will be copied prior to execution into the home directory on the remote machine. If a DIR is specified it will be copied into that dir. The name of the script is either specified in the script itself, or if not the default naming scheme of cloudmesh is used using the same index incremented name as in vms fro clouds: cloudmes husername-index cm hpc delete all kills all jobs on the default hpc group cm hpc delete --job=NAME kills a job with a given name or job id cm default cluster=NAME sets the default hpc cluster cm hpc status returns the status of all jobs cm hpc status job=ID returns the status of the named job cm hpc test --cluster=CLUSTER --time=SECONDS submits a simple test job to the named cluster and returns if the job could be successfully executed. This is a blocking call and may take a long time to complete dependent on if the queuing system of that cluster is busy. It will only use one node/core and print the message #CLOUDMESH: Test ok in that is being looked for to identify if the test is successful. If time is used, the job is terminated after the time is elapsed. Examples: cm hpc queue cm hpc queue --job=xxx cm hpc info cm hpc delete --job=6 cm hpc delete all cm hpc status cm hpc status --job=6 cm hpc run uname cm hpc run ~/test.sh --cluster=india """ format = arguments['--format'] cluster = arguments['--cluster'] or Default.get_cluster() arguments["CLUSTER"] = cluster if cluster is None: Console.error("Default cluster doesn't exist") return batch = BatchProvider(cluster) if arguments["queue"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["info"]: Console.msg(batch.info(cluster, format)) elif arguments['delete'] and arguments['all']: group = arguments['--group'] or Default.get('group') if group is None: Console.error('set default group using: default group=<value> --cloud=general') return Console.ok(batch.delete(cluster, None, group)) elif arguments["delete"]: job = arguments['--job'] Console.ok(batch.delete(cluster, job)) elif arguments["status"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["test"]: time_secs = arguments['--time'] if time_secs: time = '00:00:' + time_secs else: time = '00:00:10' # give a default time of 10 secs print(batch.test(cluster, time)) elif arguments["run"] and arguments["list"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: print ("# List of experiment {ID} on Cluster {CLUSTER}".format(**arguments)) result = Experiment.list(cluster, id=arguments["ID"], format="list") if result is not None: print ("\n".join(result)) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) else: print ("# List of experiments on Cluster {CLUSTER}".format(**arguments)) ids = Experiment.list(cluster, id=None, format="list") if ids is not None: print (", ".join([str(i) for i in ids])) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) elif arguments["run"] and arguments["rm"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: force = yn_choice("Would you like to delete experiment {ID} on Cluster {CLUSTER}".format(**arguments)) if force: try: result = Experiment.rm(cluster, id=arguments["ID"]) Console.ok("Experiment {ID} on Cluster {CLUSTER} deleted".format(**arguments)) except: Console.error("Could not delete experiment {ID} on {CLUSTER}".format(**arguments)) else: result = Experiment.list(cluster, id=None, format="list") if result is not None: arguments['experiments'] = ", ".join([str(i) for i in result]) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) return "" force = yn_choice("Would you like to delete the experiments {experiments} on Cluster {CLUSTER}".format( **arguments)) if force: try: result = Experiment.rm(cluster, id=None) Console.ok("Experiments {experiments} on Cluster {CLUSTER} deleted".format(**arguments)) except: Console.error("Could delete the experiments on {CLUSTER}".format(**arguments)) return "" elif arguments["run"] and arguments["output"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: print ("# List of experiment {ID} on Cluster {CLUSTER}".format(**arguments)) result = Experiment.output(cluster, id=arguments["ID"], format="list") if result is not None: print ("\n".join(result)) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) else: print ("# List of experiments on Cluster {CLUSTER}".format(**arguments)) ids = Experiment.output(cluster, id=None, format="list") if ids is not None: print (", ".join([str(i) for i in ids])) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) elif arguments["run"]: queue = arguments['--queue'] or Default.get('queue') # if not queue: # Console.error('set default queue using: default queue=<value>') # return group = arguments['--group'] or Default.get('group') if group is None: Console.error('set default group using: default group=<value> --cloud=general') return script = arguments['SCRIPT'] arg_dict = { '-name': arguments['--name'], '-p': queue, '-t': arguments['--t'], '-N': arguments['--N'] } result = batch.run(cluster, group, script, **arg_dict) if isinstance(result, dict): print (attribute_printer(result)) Console.ok("Experiment {count}: Started batch job {job_id} on {cluster}".format(**result)) else: Console.error(result) return ""
def do_hpc(self, args, arguments): # noinspection PyPep8 """ :: Usage: hpc queue [--job=NAME][--cluster=CLUSTER][--format=FORMAT] hpc info [--cluster=CLUSTER][--format=FORMAT] hpc run list [ID] [--cluster=CLUSTER] hpc run output [ID] [--cluster=CLUSTER] hpc run rm [ID] [--cluster=CLUSTER] hpc run SCRIPT [--queue=QUEUE] [--t=TIME] [--N=nodes] [--name=NAME] [--cluster=CLUSTER][--dir=DIR][--group=GROUP][--format=FORMAT] hpc delete --job=NAME [--cluster=CLUSTER][--group=GROUP] hpc delete all [--cluster=CLUSTER][--group=GROUP][--format=FORMAT] hpc status [--job=name] [--cluster=CLUSTER] [--group=GROUP] hpc test --cluster=CLUSTER [--time=SECONDS] Options: --format=FORMAT the output format [default: table] Examples: Special notes if the group is specified only jobs from that group are considered. Otherwise the default group is used. If the group is set to None, all groups are used. cm hpc queue lists the details of the queues of the hpc cluster cm hpc queue --job=NAME lists the details of the job in the queue of the hpc cluster cm hpc info lists the details of the hpc cluster cm hpc run SCRIPT submits the script to the cluster. The script will be copied prior to execution into the home directory on the remote machine. If a DIR is specified it will be copied into that dir. The name of the script is either specified in the script itself, or if not the default naming scheme of cloudmesh is used using the same index incremented name as in vms fro clouds: cloudmes husername-index cm hpc delete all kills all jobs on the default hpc group cm hpc delete --job=NAME kills a job with a given name or job id cm default cluster=NAME sets the default hpc cluster cm hpc status returns the status of all jobs cm hpc status job=ID returns the status of the named job cm hpc test --cluster=CLUSTER --time=SECONDS submits a simple test job to the named cluster and returns if the job could be successfully executed. This is a blocking call and may take a long time to complete dependent on if the queuing system of that cluster is busy. It will only use one node/core and print the message #CLOUDMESH: Test ok in that is being looked for to identify if the test is successful. If time is used, the job is terminated after the time is elapsed. Examples: cm hpc queue cm hpc queue --job=xxx cm hpc info cm hpc delete --job=6 cm hpc delete all cm hpc status cm hpc status --job=6 cm hpc run uname cm hpc run ~/test.sh --cluster=india """ format = arguments['--format'] cluster = arguments['--cluster'] or Default.cluster arguments["CLUSTER"] = cluster if cluster is None: Console.error("Default cluster doesn't exist") return batch = BatchProvider(cluster) if arguments["queue"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["info"]: Console.msg(batch.info(cluster, format)) elif arguments['delete'] and arguments['all']: group = arguments['--group'] or Default.get(name='group') if group is None: Console.error('set default group using: default group=<value> --cloud=general') return Console.ok(batch.delete(cluster, None, group)) elif arguments["delete"]: job = arguments['--job'] Console.ok(batch.delete(cluster, job)) elif arguments["status"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["test"]: time_secs = arguments['--time'] if time_secs: time = '00:00:' + time_secs else: time = '00:00:10' # give a default time of 10 secs print(batch.test(cluster, time)) elif arguments["run"] and arguments["list"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: print("# List of experiment {ID} on Cluster {CLUSTER}".format(**arguments)) result = Experiment.list(cluster, id=arguments["ID"], format="list") if result is not None: print("\n".join(result)) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) else: print("# List of experiments on Cluster {CLUSTER}".format(**arguments)) ids = Experiment.list(cluster, id=None, format="list") if ids is not None: print(", ".join([str(i) for i in ids])) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) elif arguments["run"] and arguments["rm"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: force = yn_choice("Would you like to delete experiment {ID} on Cluster {CLUSTER}".format(**arguments)) if force: try: result = Experiment.rm(cluster, id=arguments["ID"]) Console.ok("Experiment {ID} on Cluster {CLUSTER} deleted".format(**arguments)) except: Console.error("Could not delete experiment {ID} on {CLUSTER}".format(**arguments)) else: result = Experiment.list(cluster, id=None, format="list") if result is not None: arguments['experiments'] = ", ".join([str(i) for i in result]) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) return "" force = yn_choice("Would you like to delete the experiments {experiments} on Cluster {CLUSTER}".format( **arguments)) if force: try: result = Experiment.rm(cluster, id=None) Console.ok("Experiments {experiments} on Cluster {CLUSTER} deleted".format(**arguments)) except: Console.error("Could delete the experiments on {CLUSTER}".format(**arguments)) return "" elif arguments["run"] and arguments["output"]: # hpc experiment list [--cluster=CLUSTER] if arguments["ID"]: print("# List of experiment {ID} on Cluster {CLUSTER}".format(**arguments)) result = Experiment.output(cluster, id=arguments["ID"], format="list") if result is not None: print("\n".join(result)) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) else: print("# List of experiments on Cluster {CLUSTER}".format(**arguments)) ids = Experiment.output(cluster, id=None, format="list") if ids is not None: print(", ".join([str(i) for i in ids])) else: Console.error("Could not find experiment {ID} on {CLUSTER}".format(**arguments)) elif arguments["run"]: queue = arguments['--queue'] or Default.get(name='queue') # if not queue: # Console.error('set default queue using: default queue=<value>') # return group = arguments['--group'] or Default.get(name='group') if group is None: Console.error('set default group using: default group=<value> --cloud=general') return script = arguments['SCRIPT'] arg_dict = { '-name': arguments['--name'], '-p': queue, '-t': arguments['--t'], '-N': arguments['--N'] } result = batch.run(cluster, group, script, **arg_dict) if isinstance(result, dict): print(Printer.attribute(result)) Console.ok("Experiment {count}: Started batch job {job_id} on {cluster}".format(**result)) else: Console.error(result) return ""
def refresh(self, kind, name, **kwargs): """ This method refreshes the local database with the live cloud details :param kind: :param name: :param kwargs: :return: """ try: # print(cloudname) # get the user # TODO: Confirm user user = self.user if kind in ["flavor", "image", "vm", "secgroup"]: # get provider for specific cloud provider = CloudProvider(name).provider # clear local db records for kind self.clear(kind, name) # for secgroup, clear rules as well if kind == "secgroup": self.clear("secgrouprule", name) if kind == "flavor": flavors = provider.list_flavor(name) for flavor in list(flavors.values()): flavor["uuid"] = flavor['id'] flavor['type'] = 'string' flavor["category"] = name flavor["user"] = user db_obj = {0: {kind: flavor}} self.add_obj(db_obj) self.save() return True elif kind == "image": images = provider.list_image(name) for image in list(images.values()): image['uuid'] = image['id'] image['type'] = 'string' image['category'] = name image['user'] = user db_obj = {0: {kind: image}} self.add_obj(db_obj) self.save() return True elif kind == "vm": vms = provider.list_vm(name) for vm in list(vms.values()): vm['uuid'] = vm['id'] vm['type'] = 'string' vm['category'] = name vm['user'] = user db_obj = {0: {kind: vm}} self.add_obj(db_obj) self.save() return True elif kind == "secgroup": secgroups = provider.list_secgroup(name) # pprint(secgroups) for secgroup in list(secgroups.values()): secgroup_db_obj = self.db_obj_dict( "secgroup", name=secgroup['name'], uuid=secgroup['id'], category=name, project=secgroup['tenant_id'], user=user) for rule in secgroup['rules']: rule_db_obj = self.db_obj_dict( "secgrouprule", uuid=rule['id'], name=secgroup['name'], groupid=rule['parent_group_id'], category=name, user=user, project=secgroup['tenant_id'], fromPort=rule['from_port'], toPort=rule['to_port'], protocol=rule['ip_protocol']) if bool(rule['ip_range']) is not False: rule_db_obj[0]['secgrouprule']['cidr'] = rule[ 'ip_range']['cidr'] self.add_obj(rule_db_obj) self.save() # rule-for-loop ends self.add_obj(secgroup_db_obj) self.save() return True elif kind in ["batchjob"]: # provider = BatchProvider(name).provider # provider = BatchProvider(name) from cloudmesh_client.cloud.hpc.BatchProvider import BatchProvider provider = BatchProvider(name) vms = provider.list_job(name) for vm in list(vms.values()): vm['uuid'] = vm['id'] vm['type'] = 'string' vm['category'] = name vm['user'] = user db_obj = {0: {kind: vm}} self.add_obj(db_obj) self.save() return True else: Console.error( "refresh not supported for this kind: {}".format(kind)) except Exception as ex: Console.error(ex.message) return False
def do_hpc(self, args, arguments): # noinspection PyPep8 """ :: Usage: hpc queue [--job=NAME][--cluster=CLUSTER][--format=FORMAT] hpc info [--cluster=CLUSTER][--format=FORMAT] hpc run SCRIPT [--queue=QUEUE] [--t=TIME] [--N=nodes] [--name=NAME] [--cluster=CLUSTER][--dir=DIR][--group=GROUP][--format=FORMAT] hpc delete --job=NAME [--cluster=CLUSTER][--group=GROUP] hpc delete all [--cluster=CLUSTER][--group=GROUP][--format=FORMAT] hpc status [--job=name] [--cluster=CLUSTER] [--group=GROUP] hpc test --cluster=CLUSTER [--time=SECONDS] Options: --format=FORMAT the output format [default: table] Examples: Special notes if the group is specified only jobs from that group are considered. Otherwise the default group is used. If the group is set to None, all groups are used. cm hpc queue lists the details of the queues of the hpc cluster cm hpc queue --name=NAME lists the details of the job in the queue of the hpc cluster cm hpc info lists the details of the hpc cluster cm hpc run SCRIPT submits the script to the cluster. The script will be copied prior to execution into the home directory on the remote machine. If a DIR is specified it will be copied into that dir. The name of the script is either specified in the script itself, or if not the default nameing scheme of cloudmesh is used using the same index incremented name as in vms fro clouds: cloudmeshusername-index cm hpc delete all kills all jobs on the default hpc cluster cm hpc delete all -cluster=all kills all jobs on all clusters cm hpc delete --job=NAME kills a job with a given name or job id cm hpc default cluster=NAME sets the default hpc cluster cm hpc status returns the status of all jobs cm hpc status job=ID returns the status of the named job cm hpc test --cluster=CLUSTER --time=SECONDS submits a simple test job to the named cluster and returns if the job could be successfully executed. This is a blocking call and may take a long time to complete dependent on if the queuing system of that cluster is busy. It will only use one node/core and print the message #CLOUDMESH: Test ok in that is being looked for to identify if the test is successful. If time is used, the job is terminated after the time is elapsed. Examples: cm hpc queue cm hpc queue --job=xxx cm hpc info cm hpc delete --job=6 cm hpc run uname """ format = arguments['--format'] cluster = arguments['--cluster'] or Default.get_cluster() print ("CCC", cluster) if cluster is None: Console.error("Default cluster doesn't exist") return batch = BatchProvider(cluster) if arguments["queue"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["info"]: Console.msg(batch.info(cluster, format)) elif arguments["delete"]: job = arguments['--job'] Console.ok(batch.kill(cluster, job)) elif arguments["status"]: name = arguments['--job'] result = batch.queue(cluster, format=format, job=name) Console.msg(result) elif arguments["run"]: queue = arguments['--queue'] or Default.get('queue') # if not queue: # Console.error('set default queue using: default queue=<value>') # return script = arguments['SCRIPT'] arg_dict = { '-name': arguments['--name'], '-p': queue, '-t': arguments['--t'], '-N': arguments['--N'] } result = batch.run(cluster, script, **arg_dict) Console.ok("Started batch job {id} on {cluster}".format(**result)) elif arguments["test"]: time_secs = arguments['--time'] if time_secs: time = '00:00:' + time_secs else: time = '00:00:10' # give a default time of 10 secs print(batch.test(cluster, time)) return ""
def refresh_new(cls, kind, name, **kwargs): """ This method refreshes the local database with the live cloud details :param kind: :param name: :param kwargs: :return: """ try: # print(cloudname) # get the user # TODO: Confirm user # user = cls.user purge = kwargs.get("purge", True) if kind in ["flavor", "image", "vm"]: # get provider for specific cloud provider = CloudProvider(name).provider current_elements = cls.find_new(category=name, kind=kind, output='dict', key='name') #returns the following: #current_elements = {} #for element in elements: # current_elements[element["name"]] = element # pprint(current_elements) if purge: cls.clear(kind=kind, category=name) elements = provider.list(kind, name) # # image, flavor, username, group, ... # for element in list(elements.values()): element["uuid"] = element['id'] element['type'] = 'string' element["category"] = name # element["user"] = user element["kind"] = kind element["provider"] = provider.cloud_type if current_elements is not None: for index in current_elements: current = current_elements[index] for attribute in [ "username", "image", "flavor", "group" ]: if attribute in current and current[ attribute] is not None: element[attribute] = current[attribute] print("CCC", index, element["name"], element["flavor"]) cls.add(element) return True elif kind in ["batchjob"]: # provider = BatchProvider(name).provider # provider = BatchProvider(name) from cloudmesh_client.cloud.hpc.BatchProvider import BatchProvider provider = BatchProvider(name) vms = provider.list_job(name) for job in list(vms.values()): job[u'uuid'] = job['id'] job[u'type'] = 'string' job[u'category'] = name # job[u'user'] = user cls.add(job) cls.save() return True elif kind not in ["secgroup"]: Console.error( "refresh not supported for this kind: {}".format(kind)) except Exception as ex: Console.error("Problem with secgroup") return False
def read_squeue(): # read squeue from comet # TODO: check this function name = None provider = BatchProvider(name) return provider.read_squeue(format="json")