def shutdown(self, request, computeset_id, format=None): """Shutdown the nodes in the identified ComputeSet.""" cset = ComputeSet.objects.get(pk=computeset_id) if not cset.cluster.project in request.user.groups.all(): raise PermissionDenied() computes = [] for compute in cset.computes.all(): computes.append(compute.rocks_name) if compute.cluster.name != request.data["cluster"]: cset.delete() return Response( "The node %s does not belong to the cluster %s, belongs to %s" % (node, request.data["cluster"], compute.cluster.name), status=status.HTTP_400_BAD_REQUEST, ) cset.computes.add(compute) submit_computeset.delay(FullComputeSetSerializer(cset).data) # We should only poweron computes after entering jobscript and # finishing the PROLOG on all allocated nodes. At that point the # nodelist will be returned and we can call poweron_nodeset() # poweron_nodeset.delay(nodes, hosts) location = "/nucleus/v1/computeset/%s" % (cset.id) serializer = ComputeSetSerializer(cset) response = Response(serializer.data, status=201, headers={"Location": request.build_absolute_uri(location)}) return response
def poweron(self, request, format=None): """ Power on a set of computes creating a ComputeSet.""" clust = get_object_or_404(Cluster, name=request.data["cluster"]) if not clust.project in request.user.groups.all(): raise PermissionDenied() walltime_mins = request.data.get("walltime_mins") if not walltime_mins: walltime_mins = 2880 nodes = [] hosts = [] if request.data["computes"] is list: for obj in request.data["computes"]: nodes.append(obj["name"]) hosts.append(obj["host"]) else: nodes = hostlist.expand_hostlist("%s" % request.data["computes"]) if request.data.get("hosts"): hosts = hostlist.expand_hostlist("%s" % request.data["hosts"]) if hosts and len(nodes) != len(hosts): return Response("The length of hosts should be equal to length of nodes", status=status.HTTP_400_BAD_REQUEST) cset = ComputeSet() cset.cluster = clust cset.user = self.request.user.username cset.account = clust.project cset.walltime_mins = walltime_mins cset.jobid = None cset.name = None cset.nodelist = "" cset.state = ComputeSet.CSET_STATE_CREATED cset.node_count = len(nodes) cset.save() for node in nodes: compute = Compute.objects.get(name=node, cluster=clust) other_cs_query = ComputeSet.objects.filter(computes__id__exact=compute.id).exclude( state__exact=ComputeSet.CSET_STATE_COMPLETED) if other_cs_query.exists(): cset.delete() err_cs = other_cs_query.get() return Response("The compute %s belongs to computeset %s which is in %s state" % (node, err_cs.id, err_cs.state), status=status.HTTP_400_BAD_REQUEST) if compute.cluster.name != request.data["cluster"]: cset.delete() return Response("The node %s does not belong to the cluster %s, belongs to %s" % (node, request.data["cluster"], compute.cluster.name), status=status.HTTP_400_BAD_REQUEST) cset.computes.add(compute) submit_computeset.delay(FullComputeSetSerializer(cset).data) # We should only poweron computes after entering jobscript and # finishing the PROLOG on all allocated nodes. At that point the # nodelist will be returned and we can call poweron_nodeset() #poweron_nodeset.delay(nodes, hosts) location = "/nucleus/v1/computeset/%s" % (cset.id) serializer = ComputeSetSerializer(cset) response = Response( serializer.data, status=201, headers={'Location': location}) return response
def poweron(self, request, format=None): """ Power on a set of computes creating a ComputeSet.""" clust = get_object_or_404(Cluster, name=request.data["cluster"]) if not clust.project in request.user.groups.all(): raise PermissionDenied() walltime_mins = request.data.get("walltime_mins") if not walltime_mins: walltime_mins = 2880 nodes = [] hosts = [] if request.data.get("computes"): if request.data["computes"] is list: for obj in request.data["computes"]: nodes.append(obj["name"]) hosts.append(obj["host"]) else: nodes = hostlist.expand_hostlist("%s" % request.data["computes"]) if request.data.get("hosts"): hosts = hostlist.expand_hostlist("%s" % request.data["hosts"]) elif request.data.get("count"): computes_selected = ( Compute.objects.filter(cluster=clust) .exclude( computeset__state__in=[ ComputeSet.CSET_STATE_CREATED, ComputeSet.CSET_STATE_SUBMITTED, ComputeSet.CSET_STATE_RUNNING, ComputeSet.CSET_STATE_ENDING, ] ) .exclude(state="active") .filter(Q(image_state="unmapped") | Q(image_state__isnull=True)) .exclude(image_locked=True)[: int(request.data["count"])] ) nodes.extend([comp.name for comp in computes_selected]) if len(nodes) < int(request.data["count"]) or int(request.data["count"]) == 0: return Response( "There are %i nodes available for starting. Requested number should be greater than zero." % len(nodes), status=status.HTTP_400_BAD_REQUEST, ) if hosts and len(nodes) != len(hosts): return Response( "The length of hosts should be equal to length of nodes", status=status.HTTP_400_BAD_REQUEST ) cset = ComputeSet() cset.cluster = clust cset.user = clust.username if request.data.get("allocation"): cset.account = request.data["allocation"] elif clust.allocations.count() == 1: cset.account = clust.allocations.get().allocation else: return Response("Please specify the allocation", status=status.HTTP_400_BAD_REQUEST) if not clust.allocations.filter(allocation=cset.account).exists(): return Response( "Allocation %s does not belong to the cluster." % cset.account, status=status.HTTP_400_BAD_REQUEST ) cset.walltime_mins = walltime_mins cset.jobid = None cset.name = None cset.nodelist = "" cset.state = ComputeSet.CSET_STATE_CREATED cset.node_count = len(nodes) cset.save() for node in nodes: compute = Compute.objects.get(name=node, cluster=clust) other_cs_query = ComputeSet.objects.filter(computes__id__exact=compute.id).exclude( state__in=[ ComputeSet.CSET_STATE_COMPLETED, ComputeSet.CSET_STATE_FAILED, ComputeSet.CSET_STATE_CANCELLED, ] ) if other_cs_query.exists(): cset.delete() err_cs = other_cs_query.get() return Response( "The compute %s belongs to computeset %s which is in %s state" % (node, err_cs.id, err_cs.state), status=status.HTTP_400_BAD_REQUEST, ) if (compute.image_state not in ["unmapped", None]) or compute.image_locked: cset.delete() return Response( "The node %s's image is in %s state and image locked status is %s. Please contact the user support if the VM is not running." % (node, compute.image_state, compute.image_locked), status=status.HTTP_400_BAD_REQUEST, ) if compute.cluster.name != request.data["cluster"]: cset.delete() return Response( "The node %s does not belong to the cluster %s, belongs to %s" % (node, request.data["cluster"], compute.cluster.name), status=status.HTTP_400_BAD_REQUEST, ) cset.computes.add(compute) submit_computeset.delay(FullComputeSetSerializer(cset).data) # We should only poweron computes after entering jobscript and # finishing the PROLOG on all allocated nodes. At that point the # nodelist will be returned and we can call poweron_nodeset() # poweron_nodeset.delay(nodes, hosts) location = "/nucleus/v1/computeset/%s" % (cset.id) serializer = ComputeSetSerializer(cset) response = Response(serializer.data, status=201, headers={"Location": request.build_absolute_uri(location)}) return response