示例#1
0
    def shutdown(self, request, computeset_id, format=None):
        """Shutdown the nodes in the identified ComputeSet."""
        cset = ComputeSet.objects.get(pk=computeset_id)
        if not cset.cluster.project in request.user.groups.all():
            raise PermissionDenied()

        computes = []
        for compute in cset.computes.all():
            computes.append(compute.rocks_name)
            if compute.cluster.name != request.data["cluster"]:
                cset.delete()
                return Response(
                    "The node %s does not belong to the cluster %s, belongs to %s"
                    % (node, request.data["cluster"], compute.cluster.name),
                    status=status.HTTP_400_BAD_REQUEST,
                )

            cset.computes.add(compute)

        submit_computeset.delay(FullComputeSetSerializer(cset).data)

        # We should only poweron computes after entering jobscript and
        # finishing the PROLOG on all allocated nodes. At that point the
        # nodelist will be returned and we can call poweron_nodeset()
        # poweron_nodeset.delay(nodes, hosts)

        location = "/nucleus/v1/computeset/%s" % (cset.id)

        serializer = ComputeSetSerializer(cset)
        response = Response(serializer.data, status=201, headers={"Location": request.build_absolute_uri(location)})

        return response
示例#2
0
    def poweron(self, request, format=None):
        """ Power on a set of computes creating a ComputeSet."""
        clust = get_object_or_404(Cluster, name=request.data["cluster"])
        if not clust.project in request.user.groups.all():
            raise PermissionDenied()

        walltime_mins = request.data.get("walltime_mins")
        if not walltime_mins:
            walltime_mins = 2880

        nodes = []
        hosts = []

        if request.data["computes"] is list:
            for obj in request.data["computes"]:
                nodes.append(obj["name"])
                hosts.append(obj["host"])
        else:
            nodes = hostlist.expand_hostlist("%s" % request.data["computes"])
            if request.data.get("hosts"):
                hosts = hostlist.expand_hostlist("%s" % request.data["hosts"])

        if hosts and len(nodes) != len(hosts):
            return Response("The length of hosts should be equal to length of nodes",
                            status=status.HTTP_400_BAD_REQUEST)

        cset = ComputeSet()
        cset.cluster = clust
        cset.user = self.request.user.username
        cset.account = clust.project
        cset.walltime_mins = walltime_mins
        cset.jobid = None
        cset.name = None
        cset.nodelist = ""
        cset.state = ComputeSet.CSET_STATE_CREATED
        cset.node_count = len(nodes)
        cset.save()

        for node in nodes:
            compute = Compute.objects.get(name=node, cluster=clust)

            other_cs_query = ComputeSet.objects.filter(computes__id__exact=compute.id).exclude(
                state__exact=ComputeSet.CSET_STATE_COMPLETED)
            if other_cs_query.exists():
                cset.delete()
                err_cs = other_cs_query.get()
                return Response("The compute %s belongs to computeset %s which is in %s state" % (node, err_cs.id, err_cs.state), status=status.HTTP_400_BAD_REQUEST)

            if compute.cluster.name != request.data["cluster"]:
                cset.delete()
                return Response("The node %s does not belong to the cluster %s, belongs to %s" % (node, request.data["cluster"], compute.cluster.name), status=status.HTTP_400_BAD_REQUEST)

            cset.computes.add(compute)

        submit_computeset.delay(FullComputeSetSerializer(cset).data)

        # We should only poweron computes after entering jobscript and
        # finishing the PROLOG on all allocated nodes. At that point the
        # nodelist will be returned and we can call poweron_nodeset()
        #poweron_nodeset.delay(nodes, hosts)

        location = "/nucleus/v1/computeset/%s" % (cset.id)

        serializer = ComputeSetSerializer(cset)
        response = Response(
            serializer.data,
            status=201,
            headers={'Location': location})

        return response
示例#3
0
    def poweron(self, request, format=None):
        """ Power on a set of computes creating a ComputeSet."""
        clust = get_object_or_404(Cluster, name=request.data["cluster"])
        if not clust.project in request.user.groups.all():
            raise PermissionDenied()

        walltime_mins = request.data.get("walltime_mins")
        if not walltime_mins:
            walltime_mins = 2880

        nodes = []
        hosts = []

        if request.data.get("computes"):
            if request.data["computes"] is list:
                for obj in request.data["computes"]:
                    nodes.append(obj["name"])
                    hosts.append(obj["host"])
            else:
                nodes = hostlist.expand_hostlist("%s" % request.data["computes"])
                if request.data.get("hosts"):
                    hosts = hostlist.expand_hostlist("%s" % request.data["hosts"])
        elif request.data.get("count"):
            computes_selected = (
                Compute.objects.filter(cluster=clust)
                .exclude(
                    computeset__state__in=[
                        ComputeSet.CSET_STATE_CREATED,
                        ComputeSet.CSET_STATE_SUBMITTED,
                        ComputeSet.CSET_STATE_RUNNING,
                        ComputeSet.CSET_STATE_ENDING,
                    ]
                )
                .exclude(state="active")
                .filter(Q(image_state="unmapped") | Q(image_state__isnull=True))
                .exclude(image_locked=True)[: int(request.data["count"])]
            )
            nodes.extend([comp.name for comp in computes_selected])
            if len(nodes) < int(request.data["count"]) or int(request.data["count"]) == 0:
                return Response(
                    "There are %i nodes available for starting. Requested number should be greater than zero."
                    % len(nodes),
                    status=status.HTTP_400_BAD_REQUEST,
                )

        if hosts and len(nodes) != len(hosts):
            return Response(
                "The length of hosts should be equal to length of nodes", status=status.HTTP_400_BAD_REQUEST
            )

        cset = ComputeSet()
        cset.cluster = clust
        cset.user = clust.username
        if request.data.get("allocation"):
            cset.account = request.data["allocation"]
        elif clust.allocations.count() == 1:
            cset.account = clust.allocations.get().allocation
        else:
            return Response("Please specify the allocation", status=status.HTTP_400_BAD_REQUEST)

        if not clust.allocations.filter(allocation=cset.account).exists():
            return Response(
                "Allocation %s does not belong to the cluster." % cset.account, status=status.HTTP_400_BAD_REQUEST
            )
        cset.walltime_mins = walltime_mins
        cset.jobid = None
        cset.name = None
        cset.nodelist = ""
        cset.state = ComputeSet.CSET_STATE_CREATED
        cset.node_count = len(nodes)
        cset.save()

        for node in nodes:
            compute = Compute.objects.get(name=node, cluster=clust)

            other_cs_query = ComputeSet.objects.filter(computes__id__exact=compute.id).exclude(
                state__in=[
                    ComputeSet.CSET_STATE_COMPLETED,
                    ComputeSet.CSET_STATE_FAILED,
                    ComputeSet.CSET_STATE_CANCELLED,
                ]
            )
            if other_cs_query.exists():
                cset.delete()
                err_cs = other_cs_query.get()
                return Response(
                    "The compute %s belongs to computeset %s which is in %s state" % (node, err_cs.id, err_cs.state),
                    status=status.HTTP_400_BAD_REQUEST,
                )

            if (compute.image_state not in ["unmapped", None]) or compute.image_locked:
                cset.delete()
                return Response(
                    "The node %s's image is in %s state and image locked status is %s. Please contact the user support if the VM is not running."
                    % (node, compute.image_state, compute.image_locked),
                    status=status.HTTP_400_BAD_REQUEST,
                )

            if compute.cluster.name != request.data["cluster"]:
                cset.delete()
                return Response(
                    "The node %s does not belong to the cluster %s, belongs to %s"
                    % (node, request.data["cluster"], compute.cluster.name),
                    status=status.HTTP_400_BAD_REQUEST,
                )

            cset.computes.add(compute)

        submit_computeset.delay(FullComputeSetSerializer(cset).data)

        # We should only poweron computes after entering jobscript and
        # finishing the PROLOG on all allocated nodes. At that point the
        # nodelist will be returned and we can call poweron_nodeset()
        # poweron_nodeset.delay(nodes, hosts)

        location = "/nucleus/v1/computeset/%s" % (cset.id)

        serializer = ComputeSetSerializer(cset)
        response = Response(serializer.data, status=201, headers={"Location": request.build_absolute_uri(location)})

        return response