示例#1
0
    def run(self, args):
        """Run 'connect' subcommand."""
        optparser = ConnectOptions(go_args=args,
                                   envvar_prefix=self.envvar_prefix,
                                   usage=self.usage_txt)
        try:
            if len(optparser.args) > 1:
                label = optparser.args[1]
            else:
                self.report_error("No label provided.")

            print "Connecting to HOD cluster with label '%s'..." % label

            try:
                jobid = cluster_jobid(label)
                env_script = cluster_env_file(label)
            except ValueError as err:
                self.report_error(err)

            print "Job ID found: %s" % jobid

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]

            if len(pbsjobs) == 0:
                self.report_error("Job with job ID '%s' not found by pbs.",
                                  jobid)
            elif len(pbsjobs) > 1:
                self.report_error("Multiple jobs found with job ID '%s': %s",
                                  jobid, pbsjobs)

            pbsjob = pbsjobs[0]
            if pbsjob.state == ['Q', 'H']:
                # This should never happen since the hod.d/<jobid>/env file is
                # written on cluster startup. Maybe someone hacked the dirs.
                self.report_error(
                    "Cannot connect to cluster with job ID '%s' yet. It is still queued.",
                    jobid)
            else:
                print "HOD cluster '%s' @ job ID %s appears to be running..." % (
                    label, jobid)

            print "Setting up SSH connection to %s..." % pbsjob.hosts

            # -i: interactive non-login shell
            cmd = [
                'ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile',
                env_script, '-i'
            ]
            self.log.info("Logging in using command: %s", ' '.join(cmd))
            os.execvp('/usr/bin/ssh', cmd)

        except StandardError as err:
            self._log_and_raise(err)

        return 0
示例#2
0
    def run(self, args):
        """Run 'connect' subcommand."""
        optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt)
        try:
            if len(optparser.args) > 1:
                label = optparser.args[1]
            else:
                _log.error("No label provided.")
                sys.exit(1)

            print "Connecting to HOD cluster with label '%s'..." % label

            try:
                jobid = cluster_jobid(label)
                env_script = cluster_env_file(label)
            except ValueError as err:
                _log.error(err)
                sys.exit(1)

            print "Job ID found: %s" % jobid

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]

            if len(pbsjobs) == 0:
                _log.error("Job with job ID '%s' not found by pbs.", jobid)
                sys.exit(1)
            elif len(pbsjobs) > 1:
                _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs)
                sys.exit(1)

            pbsjob = pbsjobs[0]
            if pbsjob.state == ['Q', 'H']:
                # This should never happen since the hod.d/<jobid>/env file is
                # written on cluster startup. Maybe someone hacked the dirs.
                _log.error("Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid)
                sys.exit(1)
            else:
                print "HOD cluster '%s' @ job ID %s appears to be running..." % (label, jobid)

            print "Setting up SSH connection to %s..." % pbsjob.hosts

            # -i: interactive non-login shell
            cmd = ['ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i']
            _log.info("Logging in using command: %s", ' '.join(cmd))
            os.execvp('/usr/bin/ssh', cmd)
            return 0 # pragma: no cover

        except StandardError as err:
            fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT)
            fancylogger.logToScreen(enable=True)
            _log.raiseException(err)
示例#3
0
    def run(self, args):
        """Run 'destroy' subcommand."""
        optparser = DestroyOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt)
        try:
            label, jobid = None, None

            if len(optparser.args) > 1:
                label = optparser.args[1]
                print "Destroying HOD cluster with label '%s'..." % label
            else:
                _log.error("No label provided.")
                sys.exit(1)

            try:
                jobid = cluster_jobid(label)
                print "Job ID: %s" % jobid
            except ValueError as err:
                _log.error(err)
                sys.exit(1)

            # try to figure out job state
            job_state = None

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]
            _log.debug("Matching jobs for job ID '%s': %s", jobid, pbsjobs)

            if len(pbsjobs) == 1:
                job_state = pbsjobs[0].state 
                print "Job status: %s" % job_state

            elif len(pbsjobs) == 0:
                print "(job no longer found)"

            else:
                _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs)
                sys.exit(1)

            # request confirmation is case the job is currently running
            if job_state == 'R':
                resp = raw_input("Confirm destroying the *running* HOD cluster with label '%s'? [y/n]: " % label)
                if resp != 'y':
                    print "(destruction aborted)"
                    return

            elif job_state in ['C', 'E']:
                print "(job has already ended/completed)"
                job_state = None

            print "\nStarting actual destruction of HOD cluster with label '%s'...\n" % label

            # actually destroy HOD cluster by deleting job and removing cluster info dir and local work dir
            if job_state is not None:
                # if job was not successfully deleted, pbs.remove will print an error message
                if pbs.remove(jobid):
                    print "Job with ID %s deleted." % jobid

            rm_cluster_localworkdir(label)

            if cluster_info_exists(label):
                rm_cluster_info(label)

            print "\nHOD cluster with label '%s' (job ID: %s) destroyed." % (label, jobid)

        except StandardError as err:
            fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT)
            fancylogger.logToScreen(enable=True)
            _log.raiseException(err)
示例#4
0
    def run(self, args):
        """Run 'destroy' subcommand."""
        optparser = DestroyOptions(go_args=args,
                                   envvar_prefix=self.envvar_prefix,
                                   usage=self.usage_txt)
        try:
            label, jobid = None, None

            if len(optparser.args) > 1:
                label = optparser.args[1]
                print "Destroying HOD cluster with label '%s'..." % label
            else:
                self.report_error("No label provided.")

            try:
                jobid = cluster_jobid(label)
                print "Job ID: %s" % jobid
            except ValueError as err:
                self.report_error(err)

            # try to figure out job state
            job_state = None

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]
            self.log.debug("Matching jobs for job ID '%s': %s", jobid, pbsjobs)

            if len(pbsjobs) == 1:
                job_state = pbsjobs[0].state
                print "Job status: %s" % job_state

            elif len(pbsjobs) == 0:
                print "(job no longer found)"

            else:
                self.report_error("Multiple jobs found with job ID '%s': %s",
                                  jobid, pbsjobs)

            # request confirmation is case the job is currently running
            if job_state == 'R':
                resp = raw_input(
                    "Confirm destroying the *running* HOD cluster with label '%s'? [y/n]: "
                    % label)
                if resp != 'y':
                    print "(destruction aborted)"
                    return

            elif job_state in ['C', 'E']:
                print "(job has already ended/completed)"
                job_state = None

            print "\nStarting actual destruction of HOD cluster with label '%s'...\n" % label

            # actually destroy HOD cluster by deleting job and removing cluster info dir and local work dir
            if job_state is not None:
                # if job was not successfully deleted, pbs.remove will print an error message
                if pbs.remove(jobid):
                    print "Job with ID %s deleted." % jobid

            rm_cluster_localworkdir(label)

            if cluster_info_exists(label):
                rm_cluster_info(label)

            print "\nHOD cluster with label '%s' (job ID: %s) destroyed." % (
                label, jobid)

        except StandardError as err:
            self._log_and_raise(err)

        return 0