Exemplo n.º 1
0
class DirectConnection(Connection):
    '''no connection, working directly on that machine

    '''
    charset = typed_property("charset", str)
    '''charset of stdout of the machine, usually utf-8'''
    def __init__(self):
        super().__init__()
        self.charset = "utf-8"
        return

    def put_files(self, files, remote_path, timeout=None):
        for f in files:
            shutil.copy2(f, remote_path)
        return True

    def get_files(self, files, local_path=None, timeout=None):
        if not local_path:
            local_path = "."
        for f in files:
            shutil.copy2(f, local_path)
        return True

    def syscall(self, program, args, timeout=None):
        if sys.version_info > (3, 5, 0):
            proc = subprocess.run([program] + args,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  timeout=timeout)
            return (proc.stdout.decode(self.charset),
                    proc.stderr.decode(self.charset), proc.returncode)
        else:
            try:
                output = subprocess.check_output([program] + args,
                                                 timeout=timeout)
                return (output.decode(self.charset), '', 0)
            except subprocess.CalledProcessError as cpe:
                return (cpe.output.decode(self.charset), '', cpe.returncode)
Exemplo n.º 2
0
class SLURMQJob(QJob):
    jobid = typed_property("jobid", str)

    def __init__(self, jobid):
        super().__init__()
        self.jobid = jobid
Exemplo n.º 3
0
class SSHConnection(Connection):
    '''connection via ssh

    besides the main options username, machine and port, the user can set special attributes

    '''

    username = typed_property("username", str)
    '''name of the user on the remote machine, None possible'''
    machine = typed_property("machine", str)
    '''name or IP-address of the remote machine'''
    remote_charset = typed_property("remote_charset", str)
    '''charset of stdout of the remote machine, usually utf-8'''
    port = typed_property("port", int)
    '''port to connect on the remote machine, None possible'''
    ssh_command = typed_property("ssh_command", str)
    '''command to use for ssh-connections, usually just 'ssh' for the ssh command in the PATH'''
    scp_command = typed_property("scp_command", str)
    '''command to use for scp-connections, usually just 'scp' for the scp command in the PATH'''
    ssh_options = typed_property("ssh_options", list)
    '''additional options to add to ssh'''
    scp_options = typed_property("scp_options", list)
    '''additional options to add to scp'''


    def __init__(self, username=None, machine="localhost", port=None):
        super().__init__()
        self.username = username
        self.machine = machine
        self.remote_charset = "utf-8"
        self.port = port
        self.ssh_command = "ssh"
        self.scp_command = "scp"
        self.scp_options = ["-o", "ConnectTimeout=20", "-o", "Batchmode=yes",
                            "-o", "StrictHostKeyChecking=no",
                            "-q", "-p"]
        self.ssh_options = ["-o", "ConnectTimeout=20", "-o", "Batchmode=yes",
                            "-o", "StrictHostKeyChecking=no"]
        return

    def _build_scp_args(self):
        args = [self.scp_command]
        args.extend(self.scp_options)
        if self.port is not None:
            args.extend(["-P", "{}".format(self.port)])
        return args

    def _build_ssh_args(self):
        args = [self.ssh_command]
        args.extend(self.ssh_options)
        if self.port is not None:
            args.extend(["-p", "{}".format(self.port)])
        if self.username is not None:
            args.extend(["-l", self.username])
        args.append(self.machine)
        return args


    def put_files(self, files, remote_path, timeout=None):
        args = self._build_scp_args()
        args.extend(files)
        user = ""
        if self.username is not None:
            user = self.username + '@'
        args.append("{user}{machine}:{path}".format(user=user,
                                                    machine=self.machine,
                                                    path=remote_path))

        if sys.version_info > (3, 5, 0):
            proc = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
            proc.check_returncode()
        else:
            subprocess.check_output(args, timeout=timeout)
        return True

    def get_files(self, files, local_path=None, timeout=None):
        args = self._build_scp_args()
        user = ""
        if self.username is not None:
            user = self.username + '@'
        for file in files:
            args.append("{user}{machine}:{path}".format(user=user,
                                                        machine=self.machine,
                                                        path=file))
        if local_path is None:
            local_path = "."
        args.append(local_path)

        if sys.version_info > (3, 5, 0):
            proc = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
            proc.check_returncode()
        else:
            subprocess.check_output(args, timeout=timeout)

        return True

    def syscall(self, program, args, timeout=None):
        ssh_args = self._build_ssh_args()
        args.insert(0, program)
        args = [ shlex.quote(a) for a in args ]
        # print(args)
        remote_command = " ".join(args)
        ssh_args.append(remote_command)

        if sys.version_info > (3, 5, 0):
            proc = subprocess.run(ssh_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
            return (proc.stdout.decode(self.remote_charset),
                    proc.stderr.decode(self.remote_charset),
                    proc.returncode)
        else:
            try:
                output = subprocess.check_output(ssh_args, timeout=timeout)
                return (output.decode(self.remote_charset),
                        '',
                        0)
            except subprocess.CalledProcessError as cpe:
                return (cpe.output.decode(self.remote_charset),
                        '',
                        cpe.returncode)
Exemplo n.º 4
0
class SnapTask():
    topdir = typed_property('topdir', str)
    backupdir = typed_property('backupdir', str)
    zipfile = typed_property('zipfile', str)
    model = typed_property('model', str)
    id = typed_property('id', str)
    scpdestination = typed_property('scpdestination', str)
    timestamp = typed_property('timestamp', datetime.datetime)
    rundir = typed_property('rundir', str)

    def __init__(self, topdir, backupdir, zip_file, model, ident,
                 scpdestination, scpoptions):
        self.topdir = topdir
        self.backupdir = backupdir
        self.zipfile = zip_file
        self.model = model
        self.id = ident
        self.scpdestination = scpdestination
        self.scpoptions = scpoptions
        self.timestamp = datetime.datetime.now()

    def status_filename(self):
        return "{ident}_{model}_status".format(ident=self.id, model=self.model)

    def is_complete(self, reldir):
        infile = os.path.join(self.topdir, reldir, self.zipfile)
        try:
            with zipfile.ZipFile(infile, 'r') as zf:
                if zf.testzip() == None:
                    return True
        except:
            pass
        return False

    def handle(self, hpc):
        ''' Handle the job on the hpc. HPC directories must be writable locally. Return True if job is submitted '''
        retval = False
        try:
            retval = self._handle(hpc)
        except:
            traceback.print_exc()
        return retval

    def _handle(self, hpc):
        top_rundir = os.path.join(self.topdir, SnapRemoteRunner.RUN_DIR)
        if not os.path.isdir(top_rundir):
            os.mkdir(top_rundir)
        self.rundir = os.path.join(
            top_rundir, "{dt}_{ident}".format(
                dt=self.timestamp.strftime('%Y-%m-%dT%H%M%S'), ident=self.id))
        os.mkdir(self.rundir)
        infile = os.path.join(self.topdir, SnapRemoteRunner.UPLOAD_DIR,
                              self.zipfile)
        workfile = os.path.join(self.rundir, self.zipfile)
        os.rename(infile, workfile)

        with zipfile.ZipFile(workfile, 'r') as zf:
            # files = zf.namelist()
            zf.extractall(path=self.rundir)

        # start a remote detached qsub job
        snapJob = SnapJobEC(self, hpc)
        jobscript = snapJob.job_script()
        jobfile = os.path.join(self.rundir, 'snap.job')
        with open(jobfile, 'w') as jh:
            jh.write(jobscript)
            if self.backupdir:
                back_rundir = os.path.join(self.backupdir,
                                           SnapRemoteRunner.RUN_DIR)
                jh.write('''
# create files in backup directory
mkdir {back_rundir}
rsync -av {rundir} {back_rundir}
'''.format(back_rundir=back_rundir, rundir=self.rundir))

        # push the job into the queue, no feedback
        qjob = hpc.submit_job(jobfile, args=[])
        if (qjob == None):
            return False
        return True
Exemplo n.º 5
0
class SnapRemoteRunner():
    UPLOAD_DIR = 'upload'
    RUN_DIR = 'runs'
    REJECTED_DIR = 'rejected'
    WORK_DIR = 'work'

    hpc = typed_property("hpc", HPC)
    ssh = typed_property("ssh", SSHConnection)
    directory = typed_property("directory", str)
    directory2 = typed_property("directory2", str)
    dryrun = typed_property("dryrun", bool)
    remote = typed_property("remote", str)
    remote_dir = typed_property("remote_dir", str)
    remote_user = typed_property("remote_user", str)
    statusfile = typed_property("statusfile", str)

    def __init__(self,
                 directory,
                 hpc,
                 directory2,
                 remote,
                 remoteUser,
                 remoteDir,
                 dryrun=False):
        self.dryrun = dryrun
        self.hpc = HPC.by_name(hpc)
        self.remote = remote
        self.remote_user = remoteUser
        self.remote_dir = remoteDir
        self.ssh = SSHConnection(remoteUser, remote)
        self.scpdestination = "{remote}:{remoteDir}".format(
            remote=self.remote, remoteDir=self.remote_dir)
        if self.remote_user:
            self.scpdestination = self.remote_user + '@' + self.scpdestination

        if dirIsWritable(directory):
            self.directory = directory
            if dirIsWritable(directory2):
                self.directory2 = directory2
            else:
                if (self.dryrun):
                    print("directory2: '{}' not writable and disabled".format(
                        directory2),
                          file=sys.stderr)
                self.directory2 = ""
        elif dirIsWritable(directory2):
            if (self.dryrun):
                print(
                    "directory: '{}' not writable and disabled, using '{}' as default "
                    .format(directory, directory2),
                    file=sys.stderr)
            self.directory = directory2
            self.directory2 = ""
        else:
            raise Exception("{dir1} and {dir2} not writable".format(
                dir1=directory, dir2=directory2))

        workdir = os.path.join(self.directory, self.WORK_DIR)
        if not os.path.isdir(workdir): os.mkdir(workdir)

        self.statusfile = os.path.join(self.directory,
                                       "snapRemoteRunner_working")
        # make sure only one instance is running, not failsafe (no flock on lustre, eventually in different directories, but good enough)
        if (os.path.exists(self.statusfile)):
            file_modified = datetime.datetime.fromtimestamp(
                os.lstat(self.statusfile).st_mtime)
            if (self.dryrun):
                with open(self.statusfile, 'rt') as fh:
                    msg = fh.read()
                print("status-file exists at '{}' with:".format(
                    self.statusfile),
                      file=sys.stderr)
                print(msg, file=sys.stderr)
            else:
                if datetime.datetime.now(
                ) - file_modified > datetime.timedelta(hours=3):
                    # return statusfile if hanging for more than 3 hours
                    print("cleaning up {} after 3 hours".format(
                        self.statusfile),
                          file=sys.stderr)
                    _cleanupFileCallable(self.statusfile)()
                return
        else:
            if not self.dryrun:
                with open(self.statusfile, 'wt') as fh:
                    atexit.register(_cleanupFileCallable(self.statusfile))
                    fh.write("working pid: {} on node: {}\n".format(
                        os.getpid(),
                        os.uname().nodename))
                    if DEBUG:
                        print("working pid: {} on node: {}\n".format(
                            os.getpid(),
                            os.uname().nodename))

        self._check_and_unpack_new_files()

    def write_status(self, task, tag, msg=""):
        '''Write a status file to the remote host. All errors here are ignored'''
        try:
            return self._write_status(task, tag, msg)
        except:
            traceback.print_exc()

    def _write_status(self, task, tag, msg=""):
        '''
    Old codes from perl:
    if ($status_number == 100) {$text = ":Getting ARGOS data from server";}
    if ($status_number == 200) {$text = ":Finished getting ARGOS-data from server";}
    if ($status_number == 201) {$text = ":Finished running ${model}";}
    if ($status_number == 202) {$text = ":Finished extracting ${model} data for ARGOS";}
    if ($status_number == 401) {$text = ":$run_ident" . "_${model}_input does not exist";}
    if ($status_number == 402) {$text = ":$run_ident" . "_${model}_iso does not exist";}
    if ($status_number == 403) {$text = ":$run_ident" . "_${model}_src does not exist";}
    if ($status_number == 404) {$text = ":Inconsistent isotope identification (isotop-navn)";}
    if ($status_number == 408) {$text = ":Initial time not covered by NWP database";}
    if ($status_number == 409) {$text = ":${model} output data do not exist";}
    my $message = "$status_number" . ":" . "$timestamp" . ":" . "$text";
'''
        filename = task.status_filename()
        work_file = os.path.join(self.directory, self.WORK_DIR, filename)
        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
        with open(work_file, 'a+') as fh:
            if (tag == 'downloading'):
                fh.write("{x}:{ts}::Getting ARGOS data from server\n".format(
                    x=100, ts=timestamp))
            elif (tag == 'success'):
                fh.write(
                    "{x}:{ts}::Finished extracting {model} data for ARGOS\n".
                    format(x=202, ts=timestamp, model=task.model))
            elif (tag == 'error'):
                fh.write("{x}:{ts}::{model} output data do not exist\n".format(
                    x=409, ts=timestamp, model=task.model))
            elif (tag == 'running'):
                fh.write("101:{ts}::running {model}\n".format(
                    ts=timestamp, model=task.model))
            elif (tag == 'internal'):
                fh.write(
                    "{x}:{ts}::internal error, cannot start job in queue in dir '{rundir}'\n"
                    .format(x=500, ts=timestamp, rundir=task.rundir))
            else:
                fh.write("{tag}:{ts} {msg}\n".format(ts=timestamp,
                                                     tag=tag,
                                                     msg=msg))
        self.ssh.put_files([work_file], self.remote_dir, 30)

    def _check_and_unpack_new_files(self):
        '''Download new files from the remote machine to the upload directory.
        - Move invalid files to rejected. (Wrong name, not containing *ARGOS2*.zip)
        - Unpack zip-files in project-folder / delete ignore incomplete files.
            - status for complete and incomplete files
        - Remove complete files from remote-upload and local upload 
        - create modelruns
        
        throws an exception when download / unpack failed unexpectedly
        '''
        remote_files = os.path.join(self.remote_dir, self.UPLOAD_DIR, '*')
        local_upload = os.path.join(self.directory, self.UPLOAD_DIR)
        if not os.path.isdir(local_upload):
            os.mkdir(local_upload)
        local_rejected = os.path.join(self.directory, self.REJECTED_DIR)
        if not os.path.isdir(local_rejected):
            os.mkdir(local_rejected)
        try:
            self.ssh.get_files([remote_files], local_upload, 30)
        except subprocess.CalledProcessError as cpe:
            # code 1 is generic error, e.g. no files, 2 is connection error
            if cpe.returncode != 1: raise cpe

        delete_in_upload = []
        if DEBUG: print("checking files in uploaddir: {}".format(local_upload))
        for f in os.listdir(local_upload):
            if DEBUG: print("found file: {}".format(f))
            if os.path.isfile(os.path.join(local_upload, f)):
                m = re.match(r'([\w\-\.:]*)_ARGOS2(.*)\.zip', f)
                if m:
                    if DEBUG: print("found zip-file: '{}'".format(f))
                    task = SnapTask(topdir=self.directory,
                                    backupdir=self.directory2,
                                    zip_file=f,
                                    ident=m.group(1),
                                    model=m.group(2),
                                    scpdestination=self.scpdestination,
                                    scpoptions=" ".join(self.ssh.scp_options))
                    if task.is_complete(reldir=self.UPLOAD_DIR):
                        if DEBUG: print("handling zipfile: {}".format(f))
                        if not self.dryrun:
                            if task.handle(self.hpc):
                                self.write_status(task, tag='running')
                            else:
                                self.write_status(task, tag='internal')
                        delete_in_upload.append(f)
                    else:
                        self.write_status(task, tag='downloading')
                else:
                    os.rename(os.path.join(local_upload, f),
                              os.path.join(local_rejected, f))
                    delete_in_upload.append(f)

        delete_upload_files = [
            os.path.join(self.UPLOAD_DIR, f) for f in delete_in_upload
        ]
        if DEBUG: print("deleting remotely: " + ", ".join(delete_upload_files))
        if not self.dryrun:
            self.ssh.syscall('rm', delete_upload_files, 30)