Exemplo n.º 1
0
 def stagedir_name(self,stagedir=None):
     if self.queuingsystem is 'Local':
         return None
     if stagedir is None:
         # use canonical setup
         stagedir = pathjoin('/scratch',os.environ['USER'],self.jobdir_name)
     return stagedir
Exemplo n.º 2
0
 def stagedir_name(self, stagedir=None):
     if self.queuingsystem is 'Local':
         return None
     if stagedir is None:
         # use canonical setup
         stagedir = pathjoin('/scratch', os.environ['USER'],
                             self.jobdir_name)
     return stagedir
Exemplo n.º 3
0
 def unstage(self):
     """Copy results back. Shell-style glob patterns are allowed."""
     self.__MODE__ = "unstage"
     if self.queuingsystem is 'Local':
         return
     import glob
     self._make_all_dirs(self.startdir,self.output,sanitize=False)  # make result directories, may be absolute!
     for key,p in self.output.items():
         src = self.filenames[key]                                 # always relative to stagedir            
         srcdir = os.path.dirname(p)
         destdir = pathjoin(self.startdir,srcdir, sanitize=False)  # may be absolute
         self.msg("item=%(key)s: looking for %(p)s [=%(src)s]..." % locals())
         for srcpath in glob.glob(src):
             srcname = os.path.basename(srcpath)
             destpath = pathjoin(destdir,srcname, sanitize=False)
             self.msg("item=%(key)s: copying %(srcpath)s" % locals(), newline=False)
             shutil.copyfile(srcpath,destpath)   # silently replaces files !
             self.msg(" --> %(destpath)s" % locals())
Exemplo n.º 4
0
 def _make_all_dirs(self, topdir, filedict, **kwargs):
     """Create directories under topdir, based on paths in filedict."""
     for key, p in filedict.items():
         srcdir = os.path.dirname(p)
         destdir = pathjoin(topdir, srcdir, **kwargs)
         try:
             os.makedirs(destdir)  # recursive
             self.msg("item=%(key)s: created dir %(destdir)s" % locals())
         except os.error, e:
             if e.errno == errno.EEXIST:
                 pass
             else:
                 raise
Exemplo n.º 5
0
 def _make_all_dirs(self,topdir,filedict,**kwargs):
     """Create directories under topdir, based on paths in filedict."""
     for key,p in filedict.items():
         srcdir = os.path.dirname(p)
         destdir = pathjoin(topdir,srcdir,**kwargs)
         try:
             os.makedirs(destdir)  # recursive
             self.msg("item=%(key)s: created dir %(destdir)s" % locals())
         except os.error,e:
             if e.errno == errno.EEXIST:
                 pass
             else:
                 raise
Exemplo n.º 6
0
 def unstage(self):
     """Copy results back. Shell-style glob patterns are allowed."""
     self.__MODE__ = "unstage"
     if self.queuingsystem is 'Local':
         return
     import glob
     self._make_all_dirs(
         self.startdir, self.output,
         sanitize=False)  # make result directories, may be absolute!
     for key, p in self.output.items():
         src = self.filenames[key]  # always relative to stagedir
         srcdir = os.path.dirname(p)
         destdir = pathjoin(self.startdir, srcdir,
                            sanitize=False)  # may be absolute
         self.msg("item=%(key)s: looking for %(p)s [=%(src)s]..." %
                  locals())
         for srcpath in glob.glob(src):
             srcname = os.path.basename(srcpath)
             destpath = pathjoin(destdir, srcname, sanitize=False)
             self.msg("item=%(key)s: copying %(srcpath)s" % locals(),
                      newline=False)
             shutil.copyfile(srcpath, destpath)  # silently replaces files !
             self.msg(" --> %(destpath)s" % locals())
Exemplo n.º 7
0
    def __init__(self, *args, **kwargs):
        """Set up SGE job.

        :Arguments:

           inputfiles
                            dict of input files (with relative path to startdir);
                            globs are not supported.
           outputfiles
                            dict of result files or glob patterns (relative to
                            stagedir == relative to startdir)
           variables
                            key/value pairs that can be used in the script as 
                            Job.variables[key]
           startdir
                            path to the directory where the input can be found
                            (must be nfs-mounted on node)
           stagedir
                            local scratch directory on node; all input files are copied
                            there. The default should be ok.

           JOB_NAME
                            unique identifier (only set this if this NOT submitted through
                            the Gridengine queuing system AND if the files should be copied
                            to a scratch disk (i.e. staging proceeds as it would for a
                            SGE-submitted job).)
           SGE_TASK_ID
                           fake a task id (use with JOB_NAME)

        """
        self.__MODE__ = "init"  # current state, for self.msg
        super(Job, self).__init__(*args, **kwargs)
        self.input = kwargs.setdefault('inputfiles', {})
        self.output = kwargs.setdefault('outputfiles', {})

        self.variables = kwargs.setdefault('variables', {})
        # where we find input files and copy back results
        self.startdir = self.startdir_name(kwargs.setdefault('startdir', None))
        # local directory on node
        self.stagedir = self.stagedir_name(kwargs.setdefault('stagedir', None))
        # normalized filenames (always under stagedir)
        self.filenames = {
            k: pathjoin(self.stagedir, path, refdir=self.startdir)
            for k, path in joindicts(self.input, self.output).items()
        }

        self.statusmessage()
Exemplo n.º 8
0
    def __init__(self,*args,**kwargs):
        """Set up SGE job.

        :Arguments:

           inputfiles
                            dict of input files (with relative path to startdir);
                            globs are not supported.
           outputfiles
                            dict of result files or glob patterns (relative to
                            stagedir == relative to startdir)
           variables
                            key/value pairs that can be used in the script as 
                            Job.variables[key]
           startdir
                            path to the directory where the input can be found
                            (must be nfs-mounted on node)
           stagedir
                            local scratch directory on node; all input files are copied
                            there. The default should be ok.

           JOB_NAME
                            unique identifier (only set this if this NOT submitted through
                            the Gridengine queuing system AND if the files should be copied
                            to a scratch disk (i.e. staging proceeds as it would for a
                            SGE-submitted job).)
           SGE_TASK_ID
                           fake a task id (use with JOB_NAME)

        """
        self.__MODE__ = "init"   # current state, for self.msg
        super(Job,self).__init__(*args,**kwargs)
        self.input = kwargs.setdefault('inputfiles',{})
        self.output = kwargs.setdefault('outputfiles',{})
        
        self.variables = kwargs.setdefault('variables',{})
        # where we find input files and copy back results
        self.startdir = self.startdir_name(kwargs.setdefault('startdir',None))
        # local directory on node
        self.stagedir = self.stagedir_name(kwargs.setdefault('stagedir',None))
        # normalized filenames (always under stagedir)
        self.filenames = dict([
            (k,pathjoin(self.stagedir,path,refdir=self.startdir))
             for k,path in joindicts(self.input,self.output).items()] )
        
        self.statusmessage()
Exemplo n.º 9
0
    def __init__(self, *args, **kwargs):
        """Set up the Job:

        job = Job(inputfiles=dict(...),outputfiles=dict(...),variables=dict(...),**kwargs)

        inputfiles and outputfiles are dictionaries with arbitrary
        keys; each item is a path to a file relative to the startdir
        (which by default is the directory from which the SGE job
        starts --- use the #$ -cwd flag!). If the files are not
        relative to the start dir then new directories are constructed
        under the stage dir; in this instance it uis important that
        the user script ONLY uses the filenames in self.filenames:
        These have the proper paths of the local (staged) files for
        the script to operate on.

        With

          job.stage()
        
        inputfiles are copied to the stagedir on the node's scratch
        dir and sub directories are created as necessary; directories
        mentioned as part of the outputfiles are created, too.

          job.unstage()

        copies back all files mentioned in output files (again, use
        directories as part of the path as necessary) and create the
        directories in the startdir if needed. For the outputfiles one
        can also use shell-style glob patterns, e.g. outfiles =
        {'all_dcd': '*.dcd', 'last_data':'*[5-9].dat'}

        Sensible defaults are automatically selected for startdir
        (cwd) and stagedir (/scratch/USER/JOB_NAME.JOB_ID).
        
        If the script is not run through SGE (i.e. the environment
        variable JOB_NAME is not set) then the script is run without
        staging; this is pretty much equivalent to using

          from staging.Local import Job

        :Input:

        inputfiles       dict of input files (with relative path to startdir);
                         globs are not supported.
        outputfiles      dict of result files or glob patterns (relative to
                         stagedir == relative to startdir)
        variables        key/value pairs that can be used in the script as 
                         Job.variables[key]
        startdir         path to the directory where the input can be found
                         (must be nfs-mounted on node)
        stagedir         local scratch directory on node; all input files are copied
                         there. The default should be ok.
                         
        JOB_NAME         unique identifier (only set this if this NOT submitted through
                         the Gridengine queuing system AND if the files should be copied
                         to a scratch disk (i.e. staging proceeds as it would for a
                         SGE-submitted job).)
        SGE_TASK_ID      fake a task id (use with JOB_NAME)

        :Attributes:
      
        input            inputfiles dict  (relative to startdir or absolute)
        output           outputfiles dict (relative to startdir or absolute, can contain globs)
        filenames        merged dict of input and output, pointing to *staged* files
        variables        variables dict

        :Methods:
        
        stage()          setup job on the nodes in stagedir
        unstage()        retrieve results to startdir
        cleanup()        remove all files on the node (rm -rf stagedir)
        """
        self.__MODE__ = "init"  # current state, for self.msg
        super(Job, self).__init__(*args, **kwargs)
        self.input = kwargs.setdefault('inputfiles', {})
        self.output = kwargs.setdefault('outputfiles', {})

        self.variables = kwargs.setdefault('variables', {})
        # where we find input files and copy back results
        self.startdir = self.startdir_name(kwargs.setdefault('startdir', None))
        # local directory on node
        self.stagedir = self.stagedir_name(kwargs.setdefault('stagedir', None))
        # normalized filenames (always under stagedir)
        self.filenames = dict([
            (k, pathjoin(self.stagedir, path, refdir=self.startdir))
            for k, path in joindicts(self.input, self.output).items()
        ])

        self.statusmessage()
Exemplo n.º 10
0
class Job(SGE_job):
    """The Job class encapsulates the SGE job and allows for clean staging and unstaging."""
    def __init__(self, *args, **kwargs):
        """Set up the Job:

        job = Job(inputfiles=dict(...),outputfiles=dict(...),variables=dict(...),**kwargs)

        inputfiles and outputfiles are dictionaries with arbitrary
        keys; each item is a path to a file relative to the startdir
        (which by default is the directory from which the SGE job
        starts --- use the #$ -cwd flag!). If the files are not
        relative to the start dir then new directories are constructed
        under the stage dir; in this instance it uis important that
        the user script ONLY uses the filenames in self.filenames:
        These have the proper paths of the local (staged) files for
        the script to operate on.

        With

          job.stage()
        
        inputfiles are copied to the stagedir on the node's scratch
        dir and sub directories are created as necessary; directories
        mentioned as part of the outputfiles are created, too.

          job.unstage()

        copies back all files mentioned in output files (again, use
        directories as part of the path as necessary) and create the
        directories in the startdir if needed. For the outputfiles one
        can also use shell-style glob patterns, e.g. outfiles =
        {'all_dcd': '*.dcd', 'last_data':'*[5-9].dat'}

        Sensible defaults are automatically selected for startdir
        (cwd) and stagedir (/scratch/USER/JOB_NAME.JOB_ID).
        
        If the script is not run through SGE (i.e. the environment
        variable JOB_NAME is not set) then the script is run without
        staging; this is pretty much equivalent to using

          from staging.Local import Job

        :Input:

        inputfiles       dict of input files (with relative path to startdir);
                         globs are not supported.
        outputfiles      dict of result files or glob patterns (relative to
                         stagedir == relative to startdir)
        variables        key/value pairs that can be used in the script as 
                         Job.variables[key]
        startdir         path to the directory where the input can be found
                         (must be nfs-mounted on node)
        stagedir         local scratch directory on node; all input files are copied
                         there. The default should be ok.
                         
        JOB_NAME         unique identifier (only set this if this NOT submitted through
                         the Gridengine queuing system AND if the files should be copied
                         to a scratch disk (i.e. staging proceeds as it would for a
                         SGE-submitted job).)
        SGE_TASK_ID      fake a task id (use with JOB_NAME)

        :Attributes:
      
        input            inputfiles dict  (relative to startdir or absolute)
        output           outputfiles dict (relative to startdir or absolute, can contain globs)
        filenames        merged dict of input and output, pointing to *staged* files
        variables        variables dict

        :Methods:
        
        stage()          setup job on the nodes in stagedir
        unstage()        retrieve results to startdir
        cleanup()        remove all files on the node (rm -rf stagedir)
        """
        self.__MODE__ = "init"  # current state, for self.msg
        super(Job, self).__init__(*args, **kwargs)
        self.input = kwargs.setdefault('inputfiles', {})
        self.output = kwargs.setdefault('outputfiles', {})

        self.variables = kwargs.setdefault('variables', {})
        # where we find input files and copy back results
        self.startdir = self.startdir_name(kwargs.setdefault('startdir', None))
        # local directory on node
        self.stagedir = self.stagedir_name(kwargs.setdefault('stagedir', None))
        # normalized filenames (always under stagedir)
        self.filenames = dict([
            (k, pathjoin(self.stagedir, path, refdir=self.startdir))
            for k, path in joindicts(self.input, self.output).items()
        ])

        self.statusmessage()

    def statusmessage(self):
        super(Job, self).statusmessage()
        self.msg("startdir:       %s" % self.startdir)
        self.msg("stagedir:       %s" % self.stagedir)

    def startdir_name(self, startdir=None):
        if startdir is None:
            # use canonical setup (relies on -cwd SGE flag)
            startdir = os.path.realpath(os.path.curdir)
        return startdir

    def stagedir_name(self, stagedir=None):
        if self.queuingsystem is 'Local':
            return None
        if stagedir is None:
            # use canonical setup
            stagedir = pathjoin('/scratch', os.environ['USER'],
                                self.jobdir_name)
        return stagedir

    def stage(self):
        """Copy all input files to the scratch directory."""
        self.__MODE__ = "stage"
        if self.queuingsystem is 'Local':
            return
        stagedir = self.stagedir
        try:
            os.makedirs(stagedir)
            self.msg("Created stage dir %(stagedir)s." % locals())
        except os.error, e:
            if e.errno == errno.EEXIST:
                self.msg("WARNING %(stagedir)s already exists." % locals())
            else:
                raise
        self._make_all_dirs(stagedir, self.input, refdir=self.startdir
                            )  # copy input and preserve directory structure
        self._make_all_dirs(stagedir, self.output, refdir=self.startdir
                            )  # also create directories for the output files
        for key, p in self.input.items():  # copy input files
            srcpath = pathjoin(
                self.startdir, p,
                sanitize=False)  # may be absolute (and ignores startdir!)
            destpath = self.filenames[key]  # ALWAYS under stagedir
            self.msg("item=%(key)s: copying %(srcpath)s" % locals(),
                     newline=False)
            shutil.copyfile(srcpath, destpath)
            self.msg(" --> %(destpath)s" % locals())
        # finally, change current directory to the stage dir: all further
        # commands can assume that staging has been completed
        os.chdir(stagedir)
        self.msg("chdir to %(stagedir)s successful." % locals())