示例#1
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        j = GPI.Job()
        j._impl.backend = self._getParent().backend.clone()
        j._impl.application = self._getParent().application.clone()
        if not self.inputdata == None:
            j.inputdata = self.inputdata.clone()

        trf = self._getParent()
        task = trf._getParent()

        # copy across the outputfiles
        for f in trf.outputfiles:
            j.outputfiles += [f.clone()]

        j.inputsandbox = trf.inputsandbox

        # Sort out the splitter
        if trf.splitter:
            j.splitter = trf.splitter.clone()

        # Postprocessors
        for pp in trf.postprocessors:
            j.postprocessors.append(deepcopy(pp))

        return j
示例#2
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        j = GPI.Job()

        j.backend = self._getParent().backend.clone()

        # copy form ourselves or the parent transform depending on what's
        # specified
        fields = [
            'application', 'splitter', 'inputfiles', 'inputdata',
            'inputsandbox', 'outputfiles', 'postprocessors'
        ]

        for f in fields:

            if (f == "postprocessors"
                    and len(getattr(self, f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(addProxy(self).postprocessors)
            elif (f != "postprocessors" and getattr(self, f)):
                setattr(j, f, copy.deepcopy(getattr(self, f)))
            elif (f == "postprocessors"
                  and len(getattr(self._getParent(), f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(
                    addProxy(self._getParent()).postprocessors)
            elif (f != "postprocessors" and getattr(self._getParent(), f)):
                setattr(j, f, copy.deepcopy(getattr(self._getParent(), f)))

        return j
示例#3
0
   def createNewJob(self):
      """Create any jobs required for this unit"""      
      j = GPI.Job()
      j._impl.backend = self._getParent().backend.clone()
      j._impl.application = self._getParent().application.clone()
      j.inputdata = self.inputdata.clone()

      trf = self._getParent()
      task = trf._getParent()

      # copy across the outputfiles
      for f in trf.outputfiles:
         j.outputfiles += [f.clone()]

      j.inputsandbox = trf.inputsandbox

      if type(self.eventswanted) == type(''):
        subLines = self.eventswanted
      else:
        subLines = '\n'.join(self.eventswanted)
      # Base for the naming of each subjob's CSV file
      incsvfile = j._impl.application.csvfile
      tmpname = os.path.basename(incsvfile)
      if len(tmpname.split('.')) > 1:
        patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
      else:
        patterncsv = tmpname+"_sub%d"

      from Ganga.GPIDev.Lib.File import FileBuffer
      thiscsv = patterncsv % self.subpartid

      # Create the CSV file for this Unit
      j._impl.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0)
      j._impl.application.csvfile = j._impl.getInputWorkspace().getPath()+thiscsv
      j.inputsandbox.append(j._impl.getInputWorkspace().getPath()+thiscsv)

      # Base for the naming of each subjob's output file
      tmpname = os.path.basename(j._impl.application.outputfile)
      if len(tmpname.split('.')) > 1:
        patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
      else:
        patternout = tmpname+"_sub%d"
      j._impl.application.outputfile = patternout % self.subpartid

      # Sort out the splitter
      if trf.splitter:
         j.splitter = trf.splitter.clone()
         
      return j
示例#4
0
 def createNewJob(self, partition):
     """ Returns a new job initialized with the transforms application, backend and name """
     task = self._getParent(
     )  # this works because createNewJob is only called by a task
     id = task.transforms.index(self)
     j = GPI.Job()
     stripProxy(j).backend = self.backend.clone()
     stripProxy(j).application = self.application.clone()
     stripProxy(j).application.tasks_id = "%i:%i" % (task.id, id)
     stripProxy(j).application.id = self.getNewAppID(partition)
     j.inputdata = self.inputdata
     j.outputdata = self.outputdata
     j.inputsandbox = self.inputsandbox
     j.outputsandbox = self.outputsandbox
     j.name = "T%i:%i C%i" % (task.id, id, partition)
     return j
示例#5
0
 def createNewJob(self, partition):
     """ Returns a new job initialized with the transforms application, backend and name """
     j = GPI.Job()
     stripProxy(j).backend = self.backend.clone()
     stripProxy(j).application = self.application.clone()
     stripProxy(j).application.tasks_id = "%i:%i" % (self.task_id,
                                                     self.transform_id)
     stripProxy(j).application.id = self.getNewAppID(partition)
     if self.splitter is not None:
         stripProxy(j).splitter = LHCbTaskDummySplitter(self.splitter)
     # if self.merger is not None:
     # stripProxy(j).merger = self.merger
     j.inputdata = self.toProcess_dataset
     j.outputdata = self.outputdata
     j.inputsandbox = self.inputsandbox
     j.outputsandbox = self.outputsandbox
     j.name = "T%i Tr%i P%i" % (self.task_id, self.transform_id, partition)
     j.do_auto_resubmit = True
     self.toProcess_dataset.files = []
     return j
示例#6
0
   def createNewJob(self):
      """Create any jobs required for this unit"""      
      j = GPI.Job()
      j._impl.backend = self._getParent().backend.clone()
      j._impl.application = self._getParent().application.clone()
      if self.inputdata:
         j.inputdata = self.inputdata.clone()

      trf = self._getParent()
      task = trf._getParent()
      if trf.outputdata:
         j.outputdata = trf.outputdata.clone()
      elif j.inputdata and j.inputdata._impl._name == "ATLASLocalDataset" and j.application._impl._name != "TagPrepare":
         j.outputdata = GPI.ATLASOutputDataset()
      elif j.application._impl._name != "TagPrepare":
         j.outputdata = GPI.DQ2OutputDataset()

      # check for ds name specified and length
      if j.outputdata and j.outputdata._impl._name == "DQ2OutputDataset":
         max_length = configDQ2['OUTPUTDATASET_NAMELENGTH'] - 11

         # merge names need to be shorter
         if (j.backend._impl._name == "Panda" or j.backend._impl._name == "Jedi"):
            if j.backend.requirements.enableMerge:
               max_length -= 12

            if j.backend._impl._name == "Jedi":
               # go over the outputdata and check for output names that Jedi appends to the outDS name
               tmp_len_chg = 8
               for o in j.outputdata.outputdata:
                  if (len(o)+1) > tmp_len_chg:
                     tmp_len_chg = len(o)+1

               max_length -= tmp_len_chg

            elif j.backend.individualOutDS:
               max_length -= 8

         if j.outputdata.datasetname != "":
            dsn = [j.outputdata.datasetname, "j%i.t%i.trf%i.u%i" %
                   (j.id, task.id, trf.getID(), self.getID())]

            if len(".".join(dsn)) > max_length:
               dsn = [j.outputdata.datasetname[: - (len(".".join(dsn)) - max_length)], "j%i.t%i.trf%i.u%i" %
                      (j.id, task.id, trf.getID(), self.getID())]
         else:
            dsn = [trf.getContainerName()[:-1], self.name, "j%i.t%i.trf%i.u%i" %
                   (j.id, task.id, trf.getID(), self.getID())]

            if len(".".join(dsn)) > max_length:
               dsn2 = [trf.getContainerName(2 * max_length / 3)[:-1], "", "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())]
               dsn = [trf.getContainerName(2 * max_length / 3)[:-1], self.name[: - (len(".".join(dsn2)) - max_length)], "j%i.t%i.trf%i.u%i" %
                      (j.id, task.id, trf.getID(), self.getID())]
            
         j.outputdata.datasetname = '.'.join(dsn).replace(":", "_").replace(" ", "").replace(",","_")
                           
      j.inputsandbox = self._getParent().inputsandbox
      j.outputsandbox = self._getParent().outputsandbox

      # check for splitter - TagPrepare and Jedi don't user splitters
      if j.application._impl._name == "TagPrepare":
         return j
      
      if j.backend._impl._name == "Jedi":
         if trf.files_per_job > 0:
            j.backend.requirements.nFilesPerJob = trf.files_per_job
         elif trf.MB_per_job > 0:
            j.backend.requirements.nGBPerJob = trf.MB_per_job / 1000

         return j

      if not trf.splitter:
         # provide a default number of files if there's nothing else given
         nfiles = trf.files_per_job
         if nfiles < 1:
            nfiles = 5

         if j.inputdata._impl._name == "ATLASLocalDataset":
            j.splitter = AthenaSplitterJob()
            if trf.subjobs_per_unit > 0:
               j.splitter.numsubjobs = trf.subjobs_per_unit
            else:
               import math 
               j.splitter.numsubjobs = int( math.ceil( len(j.inputdata.names) / float(nfiles) ) )
         else:
            j.splitter = DQ2JobSplitter()
            if trf.MB_per_job > 0:
               j.splitter.filesize = trf.MB_per_job
            elif trf.subjobs_per_unit > 0:
               j.splitter.numsubjobs = trf.subjobs_per_unit
            else:
               j.splitter.numfiles = nfiles
      else:
         j.splitter = trf.splitter.clone()

      # postprocessors
      if len(self._getParent().postprocessors.process_objects) > 0:
         import copy
         j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors )
         
      return j