示例#1
0
 def run(self,hadoop):
     bname = []
     output = self.output
     if len(output) > 0:
         bname = [os.path.basename(o) for o in output]
     # bname and output frequently used together
     opbn = zip(output, bname)
     if len(output) > 0 and all([os.path.exists(o) for o in output]):
         for op, bn in opbn:
             hadoop.put(op, bn)
         return
     else:
         hadoop.syscall(self.execute)
         if len(output) > 0:
             for op, bn in opbn:
                 if self.stage == 'training':
                     hadoop.getmerge(bn, op)
                 else:
                     hadoop.get(bn, op)
示例#2
0
 def run(self,hadoop,outdir,tmpdir):
     bname = []
     output_suffix = [o + self.suffix for o in self.output]
     if len(self.output) > 0:
         bname = [os.path.basename(o) for o in self.output]
     # bname+suffix and output_suffix frequently used together
     osbn = zip(output_suffix, [b + self.suffix for b in bname])
     if len(self.output) > 0 and all([os.path.exists(o) for o in output_suffix]):
         for os, bn in osbn:
             hadoop.put(os, bn)
         return
     else:
         file = self.realfile_(hadoop,outdir,tmpdir)
         execute = PTemplate(self.execute).safe_substitute({'file':file,'suffix':self.suffix})
         hadoop.syscall(execute)
         if len(self.output) > 0:
             for os, bn in osbn:
                 if self.stage == 'training':
                     hadoop.getmerge(bn, os)
                 else:
                     hadoop.get(bn, os)
         self.cleanfile_(file,hadoop,outdir,tmpdir)