def extract_features(self,bin_pid): jobid = gen_id()[:5] def selflog(line): self.log('[%s] %s' % (jobid, line)) def self_check_log(line,bin_pid): selflog(line) now = time.time() elapsed = now - self.last_check self.last_check = now if elapsed > CHECK_EVERY: if self.complete(bin_pid): msg = 'STOPPING JOB - %s completed by another worker' % bin_pid selflog(msg) raise JobExit(msg, SKIP) if self.complete(bin_pid): selflog('SKIPPING %s - already completed' % bin_pid) return SKIP job_dir = os.path.join(self.config.tmp_dir, gen_id()) zip_dir = os.path.join(self.config.tmp_dir, gen_id()) bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid)) try: os.makedirs(job_dir) selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid)) try: os.makedirs(zip_dir) selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('LOADING and STITCHING %s' % bin_pid) with open(bin_zip_path,'wb') as binzip: represent.binpid2zip(bin_pid, binzip, resolver=self.resolver) blobzipurl = bin_pid + '_blob.zip' blobzipfile = re.sub(r'\.zip','_blob.zip',bin_zip_path) selflog('LOADING blob zip from %s -> %s' % (blobzipurl, blobzipfile)) drain(UrlSource(blobzipurl), LocalFileSink(blobzipfile)) feature_csv = os.path.join(job_dir, csvname(bin_pid)) multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid)) matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid)) namespace = os.path.dirname(bin_zip_path) + '/' lid = os.path.basename(bin_zip_path) cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/') selflog('RUNNING %s' % cmd) try: self.output_check = CHECK_EVERY matlab.run(cmd) if not os.path.exists(feature_csv): msg = 'WARNING bin_features succeeded but no output file found at %s' % feature_csv selflog(msg) raise JobExit(msg,FAIL) if not self.complete(bin_pid): # check to make sure another worker hasn't finished it in the meantime selflog('DEPOSITING features csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit)) self.deposit.deposit(bin_pid,feature_csv) selflog('DEPOSITED features csv for %s ' % bin_pid) if os.path.exists(multiblob_csv): selflog('DEPOSITING multiblob csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit)) self.multiblob_deposit.deposit(bin_pid,multiblob_csv) selflog('DEPOSITED multiblob csv for %s ' % bin_pid) else: selflog('NOT SAVING - features for %s already present at output destination' % bin_pid) except KeyboardInterrupt: selflog('KeyboardInterrupt, exiting') return DIE except JobExit: pass finally: try: shutil.rmtree(job_dir) selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid)) try: shutil.rmtree(zip_dir) selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('DONE - no more actions for %s' % bin_pid)
def extract_blobs(self,bin_pid): try: jobid = self.config.task_id except: jobid = gen_id()[:5] def selflog(line): self.log('[%s] %s' % (jobid, line)) def self_check_log(line,bin_pid): selflog(line) now = time.time() elapsed = now - self.last_check self.last_check = now if elapsed > CHECK_EVERY: if self.exists(bin_pid): msg = 'STOPPING JOB - %s completed by another worker' % bin_pid selflog(msg) raise JobExit(msg, SKIP) if self.exists(bin_pid): selflog('SKIPPING %s - already completed' % bin_pid) return SKIP job_dir = os.path.join(self.config.tmp_dir, gen_id()) zip_dir = os.path.join(self.config.tmp_dir, gen_id()) bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid)) try: os.makedirs(job_dir) selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid)) try: os.makedirs(zip_dir) selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid)) try: selflog('LOADING and STITCHING %s' % bin_pid) with open(bin_zip_path,'wb') as binzip: targets = represent.binpid2zip(bin_pid, binzip, resolver=self.resolver) if len(targets)==0: selflog('SKIPPING %s - no targets in bin' % bin_pid) return SKIP tmp_file = os.path.join(job_dir, zipname(bin_pid)) matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid)) cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, bin_zip_path, job_dir) self.output_check = CHECK_EVERY matlab.run(cmd) if not os.path.exists(tmp_file): selflog('WARNING bin_blobs succeeded but no output file found at %s' % tmp_file) elif not self.exists(bin_pid): # check to make sure another worker hasn't finished it in the meantime selflog('DEPOSITING blob zip for %s to deposit service at %s' % (bin_pid, self.config.blob_deposit)) self.deposit.deposit(bin_pid,tmp_file) selflog('DEPOSITED blob zip for %s ' % bin_pid) else: selflog('NOT SAVING - blobs for %s already present at output destination' % bin_pid) except KeyboardInterrupt: selflog('KeyboardInterrupt, exiting') return DIE except JobExit: pass finally: try: shutil.rmtree(job_dir) selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid)) try: shutil.rmtree(zip_dir) selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('DONE - no more actions for %s' % bin_pid)