def reap(self): try: log.info('Inspecting %s' % self) self.pfile = nimsdata.nimsraw.NIMSPFile(self.path) except nimsdata.nimsraw.NIMSPFileError as e: self.needs_reaping = False log.warning('Skipping %s (%s)' % (self, str(e))) return else: self.pat_id = self.pfile.patient_id stage_dir = '%s_%s' % (self.reaper.id_, datetime.datetime.now().strftime('%s.%f')) reap_path = nimsutil.make_joined_path(self.reaper.reap_stage, stage_dir) if self.pat_id.strip('/').lower() in reaper.discard_ids: self.needs_reaping = False log.info('Discarding %s' % self) return if self.reaper.pat_id and not re.match( self.reaper.pat_id.replace('*', '.*'), self.pat_id): self.needs_reaping = False log.info('Ignoring %s' % self) return try: log.info('Reaping %s' % self) shutil.copy2(self.path, reap_path) for fp in glob.glob(self.path + '_' + self.pfile.series_uid + '_*'): log.info('Reaping %s to %s' % (os.path.basename(fp), os.path.join( reap_path, '_' + self.basename + '_' + fp.rsplit('_', 1)[-1]))) shutil.copy2( fp, os.path.join( reap_path, '_' + self.basename + '_' + fp.rsplit('_', 1)[-1])) except KeyboardInterrupt: shutil.rmtree(reap_path) raise except (shutil.Error, IOError): log.warning('Error while reaping %s' % self) else: log.info('Compressing %s' % self) nimsutil.gzip_inplace(os.path.join(reap_path, self.basename), 0o644) shutil.move(reap_path, os.path.join(self.reaper.sort_stage, '.' + stage_dir)) os.rename(os.path.join(self.reaper.sort_stage, '.' + stage_dir), os.path.join(self.reaper.sort_stage, stage_dir)) self.needs_reaping = False log.info('Reaped %s' % self)
def reap(self): try: log.info('Inspecting %s' % self) self.pfile = nimsdata.nimsraw.NIMSPFile(self.path) except nimsdata.nimsraw.NIMSPFileError as e: self.needs_reaping = False log.warning('Skipping %s (%s)' % (self, str(e))) return else: self.pat_id = self.pfile.patient_id self.exam = self.pfile.exam_no self.series = self.pfile.series_no self.acq = self.pfile.acq_no stage_dir = '%s_%s' % (self.reaper.id_, datetime.datetime.now().strftime('%s.%f')) reap_path = nimsutil.make_joined_path(self.reaper.reap_stage, stage_dir) aux_reap_files = [arf for arf in glob.glob(self.path + '_*') if self.is_aux_file(arf)] if self.pat_id.strip('/').lower() in reaper.discard_ids: self.needs_reaping = False log.info('Discarding %s' % self) return if self.reaper.pat_id and not re.match(self.reaper.pat_id.replace('*','.*'), self.pat_id): self.needs_reaping = False log.info('Ignoring %s' % self) return try: log.info('Reaping %s' % self) shutil.copy2(self.path, reap_path) for arf in aux_reap_files: shutil.copy2(arf, os.path.join(reap_path, '_' + os.path.basename(arf))) log.info('Reaping %s' % '_' + os.path.basename(arf)) except KeyboardInterrupt: shutil.rmtree(reap_path) raise except (shutil.Error, IOError): log.warning('Error while reaping %s' % self) else: log.info('Compressing %s' % self) nimsutil.gzip_inplace(os.path.join(reap_path, self.basename), 0o644) shutil.move(reap_path, os.path.join(self.reaper.sort_stage, '.' + stage_dir)) os.rename(os.path.join(self.reaper.sort_stage, '.' + stage_dir), os.path.join(self.reaper.sort_stage, stage_dir)) self.needs_reaping = False log.info('Reaped %s' % self)
def run(self): while self.alive: # relaunch jobs that need rerun for job in Job.query.filter((Job.status != u'running') & (Job.status != u'abandoned') & (Job.needs_rerun == True)).all(): job.status = u'pending' job.activity = u'reset to pending' log.info(u'Reset %s to pending' % job) job.needs_rerun = False transaction.commit() # deal with dirty data containers dc = (DataContainer.query .filter(DataContainer.dirty == True) .filter(~DataContainer.datasets.any(Dataset.updatetime > (datetime.datetime.now() - self.cooltime))) .order_by(DataContainer.timestamp).first()) if dc: dc.dirty = False dc.scheduling = True transaction.commit() DBSession.add(dc) # compress data if needed for ds in [ds for ds in dc.original_datasets if not ds.compressed]: log.info(u'Compressing %s %s' % (dc, ds.filetype)) dataset_path = os.path.join(self.nims_path, ds.relpath) if ds.filetype == nimsdata.nimsdicom.NIMSDicom.filetype: arcdir = '%d_%d_%d_dicoms' % (dc.session.exam, dc.series, dc.acq) arcdir_path = os.path.join(dataset_path, arcdir) os.mkdir(arcdir_path) for filename in [f for f in os.listdir(dataset_path) if not f.startswith(arcdir)]: os.rename(os.path.join(dataset_path, filename), os.path.join(arcdir_path, filename)) with tarfile.open('%s.tgz' % arcdir_path, 'w:gz', compresslevel=6) as archive: archive.add(arcdir_path, arcname=os.path.basename(arcdir_path)) shutil.rmtree(arcdir_path) ds.filenames = os.listdir(dataset_path) ds.compressed = True transaction.commit() elif ds.filetype == nimsdata.nimsraw.NIMSPFile.filetype: for pfilepath in [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if not f.startswith('_')]: nimsutil.gzip_inplace(pfilepath, 0o644) ds.filenames = os.listdir(dataset_path) ds.compressed = True transaction.commit() DBSession.add(dc) # schedule job log.info(u'Inspecting %s' % dc) new_digest = nimsutil.redigest(os.path.join(self.nims_path, dc.primary_dataset.relpath)) if dc.primary_dataset.digest != new_digest: dc.primary_dataset.digest = new_digest job = Job.query.filter_by(data_container=dc).filter_by(task=u'find&proc').first() if not job: job = Job(data_container=dc, task=u'find&proc', status=u'pending', activity=u'pending') log.info(u'Created job %s' % job) elif job.status != u'pending' and not job.needs_rerun: job.needs_rerun = True log.info(u'Marked job %s for restart' % job) dc.scheduling = False log.info(u'Done %s' % dc) transaction.commit() else: time.sleep(self.sleeptime)
def run(self): while self.alive: # relaunch jobs that need rerun for job in Job.query.filter((Job.status != u'running') & (Job.status != u'abandoned') & (Job.needs_rerun == True)).all(): job.status = u'pending' job.activity = u'reset to pending' log.info(u'Reset %s to pending' % job) job.needs_rerun = False transaction.commit() # deal with dirty data containers dc = (DataContainer.query.filter(DataContainer.dirty == True). filter(~DataContainer.datasets.any(Dataset.updatetime > ( datetime.datetime.now() - self.cooltime))).order_by( DataContainer.timestamp).first()) if dc: dc.dirty = False dc.scheduling = True transaction.commit() DBSession.add(dc) # compress data if needed for ds in [ ds for ds in dc.original_datasets if not ds.compressed ]: log.info(u'Compressing %s %s' % (dc, ds.filetype)) dataset_path = os.path.join(self.nims_path, ds.relpath) if ds.filetype == nimsdata.nimsdicom.NIMSDicom.filetype: arcdir = '%d_%d_%d_dicoms' % (dc.session.exam, dc.series, dc.acq) arcdir_path = os.path.join(dataset_path, arcdir) os.mkdir(arcdir_path) for filename in [ f for f in os.listdir(dataset_path) if not f.startswith(arcdir) ]: os.rename(os.path.join(dataset_path, filename), os.path.join(arcdir_path, filename)) with tarfile.open('%s.tgz' % arcdir_path, 'w:gz', compresslevel=6) as archive: archive.add(arcdir_path, arcname=os.path.basename(arcdir_path)) shutil.rmtree(arcdir_path) ds.filenames = os.listdir(dataset_path) ds.compressed = True transaction.commit() elif ds.filetype == nimsdata.nimsraw.NIMSPFile.filetype: for pfilepath in [ os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if not f.startswith('_') ]: nimsutil.gzip_inplace(pfilepath, 0o644) ds.filenames = os.listdir(dataset_path) ds.compressed = True transaction.commit() DBSession.add(dc) # schedule job log.info(u'Inspecting %s' % dc) new_digest = nimsutil.redigest( os.path.join(self.nims_path, dc.primary_dataset.relpath)) if dc.primary_dataset.digest != new_digest: dc.primary_dataset.digest = new_digest job = Job.query.filter_by(data_container=dc).filter_by( task=u'find&proc').first() if not job: job = Job(data_container=dc, task=u'find&proc', status=u'pending', activity=u'pending') log.info(u'Created job %s' % job) elif job.status != u'pending' and not job.needs_rerun: job.needs_rerun = True log.info(u'Marked job %s for restart' % job) dc.scheduling = False log.info(u'Done %s' % dc) transaction.commit() else: time.sleep(self.sleeptime)