def remove_empty_files(self): """Remove all empty (0-length) files in the directory, regardless of the type of file (but skip directories). """ start_time = time.time() wstdout('> Removing empty files from dir "%s"\n' % self.dirpath) for filepath in deepcopy(self._allfilepaths): try: if getsize(filepath) == 0: self._remove(filepath) except (IOError, OSError): self._done_with(filepath) self.report() wstdout('> Time to remove empty files: %s\n' % timediffstamp(time.time() - start_time))
def deep_clean(self, n_procs=None): """Run the cleaning process Parameters ---------- ignore_locks : bool Whether to ignore (valid) locks. Note that invalid locks are ignored regardless of the value of `ignore_locks`. """ start_time = time.time() wstdout('> Deep-cleaning the I3 files in the directory...\n') if n_procs is None: n_procs = min(len(self._allfilepaths), 8*cpu_count()) # Create a manager for objects synchronized across workers mgr = Manager() groups = [mgr.list(g) for g in self.group_by_event().values()] pool = Pool(processes=n_procs) ret = pool.map(DeepCleaner(), groups) removed = [] renamed = [] failed_to_remove = [] failed_to_rename = [] for d in ret: removed.extend(d['removed']) renamed.extend(d['renamed']) failed_to_remove.extend(d['failed_to_remove']) failed_to_rename.extend(d['failed_to_rename']) wstderr(' '.join([str(len(g)) for g in groups]) + '\n') # TODO: remove files that -- while not redundant -- have fewer recos # (or, stretch goal: merge the non-redundant info together) self.report() wstdout('> Time to run deep cleaning: %s\n' % timediffstamp(time.time() - start_time))
def main(): """Main""" start_time_sec = time.time() args = parse_args() cleaner = CleanupRecoFiles(args.dir) if args.remove_empty: cleaner.remove_empty_files() cleaner.cleanup_lockfiles() if args.deep_clean: cleaner.merge_and_rename() #lock_count = 0 #i3_count = 0 #other_count = 0 #wstdout(' -> %3d I3 files removed\n' % i3_count) #wstdout(' -> %3d lock files removed\n' % lock_count) #wstdout(' -> %3d other files removed\n' % other_count) wstdout('Script run time: %s\n' % timediffstamp(time.time() - start_time_sec))
def merge_and_rename(self, n_procs=None): """Merge and/or rename""" if n_procs is None: n_procs = min(len(self._allfilepaths), 8*cpu_count()) start_time = time.time() wstdout('> Merging and/or renaming I3 files in the directory...\n') # Create a manager for objects synchronized across workers mgr = Manager() groups = [mgr.list(g) for g in self.group_by_event().values()] pool = Pool(processes=n_procs) ret = pool.map(merge_and_rename, groups) successes = [] failures = 0 for r in ret: if r is None: failures += 1 else: successes.append(r) wstdout('> Failures: %5d; successes: %5d\n' % (failures, len(successes))) wstdout('> Time to run merge_and_rename: %s\n' % timediffstamp(time.time() - start_time, hms_always=True)) return failures, successes
track_spline_prob=spline_tables['trk_timing'], bad_doms_name='BadDomsList', input_pulses=args.srt_pulse_name, wave_form_range='WaveformRange', use_ball_speedup=True, ball_radius=150.0, ) for reco_num in recos_to_run: reco_name = RECOS[reco_num]['name'] time_limit = RECOS[reco_num]['time_limit'] kwargs = RECOS[reco_num]['kwargs'] wstdout( '> Setting up tray to run reco #%3d, %s (time limit %s)\n' % (reco_num, reco_name, timediffstamp(time_limit))) tray.AddSegment(multinest_icetray.MultiNestReco, reco_name, input_pulses=args.srt_pulse_name, base_geometry=args.geometry, **kwargs) tray.AddModule( 'I3Writer', 'EventWriter', Filename=outfile_path, Streams=[icetray.I3Frame.Physics, icetray.I3Frame.DAQ], DropOrphanStreams=[icetray.I3Frame.DAQ]) tray.AddModule('TrashCan', 'Done')
continue continue except: continue else: # TODO: remove this clause once the buggy runs where lock files # were being overwritten has been cleaned up lock_info = read_lockfile(lockfilepath) if 'type' in lock_info and lock_info['type'] == 'outfile_lock': self._remove(lock_info['outfile']) lock_f.close() self._remove(lockfilepath) self.report() wstdout('> Time to clean up lock files: %s\n' % timediffstamp(time.time() - start_time)) # TODO def tabulate_unique_recos(self): pass # TODO def identify_duplicated_base_events(self): pass def group_by_event(self): """Group I3 files by common event (i.e., exclude whatever processing steps have been performed) Returns -------
track_spline_prob=spline_tables['trk_timing'], bad_doms_name='BadDomsList', input_pulses=args.srt_pulse_name, wave_form_range='WaveformRange', use_ball_speedup=True, ball_radius=150.0, ) for reco_num in recos_to_run: reco_name = RECOS[reco_num]['name'] time_limit = RECOS[reco_num]['time_limit'] kwargs = RECOS[reco_num]['kwargs'] wstdout( '> Setting up tray to run reco #%3d, %s (time limit %s)\n' % (reco_num, reco_name, timediffstamp(time_limit)) ) tray.AddSegment( multinest_icetray.MultiNestReco, reco_name, input_pulses=args.srt_pulse_name, base_geometry=args.geometry, **kwargs ) tray.AddModule( 'I3Writer', 'EventWriter', Filename=outfile_path, Streams=[icetray.I3Frame.Physics, icetray.I3Frame.DAQ], DropOrphanStreams=[icetray.I3Frame.DAQ] )
if PGEN_PRESENT: fileLoc = PGEN.FileLocations(run=RUN_NUM) i3_src_dir = fileLoc.source_i3_dir llhpbin_tgt_dir = fileLoc.llhp_binfiles_dir if TEST: fileLoc.llhp_binfiles_dir += '_test' fileLoc.createDirs(which='llhp_bin') else: i3_src_dir = I3_SRC_DIRS[RUN_NUM] llhpbin_tgt_dir = LLHPBIN_TGT_DIRS[RUN_NUM] print 'root dir:', i3_src_dir print 'llhpbin_tgt_dir:', llhpbin_tgt_dir tr1 = time.time() print 'tr1-tr0', timediffstamp(tr1 - tr0) count = 0 for filenum, (ffpath, basename, match) in enumerate(fiter): tl0 = time.time() #filenum = int(fnum_rex.findall(ffpath)[0][0]) # Print info particular to this file's LLHP extraction print 'ffpath ; fnum_rex finds...:', ffpath, ';', fnum_rex.findall( ffpath) print 'filenum:', filenum print 'extract_llhp_hsumm', EXTRACT_LLHP_HSUMM if EXTRACT_LLHP_HSUMM: print 'llh_minstepsize:', LLH_MINSTEPSIZE print 'extract_llhp_all:', EXTRACT_LLHP_ALL print 'llhp_top_thresh:', args.llhp_top_thresh