Пример #1
0
    def remove_empty_files(self):
        """Remove all empty (0-length) files in the directory, regardless of
        the type of file (but skip directories).
        """
        start_time = time.time()
        wstdout('> Removing empty files from dir "%s"\n' % self.dirpath)
        for filepath in deepcopy(self._allfilepaths):
            try:
                if getsize(filepath) == 0:
                    self._remove(filepath)
            except (IOError, OSError):
                self._done_with(filepath)

        self.report()
        wstdout('>     Time to remove empty files: %s\n'
                % timediffstamp(time.time() - start_time))
Пример #2
0
    def deep_clean(self, n_procs=None):
        """Run the cleaning process

        Parameters
        ----------
        ignore_locks : bool
            Whether to ignore (valid) locks. Note that invalid locks are
            ignored regardless of the value of `ignore_locks`.

        """
        start_time = time.time()
        wstdout('> Deep-cleaning the I3 files in the directory...\n')

        if n_procs is None:
            n_procs = min(len(self._allfilepaths), 8*cpu_count())

        # Create a manager for objects synchronized across workers
        mgr = Manager()
        groups = [mgr.list(g) for g in self.group_by_event().values()]

        pool = Pool(processes=n_procs)
        ret = pool.map(DeepCleaner(), groups)
        removed = []
        renamed = []
        failed_to_remove = []
        failed_to_rename = []
        for d in ret:
            removed.extend(d['removed'])
            renamed.extend(d['renamed'])
            failed_to_remove.extend(d['failed_to_remove'])
            failed_to_rename.extend(d['failed_to_rename'])

        wstderr(' '.join([str(len(g)) for g in groups]) + '\n')

        # TODO: remove files that -- while not redundant -- have fewer recos
        # (or, stretch goal: merge the non-redundant info together)

        self.report()
        wstdout('>     Time to run deep cleaning: %s\n'
                % timediffstamp(time.time() - start_time))
Пример #3
0
def main():
    """Main"""
    start_time_sec = time.time()
    args = parse_args()

    cleaner = CleanupRecoFiles(args.dir)
    if args.remove_empty:
        cleaner.remove_empty_files()

    cleaner.cleanup_lockfiles()

    if args.deep_clean:
        cleaner.merge_and_rename()

    #lock_count = 0
    #i3_count = 0
    #other_count = 0
    #wstdout('  -> %3d I3 files removed\n' % i3_count)
    #wstdout('  -> %3d lock files removed\n' % lock_count)
    #wstdout('  -> %3d other files removed\n' % other_count)

    wstdout('Script run time: %s\n'
            % timediffstamp(time.time() - start_time_sec))
Пример #4
0
 def merge_and_rename(self, n_procs=None):
     """Merge and/or rename"""
     if n_procs is None:
         n_procs = min(len(self._allfilepaths), 8*cpu_count())
     start_time = time.time()
     wstdout('> Merging and/or renaming I3 files in the directory...\n')
     # Create a manager for objects synchronized across workers
     mgr = Manager()
     groups = [mgr.list(g) for g in self.group_by_event().values()]
     pool = Pool(processes=n_procs)
     ret = pool.map(merge_and_rename, groups)
     successes = []
     failures = 0
     for r in ret:
         if r is None:
             failures += 1
         else:
             successes.append(r)
     wstdout('> Failures: %5d; successes: %5d\n'
             % (failures, len(successes)))
     wstdout('>     Time to run merge_and_rename: %s\n'
             % timediffstamp(time.time() - start_time, hms_always=True))
     return failures, successes
Пример #5
0
                track_spline_prob=spline_tables['trk_timing'],
                bad_doms_name='BadDomsList',
                input_pulses=args.srt_pulse_name,
                wave_form_range='WaveformRange',
                use_ball_speedup=True,
                ball_radius=150.0,
            )

            for reco_num in recos_to_run:
                reco_name = RECOS[reco_num]['name']
                time_limit = RECOS[reco_num]['time_limit']
                kwargs = RECOS[reco_num]['kwargs']

                wstdout(
                    '> Setting up tray to run reco #%3d, %s (time limit %s)\n'
                    % (reco_num, reco_name, timediffstamp(time_limit)))

                tray.AddSegment(multinest_icetray.MultiNestReco,
                                reco_name,
                                input_pulses=args.srt_pulse_name,
                                base_geometry=args.geometry,
                                **kwargs)

            tray.AddModule(
                'I3Writer',
                'EventWriter',
                Filename=outfile_path,
                Streams=[icetray.I3Frame.Physics, icetray.I3Frame.DAQ],
                DropOrphanStreams=[icetray.I3Frame.DAQ])

            tray.AddModule('TrashCan', 'Done')
Пример #6
0
                    continue
                continue
            except:
                continue
            else:
                # TODO: remove this clause once the buggy runs where lock files
                # were being overwritten has been cleaned up
                lock_info = read_lockfile(lockfilepath)
                if 'type' in lock_info and lock_info['type'] == 'outfile_lock':
                    self._remove(lock_info['outfile'])
                lock_f.close()
                self._remove(lockfilepath)

        self.report()
        wstdout('>     Time to clean up lock files: %s\n'
                % timediffstamp(time.time() - start_time))

    # TODO
    def tabulate_unique_recos(self):
        pass

    # TODO
    def identify_duplicated_base_events(self):
        pass

    def group_by_event(self):
        """Group I3 files by common event (i.e., exclude whatever processing
        steps have been performed)

        Returns
        -------
Пример #7
0
                track_spline_prob=spline_tables['trk_timing'],
                bad_doms_name='BadDomsList',
                input_pulses=args.srt_pulse_name,
                wave_form_range='WaveformRange',
                use_ball_speedup=True,
                ball_radius=150.0,
            )

            for reco_num in recos_to_run:
                reco_name = RECOS[reco_num]['name']
                time_limit = RECOS[reco_num]['time_limit']
                kwargs = RECOS[reco_num]['kwargs']

                wstdout(
                    '> Setting up tray to run reco #%3d, %s (time limit %s)\n'
                    % (reco_num, reco_name, timediffstamp(time_limit))
                )

                tray.AddSegment(
                    multinest_icetray.MultiNestReco, reco_name,
                    input_pulses=args.srt_pulse_name,
                    base_geometry=args.geometry,
                    **kwargs
                )

            tray.AddModule(
                'I3Writer', 'EventWriter',
                Filename=outfile_path,
                Streams=[icetray.I3Frame.Physics, icetray.I3Frame.DAQ],
                DropOrphanStreams=[icetray.I3Frame.DAQ]
            )
    if PGEN_PRESENT:
        fileLoc = PGEN.FileLocations(run=RUN_NUM)
        i3_src_dir = fileLoc.source_i3_dir
        llhpbin_tgt_dir = fileLoc.llhp_binfiles_dir
        if TEST:
            fileLoc.llhp_binfiles_dir += '_test'
        fileLoc.createDirs(which='llhp_bin')
    else:
        i3_src_dir = I3_SRC_DIRS[RUN_NUM]
        llhpbin_tgt_dir = LLHPBIN_TGT_DIRS[RUN_NUM]

    print 'root dir:', i3_src_dir
    print 'llhpbin_tgt_dir:', llhpbin_tgt_dir

    tr1 = time.time()
    print 'tr1-tr0', timediffstamp(tr1 - tr0)

    count = 0
    for filenum, (ffpath, basename, match) in enumerate(fiter):
        tl0 = time.time()
        #filenum = int(fnum_rex.findall(ffpath)[0][0])

        # Print info particular to this file's LLHP extraction
        print 'ffpath ; fnum_rex finds...:', ffpath, ';', fnum_rex.findall(
            ffpath)
        print 'filenum:', filenum
        print 'extract_llhp_hsumm', EXTRACT_LLHP_HSUMM
        if EXTRACT_LLHP_HSUMM:
            print 'llh_minstepsize:', LLH_MINSTEPSIZE
        print 'extract_llhp_all:', EXTRACT_LLHP_ALL
        print 'llhp_top_thresh:', args.llhp_top_thresh