def collect_events(dtype, icc=False): ### files to be looped through filenames = folder[dtype] nfiles = float(len(filenames)) print('nfiles: {0}'.format(nfiles)) ### initialize an event holder events = {key: [] for key in variables.keys()} ### get prob map if genie ### also oscillate NC by default probmap = ProbMap (matter=True, params=params) \ if 'genie' in dtype else None ### loop through files for filename in filenames: ## update me print('+-- {0}'.format(filename)) ## open file i3file = I3File(filename, 'r') ## loop through frames while i3file.more(): #try: frame = i3file.pop_physics() # append when physics frame if frame: if passed(frame, icc=icc): events = append(frame, dtype, events, nfiles, probmap=probmap, icc=icc) else: print('| this frame is empty ...') continue #except: # print ('| this file is busted :/') # pass ## close file print('| unweighted nevents: {0}'.format(len(events['weight']))) print('| weighted nevents: {0}'.format(np.sum(events['weight']))) i3file.close() ### numpify events = {key: np.array(value) for key, value in events.items()} ### update me weight = events['weight'] print('+-------------------------------------') print('| unweighted before L6cuts : {0}'.format(len(weight))) print('| weighted before L6cuts : {0:4f} mHZ'.format( np.sum(weight) * 1000.)) ### return output return events
def CmpI3File(rep, fileNameCurrent, fileNameReference, restrictFrames=None, restrictKeys=None, ignoreKeys=None, strictOnLayout=False, keyBasedCmp=None): """ Compare contents of two I3Files. Files are compared frame by frame. The set of keys to compare can be restricted (restrictKeys) or selected frame keys can be ignored (ignoreKeys). If strictOnLayout is False, curFrame may contain more keys than refFrame; otherwise the key sets have to be identical. Key-based comparison functions can be passed (keyBasedCmp) and will overwrite type-based comparison. """ from icecube.dataio import I3File with rep.section("[File] %s <-> %s" % (fileNameCurrent, fileNameReference)): fileCur = I3File(fileNameCurrent) fileRef = I3File(fileNameReference) iFrame = -1 while True: iFrame += 1 frameCur = fileCur.pop_frame() frameRef = fileRef.pop_frame() if frameRef is None: break if restrictFrames and iFrame not in restrictFrames: continue with rep.section("[Frame] %i" % iFrame): if frameCur is None: rep.failure("Frame missing LEFT") else: CmpI3Frame(rep, frameCur, frameRef, restrictKeys, ignoreKeys, strictOnLayout, keyBasedCmp)
def setUp(self): ref = I3File(seekable_file) self.ref_frame_info = [] while ref.more(): frame = ref.pop_frame() self.ref_frame_info.append({ "stop": frame.Stop, "keys": set(frame.keys()) }) for x in self.ref_frame_info: print x["stop"], len(x["keys"])
def extract_gcd_files(gcd_files, retro_gcd_dir, verbosity=0): """ Parameters ---------- gcd_files : string or iterable thereof retro_gcd_dir : string Path to communal Retro-extracted GCD dir verbosity : int in [0, 1] Returns ------- gcd_md5_hexs : len(gcd_files)-list of strings """ # Import here so module can be read without access to IceCube software from icecube.dataio import I3File # pylint: disable=no-name-in-module, import-outside-toplevel from icecube.icetray import I3Frame # pylint: disable=no-name-in-module, import-outside-toplevel if isinstance(gcd_files, string_types): gcd_files = [gcd_files] gcd_md5_hexs = [] for gcd_fpath in gcd_files: gcd_fpath = expand(gcd_fpath) i3f = I3File(gcd_fpath) gcd_frames = OrderedDict() while i3f.more(): frame = i3f.pop_frame() if frame.Stop == I3Frame.Geometry: if "g_frame" in gcd_frames: raise ValueError( 'GCD file "{}" contains multiple G frames'.format( gcd_fpath)) gcd_frames["g_frame"] = frame elif frame.Stop == I3Frame.Calibration: if "c_frame" in gcd_frames: raise ValueError( 'GCD file "{}" contains multiple C frames'.format( gcd_fpath)) gcd_frames["c_frame"] = frame elif frame.Stop == I3Frame.DetectorStatus: if "d_frame" in gcd_frames: raise ValueError( 'GCD file "{}" contains multiple D frames'.format( gcd_fpath)) gcd_frames["d_frame"] = frame for frame_type in ("g", "c", "d"): if "{}_frame".format(frame_type) not in gcd_frames: raise ValueError('No {} frame found in GCD file "{}"'.format( frame_type, gcd_fpath)) metadata = OrderedDict() metadata["extracted_on_fqdn"] = socket.getfqdn() metadata["path_to_sourcefile"] = abspath(gcd_fpath) metadata["sourcefile_md5sum"] = get_file_md5(gcd_fpath) try: gcd_md5_hex = extract_gcd_frames(retro_gcd_dir=retro_gcd_dir, metadata=metadata, **gcd_frames) except Exception: sys.stderr.write( 'failed to extract GCD file "{}"\n'.format(gcd_fpath)) raise gcd_md5_hexs.append(gcd_md5_hex) if verbosity: sys.stdout.write("{} {}\n".format(gcd_md5_hex, gcd_fpath)) return gcd_md5_hexs
def extract_gcd_frames(g_frame, c_frame, d_frame, retro_gcd_dir, metadata=None): """Extract GCD info to Python/Numpy-readable objects stored to a central GCD directory, subdirs of which are named by the hex md5sum of each extracted GCD file. Parameters ---------- g_frame : icecube.icetray.I3Frame with stop I3Frame.Geometry c_frame : icecube.icetray.I3Frame with stop I3Frame.Calibration d_frame : icecube.icetray.I3Frame with stop I3Frame.DetectorStatus retro_gcd_dir : string metadata : None or mapping, optional If non-empty mapping (e.g., OrderedDict) is provided, the contents are written to the gcd file's subdirectory inside retro_gcd_dir as "metadata.json" Returns ------- gcd_md5_hex : len-32 string of chars 0-9 and/or a-f MD5 sum of _only_ the G, C, and D frames (in that order) dumped to an uncompressed i3 file. Note that this can result in a hash value different from hashing the original GCD file if other frames were present besides the GCD frames (such as an I frame, or Q/P/etc. if the GCD is embedded in a data i3 file) """ from icecube.dataio import I3File # pylint: disable=import-outside-toplevel retro_gcd_dir = expand(retro_gcd_dir) # Create root dir for gcd subdirs if necessary if not isdir(retro_gcd_dir): mkdir(retro_gcd_dir) # Add a vaguely useful README to gcd root dir readme_fpath = join(retro_gcd_dir, "README") if not isfile(readme_fpath): with io.open(readme_fpath, "w", encoding="utf-8") as fhandle: fhandle.write(GCD_README.strip() + "\n") # Find md5sum of an uncompressed GCD file created by these G, C, & D frames tempdir_path = mkdtemp(suffix="gcd") try: gcd_i3file_path = join(tempdir_path, "gcd.i3") gcd_i3file = I3File(gcd_i3file_path, "w") gcd_i3file.push(g_frame) gcd_i3file.push(c_frame) gcd_i3file.push(d_frame) gcd_i3file.close() gcd_md5_hex = get_file_md5(gcd_i3file_path) finally: try: rmtree(tempdir_path) except Exception: pass this_gcd_dir_path = join(retro_gcd_dir, gcd_md5_hex) if isdir(this_gcd_dir_path): # already extracted this GCD sys.stderr.write( "Already extracted GCD with md5sum {}\n".format(gcd_md5_hex)) return gcd_md5_hex tempdir_path = mkdtemp(suffix="." + gcd_md5_hex) try: # Extract GCD info into Python/Numpy-readable things gcd_info = OrderedDict() gcd_info["I3Geometry"] = extract_i3_geometry(g_frame) gcd_info["I3Calibration"] = extract_i3_calibration(c_frame) gcd_info["I3DetectorStatus"] = extract_i3_detector_status(d_frame) gcd_info.update(extract_bad_doms_lists(d_frame)) # Write info to files. Preferable to write a single array to a .npy file; # second most preferable is to write multiple arrays to (compressed) .npz # file (faster to load than pkl files); finally, I3DetectorStatus _has_ to # be stored as pickle to preserve varying-length items. for key, val in gcd_info.items(): if isinstance(val, Mapping): if key == "I3DetectorStatus": key_fpath = join(tempdir_path, key + ".pkl") with io.open(key_fpath, "wb") as fhandle: pickle.dump(val, fhandle, protocol=pickle.HIGHEST_PROTOCOL) else: np.savez_compressed(join(tempdir_path, key + ".npz"), **val) else: assert isinstance(val, np.ndarray) np.save(join(tempdir_path, key + ".npy"), val) if metadata: metadata_fpath = join(tempdir_path, "metadata.json") with open(metadata_fpath, "w") as fhandle: json.dump(metadata, fhandle, sort_keys=False, indent=4) try: copytree(tempdir_path, this_gcd_dir_path) except OSError as err: if err.errno != errno.EEXIST: raise finally: try: rmtree(tempdir_path) except Exception: pass return gcd_md5_hex
def retro_recos_to_i3files( eventsdir, point_estimator, recos=None, reco_suffix=None, i3dir=None, overwrite=False, replace_existing_frame_items=False, ): """Take retro recos found in .npy files / retro directory structure and corresponding i3 files and generate new i3 files like the original but populated with the retro reco information. Parameters ---------- eventsdir : str point_estimator : str in {"mean", "median", "max"} recos : str or iterable thereof, optional If not specified, all "retro_*" recos found will be populated reco_suffix : str or None, optional i3dir : str, optional If None or not specified, defaults to `eventsdir` overwrite : bool, optional replace_existing_frame_items : bool, optional """ eventsdir = abspath(expanduser(expandvars(eventsdir))) # If the leaf reco/events/truth dir "recos" was specified, must go one up # to find events/truth if basename(eventsdir) == "recos": eventsdir = dirname(eventsdir) if recos is None: recos = [ splitext(basename(n))[0] for n in glob(join(eventsdir, "recos", "retro_*.npy")) ] if isinstance(recos, string_types): recos = [recos] else: recos = sorted(list(recos)) for reco in recos: if not reco.startswith("retro_"): raise ValueError( 'Can only populate "retro_*" recos; "{}" is invalid'.format( reco)) reco_suffix = reco_suffix if reco_suffix else "" if i3dir is None: i3dir = eventsdir else: i3dir = abspath(expanduser(expandvars(i3dir))) # -- Walk directories and match (events, recos) to i3 paths -- # for events_dirpath, dirs, filenames in walk(eventsdir): dirs.sort(key=nsort_key_func) if "events.npy" not in filenames: continue missing_recos = [] reco_filepaths = {} for reco in recos: reco_filepath = join(events_dirpath, "recos", "{}.npy".format(reco)) if isfile(reco_filepath): reco_filepaths[reco] = reco_filepath else: missing_recos.append(reco) if missing_recos: print('WARNING: Missing recos {} in dir "{}"'.format( missing_recos, events_dirpath)) if set(missing_recos) == set(recos): continue eventsdir_basename = basename(events_dirpath) i3filedir = join(i3dir, relpath(dirname(events_dirpath), start=eventsdir)) i3filepaths = sorted( glob(join(i3filedir, "{}.i3*".format(eventsdir_basename))), key=nsort_key_func, ) if not i3filepaths: raise IOError( 'No matching i3 file "{}.i3*" in directory "{}"'.format( eventsdir_basename, i3filedir)) input_i3filepath = i3filepaths[0] if len(i3filepaths) > 1: print( 'WARNING: found multiple i3 files in dir, picking first one "{}"' .format(i3filepaths)) print("input_i3filepath:", input_i3filepath) suffix = "__" + "__".join( sorted((r + reco_suffix) for r in reco_filepaths.keys())) output_i3filepath = join( i3filedir, "{base}{suffix}{extensions}".format( base=basename(input_i3filepath)[:len(eventsdir_basename)], suffix=suffix, extensions=".i3.zst", ), ) if not overwrite and isfile(output_i3filepath): print( 'WARNING: skipping writing output path that already exists: "{}"' .format(output_i3filepath)) continue print("output_i3filepath:", output_i3filepath) print("events_dirpath:", events_dirpath) events = np.load(join(events_dirpath, "events.npy")) recos_d = OrderedDict() for reco, reco_filepath in reco_filepaths.items(): recos_d[reco] = np.load(reco_filepath) if len(recos_d[reco]) != len(events): raise ValueError("{} has len {}, events has len {}".format( reco, len(recos_d[reco]), len(events))) # Collect frames into an event chain until we hit a physics frame, a # second DAQ frame, or the end of the file. # # * If we have only a DAQ frame in the chain, create a new Physics # frame and populate the reco(s) to it. # # * If we have a Physics frame, populate the recos to that frame. # # * If we have no DAQ or Physics frames in the chain, we should be # done. Make sure we've accounted for all the recos in the npy files # and quit. # # When done with the chain, push all frames in the chain to the output file. # physics frame, we have a new "event" to process; populate recos to # that frame. Then, regardless of why we finished the event chain, # write the frames in the chain out to the new i3 file. input_i3file = I3File(input_i3filepath, "r") output_i3file = I3File(output_i3filepath, "w") id_fields = [ "run_id", "sub_run_id", "event_id", "sub_event_id", "sub_event_stream" ] frame_buffer = [] chain_has_daq_frame = False chain_has_physics_frame = False frame_counter = 0 event_index = -1 try: while True: if input_i3file.more(): try: next_frame = input_i3file.pop_frame() except: sys.stderr.write( "Failed to pop frame #{}\n".format(frame_counter + 1)) raise frame_counter += 1 else: next_frame = None # Current chain has ended and a new one will have to be started # (or we're at the end of the file). # Populate the reco to the current chain, push all of the # current chain's frames to the output file, and start a new # chain with the next frame (or quit if we're at the end of the # file). if (next_frame is None or next_frame.Stop == I3Frame.DAQ or (chain_has_physics_frame and next_frame.Stop == I3Frame.Physics)): if frame_buffer: # Events are identified as a chain with daq frame being # present with no physics frame, physics frame present # with no daq frame, or both being present (existence # of other frames is considered to be irrelevant) # TODO: oscNext v01.01 by L5, i3 file processing was # messed up, there were Q frames followed by I frames # and no associated P frame. Therefore we have to only # count chains with P frames in them as events, or else # the recos won't be put back in the right place / # indices run out. #if chain_has_daq_frame or chain_has_physics_frame: if chain_has_physics_frame: event_index += 1 # Make sure event headers match pframe = None for frame in frame_buffer[::-1]: if frame.Stop == I3Frame.Physics: pframe = frame assert pframe is not None i3hdr = pframe["I3EventHeader"] i3hdr_id = tuple( getattr(i3hdr, field) for field in id_fields) retro_event_id = tuple( events[event_index][id_fields]) if retro_event_id != i3hdr_id: raise ValueError( "retro event {} != frame event {}".format( retro_event_id, i3hdr_id)) populate_pframe( event_index=event_index, frame_buffer=frame_buffer, recos_d=recos_d, reco_suffix=reco_suffix, point_estimator=point_estimator, replace_existing_frame_items= replace_existing_frame_items, ) # Regardless if there was an event identified in the # chain, push all frames to the output file for frame in frame_buffer: output_i3file.push(frame) # No next frame indicates we hit the end of the file; quit if next_frame is None: break # Create a new chain, starting with the next frame frame_buffer = [next_frame] chain_has_daq_frame = next_frame.Stop == I3Frame.DAQ chain_has_physics_frame = next_frame.Stop == I3Frame.Physics # Otherwise, we have just another frame in the current chain; # append it and move on. else: frame_buffer.append(next_frame) chain_has_daq_frame |= next_frame.Stop == I3Frame.DAQ chain_has_physics_frame |= next_frame.Stop == I3Frame.Physics except: output_i3file.close() del output_i3file remove(output_i3filepath) sys.stderr.write('ERROR! file "{}", frame #{}\n'.format( input_i3filepath, frame_counter + 1)) raise else: output_i3file.close() del output_i3file
import numpy as np from numpy.linalg import norm import glob import os from icecube.dataio import I3File from icecube import icetray, dataclasses, recclasses, simclasses #data files location data_location = '/cr/data01/hagne/John_project/Donghwa/' # where to save the frames that passed the quality cuts cut_location = "./data/i3files" # get geometry info geom_file = I3File("GeoCalibDetectorStatus_2012.56063_V1_OctSnow.i3.gz") geom_frame = geom_file.pop_frame() geometry = geom_frame['I3Geometry'] geom_file.close() iron_energy = [] proton_energy = [] # -------------------------------------------------------------------------- # Now get the data from the files ------------------------------------------ # list of appropriate files in folder files = glob.glob(data_location + 'Level2*') total_frames = 0 # want to print out how many frames passed # loop through files
# ------------------------------------------------------------------------------ # MAIN ------------------------------------------------------------------------- # ------------------------------------------------------------------------------ #load the showers data_location = './data/' protondata = np.load(data_location + 'proton_showers.npy') #irondata = np.load(data_location + 'iron_showers.npy') #corsika file location corsika_location = '/cr/data01/hagne/John_project/CORSIKA/muonsPROPER/' # get geometry info geom_file = I3File("./data/GeoCalibDetectorStatus_2012.56063_V1_OctSnow.i3.gz") geom_frame = geom_file.pop_frame() geometry = geom_frame['I3Geometry'] geom_file.close() # list of tanks tanks = [] for i in range(1,82): # arrays are [station #, tank #, number of muons detected] # tank 1 contains DOMs 61,62; tank 2 contains DOMs 63,64 tanks.append([i,1]) tanks.append([i,2]) # tank coordinates ------------------- for tank in tanks: station = tank[0] if tank[1] == 1: