def read_geofon_hyp(filename, phase_data=False): """ Read a GEOFON hypocenter-phase file. :param str filename: Full or relative path and name for hypocenter-phase file. :param bool phase_data: If True, the phase block in the file is read as well and returned. """ # error handling if not isinstance(filename, basestring): raise InputError(filename, "Need string or buffer") if not op.exists(filename): raise InputError(filename, "No such file or directory") with open(filename, "r") as f: data = f.read() # --- READ ORIGIN BLOCK --- public_ID = re.search(r'\s+Public ID\s+(gfz\d\d\d\d[a-z]+)', data) public_ID = public_ID.group(1) odate = re.search(r'\s+Date\s+(\d\d\d\d-\d\d-\d\d)', data) otime = re.search(r'\s+Time\s+(\d\d:\d\d:\d\d\.\d+)', data) origin_time_str = ' '.join((odate.group(1), otime.group(1))) origin_time = datetime.strptime(origin_time_str, "%Y-%m-%d %H:%M:%S.%f") Latitude = re.search(r'\s+Latitude\s+(-?\d{1,2}\.\d\d)\sdeg', data) latitude = float(Latitude.group(1)) Longitude = re.search(r'\s+Longitude\s+(-?\d{1,3}\.\d\d)\sdeg', data) longitude = float(Longitude.group(1)) Depth = re.search(r'\s+Depth\s+(\d{1,3})\skm', data) depth = float(Depth.group(1)) ResRMS = re.search(r'\s+Residual RMS\s+(\d+\.\d\d)\ss', data) residual_rms = float(ResRMS.group(1)) AzmGap = re.search(r'\s+Azimuthal gap\s+(\d+)\sdeg', data) azimuthal_gap = int(AzmGap.group(1)) # --- READ MAGNITUDE BLOCK --- mag = float(re.search('\s+.(\d.\d\d).+preferred', data).group(1)) # --- READ PHASE BLOCK --- used_picks = None if phase_data is True: # exclude picks with MX and AX qulity (picks weighted to zero)! phase_pattern = re.compile(r'\s+([A-Z0-9]+)\s+([A-Z]+)\s+' '(\d+\.\d+)\s+(\d+)\s+([a-zA-Z]+)\s+' '(\d\d:\d\d:\d\d\.\d)\s+' '(-?\d+\.\d|N/A)\s(M|A)\s+' '\d\.\d\s+[A-Z0-9]+') phase_block = phase_pattern.findall(data) narr = len(phase_block) index = xrange(narr) columns = ['Phase', 'Time', 'Quality', 'Residual', 'Distance', 'Azimuth'] used_picks = pd.DataFrame(index=index, columns=columns) for i, q in enumerate(phase_block): sta, net = q[0].strip(), q[1].strip() if net == 'AD': net = 'RM' if net == 'EV': net = 'IO' staCode = "_".join((net, sta)) dist = deg2m(float(q[2])) azi = float(q[3]) phase = q[4].strip() arrtime = datetime.strptime(q[5], "%H:%M:%S.%f") if arrtime.time() < origin_time.time(): arrdate = origin_time.date() + timedelta(days=1) else: arrdate = origin_time.date() arrDateTime = datetime.combine(arrdate, arrtime.time()) try: res = float(q[6]) except ValueError: res = np.nan quality = q[7] used_picks.ix[i, columns] = (phase, arrDateTime, quality, res, dist, azi) # Rename indexes to station codes used_picks.rename(index={i: staCode}, inplace=True) # Just to be on the safe side! used_picks.dropna(inplace=True) # --- make a DataFrame with 2-level MultiIndex indexes stations = used_picks.index phases = used_picks['Phase'] tuples = zip(stations, phases) # drop the `Phase` column used_picks.drop(labels='Phase', axis=1, inplace=True) # construct a 2-level MultiIndex new_idx = pd.MultiIndex.from_tuples(tuples, names=['Station', 'Phase']) used_picks.index = new_idx # Convert columns to numeric and datetime types for column in used_picks.columns: if column in ['Residual', 'Distance', 'Azimuth']: used_picks[column] = pd.to_numeric(used_picks[column]) elif column == 'Time': used_picks[column] = pd.to_datetime(used_picks[column]) return GeofonEvent(public_ID, origin_time, origin_time_str, latitude, longitude, depth, residual_rms, azimuthal_gap, mag, used_picks)
def read_nlloc_hyp(filename, phase_data=False): """Read a NonLinLoc Hypocenter-phase file.""" if not isinstance(filename, basestring): raise InputError(filename, "need string or buffer") if not os.path.exists(filename): raise InputError(filename, "no such file or directory") with open(filename, 'r') as f: data = f.read() data = data.splitlines() # remove the empty lines data = filter(None, data) # determine indices of block start/end of the NLLOC output file indices_hypo_block = [None, None] indices_phase_block = [None, None] for i, line in enumerate(data): if line.startswith("NLLOC "): indices_hypo_block[0] = i elif line.startswith("END_NLLOC"): indices_hypo_block[1] = i elif line.startswith("PHASE "): indices_phase_block[0] = i elif line.startswith("END_PHASE"): indices_phase_block[1] = i if any([i is None for i in indices_hypo_block]): msg = 'Input NLLOC Hypocenter-Phase file is corrupt' raise InputError(filename, msg) # To skip any other lines around NLLOC block lines = data[indices_hypo_block[0]:indices_hypo_block[1]] i1, i2 = indices_phase_block hypo_lines, phase_lines = lines[:i1] + lines[i2+1:], lines[i1+1:i2] # --- HYPOCENTER INFORMATION --- hypo_lines = dict([line.split(None, 1) for line in hypo_lines]) # NLLOC line: Specifies the beginning of a NLLoc Hypocenter-Phase # description block and gives the location status. line = hypo_lines["NLLOC"] status = filter(None, line.split('"'))[-1].strip('.') # SIGNATURE Line: Signature text and program run stamp. line = hypo_lines["SIGNATURE"] obsfile = re.search('obs:(.+)\s+NLLoc', line).group(1).strip(None) # TRANSFORM line: # Geographic to rectangular transformation parameters. line = hypo_lines["TRANSFORM"] trans = line.split(None)[0] # GEOGRAPHIC line: # Maximum likelihood hypocenter - Geographic coordinates. line = hypo_lines["GEOGRAPHIC"] items = line.split() d = '-'.join(items[1:4]) t = ':'.join(items[4:7]) origin_time_str = ' '.join((d, t)) try: origin_time = dt.datetime.strptime(origin_time_str, "%Y-%m-%d %H:%M:%S.%f") except ValueError: # there is a bug (simplification) in NLLoc; negative # origin-time for those events occurred just before midnight! year, month, day = map(int, items[1:4]) microsec, sec = np.modf(float(items[6])) sec = abs(int(sec)) microsec = abs(int(microsec * 1.0e+6)) midnight = dt.datetime(year, month, day, 0, 0, 0) origin_time = midnight - dt.timedelta(days=0, seconds=sec, microseconds=microsec) latitude, longitude = map(float, items[-5:-2:2]) depth = float(items[-1]) * km2m if trans == 'GLOBAL': X, Y, Z = geodetic2ecef(latitude, longitude, (-1*depth)) else: # HYPOCENTER line: Maximum likelihood hypocenter - xyz coordinates. line = hypo_lines["HYPOCENTER"] items = line.split(None) X, Y, Z = map(lambda q: float(q)*km2m, items[1:6:2]) # meters! # STATISTICS line: "Traditional" Gaussian (normal) statistics # of PDF (evaluated for PROB_DENSITY grids only). line = hypo_lines["STATISTICS"] items = line.split(None) covXX = float(items[7].strip()) covYY = float(items[13].strip()) covZZ = float(items[17].strip()) latUnc = np.sqrt(covXX)*km2m # meters! longUnc = np.sqrt(covYY)*km2m # meters! depthUnc = np.sqrt(covZZ) * km2m # meters! # QML_OriginQuality line: Specifies QuakeML OriginQuality measures. line = hypo_lines["QML_OriginQuality"] items = line.split(None) assocPhCt, usedPhCt = map(int, items[1:4:2]) resRms, aziGap, secAziGap = map(float, items[11:16:2]) minDist, maxDist, medianDist = [(x*km2m) for x in map(float, items[-5::2])] # meters! # QML_OriginUncertainty line: Specifies QuakeML # OriginUncertainty measures. line = hypo_lines["QML_OriginUncertainty"] items = line.split() horUnc, minHorUnc, maxHorUnc = [(x*km2m) for x in map(float, items[1:6:2])] # meters! aziMaxHorUnc = float(items[-1]) # --- PHASE PICK INFORMATION --- used_picks = None if phase_data: index = xrange(len(phase_lines)) columns = ['Phase', 'Residual', 'Distance', 'Azimuth', 'TimeCorr'] used_picks = pd.DataFrame(index=index, columns=columns) for i, line in enumerate(phase_lines): items = line.split() ttpred, ttres, ttweight = map(float, items[15:18]) if ttpred != 0.0 and ttweight != 0.0: staCode = items[0].strip() phase = items[4].strip() staDist = deg2m(float(items[21])) # meters! staAzi = float(items[22]) ttcorr = float(items[26]) used_picks.ix[i, columns] = (phase, ttres, staDist, staAzi, ttcorr) used_picks.rename(index={i: staCode}, inplace=True) used_picks.dropna(inplace=True) # --- make a DataFrame with 2-level MultiIndex indexes stations = used_picks.index phases = used_picks['Phase'] tuples = zip(stations, phases) # drop the `Phase` column used_picks.drop(labels='Phase', axis=1, inplace=True) # construct a 2-level MultiIndex new_idx = pd.MultiIndex.from_tuples(tuples, names=['Station', 'Phase']) used_picks.index = new_idx # convert objects to numeric types. possible for all columns. used_picks = used_picks.apply(pd.to_numeric) return NLLocEvent(status, obsfile, X, Y, Z, latitude, longitude, depth, origin_time, origin_time_str, latUnc, longUnc, depthUnc, assocPhCt, usedPhCt, resRms, aziGap, secAziGap, minDist, maxDist, medianDist, horUnc, minHorUnc, maxHorUnc, aziMaxHorUnc, used_picks)