def read_sds(window): """ Read SDS database. :param dict window: Time window. :rtype: dict :return: Dict contains all traces within the time window. """ config = utils.get_config() station = window['station'] starttime = window['starttime'] endtime = window['endtime'] + 0.1 client = sds.Client(sds_root=config['SDS_ROOT']) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) stream_dict = collections.defaultdict(Stream) for trace in stream: geophone_type = trace.stats.channel[0:2] stream_dict[geophone_type].append(trace) return stream_dict
def write_hyp_station(geom, save_file): config = get_config() hyp = [] for sta, loc in geom.items(): lat = int(loc['latitude']) lat_min = (loc['latitude'] - lat) * 60 NS = 'N' if lat < 0: NS = 'S' lon = int(loc['longitude']) lon_min = (loc['longitude'] - lon) * 60 EW = 'E' if lat < 0: EW = 'W' elev = int(loc['elevation']) hyp.append( f' {sta: >5}{lat: >2d}{lat_min:>5.2f}{NS}{lon: >3d}{lon_min:>5.2f}{EW}{elev: >4d}\n' ) hyp.sort() output = os.path.join(config['GEOM_ROOT'], save_file) with open(output, 'w') as f: f.writelines(hyp)
def read_kml_placemark(kml): """ Returns geometry from Google Earth KML file. :param str kml: KML file name without directory. :rtype: dict :return: Geometry dict. """ config = utils.get_config() kml_file = os.path.join(config['GEOM_ROOT'], kml) parser = etree.XMLParser() root = etree.parse(kml_file, parser).getroot() geom = {} for Placemark in root.findall('.//Placemark', root.nsmap): sta = Placemark.find('.//name', root.nsmap).text coord = Placemark.find('.//coordinates', root.nsmap).text coord = coord.split(",") location = { 'latitude': float(coord[1]), 'longitude': float(coord[0]), 'elevation': float(coord[2]) } geom[sta] = location print(f'read {len(geom)} stations from {kml}') return geom
def read_sds(window): config = get_config() station = window['station'] starttime = window['starttime'] endtime = window['endtime'] + 0.1 client = Client(sds_root=config['SDS_ROOT']) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) stream_list = {} for trace in stream: geophone_type = trace.stats.channel[0:2] if not stream_list.get(geophone_type): stream_list[geophone_type] = Stream(trace) else: stream_list[geophone_type].append(trace) return stream_list
def write_hyp_station(geom, save_file): """ Write STATION0.HYP file from geometry. :param dict geom: Geometry dict. :param str save_file: Name of .HYP file. """ config = utils.get_config() hyp = [] for sta, loc in geom.items(): lat = int(loc['latitude']) lat_min = (loc['latitude'] - lat) * 60 NS = 'N' if lat < 0: NS = 'S' lon = int(loc['longitude']) lon_min = (loc['longitude'] - lon) * 60 EW = 'E' if lat < 0: EW = 'W' elev = int(loc['elevation']) hyp.append( f' {sta: >5}{lat: >2d}{lat_min:>5.2f}{NS}{lon: >3d}{lon_min:>5.2f}{EW}{elev: >4d}\n' ) hyp.sort() output = os.path.join(config['GEOM_ROOT'], save_file) with open(output, 'w') as f: f.writelines(hyp)
def generate_training_data(pick_list, dataset, database, chunk_size): """ Generate TFrecords from database. :param pick_list: List of picks from Pick SQL query. :param str dataset: Output directory name. :param str database: SQL database. :param int chunk_size: Number of data stores in TFRecord. """ config = utils.get_config() dataset_dir = os.path.join(config['DATASET_ROOT'], dataset) utils.make_dirs(dataset_dir) total_batch = int(len(pick_list) / chunk_size) batch_picks = utils.batch(pick_list, size=chunk_size) for index, picks in enumerate(batch_picks): example_list = utils.parallel(picks, func=get_example_list, database=database) flatten = itertools.chain.from_iterable flat_list = list(flatten(flatten(example_list))) file_name = f'{index:0>5}.tfrecord' save_file = os.path.join(dataset_dir, file_name) io.write_tfrecord(flat_list, save_file) print(f'output {file_name} / {total_batch}')
def read_hyp(hyp): """ Returns geometry from STATION0.HYP file. :param str hyp: STATION0.HYP name without directory. :rtype: dict :return: Geometry dict. """ config = utils.get_config() hyp_file = os.path.join(config['GEOM_ROOT'], hyp) geom = {} with open(hyp_file, 'r') as file: blank_line = 0 while True: line = file.readline().rstrip() if not len(line): blank_line += 1 continue if blank_line > 1: break elif blank_line == 1: lat = line[6:14] lon = line[14:23] elev = float(line[23:]) sta = line[1:6].strip() NS = 1 if lat[-1] == 'S': NS = -1 EW = 1 if lon[-1] == 'W': EW = -1 lat_degree = int(lat[0:2]) lat_minute = float(lat[2:-1]) / 60 if '.' not in lat: # high accuracy lat-lon lat_minute /= 1000 lat = (lat_degree + lat_minute) * NS lat = inventory.util.Latitude(lat) lon_degree = int(lon[0:3]) lon_minute = float(lon[3:-1]) / 60 if '.' not in lon: # high accuracy lat-lon lon_minute /= 1000 lon = (lon_degree + lon_minute) * EW lon = inventory.util.Longitude(lon) location = { 'latitude': lat, 'longitude': lon, 'elevation': elev } geom[sta] = location print(f'read {len(geom)} stations from {hyp}') return geom
def read_event_list(sfile): config = get_config() sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile) sfile_list = get_dir_list(sfile_dir) print(f'reading events from {sfile_dir}') events = parallel(par=get_event, file_list=sfile_list) print(f'read {len(events)} events from {sfile}') return events
def __init__(self, database, echo=False): config = utils.get_config() self.database = database db_path = os.path.join(config['SQL_ROOT'], self.database) self.engine = sqlalchemy.create_engine( f'sqlite:///{db_path}?check_same_thread=False', echo=echo) Base.metadata.create_all(bind=self.engine) self.session = sqlalchemy.orm.sessionmaker(bind=self.engine)
def get_model_dir(model_instance, remove=False): config = utils.get_config() save_model_path = os.path.join(config['MODELS_ROOT'], model_instance) if remove: shutil.rmtree(save_model_path, ignore_errors=True) utils.make_dirs(save_model_path) save_history_path = os.path.join(save_model_path, "history") utils.make_dirs(save_history_path) return save_model_path, save_history_path
def read_dataset(dataset): """ Returns TFRecord Dataset from TFRecord directory. :param str dataset_dir: Directory contains TFRecords. :rtype: tf.data.Dataset :return: A Dataset. """ config = utils.get_config() dataset_dir = os.path.join(config['DATASET_ROOT'], dataset) file_list = utils.get_dir_list(dataset_dir) dataset = tf.data.TFRecordDataset(file_list) dataset = dataset.map(example_proto.sequence_example_parser, num_parallel_calls=mp.cpu_count()) return dataset
def read_hyp(hyp): config = get_config() hyp_file = os.path.join(config['GEOM_ROOT'], hyp) geom = {} with open(hyp_file, 'r') as file: blank_line = 0 while True: line = file.readline().rstrip() if not len(line): blank_line += 1 continue if blank_line > 1: break elif blank_line == 1: lat = line[6:14] lon = line[14:23] elev = float(line[23:]) sta = line[1:6].strip() NS = 1 if lat[-1] == 'S': NS = -1 EW = 1 if lon[-1] == 'W': EW = -1 lat = (int(lat[0:2]) + float(lat[2:-1]) / 60) * NS lat = Latitude(lat) lon = (int(lon[0:3]) + float(lon[3:-1]) / 60) * EW lon = Longitude(lon) location = { 'latitude': lat, 'longitude': lon, 'elevation': elev } geom[sta] = location print(f'read {len(geom)} stations from {hyp}') return geom
def read_event_list(sfile_dir): """ Returns event list from sfile directory. :param str sfile_dir: Directory contains SEISAN sfile. :rtype: list :return: list of event. """ config = utils.get_config() sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile_dir) sfile_list = utils.get_dir_list(sfile_dir) print(f'Reading events from {sfile_dir}') event_list = utils.parallel(sfile_list, func=get_event) flatten = itertools.chain.from_iterable events = list(flatten(flatten(event_list))) print(f'Read {len(events)} events\n') return events
def read_kml_placemark(kml): config = get_config() kml_file = os.path.join(config['GEOM_ROOT'], kml) parser = etree.XMLParser() root = etree.parse(kml_file, parser).getroot() geom = {} for Placemark in root.findall('.//Placemark', root.nsmap): sta = Placemark.find('.//name', root.nsmap).text coord = Placemark.find('.//coordinates', root.nsmap).text coord = coord.split(",") location = { 'latitude': float(coord[1]), 'longitude': float(coord[0]), 'elevation': float(coord[2]) } geom[sta] = location print(f'read {len(geom)} stations from {kml}') return geom
def write_training_dataset(pick_list, geom, dataset, pickset): config = get_config() dataset_dir = os.path.join(config['DATASET_ROOT'], dataset) make_dirs(dataset_dir) pick_time_key = [] for pick in pick_list: pick_time_key.append(pick.time) par = partial(_write_picked_stream, pick_list=pick_list, pick_time_key=pick_time_key, geom=geom, pickset=pickset) example_list = parallel(par, pick_list) station = pick_list[0].waveform_id.station_code file_name = '{}.tfrecord'.format(station) save_file = os.path.join(dataset_dir, file_name) write_tfrecord(example_list, save_file)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import argparse from tqdm import tqdm from multiprocessing import cpu_count import tensorflow as tf from seisnn.io import read_event_list, write_training_dataset, read_geom from seisnn.pick import get_pick_dict from seisnn.utils import get_config print(f'cpu counts: {cpu_count()} threads') ap = argparse.ArgumentParser() ap.add_argument('-c', '--catalog', required=True, help='catalog s-file dir', type=str) ap.add_argument('-g', '--geometry', required=True, help='geometry STATION0.HYP', type=str) ap.add_argument('-d', '--dataset', required=True, help='output dataset name', type=str) ap.add_argument('-p', '--pickset', required=True, help='output pickset name', type=str) args = ap.parse_args() config = get_config() geom = read_geom(args.geometry) events = read_event_list(args.catalog) pick_dict = get_pick_dict(events) pick_dict_keys = pick_dict.keys() for i, key in enumerate(pick_dict_keys): tqdm.write(f'station {key}, total: {i + 1}/{len(pick_dict_keys)}, pick counts: {len(pick_dict[key])}') with tf.device('/cpu:0'): write_training_dataset(pick_dict[key], geom, dataset=args.dataset, pickset=args.pickset)
def __init__(self, database, echo=False): config = get_config() db_path = os.path.join(config['DATABASE_ROOT'], f'{database}.db') self.engine = create_engine(f'sqlite:///{db_path}', echo=echo) Base.metadata.create_all(bind=self.engine) self.session = sessionmaker(bind=self.engine)