def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' st_event = preprocess_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) cluster_id = c.id print("[tfrecords positives] Assigning cluster " + str(cluster_id) + " to event.") #cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format( writer._written)) writer.close()
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)) # Write event waveforms and cluster_id in .tfrecords output_name = output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file) #print "[tfrecords negatives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords negatives] Preprocessing stream' #filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #cluster_id = filtered_catalog.cluster_id.values[event_n] cluster_id = -1 #We work with only one location for the moment (cluster id = 0) n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords negatives] Number of windows written={}".format( writer._written)) writer.close()
def main(args): setproctitle.setproctitle('quakenet_predict_from_tfrecords') # Create dir to store tfrecords if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Load stream stream_path = args.stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(args.output_dir, "metadata.json") # Csv of start and end times times_csv = {} times_csv = {"start_time": [], "end_time": []} # Write event waveforms and cluster_id=-1 in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(args.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=args.window_size, step=args.window_step, include_partial_windows=False) if args.max_windows is None: total_time = stream[-1].stats.endtime - stream[0].stats.starttime max_windows = (total_time - args.window_size) / args.window_step print "total time {}, wind_size {}, win_step {}".format( total_time, args.window_size, args.window_step) else: max_windows = args.max_windows start_time = time.time() for idx, win in tqdm(enumerate(win_gen), total=int(max_windows), unit="window", leave=False): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win) == 3: n_samples = min(len(win[0].data), len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) else: n_sample = 10 n_pts = win[0].stats.sampling_rate * args.window_size + 1 # there is no event if (len(win) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(win, -1) # Write start and end times in csv times_csv["start_time"].append(win[0].stats.starttime) times_csv["end_time"].append(win[0].stats.endtime) # Plot events if args.plot: trace = win[0] viz_dir = os.path.join(args.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot( outfile=os.path.join(viz_dir, "window_{}.png".format(idx))) # if idx % 1000 ==0 and idx != 0: # print "{} windows created".format(idx) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata) # Write start and end times df = pd.DataFrame.from_dict(times_csv) output_times = os.path.join(args.output_dir, "catalog_times.csv") df.to_csv(output_times) print "Last window analyzed ends on", win[0].stats.endtime print "Time to create tfrecords: {}s".format(time.time() - start_time)
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path print stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) #stream.resample(10.0) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir,"metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print "startime", UTCDateTime(starttime) print "endtime", UTCDateTime(endtime) #print stream[0].stats #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1]) #print m2.group() cat = filter_catalog(cat, starttime, endtime) #cat = cat[(cat.stname == str(stream_file.split(".")[1]))] # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or # (cat.stname == str(stream_file.split(".")[1][:-1]))] #print cat print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0] print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1] cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) #Create window generator and shuffle the order,2017/12/4 #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size, # step=FLAGS.window_step, # include_partial_windows=False)] #random.shuffle(win_gen) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win)==3: n_samples = min(len(win[0].data),len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) #to get rid of super small amplitude,2017/12/6 ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue #a = remove_repeat(win, n_samples) #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: # continue else: n_sample = 10 ampl_e = max(abs(win[0:-1].data)) if ampl_e < 1e-10: continue n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp ##add window extend to broaden the window,so that more events can be avoid,2017/12/07 window_start_extend = window_start - FLAGS.window_step window_end_extend = window_end + FLAGS.window_step after_start = cat_event_times > window_start_extend before_end = cat_event_times < window_end_extend #print window_start_extend,window_end_extend try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start_extend < cat.utc_timestamp.values[cat_idx] assert window_end_extend > cat.utc_timestamp.values[cat_idx] print "avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx]) except IndexError: # there is no event is_event = False if (len(win)==3) and (n_pts == n_samples): # Write tfrecords #writer.write(win.normalize(), -1) writer.write(win.copy().normalize(),-1) #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), # -1) # Plot events if FLAGS.plot: import matplotlib matplotlib.use('Agg') #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20) trace = win[0] viz_dir = os.path.join( FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) # trace.resample(10).plot(outfile=os.path.join(viz_dir, # "noise_{}.png".format(str(window_start)))) trace.plot(outfile=os.path.join(viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "noise_{}_{}.png".format(win[0].stats.station, str(win[0].stats.starttime).replace( ':', '_')))) if idx % 1000 ==0 and idx != 0: print "{} windows created".format(idx) # Save num windows created in metadata metadata[output_name] = writer._written print "creating a new tfrecords" n_tfrecords +=1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path stream_file = os.path.split(stream_path)[-1] print("+ Loading Stream {}".format(stream_file)) stream = read(stream_path) print('+ Preprocessing stream') stream = preprocess_stream(stream) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print("+ Loading Catalog") cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print("startime", UTCDateTime(starttime)) print("endtime", UTCDateTime(endtime)) cat = filter_catalog(cat, starttime, endtime) print("First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]) print("Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]) cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win) == 3: n_samples = min(len(win[0].data), len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) else: n_sample = 10 n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp after_start = cat_event_times > window_start before_end = cat_event_times < window_end try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start < cat.utc_timestamp.values[cat_idx] assert window_end > cat.utc_timestamp.values[cat_idx] print("avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx])) except IndexError: # there is no event is_event = False if (len(win) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(win, -1) # Plot events if FLAGS.plot: trace = win[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "noise_{}.png".format(idx))) if idx % 1000 == 0 and idx != 0: print("{} windows created".format(idx)) # Save num windows created in metadata metadata[output_name] = writer._written print("creating a new tfrecords") n_tfrecords += 1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) cat = filter_catalog(cat) for stream_file in stream_files: # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] # Propagation time from source to station travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event) # If there is not trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(st_event, cluster_id) # Save window and cluster_id if FLAGS.save_mseed: output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format( cluster_id, filtered_catalog.latitude.values[event_n], filtered_catalog.longitude.values[event_n]) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "event_{}.png".format(event_n))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' #Filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS lat = 0 lon = 0 depth = 0 cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) if c is not None: #can be None in case of polygons-based clustering cluster_id = c.id else: cluster_id = -1 #signaling that the earthquake has to be discarded print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat = "+str(lat)+", lon = "+str(lon)+").") #cluster_id = filtered_catalog.cluster_id.values[event_n] if cluster_id >= 0: #no clustering or a valid cluster n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords #DEBUG: STA_LTA #df = st_event_select[0].stats.sampling_rate #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df)) #for trig in cft: # if trig != .0: # print(trig) writer.write(st_event_select, cluster_id) else: print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth)) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format(writer._written)) writer.close()
def main(_): if FLAGS.stretch_data: print "ADD NOISE AND STRETCH DATA" if FLAGS.compress_data: print "ADD NOISE AND COMPRESS DATA" if FLAGS.shift_data: print "ADD NOISE AND SHIFT DATA" # Make dirs output_dir = os.path.split(FLAGS.output)[0] if not os.path.exists(output_dir): os.makedirs(output_dir) if FLAGS.plot: if not os.path.exists(os.path.join(output_dir, "true_data")): os.makedirs(os.path.join(output_dir, "true_data")) if not os.path.exists(os.path.join(output_dir, "augmented_data")): os.makedirs(os.path.join(output_dir, "augmented_data")) cfg = config.Config() cfg.batch_size = 1 cfg.n_epochs = 1 data_pipeline = DataPipeline(FLAGS.tfrecords, config=cfg, is_training=False) samples = data_pipeline.samples labels = data_pipeline.labels with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(coord=coord) output_tfrecords = FLAGS.output writer = DataWriter(output_tfrecords) n_examples = 0 while True: try: sample, label = sess.run([samples, labels]) sample = np.squeeze(sample, axis=0) label = label[0] noised_sample = add_noise_to_signal(np.copy(sample)) if FLAGS.compress_data: noised_sample = compress_signal(noised_sample) if FLAGS.stretch_data: noised_sample = stretch_signal(noised_sample) if FLAGS.shift_data: noised_sample = shift_signal(noised_sample) if FLAGS.plot: plot_true_and_augmented_data(sample, noised_sample, label, n_examples) stream = convert_np_to_stream(noised_sample) writer.write(stream, label) n_examples += 1 except KeyboardInterrupt: print 'stopping data augmentation' break except tf.errors.OutOfRangeError: print 'Augmentation completed ({} epochs, {} examples seen).'\ .format(cfg.n_epochs,n_examples-1) break writer.close() coord.request_stop() coord.join(threads)
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*.mseed') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" for stream_file in stream_files: cat = load_catalog(FLAGS.catalog) #cat = filter_catalog(cat,stream_file.split(".mseed")[0]) # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] #print(1111, cat) # Propagation time from source to station #travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] # event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] #cluster_id =1 n_traces = len(st_event) # If there is no trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords # use filter_small_ampitude to get rid of super small amplitude,2017/12/6 ampl_e, ampl_n, ampl_z = filter_small_ampitude( st_event, n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue a = remove_repeat(st_event, n_samples) if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: continue writer.write(st_event.copy().resample(10).normalize(), cluster_id) #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id) #print (len(st_event[0])) # Save window and cluster_id if FLAGS.save_mseed: output_label = "{}_{}.mseed".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_')) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0].filter('bandpass', freqmin=0.5, freqmax=20) #trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "event_{}_{}.png".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_') ))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)