Exemplo n.º 1
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"
    output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        st_event = preprocess_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #LOCATION CLUSTERS
        cluster_id = 0  #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time,
                                                  stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            cluster_id = c.id
            print("[tfrecords positives] Assigning cluster " +
                  str(cluster_id) + " to event.")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 2
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives))

    # Write event waveforms and cluster_id in .tfrecords
    output_name = output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_negatives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:
        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file)
        #print "[tfrecords negatives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords negatives] Preprocessing stream'

        #filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #cluster_id = filtered_catalog.cluster_id.values[event_n]
        cluster_id = -1  #We work with only one location for the moment (cluster id = 0)
        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords negatives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 3
0
def main(args):
    setproctitle.setproctitle('quakenet_predict_from_tfrecords')

    # Create dir to store tfrecords
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(args.output_dir, "metadata.json")

    # Csv of start and end times
    times_csv = {}
    times_csv = {"start_time": [], "end_time": []}

    # Write event waveforms and cluster_id=-1 in .tfrecords
    output_name = stream_file.split(".mseed")[0] + ".tfrecords"
    output_path = os.path.join(args.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=args.window_size,
                           step=args.window_step,
                           include_partial_windows=False)
    if args.max_windows is None:
        total_time = stream[-1].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - args.window_size) / args.window_step
        print "total time {}, wind_size {}, win_step {}".format(
            total_time, args.window_size, args.window_step)
    else:
        max_windows = args.max_windows

    start_time = time.time()
    for idx, win in tqdm(enumerate(win_gen),
                         total=int(max_windows),
                         unit="window",
                         leave=False):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * args.window_size + 1
        # there is no event
        if (len(win) == 3) and (n_pts == n_samples):
            # Write tfrecords
            writer.write(win, -1)
            # Write start and end times in csv
            times_csv["start_time"].append(win[0].stats.starttime)
            times_csv["end_time"].append(win[0].stats.endtime)
            # Plot events
            if args.plot:
                trace = win[0]
                viz_dir = os.path.join(args.output_dir, "viz",
                                       stream_file.split(".mseed")[0])
                if not os.path.exists(viz_dir):
                    os.makedirs(viz_dir)
                trace.plot(
                    outfile=os.path.join(viz_dir, "window_{}.png".format(idx)))

        # if idx % 1000  ==0 and idx != 0:
        #     print "{} windows created".format(idx)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)

    # Write start and end times
    df = pd.DataFrame.from_dict(times_csv)
    output_times = os.path.join(args.output_dir, "catalog_times.csv")
    df.to_csv(output_times)

    print "Last window analyzed ends on", win[0].stats.endtime
    print "Time to create tfrecords: {}s".format(time.time() - start_time)
Exemplo n.º 4
0
def main(_):

# Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    print stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)
    #stream.resample(10.0)
    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir,"metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print "startime", UTCDateTime(starttime)
    print "endtime", UTCDateTime(endtime)
    #print stream[0].stats
    #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1])
    #print m2.group()
    cat = filter_catalog(cat, starttime, endtime)
    #cat = cat[(cat.stname == str(stream_file.split(".")[1]))]


   # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or
   #       (cat.stname == str(stream_file.split(".")[1][:-1]))]
    #print cat
    print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]
    print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)

   #Create window generator and shuffle the order,2017/12/4
    #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size,
    #                       step=FLAGS.window_step,
    #                    include_partial_windows=False)]

    #random.shuffle(win_gen)

    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1


    n_tfrecords = 0

    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win)==3:
            n_samples = min(len(win[0].data),len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
            #to get rid of super small amplitude,2017/12/6
            ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples)
            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                continue
            #a = remove_repeat(win, n_samples)
            #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
            #    continue
        else:
            n_sample = 10
            ampl_e = max(abs(win[0:-1].data))
            if ampl_e < 1e-10:
                continue
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        ##add window extend to broaden the window,so that more events can be avoid,2017/12/07
        window_start_extend = window_start - FLAGS.window_step
        window_end_extend = window_end + FLAGS.window_step
        after_start = cat_event_times > window_start_extend
        before_end = cat_event_times < window_end_extend

        #print window_start_extend,window_end_extend
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start_extend < cat.utc_timestamp.values[cat_idx]
            assert window_end_extend > cat.utc_timestamp.values[cat_idx]
            print "avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx])
        except IndexError:
            # there is no event
            is_event = False
            if (len(win)==3) and (n_pts == n_samples):
                # Write tfrecords
                #writer.write(win.normalize(), -1)
                writer.write(win.copy().normalize(),-1)
                #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(),
                           #  -1)
                # Plot events
                if FLAGS.plot:
                    import matplotlib
                    matplotlib.use('Agg')
                    #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20)
                    trace = win[0]
                    viz_dir = os.path.join(
                        FLAGS.output_dir, "viz", stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                   # trace.resample(10).plot(outfile=os.path.join(viz_dir,
                   #                                 "noise_{}.png".format(str(window_start))))
                    trace.plot(outfile=os.path.join(viz_dir,
                                                           ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                                           #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                                           "noise_{}_{}.png".format(win[0].stats.station,
                                                                                 str(win[0].stats.starttime).replace(
                                                                                     ':', '_'))))
        if idx % 1000  ==0 and idx != 0:
            print "{} windows created".format(idx)
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print "creating a new tfrecords"
            n_tfrecords +=1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Exemplo n.º 5
0
def main(_):

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print("+ Loading Catalog")
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print("startime", UTCDateTime(starttime))
    print("endtime", UTCDateTime(endtime))
    cat = filter_catalog(cat, starttime, endtime)
    print("First event in filtered catalog", cat.Date.values[0],
          cat.Time.values[0])
    print("Last event in filtered catalog", cat.Date.values[-1],
          cat.Time.values[-1])
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)
    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1

    n_tfrecords = 0
    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        after_start = cat_event_times > window_start
        before_end = cat_event_times < window_end
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start < cat.utc_timestamp.values[cat_idx]
            assert window_end > cat.utc_timestamp.values[cat_idx]
            print("avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx]))
        except IndexError:
            # there is no event
            is_event = False
            if (len(win) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(win, -1)
                # Plot events
                if FLAGS.plot:
                    trace = win[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "noise_{}.png".format(idx)))
        if idx % 1000 == 0 and idx != 0:
            print("{} windows created".format(idx))
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print("creating a new tfrecords")
            n_tfrecords += 1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Exemplo n.º 6
0
def main(args):
    
    
    if not os.path.exists(args.outroot):
        os.makedirs(args.outroot)
        
    # copy some files
    copy(os.path.join(args.inroot, 'params.pkl'), args.outroot)
    copy(os.path.join(args.inroot, 'event_channel_dict.pkl'), args.outroot)
        
    if not os.path.exists(args.outroot):
        os.makedirs(args.outroot)   

    for dataset in ['train', 'validate', 'test']:
        for datatype in ['events', 'noise']:
            inpath = os.path.join(args.inroot, dataset, datatype)
            outpath = os.path.join(args.outroot, dataset, datatype)
            if not os.path.exists(outpath):
                os.makedirs(outpath)
            mseedpath = os.path.join(outpath, 'mseed')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            mseedpath = os.path.join(outpath, 'mseed_raw')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            if datatype == 'events':
                xmlpath = os.path.join(outpath, 'xml')
                if not os.path.exists(xmlpath):
                    os.makedirs(xmlpath)

            # inroot example: output/MN/streams
            # inpath example: output/MN/streams/train/events
            for dirpath, dirnames, filenames in os.walk(inpath):
                for name in filenames:
                    if name.endswith(".tfrecords"):
                        filename_root = name.replace('.tfrecords', '')
                        print 'Processing:', name, os.path.join(outpath, filename_root + '.tfrecords')
                        
                        # copy some files
                        copy(os.path.join(inpath, 'mseed_raw', filename_root + '.mseed'), os.path.join(outpath, 'mseed_raw'))
                        if datatype == 'events':
                            copy(os.path.join(inpath, 'xml', filename_root + '.xml'), os.path.join(outpath, 'xml'))

                        # read raw mseed
                        stream = read(os.path.join(inpath, 'mseed_raw', filename_root + '.mseed'), format='MSEED')
                        # store absolute maximum
                        stream_max = np.absolute(stream.max()).max()
                        # normalize by absolute maximum
                        stream.normalize(global_max = True)
                        # write new processed miniseed
                        streamfile = os.path.join(outpath, 'mseed', filename_root + '.mseed')
                        stream.write(streamfile, format='MSEED', encoding='FLOAT32')

                        n_traces = 3
                        win_size = 10001

                        # read old tfrecords
                        # https://www.kaggle.com/mpekalski/reading-tfrecord
                        record_iterator = tf.python_io.tf_record_iterator(path=os.path.join(inpath, filename_root + '.tfrecords'))
                        for string_record in record_iterator:
                            example = tf.train.Example()
                            example.ParseFromString(string_record)
                            distance_id = int(example.features.feature['distance_id'].int64_list.value[0])                            
                            magnitude_id = int(example.features.feature['magnitude_id'].int64_list.value[0])                            
                            depth_id = int(example.features.feature['depth_id'].int64_list.value[0])                            
                            azimuth_id = int(example.features.feature['azimuth_id'].int64_list.value[0])                            
                            distance = float(example.features.feature['distance'].float_list.value[0])                            
                            magnitude = float(example.features.feature['magnitude'].float_list.value[0])                            
                            depth = float(example.features.feature['depth'].float_list.value[0])                            
                            azimuth = float(example.features.feature['azimuth'].float_list.value[0])                            
                            print 'id', distance_id, 'im', magnitude_id, 'ide', depth_id, 'iaz', azimuth_id, \
                                'd', distance, 'm', magnitude, 'de', depth, 'az', azimuth
                        
#                         filename_queue = tf.train.string_input_producer([os.path.join(inpath, filename_root + '.tfrecords')], shuffle=False)
#                         reader = tf.TFRecordReader()
#                         example_key, serialized_example = reader.read(filename_queue)
#                         # data_pipeline._parse_example()
#                         features = tf.parse_single_example(
#                             serialized_example,
#                             features={
#                             'window_size': tf.FixedLenFeature([], tf.int64),
#                             'n_traces': tf.FixedLenFeature([], tf.int64),
#                             'data': tf.FixedLenFeature([], tf.string),
#                             #'stream_max': tf.FixedLenFeature([], tf.float32),
#                             'distance_id': tf.FixedLenFeature([], tf.int64),
#                             'magnitude_id': tf.FixedLenFeature([], tf.int64),
#                             'depth_id': tf.FixedLenFeature([], tf.int64),
#                             'azimuth_id': tf.FixedLenFeature([], tf.int64),
#                             'distance': tf.FixedLenFeature([], tf.float32),
#                             'magnitude': tf.FixedLenFeature([], tf.float32),
#                             'depth': tf.FixedLenFeature([], tf.float32),
#                             'azimuth': tf.FixedLenFeature([], tf.float32),
#                             'start_time': tf.FixedLenFeature([],tf.int64),
#                             'end_time': tf.FixedLenFeature([], tf.int64)})
#                         features['name'] = example_key
#                         # END - data_pipeline._parse_example()
# 
#                         print "features['distance_id']", features['distance_id']
#                         print 'distance_id shape', tf.shape(features['distance_id'])
#                         with tf.Session() as sess:
#                             print sess.run(features['distance_id'])
#                         #print 'distance_id', distance_id
#                         #print 'distance_id shape', tf.shape(distance_id)
#                         magnitude_id = features['magnitude_id']
#                         depth_id = features['depth_id']
#                         azimuth_id = features['azimuth_id']
#                         distance = features['distance']
#                         magnitude = features['magnitude']
#                         depth = features['depth']
#                         azimuth = features['azimuth']                        

                        # write new tfrecords
                        writer = DataWriter(os.path.join(outpath, filename_root + '.tfrecords'))
                        writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth)
Exemplo n.º 7
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    cat = filter_catalog(cat)

    for stream_file in stream_files:

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]

        # Propagation time from source to station
        travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            n_traces = len(st_event)
            # If there is not trace skip this waveform
            if n_traces == 0:
                continue
            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(st_event, cluster_id)
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format(
                        cluster_id, filtered_catalog.latitude.values[event_n],
                        filtered_catalog.longitude.values[event_n])
                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")
                # Plot events
                if FLAGS.plot:
                    trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "event_{}.png".format(event_n)))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)):
        os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"  
    output_name = args.file_name
    output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        
        #Filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)  

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg) 

        #LOCATION CLUSTERS
        lat = 0
        lon = 0
        depth = 0
        cluster_id = 0 #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            if c is not None: #can be None in case of polygons-based clustering
                cluster_id = c.id
            else:
                cluster_id = -1 #signaling that the earthquake has to be discarded
            print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat =  "+str(lat)+", lon = "+str(lon)+").")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        if cluster_id >= 0: #no clustering or a valid cluster
            n_traces = len(st_event_select)
            if utils.check_stream(st_event_select, cfg):
                #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
                # Write tfrecords

                #DEBUG: STA_LTA
                #df = st_event_select[0].stats.sampling_rate
                #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df))
                #for trig in cft:
                #    if trig != .0:
                #        print(trig)


                writer.write(st_event_select, cluster_id) 
        else:
            print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth))

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(writer._written))
    writer.close()
Exemplo n.º 9
0
def main(_):

    if FLAGS.stretch_data:
        print "ADD NOISE AND STRETCH DATA"
    if FLAGS.compress_data:
        print "ADD NOISE AND COMPRESS DATA"
    if FLAGS.shift_data:
        print "ADD NOISE AND SHIFT DATA"

    # Make dirs
    output_dir = os.path.split(FLAGS.output)[0]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if FLAGS.plot:
        if not os.path.exists(os.path.join(output_dir, "true_data")):
            os.makedirs(os.path.join(output_dir, "true_data"))
        if not os.path.exists(os.path.join(output_dir, "augmented_data")):
            os.makedirs(os.path.join(output_dir, "augmented_data"))

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = DataPipeline(FLAGS.tfrecords,
                                 config=cfg,
                                 is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        output_tfrecords = FLAGS.output
        writer = DataWriter(output_tfrecords)
        n_examples = 0
        while True:
            try:
                sample, label = sess.run([samples, labels])
                sample = np.squeeze(sample, axis=0)
                label = label[0]

                noised_sample = add_noise_to_signal(np.copy(sample))
                if FLAGS.compress_data:
                    noised_sample = compress_signal(noised_sample)
                if FLAGS.stretch_data:
                    noised_sample = stretch_signal(noised_sample)
                if FLAGS.shift_data:
                    noised_sample = shift_signal(noised_sample)

                if FLAGS.plot:
                    plot_true_and_augmented_data(sample, noised_sample, label,
                                                 n_examples)

                stream = convert_np_to_stream(noised_sample)
                writer.write(stream, label)

                n_examples += 1

            except KeyboardInterrupt:
                print 'stopping data augmentation'
                break

            except tf.errors.OutOfRangeError:
                print 'Augmentation completed ({} epochs, {} examples seen).'\
                                .format(cfg.n_epochs,n_examples-1)
                break

        writer.close()
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 10
0
def main(args):
    
    random.seed(datetime.now())
    
    if args.n_distances < 1:
        args.n_distances = None
    # print distance classifications
    if args.n_distances != None:
        print 'dist_class, dist_deg, dist_km'
        for dclass in range(0, args.n_distances, 1):
            dist_deg = util.classification2distance(dclass, args.n_distances)
            dist_km = geo.degrees2kilometers(dist_deg)
            print "{}   {:.2f}   {:.1f}".format(dclass, dist_deg, dist_km)
        print ''
     
    if args.n_magnitudes < 1:
        args.n_magnitudes = None
    # print magtitude classifications
    if args.n_magnitudes != None:
        print 'mag_class, mag'
        for mclass in range(0, args.n_magnitudes, 1):
            mag = util.classification2magnitude(mclass, args.n_magnitudes)
            print "{}   {:.2f}".format(mclass, mag)
        print ''
     
    if args.n_depths < 1:
        args.n_depths = None
    # print depth classifications
    if args.n_depths != None:
        print 'depth_class, depth'
        for dclass in range(0, args.n_depths, 1):
            depth = util.classification2depth(dclass, args.n_depths)
            print "{}   {:.1f}".format(dclass, depth)
        print ''
     
    if args.n_azimuths < 1:
        args.n_azimuths = None
    # print azimuth classifications
    if args.n_azimuths != None:
        print 'azimuth_class, azimuth'
        for aclass in range(0, args.n_azimuths, 1):
            azimuth = util.classification2azimuth(aclass, args.n_azimuths)
            print "{}   {:.1f}".format(aclass, azimuth)
        print ''
     
    
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)
        
    # save arguments
    with open(os.path.join(args.outpath, 'params.pkl'), 'w') as file:
        file.write(pickle.dumps(args)) # use `pickle.loads` to do the reverse
        
    for dataset in ['train', 'validate', 'test']:
        for datatype in ['events', 'noise']:
            datapath = os.path.join(args.outpath, dataset, datatype)
            if not os.path.exists(datapath):
                os.makedirs(datapath)
            mseedpath = os.path.join(datapath, 'mseed')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            mseedpath = os.path.join(datapath, 'mseed_raw')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            if datatype == 'events':
                xmlpath = os.path.join(datapath, 'xml')
                if not os.path.exists(xmlpath):
                    os.makedirs(xmlpath)

        
    # read catalog of events
    #filenames = args.event_files_path + os.sep + '*.xml'
    catalog_dict = {}
    catalog_all = []
    for dirpath, dirnames, filenames in os.walk(args.event_files_path):
        for name in filenames:
            if name.endswith(".xml"):
                file = os.path.join(dirpath, name)
                catalog = read_events(file)
                target_count = int(args.event_fraction * float(catalog.count()))
                print catalog.count(), 'events:', 'read from:', file, 'will use:', target_count, 'since args.event_fraction=', args.event_fraction
                if (args.event_fraction < 1.0):
                    while catalog.count() > target_count:
                        del catalog[random.randint(0, catalog.count() - 1)]
                if not args.systematic:
                    tokens = name.split('_')
                    net_sta = tokens[0] + '_' + tokens[1]
                    if not net_sta in catalog_dict:
                        catalog_dict[net_sta] = catalog
                    else:
                        catalog_dict[net_sta] += catalog
                    # sort catalog by date
                    catalog_dict[net_sta] = Catalog(sorted(catalog_dict[net_sta], key=lambda e: e.origins[0].time))
                else:
                    catalog_all += catalog
    
    # read list of channels to use
    inventory_full = read_inventory(args.channel_file)
    inventory_full = inventory_full.select(channel=args.channel_prefix+'Z', sampling_rate=args.sampling_rate)
    #print(inventory)
    
    client = fdsn.Client(args.base_url)
    
    # get existing already processed event channel dictionary
    try:
        with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'r') as file:
            event_channel_dict = pickle.load(file)
    except IOError:
        event_channel_dict = {}
    print 'Existing event_channel_dict size:', len(event_channel_dict)

    n_noise = int(0.5 + float(args.n_streams) * args.noise_fraction)
    n_events = args.n_streams - n_noise
    n_validate = int(0.5 + float(n_events) * args.validation_fraction)
    n_test = int(0.5 + float(n_events) * args.test_fraction)
    n_train = n_events - n_validate - n_test
    n_count = 0;
    n_streams = 0
    
    if args.systematic:
        event_ndx = 0
        net_ndx = 0
        sta_ndx = 0
        channel_ndx = -1


    
#     distance_id_count = {}
#     max_num_for_distance_id = {}
#     if args.n_distances != None:
#         # train
#         distance_id_count['train'] = [0] * args.n_distances
#         max_num_for_distance_id['train'] = 1 + int(2.0 * float(n_train) / float(args.n_distances))
#         print 'Maximum number events for each distance bin train:', max_num_for_distance_id['train']
#         # validate
#         distance_id_count['validate'] = [0] * args.n_distances
#         max_num_for_distance_id['validate'] = 1 + int(2.0 * float(n_validate) / float(args.n_distances))
#         print 'Maximum number events for each distance bin validate:', max_num_for_distance_id['validate']
#         # test
#         distance_id_count['test'] = [0] * args.n_distances
#         max_num_for_distance_id['test'] = 1 + int(2.0 * float(n_test) / float(args.n_distances))
#         print 'Maximum number events for each distance bin test:', max_num_for_distance_id['test']
        
    while args.systematic or n_streams < args.n_streams:
        
        try:
        
            # choose event or noise
            is_noise = n_streams >= n_events
            
            # reset validate test count if switching from event to  noise
            if n_streams == n_events:
                n_validate = int(0.5 + float(n_noise) * args.validation_fraction)
                n_test = int(0.5 + float(n_noise) * args.test_fraction)
                n_train = n_noise - n_validate - n_test
                n_count = 0;
                
            # set out paths
            if is_noise:
                datatype = 'noise'
            else:
                datatype = 'events'
            if n_count < n_train:
                dataset = 'train'
            elif n_count < n_train + n_validate:
                dataset = 'validate'
            else:
                dataset = 'test'
            datapath = os.path.join(args.outpath, dataset, datatype)

            # get random channel from Inventory
            #inventory = inventory_full.select(time=origin.time)
            inventory = inventory_full
            
            if args.systematic:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx \
                        = get_systematic_channel(inventory, catalog_all, is_noise, event_ndx, net_ndx, sta_ndx, channel_ndx)
                except ValueError:
                    break
            else:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx = get_random_channel(inventory, catalog_dict, is_noise)
                except ValueError:
                    continue
                                
            distance_id = 0
            distance = -999.0
            magnitude = -999.0
            depth = -999.0
            azimuth = -999.0
            if not is_noise:
                dist_meters, azim, bazim = geo.gps2dist_azimuth(channel.latitude, channel.longitude, origin.latitude, origin.longitude, a=geo.WGS84_A, f=geo.WGS84_F)
                distance = geo.kilometer2degrees(dist_meters / 1000.0, radius=6371)
                azimuth = azim
                magnitude = event.preferred_magnitude().mag
                depth = origin.depth / 1000.0
                if args.n_distances != None:
                    distance_id = util.distance2classification(distance, args.n_distances)
#                                 if distance_id_count[dataset][distance_id] >= max_num_for_distance_id[dataset]:
#                                     print 'Skipping event_channel: distance bin', distance_id, 'for', dataset, 'already full:', \
#                                         distance_id_count[dataset][distance_id], '/', max_num_for_distance_id[dataset]
#                                     continue

            print ''
            print 'Event:', origin.time.isoformat(), event.event_descriptions[0].text, \
            ', Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
            ', Mag: {:.2f}'.format(magnitude), \
            ', Depth(km): {:.1f}'.format(depth), \
            ', Az(deg): {:.1f}'.format(azimuth)
            print 'Retrieving channels:', (n_streams + 1), '/ ', args.n_streams, (', NOISE, ' if  is_noise else ', EVENT, '), 'event', event_ndx, origin.time, \
                ', net', net_ndx, ', sta', sta_ndx, ', chan', channel_ndx, \
                ', ', net.code, sta.code, \
                channel.code, channel.location_code, \
                channel.sample_rate
            # check station was available at origin.time
            if not sta.is_active(time=origin.time):
                print 'Skipping event_channel: station not active at origin.time:'
                continue
            #key = str(event_ndx) + '_' + str(net_ndx) + '_' + str(sta_ndx) + '_' + str(channel_ndx) + '_' + str(is_noise)
            key = str(event_ndx) + '_' + net.code + '_' + sta.code + '_' + channel.code + '_' + str(is_noise)
            if key in event_channel_dict:
                print 'Skipping event_channel: already processed.'
                continue
            event_channel_dict[key] = 1
                
            # get start time for waveform request
            ttime = get_first_P_travel_time(origin, channel)
            arrival_time = origin.time + ttime
            if is_noise:
                # get start time of next event
                event2 = catalog[event_ndx + 1]
                origin2 = event2.preferred_origin()
                # check that origins are at least min time apart
                if origin2.time - origin.time < MIN_INTER_EVENT_TIME:
                    print 'Skipping noise event_channel: inter event time too small: ', str(origin2.time - origin.time), \
                        origin2.time, origin.time
                    continue
                ttime2 = get_first_P_travel_time(origin2, channel)
                arrival_time2 = origin2.time + ttime2
                arrival_time = (arrival_time + ((arrival_time2 - arrival_time) / 2.0)) - args.window_start
            
            start_time = arrival_time - args.window_start
                                    
            # request data for 3 channels
            
            #for orientation in ['Z', 'N', 'E', '1', '2']:
            #    req_chan = args.channel_prefix + orientation
            channel_name = net.code + '_' + sta.code + '_' + channel.location_code + '_' + args.channel_prefix
            padded_start_time = start_time - WINDOW_PADDING_FDSN
            padded_end_time = start_time + args.window_length + 2.0 * WINDOW_PADDING_FDSN
            chan_param = args.channel_prefix + '?'
            # kluge to get url used for data request
            kwargs = {'network': net.code, 'station': sta.code, 'location': channel.location_code, 'channel': chan_param,
                      'starttime': padded_start_time, 'endtime': padded_end_time}                      
            #url = client._create_url_from_parameters('dataselect', DEFAULT_PARAMETERS['dataselect'],  **kwargs)
            url = fdsn.client.build_url(client.base_url, 'dataselect', client.major_versions['dataselect'], "query", parameters=kwargs)
            print '  java net.alomax.seisgram2k.SeisGram2K', '\"', url, '\"'
            try:
                stream = client.get_waveforms(  \
                                               net.code, sta.code, channel.location_code, chan_param, \
                                               padded_start_time, padded_end_time, \
                                               attach_response=True)
                
            except fdsn.header.FDSNException as ex:
                print 'Skipping channel:', channel_name, 'FDSNException:', ex, 
                continue
                                    
            print stream
            # TEST
#                         for trace in stream:
#                             print '==========> trace.stats', trace.stats
                
            # check some things
            if (len(stream) != 3):
                print 'Skipping channel: len(stream) != 3:', channel_name
                continue
            ntrace = 0
            for trace in stream:
                if (len(trace) < 1):
                    print 'Skipping trace: len(trace) < 1:', channel_name
                    continue
                if (trace.stats.starttime > start_time or trace.stats.endtime < start_time + args.window_length):
                    print 'Skipping trace: does not contain required time window:', channel_name
                    continue
                ntrace += 1
            if (ntrace != 3):
                print 'Skipping channel: ntrace != 3:', channel_name
                continue
            
            # pre-process streams
            # sort so that channels will be ingested in NN always in same order ENZ
            stream.sort(['channel'])
            # detrend - this is meant to be equivalent to detrend or a long period low-pass (e.g. at 100sec) applied to real-time data
            stream.detrend(type='linear')
            for trace in stream:
                # correct for required sampling rate
                if abs(trace.stats.sampling_rate - args.sampling_rate) / args.sampling_rate > 0.01:
                    trace.resample(args.sampling_rate)
                    
            # apply high-pass filter if requested
            if args.hp_filter_freq > 0.0:
                stream.filter('highpass', freq=args.hp_filter_freq, corners=args.hp_filter_corners)
            
            # check signal to noise ratio, if fail, repeat on 1sec hp data to capture local/regional events in longer period microseismic noise
            sn_type = 'BRB'
            first_pass = True;
            while True:
                if is_noise:
                    snrOK = True
                else:
                    snrOK = False
                for trace in stream:
                    # slice with 1sec margin of error for arrival time to: 1) avoid increasing noise amplitude with signal, 2) avoid missing first P in signal
                    if (first_pass):
                        signal_slice = trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = trace.slice(endtime=arrival_time - 1.0) 
                    else:
                        # highpass at 1sec
                        filt_trace = trace.copy()
                        filt_trace.filter('highpass', freq=1.0, corners=4)
                        signal_slice = filt_trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = filt_trace.slice(endtime=arrival_time - 1.0) 
                        sn_type = '1HzHP'
                    # check signal to noise around arrival_time
                    # ratio of std
                    asignal = signal_slice.std()
                    anoise = noise_slice.std()
                    snr = asignal / anoise
                    print trace.id, sn_type, 'snr:', snr, 'std_signal:', asignal, 'std_noise:', anoise
                    # ratio of peak amplitudes (DO NOT USE, GIVE UNSTABLE RESULTS!)
#                                 asignal = signal_slice.max()
#                                 anoise = noise_slice.max()
#                                 snr = np.absolute(asignal / anoise)
#                                 print trace.id, sn_type, 'snr:', snr, 'amax_signal:', asignal, 'amax_noise:', anoise
                    if is_noise:
                        snrOK = snrOK and snr <= MAX_SNR_NOISE
                        if not snrOK:
                            break
                    else:
                        snrOK = snrOK or snr >= args.snr_accept
                if (first_pass and not snrOK and args.hp_filter_freq < 0.0):
                    first_pass = False;
                    continue
                else:
                    break

            if (not snrOK):
                if is_noise:
                    print 'Skipping channel:', sn_type, 'snr >', MAX_SNR_NOISE,  'on one or more traces:', channel_name
                else:
                    print 'Skipping channel:', sn_type, 'snr < args.snr_accept:', args.snr_accept, 'on all traces:', channel_name
                continue
               
            # trim data to required window
            # try to make sure samples and start/end times align as closely as possible to first trace
            trace = stream.traces[0]
            trace = trace.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            start_time = trace.stats.starttime
            stream = stream.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            
            cstart_time = '%04d.%02d.%02d.%02d.%02d.%02d.%03d' % \
                (start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, \
                 start_time.second, start_time.microsecond // 1000)

            # process each trace
            try:
                for trace in stream:
                    # correct for overall sensitivity or gain
                    trace.normalize(trace.stats.response.instrument_sensitivity.value)
                    trace.data = trace.data.astype(np.float32)
                    # write miniseed
                    #tracefile = os.path.join(datapath, 'mseed', trace.id + '.' + cstart_time + '.mseed')
                    #trace.write(tracefile, format='MSEED', encoding='FLOAT32')
                    #print 'Channel written:', tracefile, trace.count(), 'samples'
            except AttributeError as err:
                print 'Skipping channel:', channel_name,  ': Error applying trace.normalize():' , err
                
            filename_root =  channel_name + '.' + cstart_time

            # write raw miniseed
            streamfile = os.path.join(datapath, 'mseed_raw', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # store absolute maximum
            stream_max = np.absolute(stream.max()).max()
            # normalize by absolute maximum
            stream.normalize(global_max = True)
            
            # 20180521 AJL
            # spherical coordinates
            # raw data always in same order ENZ
            # tensor indexing is [traces, datapoints, comps]
            if args.spherical:
                rad2deg = 180.0 / math.pi
                # calculate modulus
                temp_square = np.add(np.square(stream.traces[0].data), np.add(np.square(stream.traces[1].data), np.square(stream.traces[2].data)))
                temp_modulus = np.sqrt(temp_square)
                # calculate azimuth
                temp_azimuth = np.add( np.multiply(np.arctan2(stream.traces[0].data, stream.traces[1].data), rad2deg), 180.0)
                # calculate inclination
                temp_inclination = np.multiply(np.arcsin(np.divide(stream.traces[2].data, temp_modulus)), rad2deg)
                # reset stream data to spherical coordinates
                stream.traces[0].data = temp_inclination
                stream.traces[1].data = temp_azimuth
                temp_modulus = np.multiply(temp_modulus, 100.0)  # increase scale for plotting purposes
                stream.traces[2].data = temp_modulus


            # put absolute maximum normalization in first element of data array, to seed NN magnitude estimation
            # 20180816 AJL - do not mix max with data
            # for trace in stream:
            #    trace.data[0] = stream_max
            print 'stream_max', stream_max
            

            # write processed miniseed
            streamfile = os.path.join(datapath, 'mseed', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # write event waveforms and distance_id in .tfrecords
            magnitude_id = 0
            depth_id = 0
            azimuth_id = 0
            if not is_noise:
#                             if args.n_distances != None:
#                                 distance_id_count[dataset][distance_id] += 1
                if args.n_magnitudes != None:
                    magnitude_id = util.magntiude2classification(magnitude, args.n_magnitudes)
                if args.n_depths != None:
                    depth_id = util.depth2classification(depth, args.n_depths)
                if args.n_azimuths != None:
                    azimuth_id = util.azimuth2classification(azimuth, args.n_azimuths)
            else:
                distance_id = -1
                distance = 0.0
            output_name = filename_root + '.tfrecords'
            output_path = os.path.join(datapath, output_name)
            writer = DataWriter(output_path)
            writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth)
            if not is_noise:
                print '==== Event stream tfrecords written:', output_name, \
                'Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
                ', Mag: {:.2f} ID: {}'.format(magnitude, magnitude_id), \
                ', Depth(km): {:.1f} ID: {}'.format(depth, depth_id), \
                ', Az(deg): {:.1f} ID: {}'.format(azimuth, azimuth_id)
            else:
                print '==== Noise stream tfrecords written:', output_name, 'ID: Dist {}, Mag {}, Depth {}, Az {}'.format(distance_id, magnitude_id, depth_id, azimuth_id)
                
            # write event data
            if not is_noise:
                filename = os.path.join(datapath, 'xml', filename_root + '.xml')
                event.write(filename, 'QUAKEML')
           
            n_streams += 1
            n_count += 1
                    
        except KeyboardInterrupt:
            print 'Stopping: KeyboardInterrupt'
            break

        except Exception as ex:
            print 'Skipping stream: Exception:', ex
            traceback.print_exc()
            continue

    print n_streams, 'streams:', 'written to:', args.outpath

    # save event_channel_dict
    with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'w') as file:
        file.write(pickle.dumps(event_channel_dict))
Exemplo n.º 11
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*.mseed')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"

    for stream_file in stream_files:
        cat = load_catalog(FLAGS.catalog)
        #cat = filter_catalog(cat,stream_file.split(".mseed")[0])

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]
        #print(1111, cat)
        # Propagation time from source to station
        #travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            # event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            #cluster_id =1
            n_traces = len(st_event)
            # If there is no trace skip this waveform
            if n_traces == 0:
                continue

            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                # use filter_small_ampitude to get rid of super small amplitude,2017/12/6
                ampl_e, ampl_n, ampl_z = filter_small_ampitude(
                    st_event, n_samples)
                if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                    continue
                a = remove_repeat(st_event, n_samples)
                if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
                    continue
                writer.write(st_event.copy().resample(10).normalize(),
                             cluster_id)
                #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id)
                #print (len(st_event[0]))
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "{}_{}.mseed".format(
                        st_event[0].stats.station,
                        str(st_event[0].stats.starttime).replace(':', '_'))

                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")

                # Plot events
                if FLAGS.plot:
                    trace = st_event[0].filter('bandpass',
                                               freqmin=0.5,
                                               freqmax=20)
                    #trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir,
                        ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                        #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                        "event_{}_{}.png".format(
                            st_event[0].stats.station,
                            str(st_event[0].stats.starttime).replace(':', '_')
                        )))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)