Exemplo n.º 1
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"
    output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        st_event = preprocess_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #LOCATION CLUSTERS
        cluster_id = 0  #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time,
                                                  stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            cluster_id = c.id
            print("[tfrecords positives] Assigning cluster " +
                  str(cluster_id) + " to event.")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 2
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives))

    # Write event waveforms and cluster_id in .tfrecords
    output_name = output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_negatives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:
        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file)
        #print "[tfrecords negatives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords negatives] Preprocessing stream'

        #filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #cluster_id = filtered_catalog.cluster_id.values[event_n]
        cluster_id = -1  #We work with only one location for the moment (cluster id = 0)
        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords negatives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 3
0
def main(args):
    setproctitle.setproctitle('quakenet_predict_from_tfrecords')

    # Create dir to store tfrecords
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(args.output_dir, "metadata.json")

    # Csv of start and end times
    times_csv = {}
    times_csv = {"start_time": [], "end_time": []}

    # Write event waveforms and cluster_id=-1 in .tfrecords
    output_name = stream_file.split(".mseed")[0] + ".tfrecords"
    output_path = os.path.join(args.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=args.window_size,
                           step=args.window_step,
                           include_partial_windows=False)
    if args.max_windows is None:
        total_time = stream[-1].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - args.window_size) / args.window_step
        print "total time {}, wind_size {}, win_step {}".format(
            total_time, args.window_size, args.window_step)
    else:
        max_windows = args.max_windows

    start_time = time.time()
    for idx, win in tqdm(enumerate(win_gen),
                         total=int(max_windows),
                         unit="window",
                         leave=False):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * args.window_size + 1
        # there is no event
        if (len(win) == 3) and (n_pts == n_samples):
            # Write tfrecords
            writer.write(win, -1)
            # Write start and end times in csv
            times_csv["start_time"].append(win[0].stats.starttime)
            times_csv["end_time"].append(win[0].stats.endtime)
            # Plot events
            if args.plot:
                trace = win[0]
                viz_dir = os.path.join(args.output_dir, "viz",
                                       stream_file.split(".mseed")[0])
                if not os.path.exists(viz_dir):
                    os.makedirs(viz_dir)
                trace.plot(
                    outfile=os.path.join(viz_dir, "window_{}.png".format(idx)))

        # if idx % 1000  ==0 and idx != 0:
        #     print "{} windows created".format(idx)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)

    # Write start and end times
    df = pd.DataFrame.from_dict(times_csv)
    output_times = os.path.join(args.output_dir, "catalog_times.csv")
    df.to_csv(output_times)

    print "Last window analyzed ends on", win[0].stats.endtime
    print "Time to create tfrecords: {}s".format(time.time() - start_time)
Exemplo n.º 4
0
def main(_):

# Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    print stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)
    #stream.resample(10.0)
    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir,"metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print "startime", UTCDateTime(starttime)
    print "endtime", UTCDateTime(endtime)
    #print stream[0].stats
    #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1])
    #print m2.group()
    cat = filter_catalog(cat, starttime, endtime)
    #cat = cat[(cat.stname == str(stream_file.split(".")[1]))]


   # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or
   #       (cat.stname == str(stream_file.split(".")[1][:-1]))]
    #print cat
    print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]
    print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)

   #Create window generator and shuffle the order,2017/12/4
    #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size,
    #                       step=FLAGS.window_step,
    #                    include_partial_windows=False)]

    #random.shuffle(win_gen)

    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1


    n_tfrecords = 0

    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win)==3:
            n_samples = min(len(win[0].data),len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
            #to get rid of super small amplitude,2017/12/6
            ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples)
            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                continue
            #a = remove_repeat(win, n_samples)
            #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
            #    continue
        else:
            n_sample = 10
            ampl_e = max(abs(win[0:-1].data))
            if ampl_e < 1e-10:
                continue
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        ##add window extend to broaden the window,so that more events can be avoid,2017/12/07
        window_start_extend = window_start - FLAGS.window_step
        window_end_extend = window_end + FLAGS.window_step
        after_start = cat_event_times > window_start_extend
        before_end = cat_event_times < window_end_extend

        #print window_start_extend,window_end_extend
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start_extend < cat.utc_timestamp.values[cat_idx]
            assert window_end_extend > cat.utc_timestamp.values[cat_idx]
            print "avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx])
        except IndexError:
            # there is no event
            is_event = False
            if (len(win)==3) and (n_pts == n_samples):
                # Write tfrecords
                #writer.write(win.normalize(), -1)
                writer.write(win.copy().normalize(),-1)
                #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(),
                           #  -1)
                # Plot events
                if FLAGS.plot:
                    import matplotlib
                    matplotlib.use('Agg')
                    #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20)
                    trace = win[0]
                    viz_dir = os.path.join(
                        FLAGS.output_dir, "viz", stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                   # trace.resample(10).plot(outfile=os.path.join(viz_dir,
                   #                                 "noise_{}.png".format(str(window_start))))
                    trace.plot(outfile=os.path.join(viz_dir,
                                                           ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                                           #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                                           "noise_{}_{}.png".format(win[0].stats.station,
                                                                                 str(win[0].stats.starttime).replace(
                                                                                     ':', '_'))))
        if idx % 1000  ==0 and idx != 0:
            print "{} windows created".format(idx)
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print "creating a new tfrecords"
            n_tfrecords +=1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Exemplo n.º 5
0
def main(_):

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print("+ Loading Catalog")
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print("startime", UTCDateTime(starttime))
    print("endtime", UTCDateTime(endtime))
    cat = filter_catalog(cat, starttime, endtime)
    print("First event in filtered catalog", cat.Date.values[0],
          cat.Time.values[0])
    print("Last event in filtered catalog", cat.Date.values[-1],
          cat.Time.values[-1])
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)
    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1

    n_tfrecords = 0
    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        after_start = cat_event_times > window_start
        before_end = cat_event_times < window_end
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start < cat.utc_timestamp.values[cat_idx]
            assert window_end > cat.utc_timestamp.values[cat_idx]
            print("avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx]))
        except IndexError:
            # there is no event
            is_event = False
            if (len(win) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(win, -1)
                # Plot events
                if FLAGS.plot:
                    trace = win[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "noise_{}.png".format(idx)))
        if idx % 1000 == 0 and idx != 0:
            print("{} windows created".format(idx))
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print("creating a new tfrecords")
            n_tfrecords += 1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Exemplo n.º 6
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    cat = filter_catalog(cat)

    for stream_file in stream_files:

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]

        # Propagation time from source to station
        travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            n_traces = len(st_event)
            # If there is not trace skip this waveform
            if n_traces == 0:
                continue
            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(st_event, cluster_id)
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format(
                        cluster_id, filtered_catalog.latitude.values[event_n],
                        filtered_catalog.longitude.values[event_n])
                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")
                # Plot events
                if FLAGS.plot:
                    trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "event_{}.png".format(event_n)))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)):
        os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"  
    output_name = args.file_name
    output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        
        #Filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)  

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg) 

        #LOCATION CLUSTERS
        lat = 0
        lon = 0
        depth = 0
        cluster_id = 0 #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            if c is not None: #can be None in case of polygons-based clustering
                cluster_id = c.id
            else:
                cluster_id = -1 #signaling that the earthquake has to be discarded
            print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat =  "+str(lat)+", lon = "+str(lon)+").")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        if cluster_id >= 0: #no clustering or a valid cluster
            n_traces = len(st_event_select)
            if utils.check_stream(st_event_select, cfg):
                #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
                # Write tfrecords

                #DEBUG: STA_LTA
                #df = st_event_select[0].stats.sampling_rate
                #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df))
                #for trig in cft:
                #    if trig != .0:
                #        print(trig)


                writer.write(st_event_select, cluster_id) 
        else:
            print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth))

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(writer._written))
    writer.close()
Exemplo n.º 8
0
def main(_):

    if FLAGS.stretch_data:
        print "ADD NOISE AND STRETCH DATA"
    if FLAGS.compress_data:
        print "ADD NOISE AND COMPRESS DATA"
    if FLAGS.shift_data:
        print "ADD NOISE AND SHIFT DATA"

    # Make dirs
    output_dir = os.path.split(FLAGS.output)[0]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if FLAGS.plot:
        if not os.path.exists(os.path.join(output_dir, "true_data")):
            os.makedirs(os.path.join(output_dir, "true_data"))
        if not os.path.exists(os.path.join(output_dir, "augmented_data")):
            os.makedirs(os.path.join(output_dir, "augmented_data"))

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = DataPipeline(FLAGS.tfrecords,
                                 config=cfg,
                                 is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        output_tfrecords = FLAGS.output
        writer = DataWriter(output_tfrecords)
        n_examples = 0
        while True:
            try:
                sample, label = sess.run([samples, labels])
                sample = np.squeeze(sample, axis=0)
                label = label[0]

                noised_sample = add_noise_to_signal(np.copy(sample))
                if FLAGS.compress_data:
                    noised_sample = compress_signal(noised_sample)
                if FLAGS.stretch_data:
                    noised_sample = stretch_signal(noised_sample)
                if FLAGS.shift_data:
                    noised_sample = shift_signal(noised_sample)

                if FLAGS.plot:
                    plot_true_and_augmented_data(sample, noised_sample, label,
                                                 n_examples)

                stream = convert_np_to_stream(noised_sample)
                writer.write(stream, label)

                n_examples += 1

            except KeyboardInterrupt:
                print 'stopping data augmentation'
                break

            except tf.errors.OutOfRangeError:
                print 'Augmentation completed ({} epochs, {} examples seen).'\
                                .format(cfg.n_epochs,n_examples-1)
                break

        writer.close()
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 9
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*.mseed')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"

    for stream_file in stream_files:
        cat = load_catalog(FLAGS.catalog)
        #cat = filter_catalog(cat,stream_file.split(".mseed")[0])

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]
        #print(1111, cat)
        # Propagation time from source to station
        #travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            # event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            #cluster_id =1
            n_traces = len(st_event)
            # If there is no trace skip this waveform
            if n_traces == 0:
                continue

            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                # use filter_small_ampitude to get rid of super small amplitude,2017/12/6
                ampl_e, ampl_n, ampl_z = filter_small_ampitude(
                    st_event, n_samples)
                if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                    continue
                a = remove_repeat(st_event, n_samples)
                if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
                    continue
                writer.write(st_event.copy().resample(10).normalize(),
                             cluster_id)
                #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id)
                #print (len(st_event[0]))
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "{}_{}.mseed".format(
                        st_event[0].stats.station,
                        str(st_event[0].stats.starttime).replace(':', '_'))

                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")

                # Plot events
                if FLAGS.plot:
                    trace = st_event[0].filter('bandpass',
                                               freqmin=0.5,
                                               freqmax=20)
                    #trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir,
                        ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                        #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                        "event_{}_{}.png".format(
                            st_event[0].stats.station,
                            str(st_event[0].stats.starttime).replace(':', '_')
                        )))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)