Python DataWriter.close примеры использования

Язык программирования: Python

Пространство имен/Пакет: quakenet.data_pipeline

Класс/Тип: DataWriter

Метод/Функция: close

Примеров на hotexamples.com: 9

Python DataWriter.close - 9 примеров найдено. Это лучшие примеры Python кода для quakenet.data_pipeline.DataWriter.close, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DataWriter(11)

write(11)

close(9)

Основные методы

DataWriter (11)

write (11)

close (9)

Пример #1

Показать файл

def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"
    output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        st_event = preprocess_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #LOCATION CLUSTERS
        cluster_id = 0  #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time,
                                                  stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            cluster_id = c.id
            print("[tfrecords positives] Assigning cluster " +
                  str(cluster_id) + " to event.")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(
        writer._written))
    writer.close()

Пример #2

Показать файл

def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives))

    # Write event waveforms and cluster_id in .tfrecords
    output_name = output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_negatives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:
        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file)
        #print "[tfrecords negatives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords negatives] Preprocessing stream'

        #filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #cluster_id = filtered_catalog.cluster_id.values[event_n]
        cluster_id = -1  #We work with only one location for the moment (cluster id = 0)
        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords negatives] Number of windows written={}".format(
        writer._written))
    writer.close()

Пример #3

Показать файл

def main(args):
    setproctitle.setproctitle('quakenet_predict_from_tfrecords')

    # Create dir to store tfrecords
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(args.output_dir, "metadata.json")

    # Csv of start and end times
    times_csv = {}
    times_csv = {"start_time": [], "end_time": []}

    # Write event waveforms and cluster_id=-1 in .tfrecords
    output_name = stream_file.split(".mseed")[0] + ".tfrecords"
    output_path = os.path.join(args.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=args.window_size,
                           step=args.window_step,
                           include_partial_windows=False)
    if args.max_windows is None:
        total_time = stream[-1].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - args.window_size) / args.window_step
        print "total time {}, wind_size {}, win_step {}".format(
            total_time, args.window_size, args.window_step)
    else:
        max_windows = args.max_windows

    start_time = time.time()
    for idx, win in tqdm(enumerate(win_gen),
                         total=int(max_windows),
                         unit="window",
                         leave=False):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * args.window_size + 1
        # there is no event
        if (len(win) == 3) and (n_pts == n_samples):
            # Write tfrecords
            writer.write(win, -1)
            # Write start and end times in csv
            times_csv["start_time"].append(win[0].stats.starttime)
            times_csv["end_time"].append(win[0].stats.endtime)
            # Plot events
            if args.plot:
                trace = win[0]
                viz_dir = os.path.join(args.output_dir, "viz",
                                       stream_file.split(".mseed")[0])
                if not os.path.exists(viz_dir):
                    os.makedirs(viz_dir)
                trace.plot(
                    outfile=os.path.join(viz_dir, "window_{}.png".format(idx)))

        # if idx % 1000  ==0 and idx != 0:
        #     print "{} windows created".format(idx)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)

    # Write start and end times
    df = pd.DataFrame.from_dict(times_csv)
    output_times = os.path.join(args.output_dir, "catalog_times.csv")
    df.to_csv(output_times)

    print "Last window analyzed ends on", win[0].stats.endtime
    print "Time to create tfrecords: {}s".format(time.time() - start_time)

Пример #4

Показать файл

def main(_):

# Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    print stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)
    #stream.resample(10.0)
    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir,"metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print "startime", UTCDateTime(starttime)
    print "endtime", UTCDateTime(endtime)
    #print stream[0].stats
    #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1])
    #print m2.group()
    cat = filter_catalog(cat, starttime, endtime)
    #cat = cat[(cat.stname == str(stream_file.split(".")[1]))]


   # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or
   #       (cat.stname == str(stream_file.split(".")[1][:-1]))]
    #print cat
    print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]
    print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)

   #Create window generator and shuffle the order,2017/12/4
    #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size,
    #                       step=FLAGS.window_step,
    #                    include_partial_windows=False)]

    #random.shuffle(win_gen)

    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1


    n_tfrecords = 0

    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win)==3:
            n_samples = min(len(win[0].data),len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
            #to get rid of super small amplitude,2017/12/6
            ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples)
            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                continue
            #a = remove_repeat(win, n_samples)
            #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
            #    continue
        else:
            n_sample = 10
            ampl_e = max(abs(win[0:-1].data))
            if ampl_e < 1e-10:
                continue
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        ##add window extend to broaden the window,so that more events can be avoid,2017/12/07
        window_start_extend = window_start - FLAGS.window_step
        window_end_extend = window_end + FLAGS.window_step
        after_start = cat_event_times > window_start_extend
        before_end = cat_event_times < window_end_extend

        #print window_start_extend,window_end_extend
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start_extend < cat.utc_timestamp.values[cat_idx]
            assert window_end_extend > cat.utc_timestamp.values[cat_idx]
            print "avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx])
        except IndexError:
            # there is no event
            is_event = False
            if (len(win)==3) and (n_pts == n_samples):
                # Write tfrecords
                #writer.write(win.normalize(), -1)
                writer.write(win.copy().normalize(),-1)
                #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(),
                           #  -1)
                # Plot events
                if FLAGS.plot:
                    import matplotlib
                    matplotlib.use('Agg')
                    #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20)
                    trace = win[0]
                    viz_dir = os.path.join(
                        FLAGS.output_dir, "viz", stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                   # trace.resample(10).plot(outfile=os.path.join(viz_dir,
                   #                                 "noise_{}.png".format(str(window_start))))
                    trace.plot(outfile=os.path.join(viz_dir,
                                                           ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                                           #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                                           "noise_{}_{}.png".format(win[0].stats.station,
                                                                                 str(win[0].stats.starttime).replace(
                                                                                     ':', '_'))))
        if idx % 1000  ==0 and idx != 0:
            print "{} windows created".format(idx)
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print "creating a new tfrecords"
            n_tfrecords +=1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)

Пример #5

Показать файл

def main(_):

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print("+ Loading Catalog")
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print("startime", UTCDateTime(starttime))
    print("endtime", UTCDateTime(endtime))
    cat = filter_catalog(cat, starttime, endtime)
    print("First event in filtered catalog", cat.Date.values[0],
          cat.Time.values[0])
    print("Last event in filtered catalog", cat.Date.values[-1],
          cat.Time.values[-1])
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)
    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1

    n_tfrecords = 0
    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        after_start = cat_event_times > window_start
        before_end = cat_event_times < window_end
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start < cat.utc_timestamp.values[cat_idx]
            assert window_end > cat.utc_timestamp.values[cat_idx]
            print("avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx]))
        except IndexError:
            # there is no event
            is_event = False
            if (len(win) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(win, -1)
                # Plot events
                if FLAGS.plot:
                    trace = win[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "noise_{}.png".format(idx)))
        if idx % 1000 == 0 and idx != 0:
            print("{} windows created".format(idx))
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print("creating a new tfrecords")
            n_tfrecords += 1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)

Пример #6

Показать файл

def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    cat = filter_catalog(cat)

    for stream_file in stream_files:

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]

        # Propagation time from source to station
        travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            n_traces = len(st_event)
            # If there is not trace skip this waveform
            if n_traces == 0:
                continue
            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(st_event, cluster_id)
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format(
                        cluster_id, filtered_catalog.latitude.values[event_n],
                        filtered_catalog.longitude.values[event_n])
                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")
                # Plot events
                if FLAGS.plot:
                    trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "event_{}.png".format(event_n)))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)

Пример #7

Показать файл

Файл: step2_preprocess2_create_tfrecords_positives.py Проект: rtous/deepquake

def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)):
        os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"  
    output_name = args.file_name
    output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        
        #Filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)  

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg) 

        #LOCATION CLUSTERS
        lat = 0
        lon = 0
        depth = 0
        cluster_id = 0 #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            if c is not None: #can be None in case of polygons-based clustering
                cluster_id = c.id
            else:
                cluster_id = -1 #signaling that the earthquake has to be discarded
            print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat =  "+str(lat)+", lon = "+str(lon)+").")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        if cluster_id >= 0: #no clustering or a valid cluster
            n_traces = len(st_event_select)
            if utils.check_stream(st_event_select, cfg):
                #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
                # Write tfrecords

                #DEBUG: STA_LTA
                #df = st_event_select[0].stats.sampling_rate
                #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df))
                #for trig in cft:
                #    if trig != .0:
                #        print(trig)


                writer.write(st_event_select, cluster_id) 
        else:
            print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth))

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(writer._written))
    writer.close()

Пример #8

Показать файл

Файл: data_augmentation.py Проект: kajjjak/ConvNetQuake

def main(_):

    if FLAGS.stretch_data:
        print "ADD NOISE AND STRETCH DATA"
    if FLAGS.compress_data:
        print "ADD NOISE AND COMPRESS DATA"
    if FLAGS.shift_data:
        print "ADD NOISE AND SHIFT DATA"

    # Make dirs
    output_dir = os.path.split(FLAGS.output)[0]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if FLAGS.plot:
        if not os.path.exists(os.path.join(output_dir, "true_data")):
            os.makedirs(os.path.join(output_dir, "true_data"))
        if not os.path.exists(os.path.join(output_dir, "augmented_data")):
            os.makedirs(os.path.join(output_dir, "augmented_data"))

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = DataPipeline(FLAGS.tfrecords,
                                 config=cfg,
                                 is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        output_tfrecords = FLAGS.output
        writer = DataWriter(output_tfrecords)
        n_examples = 0
        while True:
            try:
                sample, label = sess.run([samples, labels])
                sample = np.squeeze(sample, axis=0)
                label = label[0]

                noised_sample = add_noise_to_signal(np.copy(sample))
                if FLAGS.compress_data:
                    noised_sample = compress_signal(noised_sample)
                if FLAGS.stretch_data:
                    noised_sample = stretch_signal(noised_sample)
                if FLAGS.shift_data:
                    noised_sample = shift_signal(noised_sample)

                if FLAGS.plot:
                    plot_true_and_augmented_data(sample, noised_sample, label,
                                                 n_examples)

                stream = convert_np_to_stream(noised_sample)
                writer.write(stream, label)

                n_examples += 1

            except KeyboardInterrupt:
                print 'stopping data augmentation'
                break

            except tf.errors.OutOfRangeError:
                print 'Augmentation completed ({} epochs, {} examples seen).'\
                                .format(cfg.n_epochs,n_examples-1)
                break

        writer.close()
        coord.request_stop()
        coord.join(threads)

Пример #9

Показать файл

def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*.mseed')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"

    for stream_file in stream_files:
        cat = load_catalog(FLAGS.catalog)
        #cat = filter_catalog(cat,stream_file.split(".mseed")[0])

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]
        #print(1111, cat)
        # Propagation time from source to station
        #travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            # event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            #cluster_id =1
            n_traces = len(st_event)
            # If there is no trace skip this waveform
            if n_traces == 0:
                continue

            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                # use filter_small_ampitude to get rid of super small amplitude,2017/12/6
                ampl_e, ampl_n, ampl_z = filter_small_ampitude(
                    st_event, n_samples)
                if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                    continue
                a = remove_repeat(st_event, n_samples)
                if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
                    continue
                writer.write(st_event.copy().resample(10).normalize(),
                             cluster_id)
                #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id)
                #print (len(st_event[0]))
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "{}_{}.mseed".format(
                        st_event[0].stats.station,
                        str(st_event[0].stats.starttime).replace(':', '_'))

                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")

                # Plot events
                if FLAGS.plot:
                    trace = st_event[0].filter('bandpass',
                                               freqmin=0.5,
                                               freqmax=20)
                    #trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir,
                        ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                        #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                        "event_{}_{}.png".format(
                            st_event[0].stats.station,
                            str(st_event[0].stats.starttime).replace(':', '_')
                        )))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)