Exemplo n.º 1
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"
    output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        st_event = preprocess_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #LOCATION CLUSTERS
        cluster_id = 0  #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time,
                                                  stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            cluster_id = c.id
            print("[tfrecords positives] Assigning cluster " +
                  str(cluster_id) + " to event.")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 2
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives))

    # Write event waveforms and cluster_id in .tfrecords
    output_name = output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_negatives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:
        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file)
        #print "[tfrecords negatives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords negatives] Preprocessing stream'

        #filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #cluster_id = filtered_catalog.cluster_id.values[event_n]
        cluster_id = -1  #We work with only one location for the moment (cluster id = 0)
        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords negatives] Number of windows written={}".format(
        writer._written))
    writer.close()
Exemplo n.º 3
0
def processMseed(stream_path, cat, output_dir, plot, onlyStation):
        stream_file = os.path.basename(stream_path)
        #2. Process .mseed
        #print("Processing stream "+stream_file)
        # Load stream
        print ("[obtain training windows] Loading Stream {}".format(stream_file))
        stream = read(stream_path)
        print ('[obtain training windows] Preprocessing stream')
        stream = utils.preprocess_stream(stream)


        stream_start_time = stream[0].stats.starttime
        stream_end_time = stream[-1].stats.endtime
        total_time = stream_start_time - stream_end_time 

        print(stream)

        z_streams = stream.select(component="Z")
        #if len(z_streams) > 1: #multiple stations, need to cut
        for z_stream in z_streams:
            #Slice the input stream horizontally, for one station
            station = z_stream.stats.station

            print ("[obtain training windows] found station "+station+".")

            if onlyStation is not None and onlyStation != station: 
                    continue

            print ("[obtain training windows] ---------- Station "+station+" ---------")

            substream = stream.select(station=station)

            ###########
            #DEBUG: STA_LTA
            #df = substream[0].stats.sampling_rate
            #cft = classic_sta_lta(substream[0], int(5 * df), int(10 * df))
            #for trig in cft:
            #    if trig > 1.99:
            #        print(trig)
            ###########



            ptime = getPtime(cat, stream_start_time, stream_end_time, station, phase_hint="P")

            if ptime is not None:
                print ("[obtain training windows] Found event in the catalog: "+str(ptime))
        
                event_window_start = ptime - cfg.pwave_window/2
                event_window_end = ptime + cfg.pwave_window/2

                print ("[obtain training windows] Extracting full stream and saving into "+os.path.join(output_dir, cfg.mseed_dir)+"/"+stream_file+"_"+station+".mseed")

                if plot:
                    customPlot(substream, ptime, os.path.join(output_dir, cfg.png_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+".png")
                substream.write(os.path.join(output_dir, cfg.mseed_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+".mseed", format="MSEED") 

                #Simplified preprocessing
                #if cfg.only_one_positive:
                #    one_positive = substream.slice(starttime=ptime - cfg.window_size/2, endtime=ptime + cfg.window_size/2, keep_empty_traces=False, nearest_sample=True)

                

                #Slice the input stream vertically, by time
                sys.stdout.write("[obtain training windows] Extracting positive and negative windows and saving into "+output_dir+":\n")
                win_gen = substream.slide(window_length=cfg.window_size,
                           step=cfg.window_stride,
                           include_partial_windows=False)
                total_time = substream[0].stats.endtime - substream[0].stats.starttime
                max_windows = (total_time - cfg.window_size) / cfg.window_stride
                num_negatives = 0
                num_positives = 0
                num_errors = 0
                num_skipped = 0
                for idx, win in enumerate(win_gen):
                    if utils.check_stream(win, cfg, False):
                        window_start = win[0].stats.starttime.timestamp
                        window_end = win[-1].stats.endtime.timestamp

                        #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window]
                        #Do not use negatives 

                        #window_ptime = cat.getPtime(window_start, window_end, station)


                        #FILTER?
                        #win_filt = win.copy()
                        #if cfg.filterfreq >= 0.0:
                        #    win.filter('lowpass', freq=cfg.filterfreq, corners=2, zerophase=True)

                        if (window_start <= event_window_start) and (window_end >= event_window_end): #positive: #positive
                            win.write(os.path.join(output_dir, cfg.mseed_event_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_"+str(idx)+".mseed", format="MSEED") 
                            if plot:
                                win.plot(outfile=os.path.join(output_dir, cfg.png_event_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_"+str(idx)+".png")
                            sys.stdout.write("\033[92m.\033[0m")
                            num_positives = num_positives+1
                        elif (window_end < event_window_start-cfg.window_avoid_negatives_before) or (window_start > event_window_end+cfg.window_avoid_negatives_after):# negative
                            win.write(os.path.join(output_dir, cfg.mseed_noise_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_noise"+str(idx)+".mseed", format="MSEED") 
                            if plot:
                                win.plot(outfile=os.path.join(output_dir, cfg.png_noise_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_noise"+str(idx)+".png")
                            sys.stdout.write("\033[91m.\033[0m")
                            sys.stdout.flush()
                            num_negatives = num_negatives+1
                        else: # skipped
                            sys.stdout.write(".")
                            num_skipped = num_skipped+1
                    else:
                        sys.stdout.write("\033[93m.\033[0m")
                        sys.stdout.flush()
                        num_errors = num_errors+1
                print("\n[obtain training windows] "+str(num_positives)+" positive windows obtained.")
                print("[obtain training windows] "+str(num_negatives)+" negative windows obtained.")
                print("[obtain training windows] "+str(num_skipped)+" windows skipped (neither clear positive nor negative).")
                print("[obtain training windows] "+str(num_errors)+" windows discarded because of errors (config debug=True for details).")

            else:
                print ("[obtain training windows] \033[93m WARNING\033[0m No event detected at station "+station+" from "+stream_path+".")
Exemplo n.º 4
0
def main(args):
    setproctitle.setproctitle('quakenet_predict')

    if not os.path.exists(cfg.CHECKPOINT_DIR):
	    print ("\033[91m ERROR!!\033[0m Missing directory "+cfg.CHECKPOINT_DIR+". Run step 4 first.")
	    sys.exit(0)
    
    ckpt = tf.train.get_checkpoint_state(cfg.CHECKPOINT_DIR)

    # Remove previous output directory
    if os.path.exists(cfg.OUTPUT_PREDICT_BASE_DIR):
        shutil.rmtree(cfg.OUTPUT_PREDICT_BASE_DIR)
    os.makedirs(cfg.OUTPUT_PREDICT_BASE_DIR)
    os.makedirs(os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"viz"))
    if cfg.save_sac:
        os.makedirs(os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"sac"))

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = utils.preprocess_stream(stream)

    #if args.metadata_path is not None: #This is groundtruth data
    #    print("Reading metadata file "+args.metadata_path)
    #    obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) 

    # # TODO: change and look at all streams
    # stream_path = args.stream_path
    # stream_file = os.path.split(stream_path)[-1]
    # print " + Loading stream {}".format(stream_file)
    # stream = load_stream(stream_path)
    # print " + Preprocess stream"
    # stream = preprocess_stream(stream)
    # print " -- Stream is ready, starting detection"

    # Create catalog name in which the events are stored
    catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv"
    output_catalog = os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR, catalog_name)
    print 'Catalog created to store events', output_catalog

    # Dictonary to store info on detected events
    events_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    total_time_in_sec = stream[0].stats.endtime - stream[0].stats.starttime

    # stream data with a placeholder
    samples = {
            'data': tf.placeholder(tf.float32,
                                   shape=(1, cfg.win_size, 3),
                                   name='input_data'),
            'cluster_id': tf.placeholder(tf.int64,
                                         shape=(1,),
                                         name='input_label')
        }

    # set up model and validation metrics
    model = models.get(cfg.model, samples, cfg,
                       cfg.CHECKPOINT_DIR,
                       is_training=False)

    with tf.Session() as sess:

        model.load(sess)
        print 'Predicting using model at step {}'.format(
                sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)


        n_events = 0
        time_start = time.time()

        try:
            fromTime = UTCDateTime(2015, 2, 5, 5, 40, 18, 10000)
            win = stream.slice(fromTime, fromTime + cfg.WINDOW_SIZE).copy()
            win.plot(outfile=cfg.OUTPUT_PREDICT_BASE_DIR+"/"+stream_file+"_input_window_.png")
            idx = 0

            # Fetch class_proba and label
            to_fetch = [samples['data'],
                        model.layers['class_prob'],
                        model.layers['class_prediction']]
            # Feed window and fake cluster_id (needed by the net) but
            # will be predicted
            if utils.check_stream(win, cfg):
                feed_dict = {samples['data']: utils.fetch_window_data(win, cfg),
                            samples['cluster_id']: np.array([0])}
                sample, class_prob_, cluster_id = sess.run(to_fetch,
                                                        feed_dict)
            else:
                print ("\033[91m ERROR!!\033[0m Incomplete data.")
                sys.exit(0)

            # # Keep only clusters proba, remove noise proba
            clusters_prob = class_prob_[0,1::]
            cluster_id -= 1

            # label for noise = -1, label for cluster \in {0:n_clusters}

            is_event = cluster_id[0] > -1
            if is_event:
                n_events += 1
            # print "event {} ,cluster id {}".format(is_event,class_prob_)

            if is_event:
                events_dic["start_time"].append(win[0].stats.starttime)
                events_dic["end_time"].append(win[0].stats.endtime)
                events_dic["cluster_id"].append(cluster_id[0])
                events_dic["clusters_prob"].append(list(clusters_prob))

            if idx % 1000 ==0:
                print "Analyzing {} records".format(win[0].stats.starttime)

            if is_event:
                win_filtered = win.copy()
                # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0)
                win_filtered.plot(outfile=os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"viz",
                                "event_{}_cluster_{}.png".format(idx,cluster_id)))

            print "found {} events".format(n_events)

        except KeyboardInterrupt:
            print 'Interrupted at time {}.'.format(win[0].stats.starttime)
            print "processed {} windows, found {} events".format(idx+1,n_events)
            print "Run time: ", time.time() - time_start

    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

    customPlot(stream, cfg.OUTPUT_PREDICT_BASE_DIR+"/"+stream_file+".png", events_dic["start_time"])

    print "Run time: ", time.time() - time_start
def processMseed(stream_path, cat, output_dir, plot, onlyStation):
    stream_file = os.path.basename(stream_path)
    #2. Process .mseed
    #print("Processing stream "+stream_file)
    # Load stream
    print "[obtain training windows] Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '[obtain training windows] Preprocessing stream'
    stream = preprocess_stream(stream)
    stream_start_time = stream[0].stats.starttime
    stream_end_time = stream[-1].stats.endtime
    total_time = stream_start_time - stream_end_time

    z_streams = stream.select(component="Z")
    #if len(z_streams) > 1: #multiple stations, need to cut
    for z_stream in z_streams:
        #Slice the input stream horizontally, for one station
        station = z_stream.stats.station

        if onlyStation is not None and onlyStation != station:
            continue

        print("[obtain training windows] ---------- Station " + station +
              " ---------")

        substream = stream.select(station=station)

        ptime = cat.getPtime(stream_start_time, stream_end_time, station)

        if ptime is not None:
            print("[obtain training windows] Found event in the catalog: " +
                  str(ptime))

            event_window_start = ptime - cfg.pwave_window / 2
            event_window_end = ptime + cfg.pwave_window / 2

            print(
                "[obtain training windows] Extracting full stream and saving into "
                + os.path.join(output_dir, cfg.mseed_dir) + "/" + stream_file +
                "_" + station + ".mseed")

            if plot:
                customPlot(
                    substream, ptime,
                    os.path.join(output_dir, cfg.png_dir) + "/" +
                    utils.fileNameWithoutExtension(stream_file) + "_" +
                    station + ".png")
            substream.write(os.path.join(output_dir, cfg.mseed_dir) + "/" +
                            utils.fileNameWithoutExtension(stream_file) + "_" +
                            station + ".mseed",
                            format="MSEED")

            #Slice the input stream vertically, by time
            sys.stdout.write(
                "[obtain training windows] Extracting positive and negative windows and saving into "
                + output_dir + ":\n")
            win_gen = substream.slide(window_length=cfg.window_size,
                                      step=cfg.window_step_negatives,
                                      include_partial_windows=False)
            total_time = substream[0].stats.endtime - substream[
                0].stats.starttime
            max_windows = (total_time -
                           cfg.window_size) / cfg.window_step_negatives
            num_negatives = 0
            num_positives = 0
            num_errors = 0
            num_skipped = 0
            for idx, win in enumerate(win_gen):
                if utils.check_stream(win, cfg, False):
                    window_start = win[0].stats.starttime.timestamp
                    window_end = win[-1].stats.endtime.timestamp

                    #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window]
                    #Do not use negatives

                    #window_ptime = cat.getPtime(window_start, window_end, station)

                    if (window_start <= event_window_start) and (
                            window_end >=
                            event_window_end):  #positive: #positive
                        win.write(
                            os.path.join(output_dir, cfg.mseed_event_dir) +
                            "/" + utils.fileNameWithoutExtension(stream_file) +
                            "_" + station + "_" + str(idx) + ".mseed",
                            format="MSEED")
                        if plot:
                            win.plot(
                                outfile=os.path.join(output_dir,
                                                     cfg.png_event_dir) + "/" +
                                utils.fileNameWithoutExtension(stream_file) +
                                "_" + station + "_" + str(idx) + ".png")
                        sys.stdout.write("\033[92m.\033[0m")
                        num_positives = num_positives + 1
                    elif (window_end <
                          event_window_start - cfg.window_avoid_negatives) or (
                              window_start > event_window_end +
                              cfg.window_avoid_negatives):  # negative
                        win.write(
                            os.path.join(output_dir, cfg.mseed_noise_dir) +
                            "/" + utils.fileNameWithoutExtension(stream_file) +
                            "_" + station + "_noise" + str(idx) + ".mseed",
                            format="MSEED")
                        if plot:
                            win.plot(
                                outfile=os.path.join(output_dir,
                                                     cfg.png_noise_dir) + "/" +
                                utils.fileNameWithoutExtension(stream_file) +
                                "_" + station + "_noise" + str(idx) + ".png")
                        sys.stdout.write("\033[91m.\033[0m")
                        sys.stdout.flush()
                        num_negatives = num_negatives + 1
                    else:  # skipped
                        sys.stdout.write(".")
                        num_skipped = num_skipped + 1
                else:
                    sys.stdout.write("\033[93m.\033[0m")
                    sys.stdout.flush()
                    num_errors = num_errors + 1
            print("\n[obtain training windows] " + str(num_positives) +
                  " positive windows obtained.")
            print("[obtain training windows] " + str(num_negatives) +
                  " negative windows obtained.")
            print("[obtain training windows] " + str(num_skipped) +
                  " windows skipped (neither clear positive nor negative).")
            print(
                "[obtain training windows] " + str(num_errors) +
                " windows discarded because of errors (config debug=True for details)."
            )

        else:
            print(
                "[obtain training windows] \033[93m WARNING\033[0m No event detected at station "
                + station + " from " + stream_path + ".")
Exemplo n.º 6
0
def predict(path, stream_file, sess, model, samples, cat):
    global truePositives
    global falsePositives
    global trueNegatives
    global falseNegatives

    global perf_total_predictions
    global perf_total_time

    # Load stream
    stream_path = path+"/"+stream_file #TODO join
    stream_file = os.path.split(stream_path)[-1]
    stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0]
    print "[classify] Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '[classify] Preprocessing stream'
    stream = utils.preprocess_stream(stream)


    #Select only the specified channels
    stream_select = utils.select_components(stream, cfg) 

    outputSubdir = os.path.join(output_dir, stream_file_without_extension)
    if os.path.exists(outputSubdir):
        shutil.rmtree(outputSubdir)
    os.makedirs(outputSubdir)
    outputSubdirSubplots = os.path.join(outputSubdir, "subPlots")    
    os.makedirs(outputSubdirSubplots) 
    os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"viz"))
    if cfg.save_sac:
        os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"sac"))

    #if args.metadata_path is not None: #This is groundtruth data
    #    print("Reading metadata file "+args.metadata_path)
    #    obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) 

    # # TODO: change and look at all streams
    # stream_path = args.stream_path
    # stream_file = os.path.split(stream_path)[-1]
    # print " + Loading stream {}".format(stream_file)
    # stream = load_stream(stream_path)
    # print " + Preprocess stream"
    # stream = preprocess_stream(stream)
    # print " -- Stream is ready, starting detection"

    # Create catalog name in which the events are stored
    catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv"
    output_catalog = os.path.join(output_dir, catalog_name)
    print '[classify] Catalog created to store events', output_catalog

    # Dictonary to store info on detected events
    events_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    true_positive_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    false_positive_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}
    false_negative_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    missed_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    # Windows generator
    win_gen = stream_select.slide(window_length=cfg.window_size,
                           step=cfg.window_step_predict,
                           include_partial_windows=False)

    total_time_in_sec = stream_select[0].stats.endtime - stream_select[0].stats.starttime
    max_windows = (total_time_in_sec - cfg.window_size) / cfg.window_step_predict

    

    step = tf.train.global_step(sess, model.global_step)


    n_events = 0
    time_start = time.time()

    #if stations is not None:
    #    station = stream_select[-1].stats.station
    #    stationLAT, stationLONG, stationDEPTH = utils.station_coordinates(station, stations)

    station = stream[0].stats.station
    stream_start_time = stream[0].stats.starttime
    stream_end_time = stream[-1].stats.endtime
    ptime = cat.getPtime(stream_start_time, stream_end_time, station)
    event_window_start = ptime - cfg.pwave_window/2
    event_window_end = ptime + cfg.pwave_window/2

    print("ptime="+str(ptime))
    print("event_window_start="+str(event_window_start)+"<->event_window_end="+str(event_window_end))

    try:
        for idx, win in enumerate(win_gen):
            #Check the groundtruth
            isPositive = False
            if cat is not None:
            	#print("win[0].stats.starttime ="+str(win[0].stats.starttime))
            	#print("win[0].stats.endtime ="+str(win[0].stats.endtime))
            	#print("cat.start_time[0] ="+str(cat.start_time[0]))
            	#print("cat.end_time[0] ="+str(cat.end_time[0]))

                window_start = win[0].stats.starttime
                window_end = win[-1].stats.endtime

                print("window: "+str(window_start)+"-"+str(window_end))

                if (window_start <= event_window_start) and (window_end >= event_window_end): #positive
                    isPositive = True
                    print("REAL POSITIVE")
                else:# negative
                    isPositive = False    
                    print("REAL NEGATIVE")  

                #if stations is not None:
                #    isPositive = utils.isPositive(window_start, window_end, cat, stationLAT, stationLONG, stationDEPTH, cfg.mean_velocity)
                #else:
                #    isPositive = utils.isPositive(window_start, window_end, cat)

                #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window]
                #Do not use negatives 

            	#for i in range(0, len(cat.start_time)):
	            #	if (UTCDateTime(cat.start_time[i]) >= UTCDateTime(win[0].stats.starttime)) and (UTCDateTime(cat.end_time[i]) <= UTCDateTime(win[0].stats.endtime)):# and (cat.end_time[0] <= win[0].stats.endtime):
		        #    	isPositive = True
		        #        #print("\033[92m isPositive = True\033[0m")
	            #    else:
	            #        isPositive = False
                         
            # Fetch class_proba and label
            to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction']]
            # Feed window and fake cluster_id (needed by the net) but
            # will be predicted
            if utils.check_stream(win, cfg):
                feed_dict = {samples['data']: utils.fetch_window_data(win, cfg),
                            samples['cluster_id']: np.array([0])}
                perf_start_time = time.time()
                sample, class_prob_, cluster_id = sess.run(to_fetch,
                                                        feed_dict)
                perf_end_time = time.time()
                perf_elapsed_time = perf_end_time - perf_start_time
                perf_total_time = perf_total_time + perf_elapsed_time
                perf_total_predictions = perf_total_predictions + 1
                print("Prediction time = "+str(perf_elapsed_time))
                print("Average time per prediction in secs = "+str(perf_total_time/perf_total_predictions))
            else:
                missed_dic["start_time"].append(win[0].stats.starttime)
                continue

            # # Keep only clusters proba, remove noise proba
            clusters_prob = class_prob_[0,1::]
            cluster_id -= 1

            # label for noise = -1, label for cluster \in {0:n_clusters}

            is_event = cluster_id[0] > -1

            if is_event:
                print("PREDICTED POSITIVE")
                n_events += 1
            else:
                print("PREDICTED NEGATIVE")

            # print "event {} ,cluster id {}".format(is_event,class_prob_)
            if is_event:
                events_dic["start_time"].append(win[0].stats.starttime)
                events_dic["end_time"].append(win[0].stats.endtime)
                events_dic["cluster_id"].append(cluster_id[0])
                events_dic["clusters_prob"].append(list(clusters_prob))
                if evaluation and isPositive:
	                #sys.stdout.write("\033[92m HIT\033[0m (positive)\n")
                    sys.stdout.write("\033[92mP\033[0m")
                    sys.stdout.flush()
                    truePositives = truePositives+1
                    true_positive_dic["start_time"].append(win[0].stats.starttime)
                    true_positive_dic["end_time"].append(win[0].stats.endtime)
                    true_positive_dic["cluster_id"].append(cluster_id[0])
                    true_positive_dic["clusters_prob"].append(list(clusters_prob))
                    #break
                elif evaluation:
	                #sys.stdout.write("\033[91m MISS\033[0m (false positive)\n")
                    sys.stdout.write("\033[91mP\033[0m")
                    sys.stdout.flush()
                    falsePositives = falsePositives+1
                    false_positive_dic["start_time"].append(win[0].stats.starttime)
                    false_positive_dic["end_time"].append(win[0].stats.endtime)
                    false_positive_dic["cluster_id"].append(cluster_id[0])
                    false_positive_dic["clusters_prob"].append(list(clusters_prob))
            else:
                if evaluation and isPositive:
                    #sys.stdout.write("\033[91m MISS\033[0m (false negative)\n")
                    sys.stdout.write("\033[91mN\033[0m")
                    sys.stdout.flush()
                    falseNegatives = falseNegatives+1
                    false_negative_dic["start_time"].append(win[0].stats.starttime)
                    false_negative_dic["end_time"].append(win[0].stats.endtime)
                    false_negative_dic["cluster_id"].append(cluster_id[0])
                    false_negative_dic["clusters_prob"].append(list(clusters_prob))
                elif evaluation:
	                #sys.stdout.write("\033[92m HIT\033[0m (negative)\n")
                    sys.stdout.write("\033[92mN\033[0m")
                    sys.stdout.flush()
                    trueNegatives = trueNegatives+1

            #if idx % 1000 ==0:
            #    print "\n[classify] Analyzing {} records".format(win[0].stats.starttime)

            if is_event:
                win_filtered = win.copy()
                #win_filtered.plot(outfile=os.path.join(output_dir+"/"+stream_file_without_extension,"viz",
                #                "event_{}_cluster_{}.png".format(idx,cluster_id)))

            if cfg.save_sac and is_event:
                win_filtered = win.copy()
                win_filtered.write(os.path.join(output_dir,"sac",
                        "event_{}_cluster_{}.sac".format(idx,cluster_id)),
                        format="SAC")

            if idx >= max_windows:
                print "[classify] stopped after {} windows".format(max_windows)
                print "[classify] found {} events".format(n_events)
                break

    except KeyboardInterrupt:
        print '[classify] Interrupted at time {}.'.format(win[0].stats.starttime)
        print "[classify] processed {} windows, found {} events".format(idx+1,n_events)
        print "[classify] Run time: ", time.time() - time_start

    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

    #Plot everything
    customPlot(stream, output_dir+"/"+stream_file+"_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"], true_positive_dic["start_time"], false_positive_dic["start_time"], false_negative_dic["start_time"])
    
    #Plot only 10min sections with events
    #max_secs_to_show = 600
    #win_gen = stream_select.slide(window_length=max_secs_to_show,
    #                      step=max_secs_to_show,
    #                       include_partial_windows=False)
    #for idx, win in enumerate(win_gen):
    #    customPlot(win, outputSubdirSubplots+"/win_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"])
    #win = substream.slice(UTCDateTime(timeP), UTCDateTime(timeP) + cfg.window_size).copy()    
    print "\n[classify] Run time: ", time.time() - time_start

    return events_dic
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)):
        os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"  
    output_name = args.file_name
    output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        
        #Filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)  

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg) 

        #LOCATION CLUSTERS
        lat = 0
        lon = 0
        depth = 0
        cluster_id = 0 #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            if c is not None: #can be None in case of polygons-based clustering
                cluster_id = c.id
            else:
                cluster_id = -1 #signaling that the earthquake has to be discarded
            print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat =  "+str(lat)+", lon = "+str(lon)+").")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        if cluster_id >= 0: #no clustering or a valid cluster
            n_traces = len(st_event_select)
            if utils.check_stream(st_event_select, cfg):
                #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
                # Write tfrecords

                #DEBUG: STA_LTA
                #df = st_event_select[0].stats.sampling_rate
                #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df))
                #for trig in cft:
                #    if trig != .0:
                #        print(trig)


                writer.write(st_event_select, cluster_id) 
        else:
            print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth))

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(writer._written))
    writer.close()