def main(_):

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = dpp.DataPipeline(FLAGS.data_path,
                                     config=cfg,
                                     is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels
    start_time = data_pipeline.start_time
    end_time = data_pipeline.end_time

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        try:
            for i in (range(FLAGS.windows)):
                to_fetch = [samples, labels, start_time, end_time]
                sample, label, starttime, endtime = sess.run(to_fetch)
                # assert starttime < endtime
                print('starttime {}, endtime {}'.format(
                    UTCDateTime(starttime), UTCDateTime(endtime)))
                print("label", label[0])
                sample = np.squeeze(sample, axis=(0, ))
                target = np.squeeze(label, axis=(0, ))
        except tf.errors.OutOfRangeError:
            print 'Evaluation completed ({} epochs).'.format(cfg.n_epochs)

        print "{} windows seen".format(i + 1)
        coord.request_stop()
        coord.join(threads)
Пример #2
0
def fetch_window_data(stream):
    """fetch data from a stream window and dump in np array"""
    cfg = config.Config()
    data = np.empty((cfg.win_size, 3))
    for i in range(3):
        data[:, i] = stream[i].data.astype(np.float32)
    data = np.expand_dims(data, 0)
    return data
Пример #3
0
def data_is_complete(stream):
    """Returns True if there is 1001*3 points in win"""
    cfg = config.Config()
    try:
        data_size = len(stream[0].data) + len(stream[1].data) + len(
            stream[2].data)
    except:
        data_size = 0
    data_lenth = int(cfg.win_size) * 3
    if data_size == data_lenth:
        return True
    else:
        return False
Пример #4
0
def main(args):
    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    cfg = config.Config()
    cfg.batch_size = args.batch_size
    cfg.add = 1
    cfg.n_clusters = args.n_clusters
    cfg.n_clusters += 1

    pos_path = os.path.join(args.dataset, "positive")
    neg_path = os.path.join(args.dataset, "negative")

    # data pipeline for positive and negative examples
    pos_pipeline = dp.DataPipeline(pos_path, cfg, True)
    neg_pipeline = dp.DataPipeline(neg_path, cfg, True)

    pos_samples = {
        'data': pos_pipeline.samples,
        'cluster_id': pos_pipeline.labels
    }
    neg_samples = {
        'data': neg_pipeline.samples,
        'cluster_id': neg_pipeline.labels
    }

    samples = {
        "data":
        tf.concat(0, [pos_samples["data"], neg_samples["data"]]),
        "cluster_id":
        tf.concat(0, [pos_samples["cluster_id"], neg_samples["cluster_id"]])
    }

    # model
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=True)

    # train loop
    model.train(args.learning_rate,
                resume=args.resume,
                profiling=args.profiling,
                summary_step=10)
Пример #5
0
def main(args):
  #setproctitle.setproctitle('quakenet')

  tf.set_random_seed(1234)
  tf.reset_default_graph() 
  cfg = config.Config()
  cfg.batch_size = args.batch_size
  cfg.add = 1
  cfg.n_clusters = args.n_clusters
  cfg.n_clusters += 1

  pos_path = os.path.join(args.dataset,"signal")
  neg_path = os.path.join(args.dataset,"noise")

  # data pipeline for positive and negative examples
  pos_pipeline = dp.DataPipeline(pos_path, cfg, True)
  neg_pipeline = dp.DataPipeline(neg_path, cfg, True)

  pos_samples = {
    'data': pos_pipeline.samples,
    'cluster_id': pos_pipeline.labels
    }
  neg_samples = {
    'data': neg_pipeline.samples,
    'cluster_id': neg_pipeline.labels
    }

  samples = {
    "data": tf.concat([pos_samples["data"],neg_samples["data"]],0),
    "cluster_id" : tf.concat([pos_samples["cluster_id"],neg_samples["cluster_id"]],0)
    }

  # model
  model = models.get(args.model, samples,cfg, args.checkpoint_dir, is_training=True)

  # train loop
  model.train(
    args.learning_rate,
    resume=args.resume,
    profiling=args.profiling,
    summary_step=10) 
def main(_):

    cfg = config.Config()
    cfg.batch_size = 1

    # Make dirs
    if not os.path.exists(FLAGS.output_path):
        os.makedirs(FLAGS.output_path)

    data_pipeline = dpp.DataPipeline(FLAGS.data_path,
                                     config=cfg,
                                     is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print '+ Plotting {} windows'.format(FLAGS.windows)
        for i in tqdm(range(FLAGS.windows)):
            sample, label = sess.run([samples, labels])
            sample = np.squeeze(sample, axis=(0, ))
            target = np.squeeze(label, axis=(0, ))

            plt.clf()
            fig, ax = plt.subplots(3, 1)
            for t in range(sample.shape[1]):
                ax[t].plot(sample[:, t])
                ax[t].set_xlabel('time (samples)')
                ax[t].set_ylabel('amplitude')
            ax[0].set_title('window {:04d}, cluster_id: {}'.format(i, target))
            plt.savefig(
                os.path.join(FLAGS.output_path, 'window_{:04d}.pdf'.format(i)))
            plt.close()

        coord.request_stop()
        coord.join(threads)
Пример #7
0
def train():
    """
    Train unet using specified args:
    """

    data_files, data_size = load_datafiles(FLAGS.tfrecords_prefix)
    print data_files, data_size
    #  images, labels, filenames = dataset_loader.inputs(
    #                                 data_files = data_files,
    #                                 image_size = FLAGS.image_size,
    #                             batch_size = FLAGS.batch_size,
    #                                 num_epochs = FLAGS.num_epochs,
    #                                 train = True)
    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    cfg = config.Config()
    cfg.batch_size = FLAGS.batch_size
    cfg.add = 1
    cfg.n_clusters = FLAGS.num_classes
    cfg.n_clusters += 1

    # data pipeline for positive and negative examples
    pos_pipeline = dp.DataPipeline(FLAGS.tfrecords_dir, cfg, True)
    #  images:[batch_size, n_channels, n_points]
    images = pos_pipeline.samples
    labels = pos_pipeline.labels
    logits = unet.build_30s(images, FLAGS.num_classes, True)
    accuarcy = unet.accuracy(logits, labels)
    print "accuarcy,recall,f1", accuarcy
    #load class weights if available
    if FLAGS.class_weights is not None:
        weights = np.load(FLAGS.class_weights)
        class_weight_tensor = tf.constant(weights,
                                          dtype=tf.float32,
                                          shape=[FLAGS.num_classes, 1])
    else:
        class_weight_tensor = None
    loss = unet.loss(logits, labels, FLAGS.weight_decay_rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = unet.train(loss, FLAGS.learning_rate,
                          FLAGS.learning_rate_decay_steps,
                          FLAGS.learning_rate_decay_rate, global_step)
    #print "train_op",train_op

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    saver = tf.train.Saver()
    session_manager = tf.train.SessionManager(
        local_init_op=tf.local_variables_initializer())
    sess = session_manager.prepare_session("",
                                           init_op=init_op,
                                           saver=saver,
                                           checkpoint_dir=FLAGS.checkpoint_dir)

    writer = tf.summary.FileWriter(FLAGS.checkpoint_dir + "/train_logs",
                                   sess.graph)

    merged = tf.summary.merge_all()

    coord = tf.train.Coordinator()

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    start_time = time.time()

    try:
        while not coord.should_stop():

            step = tf.train.global_step(sess, global_step)
            _, loss_value, summary = sess.run([train_op, loss, merged])
            #print loss_value
            writer.add_summary(summary, step)
            if step % 1000 == 0:
                acc_seg_value = sess.run([accuarcy])
                #print "acc_seg_value:",acc_seg_value,acc_seg_value[0],acc_seg_value[0][1],acc_seg_value[0][1][0]
                epoch = step * FLAGS.batch_size / data_size
                #print epoch
                duration = time.time() - start_time
                #print step,duration
                start_time = time.time()
                #print('[PROGRESS]\tEpoch %d | Step %d | loss = %.2f | total. acc. = %.2f | P. acc. =  %.3f \
                #      | S. acc. =  %.3f | N. acc. =  %.3f | dur. = (%.3f sec)'\
                #      % (epoch, step, loss_value, acc_seg_value[0][1][0],acc_seg_value[0][1][1], acc_seg_value[0][1][2],\
                #         acc_seg_value[0][3],duration))

                print('[PROGRESS]\tEpoch %d | Step %d | loss = %.2f | P. acc. =  %.3f \
                      | S. acc. =  %.3f | N. acc. =  %.3f | dur. = (%.3f sec)'\
                      % (epoch, step, loss_value, acc_seg_value[0][1][1],acc_seg_value[0][1][2], acc_seg_value[0][1][0],\
                         duration))
            if step % 5000 == 0:
                print('[PROGRESS]\tSaving checkpoint')
                checkpoint_path = os.path.join(FLAGS.checkpoint_dir,
                                               'unet.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('[INFO    ]\tDone training for %d epochs, %d steps.' %
              (FLAGS.num_epochs, step))

    finally:
        # When done, ask the threads to stop.
        coord.request_stop()

    # Wait for threads to finish.
    coord.join(threads)
    writer.close()
    sess.close()
Пример #8
0
def main(_):
    setproctitle.setproctitle('quakenet_viz')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)

    windows_list = fetch_streams_list(args.dataset)

    # stream data with a placeholder
    samples = {
            'data': tf.placeholder(tf.float32,
                                   shape=(cfg.batch_size, 1001, 3),
                                   name='input_data'),
            'cluster_id': tf.placeholder(tf.int64,
                                         shape=(cfg.batch_size,),
                                         name='input_label')
        }

    # set up model and validation metrics
    model = models.get(args.model, samples, cfg,
                       args.checkpoint_dir,
                       is_training=False)
    metrics = model.validation_metrics()

    with tf.Session() as sess:

        model.load(sess, args.step)
        print 'Evaluating at step {}'.format(sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)
        mean_metrics = {}
        for key in metrics:
            mean_metrics[key] = 0

        for n in range(len(windows_list)):

            # Get One stream and label from the list
            stream, cluster_id = fetch_window_and_label(windows_list[n])

            # Get coordinates of the event
            lat_event, lon_event = fetch_lat_and_lon(windows_list[n])

            # Fetch class_proba and label
            to_fetch = [samples['data'],
                        metrics,
                        model.layers['class_prob']]
            feed_dict = {samples['data']: stream,
                         samples['cluster_id']: cluster_id}
            sample, metrics_, class_prob_= sess.run(to_fetch,
                                                    feed_dict)
            # Keep only clusters proba, remove noise proba
            clusters_prob = class_prob_[0,1::]

            # Print Misclassified window
            if metrics_['localization_accuracy'] >= 1.0:
                map_file ='cluster_ids_{}_comp.npy'.format(args.n_clusters)
                clusters_map = np.load(map_file)
                lat = np.load("cluster_ids_{}_comp_lat.npy".format(args.n_clusters))
                lon = np.load("cluster_ids_{}_comp_lon.npy".format(args.n_clusters))
                plot_proba_map(n, lat, lon, clusters_map, clusters_prob,
                               cluster_id, lat_event, lon_event)

            for key in metrics:
                mean_metrics[key] += cfg.batch_size * metrics_[key]

            mess = model.validation_metrics_message(metrics_)
            print '{:03d} | '.format(n) + mess

        for key in metrics:
            mean_metrics[key] /= len(windows_list)

        mess = model.validation_metrics_message(mean_metrics)
        print 'Average | ' + mess
Пример #9
0
def evaluate():
    """
    Eval unet using specified args:
    """
    if FLAGS.events:
        summary_dir = os.path.join(FLAGS.checkpoint_path, "events")
    while True:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
        if FLAGS.eval_interval < 0 or ckpt:
            print('Evaluating model')
            break
        print('Waiting for training job to save a checkpoint')
        time.sleep(FLAGS.eval_interval)

    #data_files, data_size = load_datafiles(FLAGS.tfrecords_prefix)

    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    cfg = config.Config()
    cfg.batch_size = FLAGS.batch_size
    cfg.add = 1
    cfg.n_clusters = FLAGS.num_classes
    cfg.n_clusters += 1
    cfg.n_epochs = 1
    model_files = [
        file for file in os.listdir(FLAGS.checkpoint_path)
        if fnmatch.fnmatch(file, '*.meta')
    ]
    for model_file in sorted(model_files):
        step = model_file.split(".meta")[0].split("-")[1]
        print(step)
        try:
            model_file = os.path.join(FLAGS.checkpoint_path, model_file)
            # data pipeline for positive and negative examples
            pos_pipeline = dp.DataPipeline(FLAGS.tfrecords_dir, cfg, True)
            #  images:[batch_size, n_channels, n_points]
            images = pos_pipeline.samples
            labels = pos_pipeline.labels
            logits = unet.build_30s(images, FLAGS.num_classes, False)

            predicted_images = unet.predict(logits, FLAGS.batch_size,
                                            FLAGS.image_size)

            accuracy = unet.accuracy(logits, labels)
            loss = unet.loss(logits, labels, FLAGS.weight_decay_rate)
            summary_writer = tf.summary.FileWriter(summary_dir, None)

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            sess = tf.Session()

            sess.run(init_op)

            saver = tf.train.Saver()

            #if not tf.gfile.Exists(FLAGS.checkpoint_path + '.meta'):
            if not tf.gfile.Exists(model_file):
                raise ValueError("Can't find checkpoint file")
            else:
                print('[INFO    ]\tFound checkpoint file, restoring model.')
                saver.restore(sess, model_file.split(".meta")[0])

            coord = tf.train.Coordinator()

            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            #metrics = validation_metrics()
            global_accuracy = 0.0
            global_p_accuracy = 0.0
            global_s_accuracy = 0.0
            global_n_accuracy = 0.0
            global_loss = 0.0

            n = 0
            #mean_metrics = {}
            #for key in metrics:
            #    mean_metrics[key] = 0
            #pred_labels = np.empty(1)
            #true_labels = np.empty(1)

            try:
                while not coord.should_stop():
                    acc_seg_value, loss_value, predicted_images_value, images_value = sess.run(
                        [accuracy, loss, predicted_images, images])
                    accuracy_p_value = acc_seg_value[1][1]
                    accuracy_s_value = acc_seg_value[1][2]
                    accuracy_n_value = acc_seg_value[1][0]
                    #pred_labels = np.append(pred_labels, predicted_images_value)
                    #true_labels = np.append(true_labels, images_value)
                    global_accuracy += acc_seg_value
                    global_p_accuracy += accuracy_p_value
                    global_s_accuracy += accuracy_s_value
                    global_n_accuracy += accuracy_n_value
                    global_loss += loss_value
                    # print  true_labels
                    #for key in metrics:
                    #    mean_metrics[key] += cfg.batch_size * metrics_[key]
                    filenames_value = []
                    # for i in range(FLAGS.batch_size):
                    #     filenames_value.append(str(step)+"_"+str(i)+".png")
                    #print (predicted_images_value[:,100:200])
                    if (FLAGS.plot):
                        maybe_save_images(predicted_images_value, images_value,
                                          filenames_value)
                    #s='loss = {:.5f} | det. acc. = {:.1f}% | loc. acc. = {:.1f}%'.format(metrics['loss']
                    print(
                        '[PROGRESS]\tAccuracy for current batch: |  P. acc. =%.5f| S. acc. =%.5f| '
                        'noise. acc. =%.5f.' %
                        (accuracy_p_value, accuracy_s_value, accuracy_n_value))
                    n += cfg.batch_size
                    #  step += 1
                    print(n)
            except KeyboardInterrupt:
                print('stopping evaluation')
            except tf.errors.OutOfRangeError:
                print('Evaluation completed ({} epochs).'.format(cfg.n_epochs))
                print("{} windows seen".format(n))
                #print('[INFO    ]\tDone evaluating in %d steps.' % step)
                if n > 0:
                    loss_value /= n
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='loss/val',
                                         simple_value=loss_value)
                    ])
                    if FLAGS.save_summary:
                        summary_writer.add_summary(summary, global_step=step)
                    global_accuracy /= n
                    global_p_accuracy /= n
                    global_s_accuracy /= n
                    global_n_accuracy /= n
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='accuracy/val',
                                         simple_value=global_accuracy)
                    ])
                    if FLAGS.save_summary:
                        summary_writer.add_summary(summary, global_step=step)
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='accuracy/val_p',
                                         simple_value=global_p_accuracy)
                    ])
                    if FLAGS.save_summary:
                        summary_writer.add_summary(summary, global_step=step)
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='accuracy/val_s',
                                         simple_value=global_s_accuracy)
                    ])
                    if FLAGS.save_summary:
                        summary_writer.add_summary(summary, global_step=step)
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='accuracy/val_noise',
                                         simple_value=global_n_accuracy)
                    ])
                    if FLAGS.save_summary:
                        summary_writer.add_summary(summary, global_step=step)
                    print(
                        '[End of evaluation for current epoch]\n\nAccuracy for current epoch:%s | total. acc. =%.5f| P. acc. =%.5f| S. acc. =%.5f| '
                        'noise. acc. =%.5f.' %
                        (step, global_accuracy, global_p_accuracy,
                         global_s_accuracy, global_n_accuracy))
                    print('Sleeping for {}s'.format(FLAGS.eval_interval))
                    time.sleep(FLAGS.eval_interval)
                summary_writer.flush()
            finally:
                # When done, ask the threads to stop.
                coord.request_stop()
            tf.reset_default_graph()
            #print('Sleeping for {}s'.format(FLAGS.eval_interval))
            #time.sleep(FLAGS.eval_interval)
        finally:
            print('joining data threads')

            coord = tf.train.Coordinator()
            coord.request_stop()

    #pred_labels = pred_labels[1::]
    #true_labels = true_labels[1::]
    #print  ("---Confusion Matrix----")
    #print (confusion_matrix(true_labels, pred_labels))
    # Wait for threads to finish.
    coord.join(threads)
    sess.close()
Пример #10
0
def main(args):
    setproctitle.setproctitle('quakenet_eval')

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1
    cfg.n_epochs = 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))

    # data pipeline
    data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False)
    samples = {
        'data': data_pipeline.samples,
        'cluster_id': data_pipeline.labels,
        'start_time': data_pipeline.start_time,
        'end_time': data_pipeline.end_time
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)

    if args.max_windows is None:
        max_windows = 2**31
    else:
        max_windows = args.max_windows

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "utc_timestamp": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Create catalog name in which the events are stored
    output_catalog = os.path.join(args.output, 'catalog_detection.csv')
    print 'Catalog created to store events', output_catalog

    # Run ConvNetQuake
    with tf.Session(config=tf_config) as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        model.load(sess, args.step)
        print 'Predicting using model at step {}'.format(
            sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        idx = 0
        time_start = time.time()
        while True:
            try:
                # Fetch class_proba and label
                to_fetch = [
                    samples['data'], model.layers['class_prob'],
                    model.layers['class_prediction'], samples['start_time'],
                    samples['end_time']
                ]
                sample, class_prob_, cluster_id, start_time, end_time = sess.run(
                    to_fetch)

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0, 1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1

                idx += 1
                if idx % 1000 == 0:
                    print "processed {} windows".format(idx)

                if is_event:
                    events_dic["start_time"].append(UTCDateTime(start_time))
                    events_dic["end_time"].append(UTCDateTime(end_time))
                    events_dic["utc_timestamp"].append(
                        (start_time + end_time) / 2.0)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx >= max_windows:
                    print "stopped after {} windows".format(max_windows)
                    print "found {} events".format(n_events)
                    break

            except KeyboardInterrupt:
                print "processed {} windows, found {} events".format(
                    idx + 1, n_events)
                print "Run time: ", time.time() - time_start

            except tf.errors.OutOfRangeError:
                print 'Evaluation completed ({} epochs).'.format(cfg.n_epochs)
                break

        print 'joining data threads'
        m, s = divmod(time.time() - time_start, 60)
        print "Prediction took {} min {} seconds".format(m, s)
        coord.request_stop()
        coord.join(threads)

    # Dump dictionary into csv file
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)
def main(args):
    setproctitle.setproctitle('quakenet_eval')

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    #cfg.batch_size = 64
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1
    cfg.n_epochs = 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))

    # data pipeline
    data_generator = dg.DataGenerator(args.dataset, cfg, is_training=False)
    dataset = data_generator.read()
    #samples = {
    #    'data': data_pipeline.samples,
    #    'cluster_id': data_pipeline.labels,
    #    'start_time': data_pipeline.start_time,
    #    'end_time': data_pipeline.end_time}

    # set up model and validation metrics
    #model = models.get(args.model, samples, cfg,
    #                    args.checkpoint_dir,
    #                    is_training=False)

    # load json and create model
    json_file = open("models/model.json", 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights("models/model.h5")
    print("Loaded model from disk")
    loaded_model.compile(optimizer='adam',
                         loss='sparse_categorical_crossentropy',
                         metrics=['accuracy'])

    if args.max_windows is None:
        max_windows = 2**31
    else:
        max_windows = args.max_windows

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "utc_timestamp": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Create catalog name in which the events are stored
    output_catalog = os.path.join(args.output, 'catalog_detection.csv')
    print('Catalog created to store events', output_catalog)
    n_events = 0
    idx = 0
    time_start = time.time()

    clusters_prob = loaded_model.predict(dataset, steps=max_windows)
    print(clusters_prob.shape)

    cluster_id = np.argmax(clusters_prob, axis=1)
    print(cluster_id.shape)

    features = []
    with tf.Session() as sess:
        data_generator = dg.DataGenerator(args.dataset, cfg, is_training=False)
        dataset = data_generator.getFeatures()
        dataset = dataset.batch(1)
        iterator = dataset.make_one_shot_iterator()
        n = iterator.get_next()
        for idx in range(cluster_id.shape[0]):
            values = sess.run(n)
            if (cluster_id[idx] > 0):
                features.append(
                    dict({
                        "start_time": values['start_time'],
                        "end_time": values['end_time'],
                        "cluster_id": cluster_id[idx]
                    }))
                events_dic["start_time"].append(
                    UTCDateTime(values['start_time']))
                events_dic["end_time"].append(UTCDateTime(values['end_time']))
                events_dic["utc_timestamp"].append(
                    (values['start_time'] + values['end_time']) / 2.0)
                events_dic["cluster_id"].append(cluster_id[idx])
                events_dic["clusters_prob"].append(list(clusters_prob[idx]))

    for i in range(len(features)):
        print(i, features[i]['start_time'], features[i]['end_time'],
              features[i]['cluster_id'])

    # label for noise = -1, label for cluster \in {0:n_clusters}

    m, s = divmod(time.time() - time_start, 60)
    print("Prediction took {} min {} seconds".format(m, s))

    # Dump dictionary into csv file
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)
Пример #12
0
def main(_):

    if FLAGS.stretch_data:
        print "ADD NOISE AND STRETCH DATA"
    if FLAGS.compress_data:
        print "ADD NOISE AND COMPRESS DATA"
    if FLAGS.shift_data:
        print "ADD NOISE AND SHIFT DATA"

    # Make dirs
    output_dir = os.path.split(FLAGS.output)[0]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if FLAGS.plot:
        if not os.path.exists(os.path.join(output_dir, "true_data")):
            os.makedirs(os.path.join(output_dir, "true_data"))
        if not os.path.exists(os.path.join(output_dir, "augmented_data")):
            os.makedirs(os.path.join(output_dir, "augmented_data"))

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = DataPipeline(FLAGS.tfrecords,
                                 config=cfg,
                                 is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        output_tfrecords = FLAGS.output
        writer = DataWriter(output_tfrecords)
        n_examples = 0
        while True:
            try:
                sample, label = sess.run([samples, labels])
                sample = np.squeeze(sample, axis=0)
                label = label[0]

                noised_sample = add_noise_to_signal(np.copy(sample))
                if FLAGS.compress_data:
                    noised_sample = compress_signal(noised_sample)
                if FLAGS.stretch_data:
                    noised_sample = stretch_signal(noised_sample)
                if FLAGS.shift_data:
                    noised_sample = shift_signal(noised_sample)

                if FLAGS.plot:
                    plot_true_and_augmented_data(sample, noised_sample, label,
                                                 n_examples)

                stream = convert_np_to_stream(noised_sample)
                writer.write(stream, label)

                n_examples += 1

            except KeyboardInterrupt:
                print 'stopping data augmentation'
                break

            except tf.errors.OutOfRangeError:
                print 'Augmentation completed ({} epochs, {} examples seen).'\
                                .format(cfg.n_epochs,n_examples-1)
                break

        writer.close()
        coord.request_stop()
        coord.join(threads)
def main(args):
    setproctitle.setproctitle('quakenet_predict')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))
    if args.save_sac:
        os.makedirs(os.path.join(args.output, "sac"))

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # # TODO: change and look at all streams
    # stream_path = args.stream_path
    # stream_file = os.path.split(stream_path)[-1]
    # print " + Loading stream {}".format(stream_file)
    # stream = load_stream(stream_path)
    # print " + Preprocess stream"
    # stream = preprocess_stream(stream)
    # print " -- Stream is ready, starting detection"

    # Create catalog name in which the events are stored
    catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv"
    output_catalog = os.path.join(args.output, catalog_name)
    print('Catalog created to store events', output_catalog)

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Windows generator
    win_gen = stream.slide(window_length=args.window_size,
                           step=args.window_step,
                           include_partial_windows=False)
    if args.max_windows is None:
        total_time_in_sec = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time_in_sec - args.window_size) / args.window_step
    else:
        max_windows = args.max_windows

    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, 1001, 3),
                       name='input_data'),
        'cluster_id':
        tf.placeholder(tf.int64, shape=(cfg.batch_size, ), name='input_label')
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)

    with tf.Session() as sess:

        model.load(sess, args.step)
        print('Predicting using model at step {}'.format(
            sess.run(model.global_step)))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        time_start = time.time()
        try:
            for idx, win in enumerate(win_gen):

                # Fetch class_proba and label
                to_fetch = [
                    samples['data'], model.layers['class_prob'],
                    model.layers['class_prediction']
                ]
                # Feed window and fake cluster_id (needed by the net) but
                # will be predicted
                if data_is_complete(win):
                    feed_dict = {
                        samples['data']: fetch_window_data(win),
                        samples['cluster_id']: np.array([0])
                    }
                    sample, class_prob_, cluster_id = sess.run(
                        to_fetch, feed_dict)
                else:
                    continue

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0, 1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1
                # print "event {} ,cluster id {}".format(is_event,class_prob_)

                if is_event:
                    events_dic["start_time"].append(win[0].stats.starttime)
                    events_dic["end_time"].append(win[0].stats.endtime)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx % 1000 == 0:
                    print("Analyzing {} records".format(
                        win[0].stats.starttime))

                if args.plot and is_event:
                    # if args.plot:
                    win_filtered = win.copy()
                    # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0)
                    win_filtered.plot(outfile=os.path.join(
                        args.output, "viz", "event_{}_cluster_{}.png".format(
                            idx, cluster_id)))

                if args.save_sac and is_event:
                    win_filtered = win.copy()
                    win_filtered.write(os.path.join(
                        args.output, "sac",
                        "event_{}_cluster_{}.sac".format(idx, cluster_id)),
                                       format="SAC")

                if idx >= max_windows:
                    print("stopped after {} windows".format(max_windows))
                    print("found {} events".format(n_events))
                    break

        except KeyboardInterrupt:
            print('Interrupted at time {}.'.format(win[0].stats.starttime))
            print("processed {} windows, found {} events".format(
                idx + 1, n_events))
            print("Run time: ", time.time() - time_start)

    # Dump dictionary into csv file
    #TODO
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

    print("Run time: ", time.time() - time_start)
Пример #14
0
def evaluate():
    """
    Eval unet using specified args:
    """


    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    cfg = config.Config()
    cfg.batch_size = FLAGS.batch_size
    cfg.add = 1
    cfg.n_clusters = FLAGS.num_classes
    cfg.n_clusters += 1

    # data pipeline for positive and negative examples
    pos_pipeline = dp.DataPipeline(FLAGS.tfrecords_dir, cfg, True)
    #  images:[batch_size, n_channels, n_points]
    images = pos_pipeline.samples
    labels = pos_pipeline.labels
    print("images", images,labels)
    logits = unet.build_30s(images, FLAGS.num_classes, False)

    predicted_images = unet.predict(logits, FLAGS.batch_size, FLAGS.image_size)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    sess = tf.Session()

    sess.run(init_op)

    saver = tf.train.Saver()

    if not tf.gfile.Exists(FLAGS.checkpoint_path + '.meta'):
        raise ValueError("Can't find checkpoint file")
    else:
        print('[INFO    ]\tFound checkpoint file, restoring model.')
        saver.restore(sess, FLAGS.checkpoint_path)

    coord = tf.train.Coordinator()

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    global_accuracy = 0.0

    step = 0

    try:
        while not coord.should_stop():
            print(predicted_images, images)
            predicted_images_value, images_value = sess.run([predicted_images, images])
            print (predicted_images_value, images_value)
            filenames_value = []
            for i in range(FLAGS.batch_size):
                filenames_value.append(str(step) + "_" + str(i) + ".png")
            #print (predicted_images_value[:,100:200])
            maybe_save_images(predicted_images_value, images_value, filenames_value)
            step += 1

    except tf.errors.OutOfRangeError:
        print('[INFO    ]\tDone evaluating in %d steps.' % step)

    finally:
        # When done, ask the threads to stop.
        coord.request_stop()

    # Wait for threads to finish.
    coord.join(threads)
    sess.close()
Пример #15
0
def main(args):
  setproctitle.setproctitle('quakenet_debug')

  if not os.path.exists(args.output):
    os.makedirs(args.output)

  if args.n_clusters == None:
    raise ValueError('Define the number of clusters with --n_clusters')

  cfg = config.Config()
  cfg.batch_size = 1
  cfg.n_epochs = 1
  cfg.add = 2
  cfg.n_clusters = args.n_clusters
  cfg.n_clusters +=1

  # data pipeline
  data_pipeline = dp.DataPipeline(args.dataset, cfg, False)

  samples = {
    'data': data_pipeline.samples,
    'cluster_id': data_pipeline.labels
    }

  # model
  model_name = args.model
  model = models.get(model_name, samples,
                     cfg, args.checkpoint_dir, is_training=False)

  with tf.Session() as sess:
    coord = tf.train.Coordinator()
    tf.initialize_local_variables().run()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    model.load(sess)
    step = sess.run(model.global_step)
    print  'Debugging at step {}'.format(step)
    # summary_writer = tf.train.SummaryWriter(model.checkpoint_dir, None)

    activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS)
    weights = tf.get_collection(tf.GraphKeys.WEIGHTS)
    biases = tf.get_collection(tf.GraphKeys.BIASES)

    toget = {}
    toget['0_input'] = model.inputs['data']
    for i, a in enumerate(activations):
      name = a.name.replace('/', '_').replace(':', '_')
      toget['{}_{}'.format(i+1, name)] = a

    for it in range(10):
      print 'running session'
      fetched = sess.run(toget)
      print fetched

      print it
      for f in fetched:
        d = fetched[f]
        d = np.squeeze(d, axis=0)

        plt.figure()
        if len(d.shape) == 2:
          for i in range(d.shape[1]):
            plt.plot(d[:, i])
          # tot_mean = np.mean(np.mean(d,axis=1),axis=0)
          # plt.plot(np.mean(d,axis=1) / tot_mean)
        plt.savefig(os.path.join(args.output, '{}_{}.pdf'.format(it, f)))
        plt.clf()

      coord.request_stop()
      coord.join(threads)
Пример #16
0
def main(args):

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')
    if not args.noise and not args.events:
        raise ValueError("Define if evaluating accuracy on noise or events")

    # Directory in which the evaluation summaries are written
    if args.noise:
        summary_dir = os.path.join(args.checkpoint_dir, "noise")
    if args.events:
        summary_dir = os.path.join(args.checkpoint_dir, "events")
    if args.save_false:
        false_start = []
        false_end = []
        false_origintime = []
        false_dir = os.path.join("output", "false_predictions")
        if not os.path.exists(false_dir):
            os.makedirs(false_dir)

    while True:
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if args.eval_interval < 0 or ckpt:
            print 'Evaluating model'
            break
        print 'Waiting for training job to save a checkpoint'
        time.sleep(args.eval_interval)

    cfg = config.Config()
    if args.noise:
        cfg.batch_size = 256
    if args.events:
        cfg.batch_size = 1
    if args.save_false:
        cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1
    cfg.n_clusters = args.n_clusters
    cfg.n_clusters += 1

    while True:
        try:
            # data pipeline
            data_pipeline = DataPipeline(args.dataset,
                                         config=cfg,
                                         is_training=False)
            samples = {
                'data': data_pipeline.samples,
                'cluster_id': data_pipeline.labels,
                "start_time": data_pipeline.start_time,
                "end_time": data_pipeline.end_time
            }

            # set up model and validation metrics
            model = models.get(args.model,
                               samples,
                               cfg,
                               args.checkpoint_dir,
                               is_training=False)
            metrics = model.validation_metrics()
            # Validation summary writer
            summary_writer = tf.train.SummaryWriter(summary_dir, None)

            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                tf.initialize_local_variables().run()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                model.load(sess, args.step)
                print 'Evaluating at step {}'.format(
                    sess.run(model.global_step))

                step = tf.train.global_step(sess, model.global_step)
                mean_metrics = {}
                for key in metrics:
                    mean_metrics[key] = 0

                n = 0
                pred_labels = np.empty(1)
                true_labels = np.empty(1)
                while True:
                    try:
                        to_fetch = [
                            metrics, model.layers["class_prediction"],
                            samples["cluster_id"], samples["start_time"],
                            samples["end_time"]
                        ]
                        metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                            to_fetch)

                        batch_pred_label -= 1
                        pred_labels = np.append(pred_labels, batch_pred_label)
                        true_labels = np.append(true_labels, batch_true_label)

                        # Save times of false preds
                        if args.save_false and \
                                batch_pred_label != batch_true_label:
                            print "---False prediction---"
                            print UTCDateTime(starttime), UTCDateTime(endtime)
                            false_origintime.append(
                                (starttime[0] + endtime[0]) / 2)
                            false_end.append(UTCDateTime(endtime))
                            false_start.append(UTCDateTime(starttime))

                        # print  true_labels
                        for key in metrics:
                            mean_metrics[key] += cfg.batch_size * metrics_[key]
                        n += cfg.batch_size

                        mess = model.validation_metrics_message(metrics_)
                        print '{:03d} | '.format(n) + mess

                    except KeyboardInterrupt:
                        print 'stopping evaluation'
                        break

                    except tf.errors.OutOfRangeError:
                        print 'Evaluation completed ({} epochs).'.format(
                            cfg.n_epochs)
                        print "{} windows seen".format(n)
                        break

                if n > 0:
                    for key in metrics:
                        mean_metrics[key] /= n
                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag='{}/val'.format(key),
                                             simple_value=mean_metrics[key])
                        ])
                        if args.save_summary:
                            summary_writer.add_summary(summary,
                                                       global_step=step)

                summary_writer.flush()

                mess = model.validation_metrics_message(mean_metrics)
                print 'Average | ' + mess

                if args.eval_interval < 0:
                    print 'End of evaluation'
                    break

            tf.reset_default_graph()
            print 'Sleeping for {}s'.format(args.eval_interval)
            time.sleep(args.eval_interval)

        finally:
            print 'joining data threads'
            coord.request_stop()

    if args.save_false:
        false_preds = {}
        false_preds["start_time"] = false_start
        false_preds["end_time"] = false_end
        false_preds["origintime"] = false_origintime
        # false_preds = np.array((false_start, false_end)).transpose()[0]
        # print 'shape', false_preds.shape
        df = pd.DataFrame(false_preds)
        df.to_csv(os.path.join(false_dir, "false_preds.csv"))
    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    print "---Confusion Matrix----"
    print confusion_matrix(true_labels, pred_labels)

    coord.join(threads)
Пример #17
0
def main(args):
    setproctitle.setproctitle('quakenet_predict')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))
        os.makedirs(os.path.join(args.output, "viz_not"))
    if args.save_sac:
        os.makedirs(os.path.join(args.output, "sac"))

    # Load stream
    #stream_path = args.stream_path
    #stream_file = os.path.split(stream_path)[-1]
    #print "+ Loading Stream {}".format(stream_file)
    #stream = read(stream_path)
    #print '+ Preprocessing stream'
    #stream = preprocess_stream(stream)

    # # change to use a dir list,2017/12/07
    stream_path = args.stream_path
    try:
        stream_files = [
            file for file in os.listdir(stream_path)
            if fnmatch.fnmatch(file, '*.mseed')
        ]
    except:
        stream_files = os.path.split(stream_path)[-1]
    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, cfg.win_size, 3),
                       name='input_data'),
        'cluster_id':
        tf.placeholder(tf.int64, shape=(cfg.batch_size, ), name='input_label')
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)
    with tf.Session() as sess:
        model.load(sess, args.step)
        print 'Predicting using model at step {}'.format(
            sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        time_start = time.time()
        for stream_file in stream_files:
            stream_path1 = os.path.join(stream_path, stream_file)
            print " + Loading stream {}".format(stream_file)
            stream = load_stream(stream_path1)
            #print stream[0],stream[1].stats
            print " + Preprocess stream"
            stream = preprocess_stream(stream)
            print " -- Stream is ready, starting detection"

            # Create catalog name in which the events are stored
            catalog_name = os.path.split(stream_file)[-1].split(
                ".mseed")[0] + ".csv"
            output_catalog = os.path.join(args.output, catalog_name)
            print 'Catalog created to store events', output_catalog

            # Dictonary to store info on detected events
            events_dic = {
                "start_time": [],
                "end_time": [],
                "cluster_id": [],
                "clusters_prob": []
            }
            # Windows generator
            # win_gen = stream.slide(window_length=args.window_size,
            #                step=args.window_step,
            #                include_partial_windows=False)
            if args.save_sac:
                first_slice = stream.slice(
                    stream[0].stats.starttime,
                    stream[0].stats.starttime + args.window_size)
                first_slice.write(os.path.join(
                    args.output, "sac", "{}_{}_{}.sac".format(
                        stream[0].stats.station, '0',
                        str(stream[0].stats.starttime).replace(':', '_'))),
                                  format="SAC")
            if args.max_windows is None:
                total_time_in_sec = stream[0].stats.endtime - stream[
                    0].stats.starttime
                max_windows = (total_time_in_sec -
                               args.window_size) / args.window_step
            else:
                max_windows = args.max_windows
            try:
                lists = [0]
                #lists = np.arange(0,30,5)
                for i in lists:

                    win_gen = stream.slide(window_length=args.window_size,
                                           step=args.window_step,
                                           offset=i,
                                           include_partial_windows=False)
                    for idx, win in enumerate(win_gen):
                        if data_is_complete(win):
                            ampl_e, ampl_n, ampl_z = filter_small_ampitude(win)
                            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                                continue
                            ampm_e = max(abs(win[0].data))
                            ampm_n = max(abs(win[1].data))
                            ampm_z = max(abs(win[2].data))
                            if ampm_e < 600 and ampm_n < 600 and ampm_z < 600:
                                continue
                    # Fetch class_proba and label
                            to_fetch = [
                                samples['data'], model.layers['class_prob'],
                                model.layers['class_prediction']
                            ]

                            # Feed window and fake cluster_id (needed by the net) but
                            # will be predicted
                            feed_dict = {
                                samples['data']:
                                fetch_window_data(win.copy().normalize()),
                                samples['cluster_id']:
                                np.array([0])
                            }
                            sample, class_prob_, cluster_id = sess.run(
                                to_fetch, feed_dict)
                        else:
                            continue

                    # # Keep only clusters proba, remove noise proba
                        clusters_prob = class_prob_[0, 1::]
                        cluster_id -= 1

                        # label for noise = -1, label for cluster \in {0:n_clusters}

                        is_event = cluster_id[0] > -1
                        #print cluster_id[0],is_event
                        probs = '{:.5f}'.format(max(list(clusters_prob)))
                        save_event = (float(probs) - 0.1) >= 0
                        #print probs,save_event
                        if is_event:
                            n_events += 1
                            print "event {} ,cluster id {}".format(
                                is_event, class_prob_)
                            events_dic["start_time"].append(
                                win[0].stats.starttime)
                            events_dic["end_time"].append(win[0].stats.endtime)
                            events_dic["cluster_id"].append(cluster_id[0])
                            events_dic["clusters_prob"].append(
                                list(clusters_prob))

                        if idx % 1000 == 0:
                            print "Analyzing {} records".format(
                                win[0].stats.starttime)

                        if args.plot:
                            import matplotlib
                            matplotlib.use('Agg')
                            win_filtered = win.copy()

                            if is_event:
                                # if args.plot:

                                # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0)
                                win_filtered.plot(outfile=os.path.join(
                                    args.output,
                                    "viz",
                                    ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                    #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                    "{}_{}_{}.png".format(
                                        win[0].stats.station, str(probs),
                                        str(win[0].stats.starttime).replace(
                                            ':', '_'))))
                            else:

                                win_filtered.plot(outfile=os.path.join(
                                    args.output, "viz_not",
                                    "{}_{}_{}.png".format(
                                        win[0].stats.station, str(probs),
                                        str(win[0].stats.starttime).replace(
                                            ':', '_'))))

                        if args.save_sac and save_event:
                            # win_filtered = win.copy()
                            save_start = win[0].stats.starttime - 12
                            save_end = win[0].stats.endtime + 10
                            win_filtered = stream.slice(save_start, save_end)
                            win_filtered.write(os.path.join(
                                args.output, "sac", "{}_{}_{}.sac".format(
                                    win[0].stats.station, str(probs),
                                    str(win[0].stats.starttime).replace(
                                        ':', '_'))),
                                               format="SAC")

                        if idx >= max_windows:
                            print "stopped after {} windows".format(
                                max_windows)
                            print "found {} events".format(n_events)
                            break

            except KeyboardInterrupt:
                print 'Interrupted at time {}.'.format(win[0].stats.starttime)
                print "processed {} windows, found {} events".format(
                    idx + 1, n_events)
                print "Run time: ", time.time() - time_start

    # Dump dictionary into csv file
    #TODO
            df = pd.DataFrame.from_dict(events_dic)
            df.to_csv(output_catalog)

    print "Run time: ", time.time() - time_start
def evaluate():
    """
    Eval unet using specified args:
    """

    #data_files, data_size = load_datafiles(FLAGS.tfrecords_prefix)

    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.add = 1
    cfg.n_clusters = FLAGS.num_classes
    cfg.n_clusters += 1

    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, cfg.win_size, 3),
                       name='input_data')
    }
    stream_path = FLAGS.stream_path
    try:
        #stream_files = [file for file in os.listdir(stream_path) if
        #                fnmatch.fnmatch(file, '*.mseed')]
        stream_files = [
            file for file in tree(stream_path)
            if fnmatch.fnmatch(file, '*.mseed')
        ]
    except:
        stream_files = os.path.split(stream_path)[-1]
        print("stream_files", stream_files)
    #data_files, data_size = load_datafiles(stream_path)
    n_events = 0
    time_start = time.time()
    print(" + Loading stream files {}".format(stream_files))

    events_dic = {
        "slice_start_time": [],
        "P_pick": [],
        "stname": [],
        "utc_timestamp_p": [],
        "utc_timestamp_s": [],
        "S_pick": []
    }
    with tf.Session() as sess:

        logits = unet.build_30s(samples['data'], FLAGS.num_classes, False)
        time_start = time.time()
        catalog_name = "PS_pick_blocks.csv"
        output_catalog = os.path.join(FLAGS.output_dir, catalog_name)
        print('Catalog created to store events', output_catalog)
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        sess.run(init_op)
        saver = tf.train.Saver()

        if not tf.gfile.Exists(FLAGS.checkpoint_path + '.meta'):
            raise ValueError("Can't find checkpoint file")
        else:
            print('[INFO    ]\tFound checkpoint file, restoring model.')
            saver.restore(sess, FLAGS.checkpoint_path)
        coord = tf.train.Coordinator()

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        for stream_file in stream_files:
            #stream_path1 = os.path.join(stream_path, stream_file)
            print(" + Loading stream {}".format(stream_file))
            #stream = load_stream(stream_path1)
            stream = load_stream(stream_file)
            stream = stream.normalize()
            #print stream[0],stream[1].stats
            print(" + Preprocess stream")
            # stream = preprocess_stream(stream)
            print(" -- Stream is ready, starting detection")
            try:
                #lists = [0]
                lists = np.arange(0, 30, 10)
                for i in lists:
                    win_gen = stream.slide(window_length=FLAGS.window_size,
                                           step=FLAGS.window_step,
                                           offset=i,
                                           include_partial_windows=False)
                    #print(win_gen)
                    for idx, win in enumerate(win_gen):
                        #win.resample(10)
                        if data_is_complete(win):
                            predicted_images = unet.predict(
                                logits, cfg.batch_size, FLAGS.image_size)
                            to_fetch = [predicted_images, samples['data']]

                            # Feed window and fake cluster_id (needed by the net) but
                            # will be predicted
                            feed_dict = {
                                samples['data']:
                                fetch_window_data(win.copy().normalize(), 3)
                            }
                            #samples_data=fetch_window_data(win.copy().normalize()
                            predicted_images_value, images_value = sess.run(
                                to_fetch, feed_dict)
                            clusters_p = np.where(
                                predicted_images_value[0, :] == 1)
                            clusters_s = np.where(
                                predicted_images_value[0, :] == 2)
                            p_boxes = group_consecutives(clusters_p[0])
                            s_boxes = group_consecutives(clusters_s[0])
                            tp = []
                            ts = []
                            tpstamp = []
                            tsstamp = []
                            if len(p_boxes) > 1:
                                for ip in range(len(p_boxes)):
                                    #print (len(p_boxes),p_boxes,p_boxes[ip])
                                    tpmean = float(
                                        min(p_boxes[ip]) / 200.00 +
                                        max(p_boxes[ip]) / 200.00)
                                    tp.append(tpmean)
                                    tpstamp = UTCDateTime(
                                        win[0].stats.starttime +
                                        tpmean).timestamp
                            if len(s_boxes) > 1:
                                for iss in range(len(s_boxes)):
                                    tsmean = float(
                                        min(s_boxes[iss]) / 200.00 +
                                        max(s_boxes[iss]) / 200.00)
                                    ts.append(tsmean)
                                    tsstamp = UTCDateTime(
                                        win[0].stats.starttime +
                                        tsmean).timestamp
                            if len(p_boxes) > 1 or len(s_boxes) > 1:
                                events_dic["slice_start_time"].append(
                                    win[0].stats.starttime)
                                events_dic["stname"].append(
                                    win[0].stats.station)
                                events_dic["P_pick"].append(tp)
                                events_dic["S_pick"].append(ts)
                                events_dic["utc_timestamp_p"].append(tpstamp)
                                events_dic["utc_timestamp_s"].append(tsstamp)
                            #print (p_boxes,s_boxes)
                            win_filtered = win.copy()
                            lab = win_filtered[2].copy()
                            lab.stats.channel = "LAB"
                            # lab =win[0].copy()

                            print("predicted_images_value",
                                  predicted_images_value.shape)
                            lab.data[...] = predicted_images_value[0, :]
                            win_filtered += lab
                            if FLAGS.save_sac:
                                output_sac = os.path.join(
                                    FLAGS.output_dir, "sac",
                                    "{}_{}.sac".format(
                                        win_filtered[0].stats.station,
                                        str(win_filtered[0].stats.starttime).
                                        replace(':', '_')))
                                print(output_sac, win_filtered)
                                win_filtered.write(output_sac, format="SAC")
                            if FLAGS.plot:
                                win_filtered.plot(outfile=os.path.join(
                                    FLAGS.output_dir, "viz",
                                    "{}_{}.png".format(
                                        win_filtered[0].stats.station,
                                        str(win_filtered[0].stats.starttime).
                                        replace(':', '_'))))
                            # Wait for threads to finish.
                            coord.join(threads)
            except KeyboardInterrupt:
                print('Interrupted at time {}.'.format(win[0].stats.starttime))
                print("processed {} windows, found {} events".format(
                    idx + 1, n_events))
                print("Run time: ", time.time() - time_start)
        df = pd.DataFrame.from_dict(events_dic)
        df.to_csv(output_catalog)

    print("Run time: ", time.time() - time_start)