def predict(self, tfr_list, batch_size=500): """ Main eval loop. :param tfr_list: List of .tfrecord. :param str name: Output name. """ model_path = self.get_model_dir(self.model_name) self.model = tf.keras.models.load_model(model_path, custom_objects={ 'TransformerBlockE': TransformerBlockE, 'TransformerBlockD': TransformerBlockD, 'MultiHeadSelfAttention': MultiHeadSelfAttention, 'ResBlock': ResBlock }) dataset = seisnn.io.read_dataset(tfr_list) n = 0 for val in dataset.prefetch(100).batch(batch_size): progbar = tf.keras.utils.Progbar(len(val['label'])) val['predict'] = self.model.predict(val['trace']) iterator = seisnn.example_proto.batch_iterator(val) for i in range(len(val['predict'])): config = seisnn.utils.Config() instance = Instance(next(iterator)) sub_dir = getattr(config, 'eval') sub_dir = os.path.join(sub_dir, self.model_name) file_name = instance.get_tfrecord_name() net, sta, loc, chan, year, julday, suffix = file_name.split( '.') tfr_dir = os.path.join(sub_dir, year, net, sta) seisnn.utils.make_dirs(tfr_dir) save_file = os.path.join(tfr_dir, f'{n:0>6}.' + file_name) instance.to_tfrecord(save_file) progbar.add(1) n = n + 1
def eval(self, dataset, batch_size=100): """ Main eval loop. :param str dataset: Dataset name. :param str model_name: Model directory name. """ model_path = self.get_model_dir(self.model_name) dataset_path, eval_path = self.get_eval_dir(dataset) dataset = seisnn.io.read_dataset(dataset) self.model = tf.keras.models.load_model(model_path, custom_objects={ 'TransformerBlockE': TransformerBlockE, 'TransformerBlockD': TransformerBlockD, 'MultiHeadSelfAttention': MultiHeadSelfAttention, 'ResBlock': ResBlock }) data_len = self.get_dataset_length(self.database) progbar = tf.keras.utils.Progbar(data_len) n = 0 for val in dataset.prefetch(100).batch(batch_size): progbar.add(batch_size) title = f"eval_{n:0>5}" val['predict'] = self.model.predict(val['trace']) val['id'] = tf.convert_to_tensor(title.encode('utf-8'), dtype=tf.string)[tf.newaxis] example = next(seisnn.example_proto.batch_iterator(val)) instance = Instance(example) instance.to_tfrecord(os.path.join(eval_path, title + '.tfrecord')) n += 1
def STA_LTA(self, dataset, batch_size=100, short_window=30, long_window=200): """ STA/LTA method :param dataset: Dataset name. :param batch_size: Model directory name. """ dataset_path, eval_path = self.get_eval_dir(dataset) dataset = seisnn.io.read_dataset(dataset) data_len = self.get_dataset_length(self.database) progbar = tf.keras.utils.Progbar(data_len) n = 0 for val in dataset.prefetch(100).batch(batch_size): progbar.add(batch_size) title = f"eval_{n:0>5}" trace_len = val['trace'].shape[2] batch_len = val['trace'].shape[0] predict = np.zeros((batch_len, 1, 3008, 3)) for i in range(batch_len): z_trace = val['trace'].numpy()[i, :, :, 0].reshape(trace_len) cft = recursive_sta_lta(z_trace, short_window, long_window) predict[i, :, :, 0] = cft val['predict'] = predict val['id'] = tf.convert_to_tensor(title.encode('utf-8'), dtype=tf.string)[tf.newaxis] example = next(seisnn.example_proto.batch_iterator(val)) instance = Instance(example) instance.to_tfrecord(os.path.join(eval_path, title + '.tfrecord')) n += 1
def score(self, tfr_list, batch_size=500, delta=0.1, height=0.5, error_distribution=True): P_true_positive = 0 S_true_positive = 0 P_error_array = [] S_error_array = [] num_P_predict = 0 num_S_predict = 0 num_P_label = 0 num_S_label = 0 dataset = seisnn.io.read_dataset(tfr_list) for val in dataset.prefetch(100).batch(batch_size): iterator = seisnn.example_proto.batch_iterator(val) progbar = tf.keras.utils.Progbar(len(val['predict'])) for i in range(len(val['predict'])): instance = Instance(next(iterator)) instance.label.get_picks(height=height) instance.predict.get_picks(height=height) for pick in instance.label.picks: if pick.phase == 'P': for p_pick in instance.predict.picks: if p_pick.phase == pick.phase: if pick.time - delta <= p_pick.time <= pick.time + delta: P_true_positive = P_true_positive + 1 P_error_array.append(p_pick.time - pick.time) num_P_label += 1 if pick.phase == 'S': for p_pick in instance.predict.picks: if p_pick.phase == pick.phase: if pick.time - delta <= p_pick.time <= pick.time + delta: S_true_positive = S_true_positive + 1 S_error_array.append(p_pick.time - pick.time) num_S_label += 1 for pick in instance.predict.picks: if pick.phase == 'P': num_P_predict += 1 if pick.phase == 'S': num_S_predict += 1 progbar.add(1) print( f'num_P_predict = {num_P_predict}, num_S_predict = {num_S_predict}' ) print(f'num_P_label = {num_P_label}, num_S_label = {num_S_label}') for phase in ['P', 'S']: precision, recall, f1 = seisnn.qc.precision_recall_f1_score( true_positive=eval(f'{phase}_true_positive'), val_count=eval(f'num_{phase}_label'), pred_count=eval(f'num_{phase}_predict')) if error_distribution: plot_error_distribution(eval(f'{phase}_error_array')) print( f'{phase}: precision = {precision},recall = {recall},f1 = {f1}' )
def train_loop(self, tfr_list, model_name, epochs=1, batch_size=1, log_step=100, plot=False, remove=False): """ Main training loop. :param tfr_list: List of TFRecord path. :param str model_name: Model directory name. :param int epochs: Epoch number. :param int batch_size: Batch size. :param int log_step: Logging step interval. :param bool plot: Plot training validation, False save fig, True show fig. :param bool remove: If True, remove model folder before training. :return: """ model_path, history_path = self.get_model_dir(model_name, remove=remove) ckpt = tf.train.Checkpoint(model=self.model, optimizer=self.optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, model_path, max_to_keep=100) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) last_epoch = len(ckpt_manager.checkpoints) print(f'Latest checkpoint epoch {last_epoch} restored!!') dataset = seisnn.io.read_dataset(tfr_list) dataset = dataset.shuffle(100000) val = next(iter(dataset.batch(1))) metrics_names = ['loss', 'val'] data_len = self.get_dataset_length(self.database, tfr_list) for epoch in range(epochs): print(f'epoch {epoch + 1} / {epochs}') n = 0 progbar = tf.keras.utils.Progbar(data_len, stateful_metrics=metrics_names) loss_buffer = [] for train in dataset.prefetch(100).batch(batch_size): train_loss, val_loss = self.train_step(train, val) loss_buffer.append([train_loss, val_loss]) values = [('loss', train_loss.numpy()), ('val', val_loss.numpy())] progbar.add(batch_size, values=values) if n % log_step == 0: seisnn.logger.save_loss(loss_buffer, model_name, model_path) loss_buffer.clear() title = f'epoch{epoch + 1:0>2}_step{n:0>5}___' val['predict'] = self.model.predict(val['trace']) val['id'] = tf.convert_to_tensor( title.encode('utf-8'), dtype=tf.string)[tf.newaxis] example = next(seisnn.example_proto.batch_iterator(val)) instance = Instance(example) if plot: instance.plot() else: instance.plot(save_dir=history_path) n += 1 ckpt_save_path = ckpt_manager.save() print(f'Saving checkpoint to {ckpt_save_path}')
def train_loop(self, tfr_list, model_name, epochs=1, batch_size=1, log_step=100, plot=False, remove=False): """ Main training loop. :param tfr_list: List of TFRecord path. :param str model_name: Model directory name. :param int epochs: Epoch number. :param int batch_size: Batch size. :param int log_step: Logging step interval. :param bool plot: Plot training validation, False save fig, True show fig. :param bool remove: If True, remove model folder before training. :return: """ model_path, history_path = self.get_model_dir(model_name, remove=remove) ckpt = tf.train.Checkpoint( generator_model=self.generator_model, discriminator_model=self.discriminator_model, cgan_model=self.cgan_model, generator_optimizer=self.generator_optimizer, discriminator_optimizer=self.discriminator_optimizer, cgan_optimizer=self.cgan_optimizer, ) ckpt_manager = tf.train.CheckpointManager(ckpt, model_path, max_to_keep=100) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) last_epoch = len(ckpt_manager.checkpoints) print(f'Latest checkpoint epoch {last_epoch} restored!!') dataset = seisnn.io.read_dataset(tfr_list) dataset = dataset.shuffle(100000) val = next(iter(dataset.batch(1))) metrics_names = ['loss', 'val'] data_len = self.get_dataset_length(self.database, tfr_list) for epoch in range(epochs): print(f'epoch {epoch + 1} / {epochs}') n = 0 progbar = tf.keras.utils.Progbar(data_len, stateful_metrics=metrics_names) for train in dataset.prefetch(100).batch(batch_size): d_loss, g_loss = self.train_step(train, val) values = [('d_loss', d_loss), ('g_loss', g_loss)] progbar.add(len(train['id']), values=values) n += 1 if n % log_step == 0: val['predict'] = self.generator_model(val['trace']) concate = concatenate([val['trace'], val['predict']], axis=3) score = self.discriminator_model(concate) print(score) title = f'epoch{epoch + 1:0>2}_step{n:0>5}___' val['id'] = tf.convert_to_tensor( title.encode('utf-8'), dtype=tf.string)[tf.newaxis] example = next(seisnn.example_proto.batch_iterator(val)) instance = Instance(example) if plot: instance.plot() else: instance.plot(save_dir=history_path) ckpt_save_path = ckpt_manager.save() print(f'Saving checkpoint to {ckpt_save_path}') self.generator_model.save(f'/home/andy/Models/{model_name}.h5')
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import argparse from seisnn.core import Instance from seisnn.utils import get_config from seisnn.io import read_dataset from seisnn.plot import plot_loss from seisnn.example_proto import batch_iterator ap = argparse.ArgumentParser() ap.add_argument('-m', '--model', required=False, help='model', type=str) args = ap.parse_args() config = get_config() SAVE_MODEL_PATH = os.path.join(config['MODELS_ROOT'], args.model) SAVE_HISTORY_PATH = os.path.join(SAVE_MODEL_PATH, 'history') SAVE_PNG_PATH = os.path.join(SAVE_MODEL_PATH, 'png') loss_log = os.path.join(SAVE_MODEL_PATH, f'{args.model}.log') plot_loss(loss_log, SAVE_MODEL_PATH) dataset = read_dataset(SAVE_HISTORY_PATH) for batch in dataset.batch(2): for example in batch_iterator(batch): feature = Instance(example) feature.plot(title=feature.id, save_dir=SAVE_PNG_PATH) print(feature.id)