def main(): config = Config() parser = argparse.ArgumentParser( description='Code for evaluating dialog models\' responses with ' + '17 evaluation metrics (arxiv.org/abs/1905.05471)') parser.add_argument( '-tns', '--train_source', default=config.train_source, help='Path to the train source file, where each line ' + 'corresponds to one train input', metavar='') parser.add_argument('-tts', '--test_source', default=config.test_source, help='Path to the test source file, where each line ' + 'corresponds to one test input', metavar='') parser.add_argument('-ttt', '--test_target', default=config.test_target, help='Path to the test target file, where each line ' + 'corresponds to one test target', metavar='') parser.add_argument('-r', '--test_responses', default=config.test_responses, help='Path to the test model responses file', metavar='') parser.add_argument('-tv', '--text_vocab', default=config.text_vocab, help='A file where each line is a word in the vocab', metavar='') parser.add_argument('-vv', '--vector_vocab', default=config.vector_vocab, help='A file where each line is a word in the vocab ' + 'followed by a vector', metavar='') parser.add_argument('-s', '--bleu_smoothing', default=config.bleu_smoothing, help='Bleu smoothing method (choices: %(choices)s)', metavar='', choices=[0, 1, 2, 3, 4, 5, 6, 7]) parser.add_argument('-t', '--t', default=config.t, help='t value for confidence level calculation ' + '(default: %(default)s)', metavar='', type=int) parser.parse_args(namespace=config) m = Metrics(config) m.run()
def _setup_metrics(self): self.metrics_list = {} if self.logparams['metrics']['loss']: self.metrics_list['loss'] = Metrics('loss_curve') if self.logparams['metrics']['accuracy']: self.metrics_list['accuracy'] = Metrics('acc_curve') if self.logparams['metrics']['cosine-dists']: if not self.logparams['metrics']['cosine-dists']['stats-only']: self.metrics_list['cosine_dists'] = Metrics('cos_dists') self.metrics_list['cosine_dists_hist'] = Metrics( 'cosine_dists_hist') self.metrics_list['cosine_dists_diff'] = Metrics( 'cosine_dists_diff') self.metrics_list['cosine_dists_mean'] = Metrics( 'cosine_dists_mean') if self.logparams['metrics']['gradient-projections']: self.metrics_list['mean_grad'] = Metrics('mean_grad') self.metrics_list['diff_grad'] = Metrics('diff_grad') if self.logparams['metrics']['test-accuracy']: self.metrics_list['test_accuracy'] = Metrics('test_accuracy') if self.logparams['metrics']['weights']: for i in range(self.num_runs): os.makedirs(os.path.join(self.logdir, 'weight_history', 'run_' + str(i)), exist_ok=True)
def __init__(self, model, mode='0'): self.model = model self.metrics = Metrics() if mode == '0': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).naive_preprocessing_data() elif mode == '1': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).advanced_preprocessing_data()
def _setup_metrics(self): self.metrics_list = {} if self.logparams['metrics']['loss']: self.metrics_list['loss'] = Metrics('loss_curve') if self.logparams['metrics']['accuracy']: self.metrics_list['accuracy'] = Metrics('acc_curve') if self.logparams['metrics']['test-accuracy']: self.metrics_list['test_accuracy'] = Metrics('test_accuracy') if self.logparams['metrics']['synth-grad-norm']: self.metrics_list['grad_norm'] = Metrics('synth_grad_norm') if self.logparams['metrics']['weights']: for i in range(self.num_runs): os.makedirs(os.path.join(self.logdir, 'weight_history', 'run_' + str(i)), exist_ok=True)
def __init__(self, args, sess, model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, model) # Init load data and generator self.generator = None self.run = None # 加载数据 if self.args.data_mode == "realsense": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_realsence_data() elif self.args.data_mode == "cityscapes_val": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() elif self.args.data_mode == "cityscapes_test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() elif self.args.data_mode == "video": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() if self.args.task == "test": self.run = self.test elif self.args.task == "realsense": self.run = self.realsense_inference elif self.args.task == "realsense_imgs": self.run = self.realsense_imgs else: print("ERROR Please select a proper data_mode BYE") exit(-1) # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
def __init__(self, args): self.args = args # Get the class from globals by selecting it by arguments if self.args.model == 'FCN8sMobileNet': self.model = FCN8sMobileNet elif self.args.model == 'FCN8sShuffleNet': self.model = FCN8sShuffleNet elif self.args.model == 'UNetMobileNet': self.model = UNetMobileNet elif self.args.model == 'UNetShuffleNet': self.model = UNetShuffleNet else: raise NameError(self.args.model + ' unknown!!') # Reset the graph tf.reset_default_graph() # Create the sess gpu_options = tf.GPUOptions(allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True)) # Create Model class and build it with self.sess.as_default(): self.build_model() # initialize metrics self.metrics = Metrics(self.args.num_classes)
def train_epoch(model, device, train_loader, criterion, optimizer, k, warm_up, lr, writer, epoch): # training phase print("Training Progress:") metrics = Metrics(args.dataset, train=True) model.train() for batch_idx, (batch, labels) in enumerate(tqdm(train_loader)): iteration = epoch * len(train_loader) + batch_idx optimizer.zero_grad() batch = batch.type(torch.FloatTensor).to(device) labels = labels.to(device) outputs = model(batch) loss = criterion(outputs, labels) loss.backward() optimizer.step() # warm up if k <= warm_up: k = learning_rate_scheduler(optimizer, k, warm_up, lr) # Batch metrics metrics.update_metrics(outputs, labels, loss) if iteration % 10 == 0: metrics.write_to_tensorboard(writer, iteration) # Epoch metrics final_metrics = metrics.get_epoch_metrics() return (final_metrics, k)
def __init__(self, hparams, batch_fn=None): super(RaceBaseModel, self).__init__() if batch_fn: self.batch_fn = batch_fn else: self.batch_fn = self.default_batch_fn self.hparams = hparams self.save_hyperparameters(hparams) # Tokenizer: self.tokenizer = AutoTokenizer.from_pretrained( self.hparams.pretrained_model) self.tokenizer.add_special_tokens( {"additional_special_tokens": self.hparams.special_tokens}) # Metrics: self.metrics = Metrics()
def __init__(self): """ Sets default values for the Pair class """ self._warning = None self._ground_truth = None self._matched = False self._metric_version = None self._performer = None self._provider = None self._lead_time = None self._utility_time = None self._confidence = None self._probability = None self._quality = None self._event_type_similarity = None self._event_details_similarity = None self._occurrence_time_similarity = None self._targets_similarity = None self._metrics = Metrics()
class CrossValidation: """ This class does k cross validation """ def __init__(self, model, hyperparameters, kfold): self.metrics = Metrics() cross_validation = StratifiedKFold(n_splits=kfold, shuffle=True) self.clf = GridSearchCV(model, hyperparameters, cv=cross_validation, n_jobs=-1, verbose=1) def fit_and_predict(self, x_train, y_train, x_test, y_test, metrics): prediction = self.clf.fit(x_train, y_train).best_estimator_.predict(x_test) if metrics == "accuracy": self.metrics.accuracy(self.clf, y=y_test, pred=prediction) elif metrics == "confusion_matrix": self.metrics.confusion_matrix(self.clf, y=y_test, pred=prediction) elif metrics == "roc": prob = self.clf.fit(x_train, y_train).best_estimator_.predict_proba(x_test) self.metrics.plot_roc(self.clf, y=y_test, prob=prob[:, 1]) def get_score(self, x_test, y_test): return round(self.clf.score(x_test, y_test) * 100, 2)
class AbstractClassifier: """ Parent class of all project classifiers. Attributes: model : An object that defines the classifier model to implement. metrics : An object that defines the different metrics that can be used to evaluate a model. X_train : The features of the training data Y_train : The targets of training data (the ground truth label) X_test : The features of the testing data Y_test : The targets of training data (the ground truth label) """ def __init__(self, model, mode='0'): self.model = model self.metrics = Metrics() if mode == '0': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).naive_preprocessing_data() elif mode == '1': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).advanced_preprocessing_data() def train(self): self.model.fit(self.X_train, self.Y_train) def predict(self, x): return self.model.predict(x) def evaluate(self, label="Training", metrics="accuracy"): if label == 'Training': x, y = self.X_train, self.Y_train else: x, y = self.X_test, self.Y_test if metrics == "accuracy": self.metrics.accuracy(self.model, y, x, label) elif metrics == "confusion_matrix": self.metrics.confusion_matrix(self.model, y, x, label) elif metrics == "roc": self.metrics.plot_roc(self.model, y, x, label) def tunning_model(self, hyperparameters, kfold, metrics): cross_validate_model = CrossValidation(self.model, hyperparameters, kfold) cross_validate_model.fit_and_predict(self.X_train, self.Y_train, self.X_test, self.Y_test, metrics) return cross_validate_model.get_score(self.X_test, self.Y_test)
def __init__(self, hparams, batch_fn=None): """ :param batch_fn: function to process batch """ super(RaceModule, self).__init__(hparams, batch_fn) if self.hparams.pretrained_model in ["t5-base","t5-small"]: # Model: config = T5Config(decoder_start_token_id = self.hparams.padding_token) self.model = T5ForConditionalGeneration(config).from_pretrained(self.hparams.pretrained_model) # Tokenizer: self.tokenizer = AutoTokenizer.from_pretrained(self.hparams.pretrained_model) self.tokenizer_.add_special_tokens({"additional_special_tokens": ["[CON]","[QUE]","[ANS]","[DIS]"]}) # Metrics: self.metrics = Metrics() try: self.model.resize_token_embeddings(self.hparams.tokenizer_len) except: self.model.resize_token_embeddings(32104) else: raise NotImplementedError
def validate_epoch(model, device, validation_loader, criterion, scheduler, writer, epoch): with torch.no_grad(): # validation phase print("Validation Progress:") metrics = Metrics(args.dataset, train=False) model.eval() for batch_idx, (batch, labels) in enumerate(tqdm(validation_loader)): batch = batch.type(torch.FloatTensor).to(device) labels = labels.to(device) outputs = model(batch) loss = criterion(outputs, labels) # Batch metrics metrics.update_metrics(outputs, labels, loss) # Epoch metrics final_metrics = metrics.get_epoch_metrics() metrics.write_to_tensorboard(writer, epoch) scheduler.step(final_metrics["Loss"]) return final_metrics
fx_dm.setup() # Trainer: trainer = pl.Trainer.from_argparse_args(args) fx_model = RaceModule.load_from_checkpoint("models/ckpts/t5.ckpt") fx_model.setup_tune(top_p=0.95, top_k=50, no_repeat_ngram_size=2, num_samples=NUM_SAMPLES) # qj_model = BertForSequenceClassification.from_pretrained("iarfmoose/bert-base-cased-qa-evaluator").cuda() # qj_tokenizer = AutoTokenizer.from_pretrained("iarfmoose/bert-base-cased-qa-evaluator") fx_model.eval() # qj_model.eval() metrics = Metrics() summary = { "bleu_1": 0.0, "bleu_2": 0.0, "bleu_3": 0.0, "bleu_4": 0.0, "meteor": 0.0, "rouge_l": 0.0 } count = 0 print("Total Length", len(fx_dm.test_dataloader())) for x, y in fx_dm.test_dataloader(): output = fx_model.generate(x)
def __init__(self, args, sess, train_model, test_model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, train_model, test_model) ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = ['mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## # Init load data and generator self.generator = None if self.args.data_mode == "experiment_tfdata": self.data_session = None self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir, (self.args.img_height, self.args.img_width), mode='train') self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size self.generator = self.train_tfdata_generator elif self.args.data_mode == "experiment_h5": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data_h5() self.generator = self.train_h5_generator elif self.args.data_mode == "experiment_v2": self.targets_resize = self.args.targets_resize self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data(v2=True) self.generator = self.train_generator elif self.args.data_mode == "experiment": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data() self.generator = self.train_generator elif self.args.data_mode == "test_tfdata": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_tfdata_generator elif self.args.data_mode == "test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_generator elif self.args.data_mode == "test_eval": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.names_mapper = None self.load_test_data() self.generator = self.test_generator elif self.args.data_mode == "test_v2": self.targets_resize = self.args.targets_resize self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data(v2=True) self.generator = self.test_generator elif self.args.data_mode == "video": self.args.data_mode = "test" self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() self.generator = self.test_generator elif self.args.data_mode == "debug": print("Debugging photo loading..") # self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') # self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) self.debug_x = np.load('data/debug/debug_x.npy') self.debug_y = np.load('data/debug/debug_y.npy') print("Debugging photo loaded") else: print("ERROR Please select a proper data_mode BYE") exit(-1) ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
class Train(BasicTrain): """ Trainer class """ def __init__(self, args, sess, train_model, test_model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, train_model, test_model) ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = ['mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## # Init load data and generator self.generator = None if self.args.data_mode == "experiment_tfdata": self.data_session = None self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir, (self.args.img_height, self.args.img_width), mode='train') self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size self.generator = self.train_tfdata_generator elif self.args.data_mode == "experiment_h5": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data_h5() self.generator = self.train_h5_generator elif self.args.data_mode == "experiment_v2": self.targets_resize = self.args.targets_resize self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data(v2=True) self.generator = self.train_generator elif self.args.data_mode == "experiment": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data() self.generator = self.train_generator elif self.args.data_mode == "test_tfdata": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_tfdata_generator elif self.args.data_mode == "test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_generator elif self.args.data_mode == "test_eval": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.names_mapper = None self.load_test_data() self.generator = self.test_generator elif self.args.data_mode == "test_v2": self.targets_resize = self.args.targets_resize self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data(v2=True) self.generator = self.test_generator elif self.args.data_mode == "video": self.args.data_mode = "test" self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() self.generator = self.test_generator elif self.args.data_mode == "debug": print("Debugging photo loading..") # self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') # self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) self.debug_x = np.load('data/debug/debug_x.npy') self.debug_y = np.load('data/debug/debug_y.npy') print("Debugging photo loaded") else: print("ERROR Please select a proper data_mode BYE") exit(-1) ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## def crop(self): sh = self.val_data['X'].shape temp_val_data = {'X': np.zeros((sh[0] * 2, sh[1], sh[2] // 2, sh[3]), self.val_data['X'].dtype), 'Y': np.zeros((sh[0] * 2, sh[1], sh[2] // 2), self.val_data['Y'].dtype)} for i in range(sh[0]): temp_val_data['X'][i * 2, :, :, :] = self.val_data['X'][i, :, :sh[2] // 2, :] temp_val_data['X'][i * 2 + 1, :, :, :] = self.val_data['X'][i, :, sh[2] // 2:, :] temp_val_data['Y'][i * 2, :, :] = self.val_data['Y'][i, :, :sh[2] // 2] temp_val_data['Y'][i * 2 + 1, :, :] = self.val_data['Y'][i, :, sh[2] // 2:] self.val_data = temp_val_data def init_tfdata(self, batch_size, main_dir, resize_shape, mode='train'): self.data_session = tf.Session() print("Creating the iterator for training data") with tf.device('/cpu:0'): segdl = SegDataLoader(main_dir, batch_size, (resize_shape[0], resize_shape[1]), resize_shape, # * 2), resize_shape, 'data/cityscapes_tfdata/train.txt') iterator = Iterator.from_structure(segdl.data_tr.output_types, segdl.data_tr.output_shapes) next_batch = iterator.get_next() self.init_op = iterator.make_initializer(segdl.data_tr) self.data_session.run(self.init_op) print("Loading Validation data in memoryfor faster training..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} # self.crop() # import cv2 # cv2.imshow('crop1', self.val_data['X'][0,:,:,:]) # cv2.imshow('crop2', self.val_data['X'][1,:,:,:]) # cv2.imshow('seg1', self.val_data['Y'][0,:,:]) # cv2.imshow('seg2', self.val_data['Y'][1,:,:]) # cv2.waitKey() self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size # self.num_iterations_validation_per_epoch = ( # self.val_data_len + self.args.batch_size - 1) // self.args.batch_size self.num_iterations_validation_per_epoch = self.val_data_len // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") return next_batch, segdl.data_len @timeit def load_overfit_data(self): print("Loading data..") self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy")} self.train_data_len = self.train_data['X'].shape[0] - self.train_data['X'].shape[0] % self.args.batch_size self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Overfitting data is loaded") print("Loading Validation data..") self.val_data = self.train_data self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") def overfit_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) else: idx = np.arange(self.train_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] start += self.args.batch_size if start >= self.train_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch def init_summaries(self): """ Create the summary part of the graph :return: """ with tf.variable_scope('train-summary-per-epoch'): for tag in self.scalar_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag]) for tag, shape in self.images_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10) def add_summary(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: :param summaries_dict: :param summaries_merged: :return: """ if summaries_dict is not None: summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()], {self.summary_placeholders[tag]: value for tag, value in summaries_dict.items()}) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) @timeit def load_train_data(self, v2=False): print("Loading Training data..") self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy")} self.train_data = self.resize(self.train_data) if v2: out_shape = (self.train_data['Y'].shape[1] // self.targets_resize, self.train_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.train_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) for y in range(self.train_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.train_data['Y'][y, ...], out_shape, interp='nearest') self.train_data['Y'] = yy self.train_data_len = self.train_data['X'].shape[0] self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Training data is loaded") print("Loading Validation data..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.val_data['Y_large'] = self.val_data['Y'] if v2: out_shape = (self.val_data['Y'].shape[1] // self.targets_resize, self.val_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.val_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) for y in range(self.val_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.val_data['Y'][y, ...], out_shape, interp='nearest') self.val_data['Y'] = yy self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") @timeit def load_train_data_h5(self): print("Loading Training data..") self.train_data = h5py.File(self.args.data_dir + self.args.h5_train_file, 'r') self.train_data_len = self.args.h5_train_len self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Training data is loaded") print("Loading Validation data..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") @timeit def load_vid_data(self): print("Loading Video data..") self.test_data = {'X': np.load(self.args.data_dir + "X_vid.npy")} self.test_data['Y'] = np.zeros(self.test_data['X'].shape[:3]) self.test_data_len = self.test_data['X'].shape[0] print("Vid-shape-x -- " + str(self.test_data['X'].shape)) print("Vid-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Video data is loaded") @timeit def load_val_data(self, v2=False): print("Loading Validation data..") self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.test_data = self.resize(self.test_data) self.test_data['Y_large'] = self.test_data['Y'] if v2: out_shape = (self.test_data['Y'].shape[1] // self.targets_resize, self.test_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.test_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.test_data['Y'].dtype) for y in range(self.test_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.test_data['Y'][y, ...], out_shape, interp='nearest') self.test_data['Y'] = yy self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Validation-shape-x -- " + str(self.test_data['X'].shape)) print("Validation-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Validation data is loaded") @timeit def load_test_data(self): print("Loading Testing data..") self.test_data = {'X': np.load(self.args.data_dir + "X_test.npy")} self.names_mapper = {'X': np.load(self.args.data_dir + "xnames_test.npy"), 'Y': np.load(self.args.data_dir + "ynames_test.npy")} self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Test-shape-x -- " + str(self.test_data['X'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Test data is loaded") def test_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.test_data_len, self.test_data_len, replace=False) else: idx = np.arange(self.test_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.test_data['X'][mask] y_batch = self.test_data['Y'][mask] # update start idx start += self.args.batch_size if start >= self.test_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch def train_generator(self): start = 0 idx = np.random.choice(self.train_data_len, self.num_iterations_training_per_epoch * self.args.batch_size, replace=True) while True: # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] # update start idx start += self.args.batch_size yield x_batch, y_batch if start >= self.train_data_len: return def train_tfdata_generator(self): with tf.device('/cpu:0'): while True: x_batch, y_batch = self.data_session.run(self.train_next_batch) yield x_batch, y_batch[:, :, :, 0] def train_h5_generator(self): start = 0 idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) while True: # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][sorted(mask.tolist())] y_batch = self.train_data['Y'][sorted(mask.tolist())] # update start idx start += self.args.batch_size if start >= self.train_data_len: return yield x_batch, y_batch def resize(self, data): X = [] Y = [] for i in range(data['X'].shape[0]): X.append(misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width))) Y.append(misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest')) data['X'] = np.asarray(X) data['Y'] = np.asarray(Y) return data def train(self): print("Training mode will begin NOW ..") # curr_lr= self.model.args.learning_rate for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): # init tqdm and get the epoch value tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch, desc="epoch-" + str(cur_epoch) + "-") # init the current iterations cur_iteration = 0 # init acc and loss lists loss_list = [] acc_list = [] # loop by the number of iterations for x_batch, y_batch in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: True # self.model.curr_learning_rate:curr_lr } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_training_per_epoch - 1: # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] # summarize # self.add_summary(cur_it, summaries_merged=summaries_merged) else: # run the feed_forward if self.args.data_mode == 'experiment_v2': _, loss, acc, summaries_merged = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) else: _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc if self.args.data_mode != 'experiment_v2': summaries_dict['train_prediction_sample'] = segmented_imgs # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() # Update the Global step self.model.global_step_assign_op.eval(session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval(session=self.sess, feed_dict={self.model.global_epoch_input: cur_epoch + 1}) # print in console tt.close() print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[ :6]) # Break the loop to finalize this epoch break # Update the Global step self.model.global_step_assign_op.eval(session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # update the cur_iteration cur_iteration += 1 # Save the current checkpoint if cur_epoch % self.args.save_every == 0: self.save_model() # Test the model on validation if cur_epoch % self.args.test_every == 0: self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess), epoch=self.model.global_epoch_tensor.eval(self.sess)) # if cur_epoch % self.args.learning_decay_every == 0: # curr_lr= curr_lr*self.args.learning_decay # print('Current learning rate is ', curr_lr) print("Training Finished") def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] inf_list = [] # idx of minibatch idx = 0 # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) # loop by the number of iterations for cur_iteration in tt: # load minibatches x_batch = self.val_data['X'][idx:idx + self.args.batch_size] y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] if self.args.data_mode == 'experiment_v2': y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] # update idx of minibatch idx += self.args.batch_size # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_validation_per_epoch - 1: start = time.time() # run the feed_forward out_argmax, loss, acc, summaries_merged = self.sess.run( [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] if self.args.data_mode == 'experiment_v2': yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32) out_argmax = np.asarray(out_argmax, dtype=np.uint8) for y in range(out_argmax.shape[0]): yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') y_batch = y_batch_large out_argmax = yy # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) else: start = time.time() # run the feed_forward if self.args.data_mode == 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs out_argmax, acc = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy], feed_dict=feed_dict) else: out_argmax, acc, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy, self.test_model.segmented_summary], feed_dict=feed_dict) end = time.time() # log loss and acc acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) # mean over batches total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou mean_inference = str(np.mean(inf_list)) + '-seconds' # summarize summaries_dict = dict() summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou if self.args.data_mode != 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs summaries_dict['val_prediction_sample'] = segmented_imgs # self.add_summary(step, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), str(mean_inference)) self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() # print in console tt.close() print("Val-epoch-" + str(epoch) + "-" + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) print("Last_max_iou: " + str(max_iou)) if mean_iou > max_iou: print("This validation got a new best iou. so we will save this one") # save the best model self.save_best_model() # Set the new maximum self.model.best_iou_assign_op.eval(session=self.sess, feed_dict={self.model.best_iou_input: mean_iou}) else: print("hmm not the best validation epoch :/..") break # Break the loop to finalize this epoch def linknet_postprocess(self, gt): gt2 = gt - 1 gt2[gt == -1] = 19 return gt2 def test(self, pkl=False): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it if not pkl: self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # naming = np.load(self.args.data_dir + 'names_train.npy') # init acc and loss lists acc_list = [] img_list = [] # idx of image idx = 0 # reset metrics self.metrics.reset() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] if self.args.data_mode == 'test_v2': y_batch_large = self.test_data['Y_large'][idx:idx + 1] idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.y_pl_before: y_batch, self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.y_pl: y_batch, self.test_model.is_training: False } # run the feed_forward if self.args.data_mode == 'test_v2': out_argmax, acc = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy], feed_dict=feed_dict) else: out_argmax, acc, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy, # self.test_model.merged_summaries, self.test_model.segmented_summary], self.test_model.segmented_summary], feed_dict=feed_dict) if self.args.data_mode == 'test_v2': yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32) out_argmax = np.asarray(out_argmax, dtype=np.uint8) for y in range(out_argmax.shape[0]): yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') y_batch = y_batch_large out_argmax = yy if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # print('mean preds ', out_argmax.mean()) # np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) if self.args.data_mode == 'test': plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) # log loss and acc acc_list += [acc] # log metrics if self.args.random_cropping: y1 = np.expand_dims(y_batch[0, :, :512], axis=0) y2 = np.expand_dims(y_batch[0, :, 512:], axis=0) y_batch = np.concatenate((y1, y2), axis=0) self.metrics.update_metrics(out_argmax, y_batch, 0, 0) else: self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) # mean over batches total_loss = 0 total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.test_data_len) # print in console tt.close() print("Here the statistics") print("Total_loss: " + str(total_loss)) print("Total_acc: " + str(total_acc)[:6]) print("mean_iou: " + str(mean_iou)) print("Plotting imgs") for i in range(len(img_list)): plt.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i]) def test_eval(self, pkl=False): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it if not pkl: self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.is_training: False } # run the feed_forward out_argmax, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.segmented_summary], feed_dict=feed_dict) if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # Colored results for visualization colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(colored_save_path)): os.makedirs(os.path.dirname(colored_save_path)) plt.imsave(colored_save_path, segmented_imgs[0]) # Results for official evaluation save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) output = postprocess(out_argmax[0]) misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest')) idx += 1 # print in console tt.close() def test_inference(self): """ Like the testing function but this one is for calculate the inference time and measure the frame per second """ print("INFERENCE mode will begin NOW..") # load the best model checkpoint to test on it self.load_best_model() # output_node: network/output/Argmax # input_node: network/input/Placeholder # for n in tf.get_default_graph().as_graph_def().node: # if 'input' in n.name:#if 'Argmax' in n.name: # import pdb; pdb.set_trace() print("Saving graph...") tf.train.write_graph(self.sess.graph_def, ".", 'graph.pb') print("Graph saved successfully.\n\n") exit(1) # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # create the FPS Meter fps_meter = FPSMeter() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.y_pl_before: y_batch # self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.y_pl: y_batch # self.test_model.is_training: False } # calculate the time of one inference start = time.time() # run the feed_forward _ = self.sess.run( [self.test_model.out_argmax], feed_dict=feed_dict) # update the FPS meter fps_meter.update(time.time() - start) fps_meter.print_statistics() def finalize(self): self.reporter.finalize() self.summary_writer.close() self.save_model() def debug_layers(self): """ This function will be responsible for output all outputs of all layers and dump them in a pickle :return: """ print("Debugging mode will begin NOW..") layers = tf.get_collection('debug_layers') print("ALL Layers in the collection that i wanna to run {} layer".format(len(layers))) for layer in layers: print(layer) # exit(0) # reset metrics self.metrics.reset() print('mean image ', self.debug_x.mean()) print('mean gt ', self.debug_y.mean()) self.debug_y = self.linknet_preprocess_gt(self.debug_y) feed_dict = {self.test_model.x_pl: self.debug_x, self.test_model.y_pl: self.debug_y, self.test_model.is_training: False } # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/weights"] # conv_w= self.sess.run(var[0]) # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/biases"] # bias= self.sess.run(var[0]) # run the feed_forward out_layers = self.sess.run(layers, feed_dict=feed_dict) for layer in out_layers: print(layer.shape) # dict_out= torchfile.load('out_networks_layers/dict_out.t7') ## init= tf.constant_initializer(conv_w) ## conv_w1 = tf.get_variable('my_weights', [3,3,128,128], tf.float32, initializer=init, trainable=True) # pp= tf.nn.relu(layers[39]) # out_relu= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) ## pp = tf.nn.conv2d_transpose(layers[39], conv_w, (1,32,64,128), strides=(1,2,2,1), padding="SAME") ## pp= tf.image.resize_images(layers[39], (32,64)) ## pp = tf.nn.conv2d(pp, conv_w, strides=(1,1,1,1), padding="SAME") ## bias1= tf.get_variable('my_bias', 128, tf.float32, tf.constant_initializer(bias)) # pp = tf.nn.bias_add(pp, bias) # #self.sess.run(conv_w1.initializer) # #self.sess.run(bias1.initializer) # out_deconv= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) # out_deconv_direct= self.sess.run(layers[40], feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) # pdb.set_trace() # print(out_layers) # exit(0) # dump them in a pickle with open("out_networks_layers/out_linknet_layers.pkl", "wb") as f: pickle.dump(out_layers, f, protocol=2) # run the feed_forward again to see argmax and segmented out_argmax, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.segmented_summary], feed_dict=feed_dict) print('mean preds ', out_argmax[0].mean()) plt.imsave(self.args.out_dir + 'imgs/' + 'debug.png', segmented_imgs[0]) self.metrics.update_metrics(out_argmax[0], self.debug_y, 0, 0) mean_iou = self.metrics.compute_final_metrics(1) print("mean_iou_of_debug: " + str(mean_iou))
def train(model, dataset, model_dir, summary_writer, epochs, lr, conf_thres, nms_thres, iou_thres, lambda_coord=5, lambda_no_obj=0.5, gradient_accumulations=2, clip_gradients=False, limit=None, debug=False, print_every=10, save_every=None, log_to_neptune=False): if log_to_neptune: env_path = Path(os.environ['HOME'], 'workspace/setup-box/neptune.env') load_dotenv(dotenv_path=env_path) neptune.init('petersiemen/sandbox', api_token=os.getenv("NEPTUNE_API_TOKEN")) total = limit if limit is not None else len(dataset) logger.info( f'Start training on {total} images. Using lr: {lr}, ' f'lambda_coord: {lambda_coord}, lambda_no_obj: {lambda_no_obj}, ' f'conf_thres: {conf_thres}, nms_thres:{nms_thres}, iou_thres: {iou_thres}, ' f'gradient_accumulations: {gradient_accumulations}, ' f'clip_gradients: {clip_gradients}, lambda_no_obj: {lambda_no_obj}') metrics = Metrics() model.to(DEVICE) model.train() optimizer = torch.optim.Adam(model.get_trainable_parameters(), lr=lr) grid_sizes = model.grid_sizes data_loader = DataLoader(dataset, batch_size=dataset.batch_size, shuffle=True, collate_fn=dataset.collate_fn) class_names = model.class_names for epoch in range(1, epochs + 1): for batch_i, (images, ground_truth_boxes, image_paths) in tqdm(enumerate(data_loader), total=total): if len(images) != dataset.batch_size: logger.warning( f"Skipping batch {batch_i} because it does not have correct size ({dataset.batch_size})" ) continue images = images.to(DEVICE) coordinates, class_scores, confidence = model(images) obj_mask, noobj_mask, cls_mask, target_coordinates, target_confidence, target_class_scores = build_targets( coordinates, class_scores, ground_truth_boxes, grid_sizes) yolo_loss = YoloLoss(coordinates, confidence, class_scores, obj_mask, noobj_mask, cls_mask, target_coordinates, target_confidence, target_class_scores, lambda_coord=lambda_coord, lambda_no_obj=lambda_no_obj) class_scores = torch.sigmoid(class_scores) prediction = torch.cat( (coordinates, confidence.unsqueeze(-1), class_scores), -1) detections = non_max_suppression(prediction=prediction, conf_thres=conf_thres, nms_thres=nms_thres) ground_truth_map_objects = list( GroundTruth.from_ground_truths(image_paths, ground_truth_boxes)) detection_map_objects = list( Detection.from_detections(image_paths, detections)) metrics.add_detections_for_batch(detection_map_objects, ground_truth_map_objects, iou_thres=iou_thres) if debug: plot_batch(detections, ground_truth_boxes, images, class_names) loss = yolo_loss.get() # backward pass to calculate the weight gradients loss.backward() if clip_gradients: logger.debug("Clipping gradients with max_norm = 1") clip_grad_norm_(model.parameters(), max_norm=1) if batch_i % print_every == 0: # print every print_every +1 batches yolo_loss.capture(summary_writer, batch_i, during='train') #plot_weights_and_gradients(model, summary_writer, epoch * batch_i) log_performance(epoch, epochs, batch_i, total, yolo_loss, metrics, class_names, summary_writer, log_to_neptune) # Accumulates gradient before each step if batch_i % gradient_accumulations == 0: logger.debug( f"Updating weights for batch {batch_i} (gradient_accumulations :{gradient_accumulations})" ) # update the weights optimizer.step() # zero the parameter (weight) gradients optimizer.zero_grad() del images del ground_truth_boxes if limit is not None and batch_i + 1 >= limit: logger.info( 'Stop here after training {} batches (limit: {})'.format( batch_i, limit)) log_performance(epoch, epochs, batch_i, total, yolo_loss, metrics, class_names, summary_writer, log_to_neptune) save_model(model_dir, model, epoch, batch_i) return if save_every is not None and batch_i % save_every == 0: save_model(model_dir, model, epoch, batch_i) # save model after every epoch save_model(model_dir, model, epoch, None)
def __init__(self, args, sess, model): print("\nTraining is initializing itself\n") self.args = args self.sess = sess self.model = model # shortcut for model params self.params = self.model.params # To initialize all variables self.init = None self.init_model() # Create a saver object self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep, keep_checkpoint_every_n_hours=10, save_relative_paths=True) self.saver_best = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # Load from latest checkpoint if found self.load_model() ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = [ 'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch' ] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]) ] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## if self.args.mode == 'train': self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size else: self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
class NewTrain(object): def __init__(self, args, sess, model): print("\nTraining is initializing itself\n") self.args = args self.sess = sess self.model = model # shortcut for model params self.params = self.model.params # To initialize all variables self.init = None self.init_model() # Create a saver object self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep, keep_checkpoint_every_n_hours=10, save_relative_paths=True) self.saver_best = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # Load from latest checkpoint if found self.load_model() ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = [ 'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch' ] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]) ] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## if self.args.mode == 'train': self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size else: self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## @timeit def load_test_data(self): print("Loading Testing data..") self.test_data = { 'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy") } self.test_data_len = self.test_data['X'].shape[ 0] - self.test_data['X'].shape[0] % self.args.batch_size print("Test-shape-x -- " + str(self.test_data['X'].shape)) print("Test-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Test data is loaded") @timeit def init_model(self): print("Initializing the variables of the model") self.init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(self.init) print("Initialization finished") def save_model(self): """ Save Model Checkpoint :return: """ print("saving a checkpoint") self.saver.save(self.sess, self.args.checkpoint_dir, self.model.global_step_tensor) print("Saved a checkpoint") def save_best_model(self): """ Save BEST Model Checkpoint :return: """ print("saving a checkpoint for the best model") self.saver_best.save(self.sess, self.args.checkpoint_best_dir, self.model.global_step_tensor) print("Saved a checkpoint for the best model") def load_best_model(self): """ Load the best model checkpoint :return: """ print("loading a checkpoint for BEST ONE") latest_checkpoint = tf.train.latest_checkpoint( self.args.checkpoint_best_dir) if latest_checkpoint: print( "Loading model checkpoint {} ...\n".format(latest_checkpoint)) self.saver_best.restore(self.sess, latest_checkpoint) else: print("ERROR NO best checkpoint found") exit(-1) print("BEST MODEL LOADED..") def init_summaries(self): """ Create the summary part of the graph :return: """ with tf.variable_scope('train-summary-per-epoch'): for tag in self.scalar_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar( tag, self.summary_placeholders[tag]) for tag, shape in self.images_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image( tag, self.summary_placeholders[tag], max_outputs=10) def add_summary(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: :param summaries_dict: :param summaries_merged: :return: """ if summaries_dict is not None: summary_list = self.sess.run( [self.summary_ops[tag] for tag in summaries_dict.keys()], { self.summary_placeholders[tag]: value for tag, value in summaries_dict.items() }) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) @timeit def load_model(self): """ Load the latest checkpoint :return: """ try: # This is for loading the pretrained weights if they can't be loaded during initialization. self.model.encoder.load_pretrained_weights(self.sess) except AttributeError: pass print("Searching for a checkpoint") latest_checkpoint = tf.train.latest_checkpoint( self.args.checkpoint_dir) if latest_checkpoint: print( "Loading model checkpoint {} ...\n".format(latest_checkpoint)) self.saver.restore(self.sess, latest_checkpoint) print("Model loaded from the latest checkpoint\n") else: print("\n.. No ckpt, SO First time to train :D ..\n") def train(self): print("Training mode will begin NOW ..") tf.train.start_queue_runners(sess=self.sess) curr_lr = self.model.args.learning_rate for cur_epoch in range( self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_training_per_epoch), total=self.num_iterations_training_per_epoch, desc="epoch-" + str(cur_epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] # loop by the number of iterations for cur_iteration in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) # Feed this variables to the network feed_dict = { self.model.handle: self.model.training_handle, self.model.is_training: True, self.model.curr_learning_rate: curr_lr } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_training_per_epoch - 1: # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( [ self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] # summarize self.add_summary(cur_it, summaries_merged=summaries_merged) else: # run the feed_forward _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [ self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc summaries_dict['train_prediction_sample'] = segmented_imgs self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics( 'train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval( session=self.sess, feed_dict={ self.model.global_epoch_input: cur_epoch + 1 }) # print in console tt.close() print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[:6]) # Break the loop to finalize this epoch break # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Save the current checkpoint if cur_epoch % self.args.save_every == 0: self.save_model() # Test the model on validation if cur_epoch % self.args.test_every == 0: self.test_per_epoch( step=self.model.global_step_tensor.eval(self.sess), epoch=self.model.global_epoch_tensor.eval(self.sess)) if cur_epoch % self.args.learning_decay_every == 0: curr_lr = curr_lr * self.args.learning_decay print('Current learning rate is ', curr_lr) print("Training Finished") def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] inf_list = [] # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) # init dataset to validation self.sess.run(self.model.validation_iterator.initializer) # loop by the number of iterations for cur_iteration in tt: # Feed this variables to the network feed_dict = { self.model.handle: self.model.validation_handle, self.model.is_training: False } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_validation_per_epoch - 1: start = time.time() # run the feed_forward next_img, out_argmax, loss, acc = self.sess.run( [ self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy ], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, next_img[1]) else: start = time.time() # run the feed_forward next_img, out_argmax, loss, acc, segmented_imgs = self.sess.run( [ self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.segmented_summary ], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, next_img[1]) # mean over batches total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics( self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou mean_inference = str(np.mean(inf_list)) + '-seconds' # summarize summaries_dict = dict() summaries_dict['val-loss-per-epoch'] = total_loss summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou summaries_dict['val_prediction_sample'] = segmented_imgs self.add_summary(step, summaries_dict=summaries_dict) self.summary_writer.flush() # report self.reporter.report_experiment_statistics( 'validation-acc', 'epoch-' + str(epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'validation-loss', 'epoch-' + str(epoch), str(total_loss)) self.reporter.report_experiment_statistics( 'avg_inference_time_on_validation', 'epoch-' + str(epoch), str(mean_inference)) self.reporter.report_experiment_validation_iou( 'epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() # print in console tt.close() print("Val-epoch-" + str(epoch) + "-" + "loss:" + str(total_loss) + "-" + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) print("Last_max_iou: " + str(max_iou)) if mean_iou > max_iou: print( "This validation got a new best iou. so we will save this one" ) # save the best model self.save_best_model() # Set the new maximum self.model.best_iou_assign_op.eval( session=self.sess, feed_dict={self.model.best_iou_input: mean_iou}) else: print("hmm not the best validation epoch :/..") # Break the loop to finalize this epoch break def test(self): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) naming = np.load(self.args.data_dir + 'names_train.npy') # init acc and loss lists loss_list = [] acc_list = [] img_list = [] # idx of image idx = 0 # reset metrics self.metrics.reset() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network feed_dict = { self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # run the feed_forward out_argmax, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [ self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary ], feed_dict=feed_dict) np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) plt.imsave( self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) # log loss and acc loss_list += [loss] acc_list += [acc] # log metrics self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) # mean over batches total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.test_data_len) # print in console tt.close() print("Here the statistics") print("Total_loss: " + str(total_loss)) print("Total_acc: " + str(total_acc)[:6]) print("mean_iou: " + str(mean_iou)) print("Plotting imgs") def finalize(self): self.reporter.finalize() self.summary_writer.close() self.save_model()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-m","--model",help="model_name") parser.add_argument("-p","--model_path",help="path to the pb file") parser.add_argument("-o","--out_path",help="path to save the segmentation numpy") parser.add_argument("-im","--image_path",help="path to the numpy that rgb images are saved") parser.add_argument("-gt","--label_path",help="path to the numpy that labels are saved") args = parser.parse_args() pb_path = args.model_path#"../fcn8s_mobilenet/checkpoints/best/final_model.pb"#"mobilenet_fcn8s.pb"#"unet_mobilenet"# "optimized_model.pb"#"mobilenet_fcn8s.pb" logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Ask tensorflow logger not to propagate logs to parent (which causes # duplicated logging) logging.getLogger('tensorflow').propagate = False # build_trt_pb(model_name, pb_path, download_dir='data') logger.info('loading TRT graph from pb: %s' % pb_path) trt_graph = load_trt_pb(pb_path) logger.info('starting up TensorFlow session') tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True tf_sess = tf.Session(config=tf_config, graph=trt_graph) tf_input = tf_sess.graph.get_tensor_by_name('network/input/Placeholder:0') tf_output = tf_sess.graph.get_tensor_by_name('network/output/ArgMax:0') logger.info('warming up the TRT graph with a dummy image') all_images = np.load(args.image_path) all_labels = np.load(args.label_path) print("------------------Data loaded!!------------------") #uid_name_map = [] #with open('map_uid_img_name.txt','r') as f: # for row in f: # row = row.strip('\n') # uid_name_map.append(row) #print(uid_name_map) elipse = 0 metrics = Metrics(nclasses=18) means = [73.29132098, 83.04442645, 72.5238962] # bgr print("------------------Start Test!!------------------") for i in range(0, all_images.shape[0], 1): # pre process # subtract mean, normalize, then rgb to bgr image = all_images[i:i+1,:,:,:] new_image = copy.deepcopy(image).astype(float) new_image[0,:,:,0] = (image[0,:,:,2] - means[0])/255.0 #b new_image[0,:,:,1] = (image[0,:,:,1] - means[1])/255.0 #g new_image[0,:,:,2] = (image[0,:,:,0] - means[2])/255.0 start = time.time() segmentation = tf_sess.run(tf_output, feed_dict={tf_input: new_image}) elipse = time.time() - start # write records #img_name = uid_name_map[i] #uid=int(img_name.split('-')[0]) #curr_record = dict(uid=uid, # command='predict_segmentation', # environment='tx2', # building=None, # time=elipse*1000, # metric=None, # misc=None, # tag=None) #output_records.append(curr_record) #print(curr_record) #print("segmentation: ", segmentation.shape) #segmentation = np.argmax(segmentation, axis=1)#.astype(int)#tf.argmax(segmentation, axis=1, output_type=tf.int32) #segmentation = segmentation.reshape((512, 512))#tf.reshape(segmentation,[512, 512]) #if args.out_path is not None: # seg_img = Image.fromarray(np.uint8(segmentation[0])) # seg_img.save(os.path.join(args.out_path, img_name)) # update metrics label = all_labels[i:i+1,:,:] metrics.update_metrics(segmentation, label, 0, 0) if i%100 == 0: print(i) #with open(args.model+'.json','w') as f: # json.dump(output_records, f, indent=2) # print(elipse/(i+1)) print("segmentation size:", segmentation.shape) nonignore = [1,2,3,4,5,6,7,8,9,10,11,12,13,14] iou, mean_iou = metrics.compute_final_metrics(1, nonignore=nonignore) print("mean IOU: ", mean_iou) print("Per class IOU: ", iou)
def evaluate(model, dataset, summary_writer, images_results_dir, iou_thres, conf_thres, nms_thres, log_every=None, limit=None, plot=False, save=False): if save: assert dir_exists_and_is_empty( images_results_dir ), f'{images_results_dir} is not empty or does not exist.' logger.info( f'Start evaluating model with iou_thres: {iou_thres}, conf_thres: {conf_thres} and nms_thres: {nms_thres}' ) metrics = Metrics() model.to(DEVICE) model.eval() with torch.no_grad(): data_loader = DataLoader(dataset, batch_size=dataset.batch_size, shuffle=True, collate_fn=dataset.collate_fn) class_names = model.class_names total = limit if limit is not None else len(data_loader) for batch_i, (images, ground_truth_boxes, image_paths) in tqdm(enumerate(data_loader), total=total): if len(images) != dataset.batch_size: logger.warning( f"Skipping batch {batch_i} because it does not have correct size ({dataset.batch_size})" ) continue images = images.to(DEVICE) coordinates, class_scores, confidence = model(images) class_scores = torch.sigmoid(class_scores) prediction = torch.cat( (coordinates, confidence.unsqueeze(-1), class_scores), -1) detections = non_max_suppression(prediction=prediction, conf_thres=conf_thres, nms_thres=nms_thres) if plot: plot_batch(detections, ground_truth_boxes, images, class_names) if save: save_batch(image_paths, images_results_dir, detections, ground_truth_boxes, images, class_names) ground_truth_map_objects = list( GroundTruth.from_ground_truths(image_paths, ground_truth_boxes)) detection_map_objects = list( Detection.from_detections(image_paths, detections)) metrics.add_detections_for_batch(detection_map_objects, ground_truth_map_objects, iou_thres=iou_thres) if limit is not None and batch_i >= limit: logger.info(f"Stop evaluation here after {batch_i} batches") break if batch_i != 0 and log_every is not None and batch_i % log_every == 0: log_average_precision_for_classes(metrics, class_names, summary_writer, batch_i) log_average_precision_for_classes(metrics, class_names, summary_writer, total)
class Pair(object): def __init__(self): """ Sets default values for the Pair class """ self._warning = None self._ground_truth = None self._matched = False self._metric_version = None self._performer = None self._provider = None self._lead_time = None self._utility_time = None self._confidence = None self._probability = None self._quality = None self._event_type_similarity = None self._event_details_similarity = None self._occurrence_time_similarity = None self._targets_similarity = None self._metrics = Metrics() def csv_row(self): """ :return: a human readable string representation of the object :rtype: str """ return '%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (int(self.warning.id), int(self.ground_truth.id), self.matched, self.metric_version, self.performer, self.provider, self.lead_time, self.utility_time, self.probability, self.quality, self.event_type_similarity, self.event_details_similarity, self.occurrence_time_similarity, self.targets_similarity) def to_json_dict(self): attributes = filter(lambda x: x[0] not in ('_ground_truth', '_warnings', '_metrics'), self.__dict__.items()) json_content = ', '.join(['\"%s\": %s' % (x[0][1:], x[1]) for x in attributes]) json_content = '{' + json_content + '}' return json.dumps(json_content) def db_row(self): return (self.metric_version, self.warning.id, self.ground_truth_id, self.lead_time, self.utility_time, self.quality, self.event_type_similarity, self.event_details_similarity, self.occurrence_time_similarity, self.targets_similarity, self.probability) def csv_head(self): return 'warning_id, ground_truth_id, matched, metric_version, performer, provider, lead_time, utility_time, probability, quality, event_type_similarity, event_details_similarity, occurrence_time_similarity, targets_similarity' warning = property(fget=attrgetter("_warning"), fdel=attrdeleter("_warning")) @warning.setter def warning(self, value): """ :param value: warning component of the pairing :type value: MetricWarning """ self._warning = value ground_truth = property(fget=attrgetter("_ground_truth"), fdel=attrdeleter("_ground_truth")) @ground_truth.setter def ground_truth(self, value): """ :param value: ground truth component of the pairing :type value: MetricGroundTruth """ self._ground_truth = value matched = property(fget=attrgetter("_matched"), fdel=attrdeleter("_matched")) @matched.setter def matched(self, value): self._metric_version = value metric_version = property(fget=attrgetter("_metric_version"), fdel=attrdeleter("_metric_version")) @metric_version.setter def metric_version(self, value): self._metric_version = value performer = property(fget=attrgetter("_performer"), fdel=attrdeleter("_performer")) @performer.setter def performer(self, value): self._performer = value provider = property(fget=attrgetter("_provider"), fdel=attrdeleter("_provider")) @provider.setter def provider(self, value): self._provider = value lead_time = property(fget=attrgetter("_lead_time"), fdel=attrdeleter("_lead_time")) @lead_time.setter def lead_time(self, value): self._lead_time = value utility_time = property(fget=attrgetter("_utility_time"), fdel=attrdeleter("_utility_time")) @utility_time.setter def utility_time(self, value): self._utility_time = value confidence = property(fget=attrgetter("_confidence"), fdel=attrdeleter("_confidence")) @confidence.setter def confidence(self, value): self._confidence = value probability = property(fget=attrgetter("_probability"), fdel=attrdeleter("_probability")) @probability.setter def probability(self, value): self._probability = value quality = property(fget=attrgetter("_quality"), fdel=attrdeleter("_quality")) @quality.setter def quality(self, value): self._quality = value event_type_similarity = property(fget=attrgetter("_event_type_similarity"), fdel=attrdeleter("_event_type_similarity")) @event_type_similarity.setter def event_type_similarity(self, value): self._event_type_similarity = value event_details_similarity = property(fget=attrgetter("_event_details_similarity"), fdel=attrdeleter("_event_details_similarity")) @event_details_similarity.setter def event_details_similarity(self, value): self._event_details_similarity = value occurrence_time_similarity = property(fget=attrgetter("_occurrence_time_similarity"), fdel=attrdeleter("_occurrence_time_similarity")) @occurrence_time_similarity.setter def occurrence_time_similarity(self, value): self._occurrence_time_similarity = value targets_similarity = property(fget=attrgetter("_targets_similarity"), fdel=attrdeleter("_targets_similarity")) @targets_similarity.setter def targets_similarity(self, value): self._targets_similarity = value @classmethod def build(cls, warn, gt, performer, provider): ''' :param warn: warning json dict to be compared to gt :type warn: dict :param gt: ground truth json dict to be compared to warn :type gt: dict :param performer: name of performer :type performer: str :param provider: name of provider :type provider: str ''' mgt = MetricGroundTruth() mwn = MetricWarning() warn = mwn.from_dict(warn) gt = mgt.from_dict(gt) pair = cls() pair.warning = warn pair.ground_truth = gt pair.metric_version = METRIC_VERSION pair.performer = performer pair.provider = provider pair.lead_time = pair._metrics.lead_time_score(gt, warn) pair.utility_time = pair._metrics.utility_time_score(gt, warn) pair.quality = pair._metrics.quality_score(gt, warn) pair.event_type_similarity = pair._metrics.event_type_score(gt, warn) pair.event_details_similarity = pair._metrics.event_details_score_all(gt, warn) pair.occurrence_time_similarity = pair._metrics.occ_time_score(gt, warn) pair.targets_similarity = pair._metrics.target_score(gt, warn) return pair @classmethod def build_unpaired_warn(cls, warn, performer, provider): ''' :param warn: MetricWarning object representation of unpaired warning :type warn: MetricWarning :param performer: name of performer :type performer: str :param provider: name of provider :type provider: str ''' pair = cls() pair.warning = warn pair.metrics_version = METRIC_VERSION pair.performer = performer pair.provider = provider pair.set_probability(0) return pair @classmethod def build_unpaired_gt(cls, gt, performer, provider): ''' :param warn: MetricGroundTruth object representation of unpaired ground truth :type warn: MetricGroundTruth :param performer: name of performer :type performer: str :param provider: name of provider :type provider: str ''' pair = cls() pair.ground_truth = gt pair.metrics_version = METRIC_VERSION pair.performer = performer pair.provider = provider return pair def get_pair_ids(self): ''' returns the ids of warning and ground truth in a tuple ''' return (self._warning.id, self._ground_truth.id) def pair_id_string(self): ''' returns a string representation of the tuple returned by get_pair_ids ''' return '('+str(self._warning.id)+','+str(self._ground_truth.id)+')' def sim_score(self): ''' returns the mean similarity score for creating the cost matrix for pairing ''' return self._mean_similarity def mean_pair_score(self): ''' returns the mean of all collected scores ''' scores = filter(lambda x: x is not None, [self._lead_time, self._utility_time, self._probability, (self._quality/4.0), self._mean_similarity]) mean_score = self._metrics.mean(scores) return(mean_score) def set_probability(self, m): ''' :param m: either 1 if matched or 0 if unmatched :type m: int ''' self._matched = bool(m) self._probability = self._metrics.probability_score(m, self._warning) @classmethod def from_db_dict(cls, db_dict): ''' :param db_dict: the dict that is generated by the sql commands :type db_dict: dict ''' pair = cls() pair.metric_version = db_dict['metric_version'] mgt = MetricGroundTruth() mwn = MetricWarning() pair.warning = mwn.from_dict({'id': db_dict['gt_id']}) pair.ground_truth = mgt.from_dict({'id': db_dict['warn_id']}) pair.lead_time = db_dict['lead_time'] pair.utility_time = db_dict['utility_time'] pair.quality = db_dict['quality'] pair.event_type_similarity = db_dict['event_type_similarity'] pair.event_details_similarity = db_dict['event_details_similarity'] pair.occurrence_time_similarity = db_dict['occurrence_time_similarity'] pair.targets_similarity = db_dict['targets_similarity'] pair.probability_score = db_dict['probability'] return pair
class Train(BasicTrain): """ Trainer class """ name = 'Train' def __init__(self, args, sess, model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, model) ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = [ 'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch' ] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]) ] # self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # self.merged_summaries = None # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## # Init load data and generator self.generator = None self.run = None if self.args.data_mode == "experiment_tfdata": self.data_session = None self.init_op = None self.train_next_batch, self.train_data_len = self.init_tfdata( self.args.batch_size, self.args.abs_data_dir, (self.args.img_height, self.args.img_width), mode='train') self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size self.generator = self.train_tfdata_generator elif self.args.data_mode == "experiment": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data() self.generator = self.train_generator self.run = self.train elif self.args.data_mode == "debug": print("Debugging photo loading..") # self.debug_x= misc.imread('/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') # self.debug_y= misc.imread('/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) self.debug_x = np.load('data/debug/debug_x.npy') self.debug_y = np.load('data/debug/debug_y.npy') print("Debugging photo loaded") else: print("ERROR Please select a proper data_mode BYE") exit(-1) ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) ################################################################################## def crop(self): sh = self.val_data['X'].shape temp_val_data = { 'X': np.zeros((sh[0] * 2, sh[1], sh[2] // 2, sh[3]), self.val_data['X'].dtype), 'Y': np.zeros((sh[0] * 2, sh[1], sh[2] // 2), self.val_data['Y'].dtype) } for i in range(sh[0]): temp_val_data['X'][i * 2, :, :, :] = self.val_data['X'][i, :, :sh[2] // 2, :] temp_val_data['X'][i * 2 + 1, :, :, :] = self.val_data['X'][i, :, sh[2] // 2:, :] temp_val_data['Y'][i * 2, :, :] = self.val_data['Y'][i, :, :sh[2] // 2] temp_val_data['Y'][i * 2 + 1, :, :] = self.val_data['Y'][i, :, sh[2] // 2:] self.val_data = temp_val_data def init_tfdata(self, batch_size, main_dir, resize_shape, mode='train'): self.data_session = tf.Session() print("Creating the iterator for training data") with tf.device('/cpu:0'): segdl = SegDataLoader( main_dir, batch_size, (resize_shape[0], resize_shape[1]), resize_shape, # * 2), resize_shape, 'data/cityscapes_tfdata/train.txt') iterator = tf.data.Iterator.from_structure( segdl.data_tr.output_types, segdl.data_tr.output_shapes) next_batch = iterator.get_next() self.init_op = iterator.make_initializer(segdl.data_tr) self.data_session.run(self.init_op) print("Loading Validation data in memoryfor faster training..") self.val_data = { 'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy") } # self.crop() # import cv2 # cv2.imshow('crop1', self.val_data['X'][0,:,:,:]) # cv2.imshow('crop2', self.val_data['X'][1,:,:,:]) # cv2.imshow('seg1', self.val_data['Y'][0,:,:]) # cv2.imshow('seg2', self.val_data['Y'][1,:,:]) # cv2.waitKey() self.val_data_len = self.val_data['X'].shape[ 0] - self.val_data['X'].shape[0] % self.args.batch_size # self.num_iterations_validation_per_epoch = # (self.val_data_len + self.args.batch_size - 1) // self.args.batch_size self.num_iterations_validation_per_epoch = self.val_data_len // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") return next_batch, segdl.data_len @timeit def load_overfit_data(self): print("Loading data..") self.train_data = { 'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy") } self.train_data_len = self.train_data['X'].shape[ 0] - self.train_data['X'].shape[0] % self.args.batch_size self.num_iterations_training_per_epoch = (self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Overfitting data is loaded") print("Loading Validation data..") self.val_data = self.train_data self.val_data_len = self.val_data['X'].shape[ 0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = (self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") def overfit_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) else: idx = np.arange(self.train_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] start += self.args.batch_size if start >= self.train_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch def init_summaries(self): """ Create the summary part of the graph :return: """ with tf.variable_scope('train-summary-per-epoch'): for tag in self.scalar_summary_tags: # self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar( tag, self.summary_placeholders[tag]) for tag, shape in self.images_summary_tags: # self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image( tag, self.summary_placeholders[tag], max_outputs=10) # self.merged_summaries = tf.summary.merge_all() # s = tf.get_collection(tf.GraphKeys.SUMMARIES) # for i in s: # if i.name == 'train-summary-per-epoch/train_prediction_sample_1:0': # print(i.name) def add_summary(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: :param summaries_dict: :param summaries_merged: :return: """ if summaries_dict is not None: summary_list = self.sess.run( [self.summary_ops[tag] for tag in summaries_dict.keys()], { self.summary_placeholders[tag]: value for tag, value in summaries_dict.items() }) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) @timeit def load_train_data(self): print("Loading Training data..") self.train_data = { 'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy") } self.train_data = self.resize(self.train_data) self.train_data_len = self.train_data['X'].shape[0] self.num_iterations_training_per_epoch = (self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Training data is loaded") print("Loading Validation data..") self.val_data = { 'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy") } self.val_data['Y_large'] = self.val_data['Y'] self.val_data_len = self.val_data['X'].shape[ 0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = (self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") def train_generator(self): start = 0 idx = np.random.choice(self.train_data_len, self.num_iterations_training_per_epoch * self.args.batch_size, replace=True) while True: # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] # update start idx start += self.args.batch_size yield x_batch, y_batch if start >= self.train_data_len: return def train_tfdata_generator(self): with tf.device('/cpu:0'): while True: x_batch, y_batch = self.data_session.run(self.train_next_batch) yield x_batch, y_batch[:, :, :, 0] def resize(self, data): X = [] Y = [] for i in range(data['X'].shape[0]): X.append( misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width))) Y.append( misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest')) data['X'] = np.asarray(X) data['Y'] = np.asarray(Y) return data def train(self): print("Training will begin NOW ..") # curr_lr= self.model.args.learning_rate for cur_epoch in range( self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): # init tqdm and get the epoch value tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch, desc="epoch-" + str(cur_epoch) + "-") # init the current iterations cur_iteration = 0 # init acc and loss lists loss_list = [] acc_list = [] # loop by the number of iterations for x_batch, y_batch in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) # Feed this variables to the network feed_dict = { self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: True # self.model.curr_learning_rate:curr_lr } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_training_per_epoch - 1: # run the feed_forward _, loss, acc = self.sess.run([ self.model.train_op, self.model.loss, self.model.accuracy ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] # summarize # self.add_summary(cur_it, summaries_merged=summaries_merged) else: # run the feed_forward _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [ self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc summaries_dict['train_prediction_sample'] = segmented_imgs self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics( 'train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval( session=self.sess, feed_dict={ self.model.global_epoch_input: cur_epoch + 1 }) # print in console tt.close() print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[:6]) # Break the loop to finalize this epoch break # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # update the cur_iteration cur_iteration += 1 # Save the current checkpoint if cur_epoch % self.args.save_every == 0: self.save_model() # Test the model on validation if cur_epoch % self.args.test_every == 0: self.test_per_epoch( step=self.model.global_step_tensor.eval(self.sess), epoch=self.model.global_epoch_tensor.eval(self.sess)) print("Training Finished") def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] inf_list = [] # idx of minibatch idx = 0 # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) # loop by the number of iterations for cur_iteration in tt: # load minibatches x_batch = self.val_data['X'][idx:idx + self.args.batch_size] y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] # if self.args.data_mode == 'experiment_v2': # y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] # update idx of minibatch idx += self.args.batch_size # Feed this variables to the network feed_dict = { self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_validation_per_epoch - 1: start = time.time() # run the feed_forward out_argmax, loss, acc = self.sess.run([ self.model.out_argmax, self.model.loss, self.model.accuracy ], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) else: start = time.time() # run the feed_forward out_argmax, acc, segmented_imgs = self.sess.run( [ self.model.out_argmax, self.model.accuracy, self.model.segmented_summary ], feed_dict=feed_dict) end = time.time() # log loss and acc acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) # mean over batches total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics( self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou mean_inference = str(np.mean(inf_list)) + '-seconds' # summarize summaries_dict = dict() summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou summaries_dict['val_prediction_sample'] = segmented_imgs self.add_summary(step, summaries_dict=summaries_dict) # report self.reporter.report_experiment_statistics( 'validation-acc', 'epoch-' + str(epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'avg_inference_time_on_validation', 'epoch-' + str(epoch), str(mean_inference)) self.reporter.report_experiment_validation_iou( 'epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() # print in console tt.close() print("Val-epoch-" + str(epoch) + "-" + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) print("Last_max_iou: " + str(max_iou)) if mean_iou > max_iou: print( "This validation got a new best iou. so we will save this one" ) # save the best model self.save_best_model() # Set the new maximum self.model.best_iou_assign_op.eval( session=self.sess, feed_dict={self.model.best_iou_input: mean_iou}) else: print("hmm not the best validation epoch :/..") break def finalize(self): self.reporter.finalize() self.summary_writer.close() self.save_model()
def __init__(self): super(self.__class__, self).init() self._metrics = Metrics() self._ground_truth_bank = None self._warning_bank = None self._from_db = None
class PairBank(object): def __init__(self): super(self.__class__, self).init() self._metrics = Metrics() self._ground_truth_bank = None self._warning_bank = None self._from_db = None def __iter__(self): for b in self._bank: yield b def trim(self, list_of_pair_ids): ''' returns a trimmed PairBank of Pair objects :param list_of_pair_ids: a list of warn/gt id tuples :type list_of_pair_ids: [(int, int)] ''' trimmed = filter(lambda x: x.get_pair_ids() in list_of_pair_ids, self) pb_trim = PairBank() pb_trim._bank = trimmed return pb_trim def __getitem__(self, ii): ''' :param ii: the ii_th item in the PairBank :type ii: int ''' return self._bank[ii] def __len__(self): ''' returns length of pair bank list ''' return len(self._bank) def get_by_ids(self, i, j): ''' :param i: warning id :type i: int :param j: ground truth id :type j: int ''' warn = self.get_from_warns_by_id(i).to_dict() gt = self.get_from_gt_by_id(j).to_dict() return Pair.build(warn, gt, self._warning_bank.performer, self._ground_truth_bank.provider) def get_from_warns_by_id(self, i): ''' :param i: warning id :type i: int ''' return filter(lambda x: x.id == i, self._warning_bank)[0] def get_from_gt_by_id(self, j): ''' :param j: ground truth id :type j: int ''' return filter(lambda x: x.id == j, self._ground_truth_bank)[0] def get_by_ids_from_banks(self, i, j): ''' :param i: warning id :type i: int :param j: ground truth id :type j: int ''' # don't make pair if in _from_db dict if (i, j) in [x.get_pair_ids() for x in self._from_db]: pair = filter(lambda x: x.get_pair_ids() == (i, j), self._from_db)[0] return pair else: warn = self.get_from_warns_by_id(i) gt = self.get_from_gt_by_id(j) pair = Pair() mkpair = pair.build(warn, gt, self.performer, self.provider) self.add_pair(mkpair) return mkpair def __str__(self): ''' returns a csv row for each Pair in PairBank ''' return '\n'.join([x.csv_row() for x in self]) def add_pair(self, pair): ''' Add a Pair object ''' self._bank.append(pair) def __add__(self, x): ''' An addition operation for adding one PairBank to another ''' y = PairBank() y._bank = self._bank + x._bank return y def get_warn_ids(self): ''' returns the warning ids of the pair bank ''' if self._warnging_bank is not None: ids = unique([x.id for x in self._warning_bank]) else: ids = None return ids def get_gt_ids(self): ''' returns ground truth ids of the pair bank ''' if self._ground_truth_bank is not None: ids = unique([x.id for x in self._ground_truth_bank]) else: ids = None return ids def base_crit(self, i, j): ''' :param i: warning id :type i: int :param j: ground truth id :type j: int ''' wn = self.get_from_warns_by_id(i) gt = self.get_from_gt_by_id(j) return self._metrics.base_criteria(gt, wn) def run_hungarian(self): ''' Run the hungarian algorithm to get valid Pair objects ''' warn_ids = self.get_warn_ids() gt_ids = self.get_gt_ids() quals = [[-self.get_by_ids_from_banks(i, j).quality if self.base_crit(i, j) else 0.0 for j in warn_ids] for i in gt_ids] munk = Munkres() pairings = munk.compute(quals) pairings_ids = [(warn_ids[x], gt_ids[y]) for x, y in pairings] unpaired_warns_ids = filter(lambda x: x not in [y[0] for y in pairings_ids], warn_ids) unpaired_gt_ids = filter(lambda x: x not in [y[1] for y in pairings_ids], gt_ids) return pairings_ids, unpaired_warns_ids, unpaired_gt_ids def generate_pairs(self, performer, provider, start_date, end_date): ''' This method generates the ground truth/warning pairs and unpaired ground and warnings by building the warning and ground truth banks with in the pair bank, pulling the already existing pairs from the persistent pair bank to check the boundaries, and calculates recall and precision. :param performer: name of performer :type performer: str :param provider: name of provider :type provider: str :param start_date: start of the scoring period :type start_date: str in the pattern of YYYY-MM-DD :param end_date: end of the scoring period :type end_date: str in the pattern of YYYY-MM-DD :return: a list of warn/gt pairs, a list of unpaired warns and gt, a recall score, and a precision score :rtype: ([Pair], [Pair], float, float) ''' # fill WarningBank wb = WarningBank(performer, start_date, end_date) wb.fill() self._warn_bank = wb # fill GroundTruthBank gtb = GroundTruthBank(provider, start_date, end_date) gtb.fill() self._ground_truth_bank = gtb # db setup config = PasswordPSQLConnectionConfig(database=DB_CONFIG['database'], user=DB_CONFIG['user'], password=DB_CONFIG['password']) psqlconn = PSQLConnector(config) pair_resource = PairDBResource(psqlconn) # populate _from_db bank # all valid pairs curr_pair_dicts = pair_resource.get_current_pairs() pr = Pair() self._from_db = [pr.from_db_dict(x) for x in curr_pair_dicts] # get ids of paired and unpaired warnings and gt pair_ids, unpaired_warns, unpaired_gt = self.run_hungarian() # replace warnings bank and ground truths bank with unpaired warnings and ground truth unp_wrn = filter(lambda x: x.id in unpaired_warns, self._warnings_bank) unp_gt = filter(lambda x: x.id in unpaired_gt, self._ground_truth_bank) # reset valid flag on all existing pairings pair_resource.set_valid_false() # report paired warnings and ground truth with scores to db # paired map(lambda x: pair_resource.sql_insert(x, psqlconn), self._bank.trim(pair_ids)) # calculate recall recall = len(pair_ids)/(len(pair_ids) + len(unpaired_gt)) # calculate precision # should these include warns and gts from the previous month? precision = len(pair_ids)/(len(pair_ids) + len(unpaired_warns)) # build unpaired bank unpaired_list = unp_wrn + unp_gt unpaired_bank = PairBank() unpaired_bank._bank = unpaired_list return self._bank, unpaired_bank, recall, precision
class RaceBaseModel(pl.LightningModule): @staticmethod def default_batch_fn(batch): x, y = batch['inputs'], batch['targets'], return x, y @staticmethod def top_p_filtering(score, top_p): """ Args: score (bsz, vocab_size): output of the last layer top_p float value: keep the top tokens with cumulative probability >= top_p (nucleus filtering). Returns: score (bsz, vocab_size): output after redistributing the prob with top-p """ sorted_logits, sorted_indices = torch.sort(score, descending=True) cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) # Remove tokens with cumulative probability above the threshold sorted_indices_to_remove = cumulative_probs >= top_p # Shift the indices to the right to keep also the first token above the threshold sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[ ..., :-1].clone() sorted_indices_to_remove[..., 0] = 0 indices_to_remove = torch.zeros_like( sorted_indices_to_remove, dtype=sorted_indices_to_remove.dtype).scatter_( dim=-1, index=sorted_indices, src=sorted_indices_to_remove) score[indices_to_remove] = -float('Inf') return score def __init__(self, hparams, batch_fn=None): super(RaceBaseModel, self).__init__() if batch_fn: self.batch_fn = batch_fn else: self.batch_fn = self.default_batch_fn self.hparams = hparams self.save_hyperparameters(hparams) # Tokenizer: self.tokenizer = AutoTokenizer.from_pretrained( self.hparams.pretrained_model) self.tokenizer.add_special_tokens( {"additional_special_tokens": self.hparams.special_tokens}) # Metrics: self.metrics = Metrics() def test_step(self, batch, batch_idx): # Prepare data: inputs, targets = self.batch_fn(batch) # Generations: generations_list = self.generate(inputs, pred_len=64, sample_num=2) # Compute metrics: references = [ self.tokenizer.decode(target, skip_special_tokens=True) for target in targets ] # Multiple generations: metrics = ['bleu_1', 'bleu_2', 'bleu_3', 'bleu_4', 'meteor', 'rouge_l'] final_metrics = dict(zip(metrics, [0] * len(metrics))) for generations in generations_list: predictions = [ self.tokenizer.decode(generation, skip_special_tokens=True) for generation in generations ] inputs = Input(predictions=predictions, references=references) metrics = self.metrics.compute_metrics(inputs) for k in metrics: final_metrics[k] += metrics[k] for k in metrics: final_metrics[k] /= len(generations_list) # Log: self.log_dict(final_metrics) return final_metrics def generate_sentence(self, article, answer, question=None, pred_len=64, sample_num=1, top_p=0.95, skip_special_tokens=True): """Args: article (str) answer (str) question (str): if not none, generating distractors pred_len (int): Length of predicted sequence. sample_num (int): number of sample top_p (float): top_p for generation skip_special_tokens (bool): skip special_tokens while decoding :return: list of generated sentences, len(list) = sample_num """ if not question: context = " ".join([ answer, self.tokenizer.sep_token, article, self.tokenizer.sep_token, question ]) else: context = " ".join([answer, self.tokenizer.sep_token, article]) inputs = self.tokenizer([context], padding=True, truncation=True, max_length=512, return_tensors="pt") questions = self.generate(inputs, pred_len, sample_num=sample_num, top_p=top_p) return [ self.tokenizer.decode(question.squeeze(), skip_special_tokens) for question in questions ]
class Test(BasicTest): """ Trainer class """ name = 'Test' def __init__(self, args, sess, model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, model) # Init load data and generator self.generator = None self.run = None # 加载数据 if self.args.data_mode == "realsense": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_realsence_data() elif self.args.data_mode == "cityscapes_val": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() elif self.args.data_mode == "cityscapes_test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() elif self.args.data_mode == "video": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() if self.args.task == "test": self.run = self.test elif self.args.task == "realsense": self.run = self.realsense_inference elif self.args.task == "realsense_imgs": self.run = self.realsense_imgs else: print("ERROR Please select a proper data_mode BYE") exit(-1) # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) def resize(self, data): X = [] Y = [] for i in range(data['X'].shape[0]): X.append(misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width))) Y.append(misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest')) data['X'] = np.asarray(X) data['Y'] = np.asarray(Y) return data @timeit def load_vid_data(self): print("Loading Video data..") self.test_data = {'X': np.load(self.args.data_dir + "X_vid.npy")} self.test_data['Y'] = np.zeros(self.test_data['X'].shape[:3]) self.test_data_len = self.test_data['X'].shape[0] print("Vid-shape-x -- " + str(self.test_data['X'].shape)) print("Vid-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Video data is loaded") @timeit def load_val_data(self): print("Loading Validation data..") self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.test_data = self.resize(self.test_data) self.test_data['Y_large'] = self.test_data['Y'] self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Validation-shape-x -- " + str(self.test_data['X'].shape)) print("Validation-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Validation data is loaded") @timeit def load_realsence_data(self): print("Loading RealSense data..") self.test_data = {'X': np.load(self.args.data_dir + "/realsense/x_inference.npy"), 'names': np.load(self.args.data_dir + "/realsense/name_inference.npy")} self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("RealSense-shape-x -- " + str(self.test_data['X'].shape)) print("RealSense-shape-name -- " + str(self.test_data['names'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("RealSense data is loaded") @timeit def load_test_data(self): print("Loading Testing data..") self.test_data = {'X': np.load(self.args.data_dir + "X_test.npy")} self.names_mapper = {'X': np.load(self.args.data_dir + "xnames_test.npy"), 'Y': np.load(self.args.data_dir + "ynames_test.npy")} self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Test-shape-x -- " + str(self.test_data['X'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Test data is loaded") def test_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.test_data_len, self.test_data_len, replace=False) else: idx = np.arange(self.test_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.test_data['X'][mask] y_batch = self.test_data['Y'][mask] # update start idx start += self.args.batch_size if start >= self.test_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch @staticmethod def linknet_postprocess(gt): gt2 = gt - 1 gt2[gt == -1] = 19 return gt2 def test(self, pkl=False): print("Testing will begin NOW..") # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # naming = np.load(self.args.data_dir + 'names_train.npy') # init acc and loss lists acc_list = [] img_list = [] # idx of image idx = 0 # reset metrics self.metrics.reset() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.model.x_pl_before: x_batch, self.model.y_pl_before: y_batch, self.model.is_training: False, } else: feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # run the feed_forward out_argmax, acc, segmented_imgs = self.sess.run( [self.model.out_argmax, self.model.accuracy, # self.model.merged_summaries, self.model.segmented_summary], self.model.segmented_summary], feed_dict=feed_dict) if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # print('mean preds ', out_argmax.mean()) # np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) misc.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) # log loss and acc acc_list += [acc] # log metrics if self.args.random_cropping: y1 = np.expand_dims(y_batch[0, :, :512], axis=0) y2 = np.expand_dims(y_batch[0, :, 512:], axis=0) y_batch = np.concatenate((y1, y2), axis=0) self.metrics.update_metrics(out_argmax, y_batch, 0, 0) else: self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) # mean over batches total_loss = 0 total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.test_data_len) # print in console tt.close() print("Here the statistics") print("Total_loss: " + str(total_loss)) print("Total_acc: " + str(total_acc)[:6]) print("mean_iou: " + str(mean_iou)) print("Plotting imgs") for i in range(len(img_list)): misc.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i]) def realsense_imgs(self): print("realsense_imgs will begin NOW..") # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] idx += 1 feed_dict = {self.model.x_pl: x_batch, self.model.is_training: False } # run the feed_forward segmented_imgs = self.sess.run([self.model.segmented_summary], feed_dict=feed_dict) # plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0][0]) misc.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0][0]) tt.close() print("realsense_imgs finished~") def test_eval(self, pkl=False): print("Testing will begin NOW..") # load the best model checkpoint to test on it if not pkl: self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.model.x_pl_before: x_batch, self.model.is_training: False, } else: feed_dict = {self.model.x_pl: x_batch, self.model.is_training: False } # run the feed_forward out_argmax, segmented_imgs = self.sess.run( [self.model.out_argmax, self.model.segmented_summary], feed_dict=feed_dict) if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # Colored results for visualization colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(colored_save_path)): os.makedirs(os.path.dirname(colored_save_path)) misc.imsave(colored_save_path, segmented_imgs[0]) # Results for official evaluation save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) output = postprocess(out_argmax[0]) misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest')) idx += 1 # print in console tt.close() def realsense_inference(self): print("INFERENCE will begin NOW..") # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # create the FPS Meter fps_meter = FPSMeter() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] # y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.is_training: False} # calculate the time of one inference start = time.time() # run the feed_forward _ = self.sess.run( [self.model.out_argmax], feed_dict=feed_dict) # update the FPS meter fps_meter.update(time.time() - start) fps_meter.print_statistics() def test_inference(self): """ Like the testing function but this one is for calculate the inference time and measure the frame per second """ print("INFERENCE will begin NOW..") # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # create the FPS Meter fps_meter = FPSMeter() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.model.x_pl_before: x_batch, self.model.y_pl_before: y_batch, self.model.is_training: False, } else: feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # calculate the time of one inference start = time.time() # run the feed_forward _ = self.sess.run( [self.model.out_argmax], feed_dict=feed_dict) # update the FPS meter fps_meter.update(time.time() - start) fps_meter.print_statistics() def finalize(self): self.reporter.finalize()
def __init__(self, model, hyperparameters, kfold): self.metrics = Metrics() cross_validation = StratifiedKFold(n_splits=kfold, shuffle=True) self.clf = GridSearchCV(model, hyperparameters, cv=cross_validation, n_jobs=-1, verbose=1)