def apply_model(params): """ Function for using a previously trained model for predicting. """ ########### Load data dataset = build_dataset(params) ########### ########### Load model ing_model = loadModel(params['STORE_PATH'], params['RELOAD']) ing_model.setOptimizer() ########### ########### Apply sampling for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = { 'batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s], 'normalize': params['NORMALIZE_IMAGES'], 'mean_substraction': params['MEAN_SUBSTRACTION'] } predictions = ing_model.predictNet(dataset, params_prediction)[s] # Format predictions predictions = decode_multilabel( predictions, # not used dataset.extra_variables['idx2word_binary'], min_val=params['MIN_PRED_VAL'], verbose=1) # Store result filepath = ing_model.model_path + '/' + s + '_labels.pred' # results file listoflists2file(filepath, predictions) ## Evaluate result extra_vars = dict() extra_vars[s] = dict() extra_vars[s]['word2idx'] = dataset.extra_variables['word2idx_binary'] exec("extra_vars[s]['references'] = dataset.Y_" + s + "[params['OUTPUTS_IDS_DATASET'][0]]") for metric in params['METRICS']: logging.info('Evaluating on metric ' + metric) # Evaluate on the chosen metric metrics = evaluation.select[metric](pred_list=predictions, verbose=1, extra_vars=extra_vars, split=s)
def evaluate(self, epoch, counter_name='epoch', logs=None): if logs is None: logs = {} # Change inputs and outputs mappings for evaluation self.changeInOutMappings() # Evaluate on each set separately all_metrics = [] for s in self.set_name: # Apply model predictions if self.beam_search: params_prediction = {'max_batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars[ 'n_parallel_loaders'], 'predict_on_sets': [s], 'beam_batch_size': self.beam_batch_size if self.beam_batch_size is not None else self.batch_size, 'pos_unk': False, 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples } params_prediction.update(checkDefaultParamsBeamSearch(self.extra_vars)) predictions_all = self.model_to_eval.predictBeamSearchNet(self.ds, params_prediction)[s] else: orig_size = self.extra_vars.get('eval_orig_size', False) params_prediction = {'batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars.get( 'n_parallel_loaders', 8), 'predict_on_sets': [s], 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples, 'model_name': self.model_name, } # Convert predictions postprocess_fun = None if self.is_3DLabel: postprocess_fun = [self.ds.convert_3DLabels_to_bboxes, self.extra_vars[s]['references_orig_sizes']] elif orig_size: postprocess_fun = [self.ds.resize_semantic_output, self.extra_vars[s]['eval_orig_size_id']] predictions_all = \ self.model_to_eval.predictNet(self.ds, params_prediction, postprocess_fun=postprocess_fun)[s] # Single-output model if not self.gt_pos or self.gt_pos == 0 or len(self.gt_pos) == 1: if len(predictions_all) != 2: predictions_all = [predictions_all] gt_positions = [0] # Multi-output model else: gt_positions = self.gt_pos # Select each output to evaluate separately for gt_pos, type, these_metrics, gt_id, write_type, index2word_y, index2word_x in zip( gt_positions, self.output_types, self.metric_name, self.gt_id, self.write_type, self.index2word_y, self.index2word_x): predictions = predictions_all[gt_pos] if self.verbose > 0: print('') logging.info('Prediction output ' + str(gt_pos) + ': ' + str( gt_id) + ' (' + str(type) + ')') # Postprocess outputs of type text if type == 'text': if params_prediction.get('pos_unk', False): samples = predictions[0] alphas = predictions[1] if eval('self.ds.loaded_raw_' + s + '[0]'): sources = predictions[2] else: sources = [] for preds in predictions[2]: for src in preds[self.input_text_id]: sources.append(src) sources = decode_predictions_beam_search(sources, index2word_x, pad_sequences=True, verbose=self.verbose) heuristic = self.extra_vars['heuristic'] else: samples = predictions alphas = None heuristic = None sources = None if self.out_pred_idx is not None: samples = samples[self.out_pred_idx] # Convert predictions into sentences if self.beam_search: predictions = decode_predictions_beam_search(samples, index2word_y, alphas=alphas, x_text=sources, heuristic=heuristic, mapping=self.extra_vars.get( 'mapping', None), verbose=self.verbose) else: probs = predictions predictions = decode_predictions(predictions, 1, # always set temperature to 1 index2word_y, self.sampling_type, verbose=self.verbose) # Apply detokenization function if needed if self.extra_vars.get('apply_detokenization', False): predictions = map(self.extra_vars['detokenize_f'], predictions) # Postprocess outputs of type binary elif type == 'binary': predictions = decode_multilabel(predictions, index2word_y, min_val=self.min_pred_multilabel[ gt_pos], verbose=self.verbose) # Prepare references # exec ("y_raw = self.ds.Y_" + s + "[gt_id]") y_split = getattr(self.ds, 'Y_' + s) y_raw = y_split[gt_id] self.extra_vars[gt_pos][s]['references'] = self.ds.loadBinary(y_raw, gt_id) # Postprocess outputs of type 3DLabel elif type == '3DLabel': self.extra_vars[gt_pos][s] = dict() # exec ('ref=self.ds.Y_' + s + '["' + gt_id + '"]') y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] [ref, original_sizes] = self.ds.convert_GT_3DLabels_to_bboxes( ref) self.extra_vars[gt_pos][s]['references'] = ref self.extra_vars[gt_pos][s]['references_orig_sizes'] = original_sizes # Postprocess outputs of type 3DSemanticLabel elif type == '3DSemanticLabel': self.extra_vars[gt_pos]['eval_orig_size'] = self.eval_orig_size self.extra_vars[gt_pos][s] = dict() # exec ('ref=self.ds.Y_' + s + '["' + gt_id + '"]') y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] if self.eval_orig_size: old_crop = copy.deepcopy(self.ds.img_size_crop) self.ds.img_size_crop = copy.deepcopy(self.ds.img_size) self.extra_vars[gt_pos][s]['eval_orig_size_id'] = np.array([gt_id] * len(ref)) ref = self.ds.load_GT_3DSemanticLabels(ref, gt_id) if self.eval_orig_size: self.ds.img_size_crop = copy.deepcopy(old_crop) self.extra_vars[gt_pos][s]['references'] = ref # Other output data types else: # exec ("self.extra_vars[gt_pos][s]['references'] = self.ds.Y_" + s + "[gt_id]") y_split = getattr(self.ds, 'Y_' + s) self.extra_vars[gt_pos][s]['references'] = y_split[gt_id] # Store predictions if self.write_samples: # Store result filepath = self.save_path + '/' + s + '_' + counter_name + '_' + str(epoch) + '_output_' + str(gt_pos) + '.pred' # results file if write_type == 'list': list2file(filepath, predictions) elif write_type == 'vqa': try: # exec ('refs = self.ds.Y_' + s + '[gt_id]') y_split = getattr(self.ds, 'Y_' + s) refs = y_split[gt_id] except Exception: refs = ['N/A' for _ in range(probs.shape[0])] extra_data_plot = {'reference': refs, 'probs': probs, 'vocab': index2word_y} list2vqa(filepath, predictions, self.extra_vars[gt_pos][s]['question_ids'], extra=extra_data_plot) elif write_type == 'listoflists': listoflists2file(filepath, predictions) elif write_type == 'numpy': numpy2file(filepath, predictions) elif write_type == '3DLabels': raise NotImplementedError( 'Write 3DLabels function is not implemented') elif write_type == '3DSemanticLabel': folder_path = self.save_path + '/' + s + '_' + counter_name + '_' + str( epoch) # results folder numpy2imgs(folder_path, predictions, eval('self.ds.X_' + s + '["' + self.input_id + '"]'), self.ds) else: raise NotImplementedError('The store type "' + self.write_type + '" is not implemented.') # Evaluate on each metric for metric in these_metrics: if self.verbose > 0: logging.info('Evaluating on metric ' + metric) filepath = self.save_path + '/' + s + '.' + metric # results file if s == 'train': logging.info( "WARNING: evaluation results on 'train' split might be incorrect when" "applying random image shuffling.") # Evaluate on the chosen metric metrics = evaluation.select[metric]( pred_list=predictions, verbose=self.verbose, extra_vars=self.extra_vars[gt_pos], split=s) # Print results to file and store in model log with open(filepath, 'a') as f: header = counter_name + ',' line = str(epoch) + ',' # Store in model log self.model_to_eval.log(s, counter_name, epoch) for metric_ in sorted(metrics): value = metrics[metric_] # Multiple-output model if self.gt_pos and self.gt_pos != 0: metric_ += '_output_' + str(gt_pos) all_metrics.append(metric_) header += metric_ + ',' line += str(value) + ',' # Store in model log self.model_to_eval.log(s, metric_, value) if not self.written_header: f.write(header + '\n') self.written_header = True f.write(line + '\n') if self.verbose > 0: logging.info('Done evaluating on metric ' + metric) # Store losses if logs.get('loss') is not None: self.model_to_eval.log('train', 'train_loss', logs['loss']) if logs.get('valid_loss') is not None: self.model_to_eval.log('val', 'val_loss', logs['valid_loss']) # Plot results so far if self.do_plot: if self.metric_name: self.model_to_eval.plot(counter_name, set(all_metrics), self.set_name, upperbound=self.max_plot) # Save the model if self.save_each_evaluation: from keras_wrapper.cnn_model import saveModel saveModel(self.model_to_eval, epoch, store_iter=not self.eval_on_epochs) # Recover inputs and outputs mappings for resume training self.recoverInOutMappings()
def evaluate(self, epoch, counter_name='epoch', logs=None): if logs is None: logs = {} # Change inputs and outputs mappings for evaluation self.changeInOutMappings() # Evaluate on each set separately all_metrics = [] for s in self.set_name: # Apply model predictions if self.beam_search: params_prediction = {'max_batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars.get('n_parallel_loaders', 1), 'predict_on_sets': [s], 'beam_batch_size': self.beam_batch_size if self.beam_batch_size is not None else self.batch_size, 'pos_unk': False, 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples } params_prediction.update(checkDefaultParamsBeamSearch(self.extra_vars)) predictions_all = self.model_to_eval.predictBeamSearchNet(self.ds, params_prediction)[s] else: orig_size = self.extra_vars.get('eval_orig_size', False) params_prediction = {'batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars.get('n_parallel_loaders', 1), 'predict_on_sets': [s], 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples, 'model_name': self.model_name, } # Convert predictions postprocess_fun = None if self.is_3DLabel: postprocess_fun = [self.ds.convert_3DLabels_to_bboxes, self.extra_vars[s]['references_orig_sizes']] elif orig_size: postprocess_fun = [self.ds.resize_semantic_output, self.extra_vars[s]['eval_orig_size_id']] predictions_all = \ self.model_to_eval.predictNet(self.ds, params_prediction, postprocess_fun=postprocess_fun)[s] # Single-output model if not self.gt_pos or self.gt_pos == 0 or len(self.gt_pos) == 1: if len(predictions_all) != 2: predictions_all = [predictions_all] gt_positions = [0] # Multi-output model else: gt_positions = self.gt_pos # Select each output to evaluate separately for gt_pos, type_out, these_metrics, gt_id, write_type, index2word_y, index2word_x in zip( gt_positions, self.output_types, self.metric_name, self.gt_id, self.write_type, self.index2word_y, self.index2word_x): predictions = predictions_all[gt_pos] if self.verbose > 0: print('') logging.info('Prediction output ' + str(gt_pos) + ': ' + str( gt_id) + ' (' + str(type_out) + ')') # Postprocess outputs of type text if type_out == 'text': if params_prediction.get('pos_unk', False): samples = predictions[0] alphas = predictions[1] if eval('self.ds.loaded_raw_' + s + '[0]'): sources = predictions[2] else: sources = [] for preds in predictions[2]: for src in preds[self.input_text_id]: sources.append(src) sources = decode_predictions_beam_search(sources, index2word_x, pad_sequences=True, verbose=self.verbose) heuristic = self.extra_vars['heuristic'] else: samples = predictions alphas = None heuristic = None sources = None if self.out_pred_idx is not None: samples = samples[self.out_pred_idx] # Convert predictions into sentences if self.beam_search: predictions = decode_predictions_beam_search(samples, index2word_y, glossary=self.extra_vars.get('glossary', None), alphas=alphas, x_text=sources, heuristic=heuristic, mapping=self.extra_vars.get('mapping', None), verbose=self.verbose) else: probs = predictions predictions = decode_predictions(predictions, 1, # always set temperature to 1 index2word_y, self.sampling_type, verbose=self.verbose) # Apply detokenization function if needed if self.extra_vars.get('apply_detokenization', False): predictions = list(map(self.extra_vars['detokenize_f'], predictions)) # Postprocess outputs of type binary elif type_out == 'binary': predictions = decode_multilabel(predictions, index2word_y, min_val=self.min_pred_multilabel[ gt_pos], verbose=self.verbose) # Prepare references # exec ("y_raw = self.ds.Y_" + s + "[gt_id]") y_split = getattr(self.ds, 'Y_' + s) y_raw = y_split[gt_id] self.extra_vars[gt_pos][s]['references'] = self.ds.loadBinary(y_raw, gt_id) # Postprocess outputs of type 3DLabel elif type_out == '3DLabel': self.extra_vars[gt_pos][s] = dict() # exec ('ref=self.ds.Y_' + s + '["' + gt_id + '"]') y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] [ref, original_sizes] = self.ds.convert_GT_3DLabels_to_bboxes( ref) self.extra_vars[gt_pos][s]['references'] = ref self.extra_vars[gt_pos][s]['references_orig_sizes'] = original_sizes # Postprocess outputs of type 3DSemanticLabel elif type_out == '3DSemanticLabel': self.extra_vars[gt_pos]['eval_orig_size'] = self.eval_orig_size self.extra_vars[gt_pos][s] = dict() # exec ('ref=self.ds.Y_' + s + '["' + gt_id + '"]') y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] if self.eval_orig_size: old_crop = copy.deepcopy(self.ds.img_size_crop) self.ds.img_size_crop = copy.deepcopy(self.ds.img_size) self.extra_vars[gt_pos][s]['eval_orig_size_id'] = np.array([gt_id] * len(ref)) ref = self.ds.load_GT_3DSemanticLabels(ref, gt_id) if self.eval_orig_size: self.ds.img_size_crop = copy.deepcopy(old_crop) self.extra_vars[gt_pos][s]['references'] = ref # Other output data types else: # exec ("self.extra_vars[gt_pos][s]['references'] = self.ds.Y_" + s + "[gt_id]") y_split = getattr(self.ds, 'Y_' + s) self.extra_vars[gt_pos][s]['references'] = y_split[gt_id] # Store predictions if self.write_samples: # Store result filepath = self.save_path + '/' + s + '_' + counter_name + '_' + str(epoch) + '_output_' + str(gt_pos) + '.pred' # results file if write_type == 'list': list2file(filepath, predictions) elif write_type == 'vqa': try: # exec ('refs = self.ds.Y_' + s + '[gt_id]') y_split = getattr(self.ds, 'Y_' + s) refs = y_split[gt_id] except Exception: refs = ['N/A' for _ in range(probs.shape[0])] extra_data_plot = {'reference': refs, 'probs': probs, 'vocab': index2word_y} list2vqa(filepath, predictions, self.extra_vars[gt_pos][s]['question_ids'], extra=extra_data_plot) elif write_type == 'listoflists': listoflists2file(filepath, predictions) elif write_type == 'numpy': numpy2file(filepath, predictions) elif write_type == '3DLabels': raise NotImplementedError( 'Write 3DLabels function is not implemented') elif write_type == '3DSemanticLabel': folder_path = self.save_path + '/' + s + '_' + counter_name + '_' + str( epoch) # results folder numpy2imgs(folder_path, predictions, eval('self.ds.X_' + s + '["' + self.input_id + '"]'), self.ds) else: raise NotImplementedError('The store type "' + self.write_type + '" is not implemented.') # Evaluate on each metric for metric in these_metrics: if self.verbose > 0: logging.info('Evaluating on metric ' + metric) filepath = self.save_path + '/' + s + '.' + metric # results file if s == 'train': logging.info( "WARNING: evaluation results on 'train' split might be incorrect when" "applying random image shuffling.") # Evaluate on the chosen metric metrics = evaluation.select[metric]( pred_list=predictions, verbose=self.verbose, extra_vars=self.extra_vars[gt_pos], split=s) # Print results to file and store in model log with open(filepath, 'a') as f: header = counter_name + ',' line = str(epoch) + ',' # Store in model log self.model_to_eval.log(s, counter_name, epoch) for metric_ in sorted(metrics): value = metrics[metric_] # Multiple-output model if self.gt_pos and self.gt_pos != 0: metric_ += '_output_' + str(gt_pos) all_metrics.append(metric_) header += metric_ + ',' line += str(value) + ',' # Store in model log self.model_to_eval.log(s, metric_, value) if not self.written_header: f.write(header + '\n') self.written_header = True f.write(line + '\n') if self.verbose > 0: logging.info('Done evaluating on metric ' + metric) # Store losses if logs.get('loss') is not None: self.model_to_eval.log('train', 'train_loss', logs['loss']) if logs.get('valid_loss') is not None: self.model_to_eval.log('val', 'val_loss', logs['valid_loss']) # Plot results so far if self.do_plot: if self.metric_name: self.model_to_eval.plot(counter_name, set(all_metrics), self.set_name, upperbound=self.max_plot) # Save the model if self.save_each_evaluation: from keras_wrapper.cnn_model import saveModel saveModel(self.model_to_eval, epoch, store_iter=not self.eval_on_epochs) # Recover inputs and outputs mappings for resume training self.recoverInOutMappings()