def score_corpus(args, params): print "Using an ensemble of %d models" % len(args.models) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) if args.source is not None: dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits, output_text_filename=args.target, compute_state_below=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] # Apply scoring extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) for s in args.splits: # Apply model predictions params_prediction = {'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s]} if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True) params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True) params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2) beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print scores
def evaluate(self, epoch, counter_name='epoch', logs=None): """ Evaluation function. Works for evaluators external to Keras. Computes the predictions according to the configuration and evaluates them, storing the results. :param epoch: Current epoch or update. :param counter_name: 'epoch' or 'update', string used for logging. :param logs: :return: """ if logs is None: logs = {} # Change inputs and outputs mappings for evaluation self.changeInOutMappings() # Evaluate on each set separately all_metrics = [] for s in self.set_name: # Apply model predictions if self.beam_search: params_prediction = { 'max_batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars.get('n_parallel_loaders', 1), 'predict_on_sets': [s], 'beam_batch_size': self.beam_batch_size if self.beam_batch_size is not None else self.batch_size, 'pos_unk': False, 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples } params_prediction.update( checkDefaultParamsBeamSearch(self.extra_vars)) predictions_all = self.model_to_eval.predictBeamSearchNet( self.ds, params_prediction)[s] else: orig_size = self.extra_vars.get('eval_orig_size', False) params_prediction = { 'batch_size': self.batch_size, 'n_parallel_loaders': self.extra_vars.get('n_parallel_loaders', 1), 'predict_on_sets': [s], 'normalize': self.normalize, 'normalization_type': self.normalization_type, 'max_eval_samples': self.max_eval_samples, 'model_name': self.model_name, } # Convert predictions postprocess_fun = None if self.is_3DLabel: postprocess_fun = [ self.ds.convert_3DLabels_to_bboxes, self.extra_vars[s]['references_orig_sizes'] ] elif orig_size: postprocess_fun = [ self.ds.resize_semantic_output, self.extra_vars[s]['eval_orig_size_id'] ] predictions_all = \ self.model_to_eval.predictNet(self.ds, params_prediction, postprocess_fun=postprocess_fun)[s] # Single-output model if not self.gt_pos or self.gt_pos == 0 or len(self.gt_pos) == 1: if len(predictions_all) != 2: predictions_all = [predictions_all] gt_positions = [0] # Multi-output model else: gt_positions = self.gt_pos # Select each output to evaluate separately for gt_pos, type_out, these_metrics, gt_id, write_type, index2word_y, index2word_x in zip( gt_positions, self.output_types, self.metric_name, self.gt_id, self.write_type, self.index2word_y, self.index2word_x): predictions = predictions_all[gt_pos] prediction_costs = None if self.verbose > 0: print('') logger.info('Prediction output ' + str(gt_pos) + ': ' + str(gt_id) + ' (' + str(type_out) + ')') # Postprocess outputs of type text if type_out == 'text': samples = predictions['samples'] prediction_costs = predictions['costs'] alphas = None sources = None if params_prediction.get('pos_unk', False): alphas = predictions['alphas'] if eval('self.ds.loaded_raw_' + s + '[0]'): sources = predictions['sources'] else: sources = [] for preds in predictions['sources']: for src in preds[self.input_text_id]: sources.append(src) sources = decode_predictions_beam_search( sources, index2word_x, pad_sequences=True, verbose=self.verbose) if self.out_pred_idx is not None: samples = samples[self.out_pred_idx] # Convert predictions into sentences if self.beam_search: decoded_predictions = decode_predictions_beam_search( samples, index2word_y, glossary=self.extra_vars.get('glossary', None), alphas=alphas, x_text=sources, heuristic=self.extra_vars.get('heuristic', 0), mapping=self.extra_vars.get('mapping', None), verbose=self.verbose) else: probs = predictions decoded_predictions = decode_predictions( predictions, 1, # always set temperature to 1 index2word_y, self.sampling_type, verbose=self.verbose) # Apply detokenization function if needed if self.extra_vars.get('apply_detokenization', False): decoded_predictions = list( map(self.extra_vars['detokenize_f'], decoded_predictions)) # Postprocess outputs of type binary elif type_out == 'binary': decoded_predictions = decode_multilabel( predictions, index2word_y, min_val=self.min_pred_multilabel[gt_pos], verbose=self.verbose) # Prepare references y_split = getattr(self.ds, 'Y_' + s) y_raw = y_split[gt_id] self.extra_vars[gt_pos][s][ 'references'] = self.ds.loadBinary(y_raw, gt_id) # Postprocess outputs of type 3DLabel elif type_out == '3DLabel': self.extra_vars[gt_pos][s] = dict() y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] [ref, original_sizes ] = self.ds.convert_GT_3DLabels_to_bboxes(ref) self.extra_vars[gt_pos][s]['references'] = ref self.extra_vars[gt_pos][s][ 'references_orig_sizes'] = original_sizes # Postprocess outputs of type 3DSemanticLabel elif type_out == '3DSemanticLabel': self.extra_vars[gt_pos][ 'eval_orig_size'] = self.eval_orig_size self.extra_vars[gt_pos][s] = dict() y_split = getattr(self.ds, 'Y_' + s) ref = y_split[gt_id] if self.eval_orig_size: old_crop = copy.deepcopy(self.ds.img_size_crop) self.ds.img_size_crop = copy.deepcopy(self.ds.img_size) self.extra_vars[gt_pos][s][ 'eval_orig_size_id'] = np.array([gt_id] * len(ref)) ref = self.ds.load_GT_3DSemanticLabels(ref, gt_id) if self.eval_orig_size: self.ds.img_size_crop = copy.deepcopy(old_crop) self.extra_vars[gt_pos][s]['references'] = ref # Other output data types else: y_split = getattr(self.ds, 'Y_' + s) self.extra_vars[gt_pos][s]['references'] = y_split[gt_id] # Store predictions if self.write_samples: # Store result filepath = os.path.join( self.save_path, s + '_' + counter_name + '_' + str(epoch) + '_output_' + str(gt_pos) + '.pred') # results file if write_type == 'list': list2file(filepath, decoded_predictions) elif write_type == 'vqa': try: y_split = getattr(self.ds, 'Y_' + s) refs = y_split[gt_id] except Exception: refs = ['N/A' for _ in range(probs.shape[0])] extra_data_plot = { 'reference': refs, 'probs': probs, 'vocab': index2word_y } list2vqa(filepath, decoded_predictions, self.extra_vars[gt_pos][s]['question_ids'], extra=extra_data_plot) elif write_type == 'listoflists': listoflists2file(filepath, decoded_predictions) elif write_type == 'numpy': numpy2file(filepath, decoded_predictions) elif write_type == '3DLabels': raise NotImplementedError( 'Write 3DLabels function is not implemented') elif write_type == '3DSemanticLabel': folder_path = os.path.join( self.save_path, s + '_' + counter_name + '_' + str(epoch)) numpy2imgs( folder_path, decoded_predictions, eval('self.ds.X_' + s + '["' + self.input_id + '"]'), self.ds) else: raise NotImplementedError('The store type "' + self.write_type + '" is not implemented.') # Store current epoch/iteration in model log self.model_to_eval.log(s, counter_name, epoch) # Evaluate on each metric for metric in these_metrics: if self.verbose > 0: logger.info('Evaluating on metric ' + metric) filepath = os.path.join(self.save_path, s + '.' + metric) if s == 'train': logger.info( "WARNING: evaluation results on 'train' split might be incorrect when" "applying random image shuffling.") # Evaluate on the chosen metric metrics = evaluation.select[metric]( pred_list=decoded_predictions, verbose=self.verbose, extra_vars=self.extra_vars[gt_pos], split=s, costs=prediction_costs) self.model_to_eval.log_tensorboard(metrics, epoch, split=s) # Print results to file and store in model log with open(filepath, 'a') as f: header = counter_name + ',' line = str(epoch) + ',' for metric_ in sorted(metrics): value = metrics[metric_] # Multiple-output model if self.gt_pos and self.gt_pos != 0: metric_ += '_output_' + str(gt_pos) all_metrics.append(metric_) header += metric_ + ',' line += str(value) + ',' # Store in model log self.model_to_eval.log(s, metric_, value) if not self.written_header: f.write(header + '\n') self.written_header = True f.write(line + '\n') if self.verbose > 0: logger.info('Done evaluating on metric ' + metric) # Store losses if logs.get('loss') is not None: self.model_to_eval.log('train', 'train_loss', logs['loss']) if logs.get('valid_loss') is not None: self.model_to_eval.log('val', 'val_loss', logs['valid_loss']) # Plot results so far if self.do_plot: if self.metric_name: self.model_to_eval.plot(counter_name, set(all_metrics), self.set_name, upperbound=self.max_plot) # Save the model if self.save_each_evaluation: from keras_wrapper.cnn_model import saveModel saveModel(self.model_to_eval, epoch, store_iter=not self.eval_on_epochs) # Recover inputs and outputs mappings for resume training self.recoverInOutMappings()
def score_corpus(args, params): """ Use one or several translation models for scoring source--target pairs- :param argparse.Namespace args: Arguments given to the method: * dataset: Dataset instance with data. * source: Text file with source sentences. * target: Text file with target sentences. * splits: Splits to sample. Should be already included in the dataset object. * dest: Output file to save scores. * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N). * verbose: Be verbose or not. * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py. * models: Path to the models. :param dict params: parameters of the translation model. """ from data_engine.prepare_data import update_dataset_from_file from keras_wrapper.dataset import loadDataset from keras_wrapper.cnn_model import loadModel from keras_wrapper.model_ensemble import BeamSearchEnsemble logging.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits, output_text_filename=args.target, compute_state_below=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] # Apply scoring extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) model_weights = args.weights if model_weights is not None and model_weights != []: assert len(model_weights) == len( models ), 'You should give a weight to each model. You gave %d models and %d weights.' % ( len(models), len(model_weights)) model_weights = map(float, model_weights) if len(model_weights) > 1: logger.info('Giving the following weights to each model: %s' % str(model_weights)) for s in args.splits: # Apply model predictions params_prediction = { 'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s] } if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params[ 'OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params.get( 'NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get( 'COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get( 'LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get( 'LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get( 'COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get( 'MAXLEN_GIVEN_X', True) params_prediction[ 'output_max_length_depending_on_x_factor'] = params.get( 'MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get( 'MINLEN_GIVEN_X', True) params_prediction[ 'output_min_length_depending_on_x_factor'] = params.get( 'MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = params.get( 'ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print(scores)
'MINLEN_GIVEN_X_FACTOR', 2) mapping = None if dataset.mapping == dict() else dataset.mapping if params['POS_UNK']: params_prediction['heuristic'] = params['HEURISTIC'] input_text_id = params['INPUTS_IDS_DATASET'][0] vocab_src = dataset.vocabulary[input_text_id]['idx2words'] else: input_text_id = None vocab_src = None mapping = None beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print scores
def score_corpus(args, params): from data_engine.prepare_data import update_dataset_from_file from keras_wrapper.dataset import loadDataset from keras_wrapper.cnn_model import loadModel from keras_wrapper.model_ensemble import BeamSearchEnsemble logging.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) if args.source is not None: dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits, output_text_filename=args.target, compute_state_below=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] # Apply scoring extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) model_weights = args.weights if model_weights is not None and model_weights != []: assert len(model_weights) == len(models), 'You should give a weight to each model. You gave %d models and %d weights.' % (len(models), len(model_weights)) model_weights = map(lambda x: float(x), model_weights) if len(model_weights) > 1: logger.info('Giving the following weights to each model: %s' % str(model_weights)) for s in args.splits: # Apply model predictions params_prediction = {'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s]} if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True) params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True) params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = params.get('ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print (scores)
def apply_Feature_Extractor_model(params, dataset=None, extractor_model=None): """ Function for using a previously trained model for sampling. """ ########### Load data if dataset is None: dataset = build_dataset(params) ########### Load model if extractor_model is None and params['RELOAD'] > 0: extractor_model = loadModel(params['STORE_PATH'], params['RELOAD']) else: extractor_model = Feature_Extractor(params, type=params['MODEL_TYPE'], verbose=params['VERBOSE'], model_name=params['MODEL_NAME'], store_path=params['STORE_PATH']) # Define the inputs and outputs mapping from our Dataset instance to our model inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): if len(extractor_model.ids_inputs) > i: pos_source = dataset.ids_inputs.index(id_in) id_dest = extractor_model.ids_inputs[i] inputMapping[id_dest] = pos_source extractor_model.setInputsMapping(inputMapping) ########### Apply sampling extra_vars = dict() for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = {'batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s], 'verbose': 0} logging.info("<<< Predicting outputs of " + s + " set >>>") if params['SAMPLING_SAVE_MODE'] == 'list': filepath = extractor_model.model_path + '/' + s + '_sampling.pred' # results file list2file(filepath, [], permission='w') start_time = time.time() eta = -1 mode = 'w' for n_sample in range(0, eval('dataset.len_' + s), params.get('PREDICTION_STEP', 100)): params_prediction['init_sample'] = n_sample params_prediction['final_sample'] = min(n_sample + params.get('PREDICTION_STEP', 100), eval('dataset.len_' + s)) predictions = extractor_model.predictNet(dataset, params_prediction)[s] # Store result if params['SAMPLING_SAVE_MODE'] == 'list': filepath = extractor_model.model_path + '/' + s + '_sampling.pred' # results file list2file(filepath, predictions, permission='a') elif params['SAMPLING_SAVE_MODE'] == 'npy': filepath = extractor_model.model_path + '/' + s + '_' + params.get('MODEL_TYPE', '') + '_features.npy' numpy2file(filepath, predictions, permission=mode) elif params['SAMPLING_SAVE_MODE'] == 'hdf5': filepath = extractor_model.model_path + '/' + s + '_' + params.get('MODEL_TYPE', '') + '_features.hdf5' numpy2hdf5(filepath, predictions, permission=mode) else: raise Exception, 'Only "list" or "hdf5" are allowed in "SAMPLING_SAVE_MODE"' mode = 'a' sys.stdout.write('\r') sys.stdout.write("\t Processed %d/%d - ETA: %ds " % (n_sample, eval('dataset.len_' + s), int(eta))) sys.stdout.flush() eta = (eval('dataset.len_' + s) - n_sample) * (time.time() - start_time) / max(n_sample, 1)