def predict_dataset(self, dataset, progress_bar=True, apply_preproc=True): """ Predict a complete dataset Parameters ---------- dataset : Dataset Dataset to predict progress_bar : bool, optional hide or show a progress bar Yields ------- PredictionResult Single PredictionResult dict Dataset entry of the prediction result """ input_dataset = InputDataset( dataset, self.data_preproc if apply_preproc else None, self.text_postproc if apply_preproc else None) prediction_results = self.predict_input_dataset( input_dataset, progress_bar) for prediction, sample in zip(prediction_results, dataset.samples()): yield prediction, sample
def run(self, _sentinel=None, gt_dataset=None, pred_dataset=None, processes=1, progress_bar=False): """ evaluate on the given dataset Parameters ---------- _sentinel : do not use Forcing the use of `gt_dataset` and `pred_dataset` fore safety gt_dataset : Dataset, optional the ground truth pred_dataset : Dataset the prediction dataset processes : int, optional the processes to use for preprocesing and evaluation progress_bar : bool, optional show a progress bar Returns ------- evaluation dictionary """ if _sentinel: raise Exception("You must call run by using parameter names.") if self.preloaded_gt: gt_data = self.preloaded_gt else: # gt_dataset.load_samples(progress_bar=progress_bar) # gt_data = self.text_preprocessor.apply(gt_dataset.text_samples(), progress_bar=progress_bar) gt_input_dataset = InputDataset(gt_dataset, None, self.text_preprocessor, processes=processes) gt_data = [ txt for _, txt, _ in tqdm_wrapper( gt_input_dataset.generator(text_only=True), total=len(gt_dataset), progress_bar=progress_bar, ) ] pred_input_dataset = InputDataset(pred_dataset, None, self.text_preprocessor, processes=processes) pred_data = [ txt for _, txt, _ in tqdm_wrapper( pred_input_dataset.generator(text_only=True), total=len(pred_dataset), progress_bar=progress_bar, ) ] return self.evaluate(gt_data=gt_data, pred_data=pred_data, processes=processes, progress_bar=progress_bar, skip_empty_gt=self.skip_empty_gt)
def predict_dataset(self, dataset, progress_bar=True): start_time = time.time() # preprocessing step (if all share the same preprocessor) if not self.same_preproc: raise Exception( 'Different preprocessors are currently not allowed during prediction' ) input_dataset = InputDataset( dataset, self.predictors[0].data_preproc, self.predictors[0].text_postproc, None, processes=self.processes, ) def progress_bar_wrapper(l): if progress_bar: return tqdm(l, total=int(np.ceil(len(dataset) / self.batch_size)), desc="Prediction") else: return l def batched_data_params(): batch = [] for data_idx, (image, _, params) in enumerate( input_dataset.generator(epochs=1)): batch.append((data_idx, image, params)) if len(batch) == self.batch_size: yield batch batch = [] if len(batch) > 0: yield batch for batch in progress_bar_wrapper(batched_data_params()): sample_ids, batch_images, batch_params = zip(*batch) samples = [dataset.samples()[i] for i in sample_ids] raw_dataset = [ RawInputDataset( DataSetMode.PREDICT, batch_images, [None] * len(batch_images), batch_params, None, None, ) for p in self.predictors ] # predict_raw returns list of prediction objects prediction = [ predictor.predict_input_dataset(ds, progress_bar=False) for ds, predictor in zip(raw_dataset, self.predictors) ] for result, sample in zip(zip(*prediction), samples): yield result, sample print("Prediction of {} models took {}s".format( len(self.predictors), time.time() - start_time))