def execute(self, dataset: Dataset, execution_scripts, train=False, compute_losses=True, summaries=True, batch_size=None, log_progress: int = 0) -> List[ExecutionResult]: if batch_size is None: batch_size = len(dataset) batched_dataset = dataset.batch_dataset(batch_size) last_log_time = time.process_time() batch_results = [ [] for _ in execution_scripts] # type: List[List[ExecutionResult]] for batch_id, batch in enumerate(batched_dataset): if 0 < log_progress < time.process_time() - last_log_time: log("Processed {} examples.".format(batch_id * batch_size)) last_log_time = time.process_time() executables = [s.get_executable(compute_losses=compute_losses, summaries=summaries, num_sessions=len(self.sessions)) for s in execution_scripts] while not all(ex.result is not None for ex in executables): self._run_executables(batch, executables, train) for script_list, executable in zip(batch_results, executables): script_list.append(executable.result) collected_results = [] # type: List[ExecutionResult] for result_list in batch_results: collected_results.append(reduce_execution_results(result_list)) return collected_results
def execute(self, dataset: Dataset, execution_scripts, train=False, compute_losses=True, summaries=True, batch_size=None) -> List[ExecutionResult]: if batch_size is None: batch_size = len(dataset) batched_dataset = dataset.batch_dataset(batch_size) batch_results = [[] for _ in execution_scripts ] # type: List[List[ExecutionResult]] for batch in batched_dataset: executables = [ s.get_executable(compute_losses=compute_losses, summaries=summaries) for s in execution_scripts ] while not all(ex.result is not None for ex in executables): all_feedables = set() # type: Set[Any] # type: Dict[Executable, tf.Tensor] all_tensors_to_execute = {} additional_feed_dicts = [] tensor_list_lengths = [] # type: List[int] for executable in executables: if executable.result is None: (feedables, tensors_to_execute, add_feed_dict) = executable.next_to_execute() all_feedables = all_feedables.union(feedables) all_tensors_to_execute[executable] = tensors_to_execute additional_feed_dicts.append(add_feed_dict) tensor_list_lengths.append(len(tensors_to_execute)) else: tensor_list_lengths.append(0) feed_dict = _feed_dicts(batch, all_feedables, train=train) for fdict in additional_feed_dicts: feed_dict.update(fdict) session_results = [ sess.run(all_tensors_to_execute, feed_dict=feed_dict) for sess in self.sessions ] for executable in executables: if executable.result is None: executable.collect_results( [res[executable] for res in session_results]) for script_list, executable in zip(batch_results, executables): script_list.append(executable.result) collected_results = [] # type: List[ExecutionResult] for result_list in batch_results: collected_results.append(reduce_execution_results(result_list)) return collected_results
def execute(self, dataset: Dataset, execution_scripts, train=False, compute_losses=True, summaries=True, batch_size=None, temp=None, log_progress: int = 0) -> List[ExecutionResult]: if batch_size is None: batch_size = len(dataset) batched_dataset = dataset.batch_dataset(batch_size) last_log_time = time.process_time() batch_results = [[] for _ in execution_scripts ] # type: List[List[ExecutionResult]] for batch_id, batch in enumerate(batched_dataset): if (time.process_time() - last_log_time > log_progress and log_progress > 0): log("Processed {} examples.".format(batch_id * batch_size)) last_log_time = time.process_time() executables = [ s.get_executable(compute_losses=compute_losses, summaries=summaries) for s in execution_scripts ] while not all(ex.result is not None for ex in executables): all_feedables = set() # type: Set[Any] all_tensors_to_execute = {} additional_feed_dicts = [] tensor_list_lengths = [] # type: List[int] for executable in executables: if executable.result is None: (feedables, tensors_to_execute, add_feed_dict) = executable.next_to_execute() if temp is not None: add_feed_dict[executable.placeholder[0]] = temp all_feedables = all_feedables.union(feedables) all_tensors_to_execute[executable] = tensors_to_execute additional_feed_dicts.append(add_feed_dict) tensor_list_lengths.append(len(tensors_to_execute)) else: tensor_list_lengths.append(0) feed_dict = _feed_dicts(batch, all_feedables, train=train) for fdict in additional_feed_dicts: feed_dict.update(fdict) session_results = [ sess.run(all_tensors_to_execute, feed_dict=feed_dict) for sess in self.sessions ] for executable in executables: if executable.result is None: executable.collect_results( [res[executable] for res in session_results]) for script_list, executable in zip(batch_results, executables): script_list.append(executable.result) collected_results = [] # type: List[ExecutionResult] for result_list in batch_results: collected_results.append(reduce_execution_results(result_list)) return collected_results
def run_on_dataset(tf_manager: TensorFlowManager, runners: List[BaseRunner], dataset: Dataset, postprocess: Postprocess, batching_scheme: BatchingScheme, write_out: bool = False, log_progress: int = 0) -> Tuple[ List[ExecutionResult], Dict[str, List[Any]]]: """Apply the model on a dataset and optionally write outputs to files. This function processes the dataset in batches and optionally prints out the execution progress. Args: tf_manager: TensorFlow manager with initialized sessions. runners: A function that runs the code dataset: The dataset on which the model will be executed. evaluators: List of evaluators that are used for the model evaluation if the target data are provided. postprocess: Dataset-level postprocessors write_out: Flag whether the outputs should be printed to a file defined in the dataset object. batching_scheme: Scheme used for batching. log_progress: log progress every X seconds extra_fetches: Extra tensors to evaluate for each batch. Returns: Tuple of resulting sentences/numpy arrays, and evaluation results if they are available which are dictionary function -> value. """ # If the dataset contains the target series, compute also losses. contains_targets = all(dataset.has_series(runner.decoder_data_id) for runner in runners if runner.decoder_data_id is not None) last_log_time = time.process_time() batch_results = [[] for _ in runners] # type: List[List[ExecutionResult]] feedables = set.union(*[runner.feedables for runner in runners]) processed_examples = 0 for batch in dataset.batches(batching_scheme): if 0 < log_progress < time.process_time() - last_log_time: log("Processed {} examples.".format(processed_examples)) last_log_time = time.process_time() execution_results = tf_manager.execute( batch, feedables, runners, compute_losses=contains_targets) processed_examples += len(batch) for script_list, ex_result in zip(batch_results, execution_results): script_list.append(ex_result) # Transpose runner interim results. all_results = [reduce_execution_results(res) for res in batch_results] # Convert execution results to dictionary. result_data = {runner.output_series: result.outputs for runner, result in zip(runners, all_results)} # Run dataset-level postprocessing. if postprocess is not None: for series_name, postprocessor in postprocess: postprocessed = postprocessor(dataset, result_data) if not hasattr(postprocessed, "__len__"): postprocessed = list(postprocessed) result_data[series_name] = postprocessed # Check output series lengths. for series_id, data in result_data.items(): if len(data) != len(dataset): warn("Output '{}' for dataset '{}' has length {}, but " "len(dataset) == {}".format(series_id, dataset.name, len(data), len(dataset))) if write_out and dataset.outputs is not None: for series_id, data in result_data.items(): if series_id in dataset.outputs: path, writer = dataset.outputs[series_id] writer(path, data) else: log("There is no file for output series '{}' in dataset: '{}'" .format(series_id, dataset.name), color="red") elif write_out: log("Dataset does not have any outputs, nothing to write out.", color="red") return all_results, result_data