def __init__(self, inputs: Union[None, str, Iterable[str]] = None, outputs: Union[None, str, Iterable[str]] = None, mode: Union[None, str, Iterable[str]] = None, ds_id: Union[None, str, Iterable[str]] = None) -> None: self.inputs = check_io_names(to_list(inputs)) self.outputs = check_io_names(to_list(outputs)) self.mode = parse_modes(to_set(mode)) self.ds_id = check_ds_id(to_set(ds_id)) self.in_list = not isinstance(inputs, (str, type(None))) self.out_list = not isinstance(outputs, (str, type(None)))
def __init__(self, inputs: Union[None, str, Iterable[str]] = None, outputs: Union[None, str, Iterable[str]] = None, mode: Union[None, str, Iterable[str]] = None, ds_id: Union[None, str, Iterable[str]] = None) -> None: self.inputs = check_io_names(to_list(inputs)) self.outputs = check_io_names(to_list(outputs)) self.mode = parse_modes(to_set(mode)) self.ds_id = check_ds_id(to_set(ds_id)) self.fe_monitor_names = set( ) # The use-case here is rare enough that we don't want to add this to the init sig
def __init__(self, index: str, metric: str, n_max_to_keep: int = 5, n_min_to_keep: int = 5, list_to_keep: Optional[Iterable[Any]] = None, epoch_frequency: int = 1, mode: Union[None, str, Set[str]] = "eval", outputs: Optional[str] = None): # TODO - highlight 'interesting' samples (sudden changes in relative ordering?) super().__init__(inputs=[index, metric], outputs=outputs or f"{metric}_by_{index}", mode=mode) self.points = [] if n_max_to_keep < 0: raise ValueError( f"n_max_to_keep must be non-negative, but got {n_max_to_keep}") self.n_max_to_keep = n_max_to_keep if n_min_to_keep < 0: raise ValueError( f"n_min_to_keep must be non-negative, but got {n_min_to_keep}") self.n_min_to_keep = n_min_to_keep self.idx_to_keep = to_set(list_to_keep) # Ideally the step and metric would be separated to save space, but a given idx may not appear each epoch self.index_history = defaultdict( lambda: defaultdict(list)) # {mode: {idx: [(step, metric)]}} self.epoch_frequency = epoch_frequency
def get_current_items(items: Iterable[Union[T, Scheduler[T]]], run_modes: Optional[Union[str, Iterable[str]]] = None, epoch: Optional[int] = None) -> List[T]: """Select items which should be executed for given mode and epoch. Args: items: A list of possible items or Schedulers of items to choose from. run_modes: The desired execution mode. One or more of "train", "eval", "test", or "infer". If None, items of all modes will be returned. epoch: The desired execution epoch. If None, items across all epochs will be returned. Returns: The items which should be executed. """ selected_items = [] run_modes = to_set(run_modes) for item in items: if isinstance(item, Scheduler): if epoch is None: item = item.get_all_values() else: item = [item.get_current_value(epoch)] else: item = [item] for item_ in item: if item_ and (not run_modes or not hasattr(item_, "mode") or not item_.mode or item_.mode.intersection(run_modes)): selected_items.append(item_) return selected_items
def __init__(self, pipeline: Pipeline, network: BaseNetwork, epochs: int, train_steps_per_epoch: Optional[int] = None, eval_steps_per_epoch: Optional[int] = None, traces: Union[None, Trace, Scheduler[Trace], Iterable[Union[Trace, Scheduler[Trace]]]] = None, log_steps: Optional[int] = 100, monitor_names: Union[None, str, Iterable[str]] = None): self.traces_in_use = [] self.filepath = os.path.realpath( inspect.stack()[2].filename) # Record this for history tracking assert log_steps is None or log_steps >= 0, \ "log_steps must be None or positive (or 0 to disable only train logging)" self.monitor_names = to_set(monitor_names) | network.get_loss_keys() self.system = System(network=network, pipeline=pipeline, traces=to_list(traces), log_steps=log_steps, total_epochs=epochs, train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch, system_config=self.fe_summary())
def __init__(self, label: str, metric: str, label_mapping: Optional[Dict[str, Any]] = None, bounds: Union[None, str, Iterable[Union[str, None]]] = "std", mode: Union[None, str, Iterable[str]] = "eval", ds_id: Union[None, str, Iterable[str]] = None, outputs: Optional[str] = None): super().__init__(inputs=[label, metric], outputs=outputs or f"{metric}_by_{label}", mode=mode, ds_id=ds_id) self.points = [] self.label_summaries = DefaultKeyDict( default=lambda x: Summary(name=x)) self.label_mapping = {val: key for key, val in label_mapping.items() } if label_mapping else None bounds = to_set(bounds) if not bounds: bounds.add(None) for option in bounds: if option not in (None, "std", "range"): raise ValueError( f"'interval' must be either None, 'std', or 'range', but got '{bounds}'." ) self.bounds = bounds
def __init__(self, inputs: Union[None, str, Iterable[str]] = None, outputs: Union[None, str, Iterable[str]] = None, mode: Union[None, str, Iterable[str]] = None) -> None: self.inputs = to_list(inputs) self.outputs = to_list(outputs) self.mode = parse_modes(to_set(mode))
def __init__(self, inputs: Union[None, str, Iterable[str]] = None, outputs: Union[None, str, Iterable[str]] = None, mode: Union[None, str, Iterable[str]] = None) -> None: self.inputs = to_list(inputs) self.outputs = to_list(outputs) self.mode = parse_modes(to_set(mode)) self.in_list = not isinstance(inputs, (str, type(None))) self.out_list = not isinstance(outputs, (str, type(None)))
def _warmup(self, eager: bool = True) -> None: """Perform a test run of each pipeline and network signature epoch to make sure that training won't fail later. Traces are not executed in the warmup since they are likely to contain state variables which could become corrupted by running extra steps. Args: eager: Whether to run the training in eager mode. This is only related to TensorFlow training because PyTorch by nature is always in eager mode. """ all_traces = get_current_items(self.traces_in_use, run_modes={"train", "eval"}) sort_traces(all_traces) # This ensures that the traces can sort properly for on_begin and on_end monitor_names = self.monitor_names for mode in self.pipeline.get_modes() - {"test"}: scheduled_items = self.pipeline.get_scheduled_items(mode) + self.network.get_scheduled_items( mode) + self.get_scheduled_items(mode) signature_epochs = get_signature_epochs(scheduled_items, self.system.total_epochs, mode=mode) epochs_with_data = self.pipeline.get_epochs_with_data(total_epochs=self.system.total_epochs, mode=mode) for epoch in signature_epochs: if epoch not in epochs_with_data: continue network_output_keys = self.network.get_all_output_keys(mode, epoch) network_input_keys = self.network.get_effective_input_keys(mode, epoch) trace_input_keys = set() trace_output_keys = {"*"} traces = get_current_items(self.traces_in_use, run_modes=mode, epoch=epoch) for idx, trace in enumerate(traces): if idx > 0: # ignore TrainEssential and EvalEssential's inputs for unmet requirement checking trace_input_keys.update(trace.inputs) trace_output_keys.update(trace.outputs) # key checking loader = self._configure_loader( self.pipeline.get_loader(mode, epoch, output_keys=trace_input_keys - network_output_keys | network_input_keys)) with Suppressor(): if isinstance(loader, tf.data.Dataset): batch = list(loader.take(1))[0] else: batch = next(iter(loader)) batch = self._configure_tensor(loader, batch) assert isinstance(batch, dict), "please make sure data output format is dictionary" pipeline_output_keys = to_set(batch.keys()) monitor_names = monitor_names - (pipeline_output_keys | network_output_keys) unmet_requirements = trace_input_keys - (pipeline_output_keys | network_output_keys | trace_output_keys) assert not unmet_requirements, \ "found missing key(s) during epoch {} mode {}: {}".format(epoch, mode, unmet_requirements) sort_traces(traces, available_outputs=pipeline_output_keys | network_output_keys) trace_input_keys.update(traces[0].inputs) self.network.load_epoch(mode, epoch, output_keys=trace_input_keys, warmup=True, eager=eager) self.network.run_step(batch) self.network.unload_epoch() assert not monitor_names, "found missing key(s): {}".format(monitor_names)
def __init__(self, log_dir: str = 'logs', update_freq: Union[None, int, str] = 100, write_graph: bool = True, write_images: Union[None, str, List[str]] = None, weight_histogram_freq: Union[None, int, str] = None, paint_weights: bool = False, embedding_freq: Union[None, int, str] = 'epoch', write_embeddings: Union[None, str, List[str]] = None, embedding_labels: Union[None, str, List[str]] = None, embedding_images: Union[None, str, List[str]] = None) -> None: super().__init__(inputs=["*"] + to_list(write_images) + to_list(write_embeddings) + to_list(embedding_labels) + to_list(embedding_images)) self.root_log_dir = log_dir self.update_freq = self._parse_freq(update_freq) self.write_graph = write_graph self.painted_graphs = set() self.write_images = to_set(write_images) self.histogram_freq = self._parse_freq(weight_histogram_freq) if paint_weights and self.histogram_freq.freq == 0: self.histogram_freq.is_step = False self.histogram_freq.freq = 1 self.paint_weights = paint_weights if write_embeddings is None and embedding_labels is None and embedding_images is None: # Speed up if-check short-circuiting later embedding_freq = None self.embedding_freq = self._parse_freq(embedding_freq) write_embeddings = to_list(write_embeddings) embedding_labels = to_list(embedding_labels) if embedding_labels: assert len(embedding_labels) == len(write_embeddings), \ f"Expected {len(write_embeddings)} embedding_labels keys, but recieved {len(embedding_labels)}. Use \ None to pad out the list if you have labels for only a subset of all embeddings." else: embedding_labels = [None for _ in range(len(write_embeddings))] embedding_images = to_list(embedding_images) if embedding_images: assert len(embedding_images) == len(write_embeddings), \ f"Expected {len(write_embeddings)} embedding_images keys, but recieved {len(embedding_images)}. Use \ None to pad out the list if you have labels for only a subset of all embeddings." else: embedding_images = [None for _ in range(len(write_embeddings))] self.write_embeddings = [ (feature, label, img_label) for feature, label, img_label in zip( write_embeddings, embedding_labels, embedding_images) ] self.collected_embeddings = defaultdict(list)
def _run_epoch(self) -> None: """A method to perform an epoch of activity. This method requires that the current mode and epoch already be specified within the self.system object. """ traces = get_current_items(self.traces_in_use, run_modes=self.system.mode, epoch=self.system.epoch_idx) trace_input_keys = set() for trace in traces: trace_input_keys.update(trace.inputs) loader = self._configure_loader( self.pipeline.get_loader(self.system.mode, self.system.epoch_idx)) iterator = iter(loader) self.network.load_epoch(mode=self.system.mode, epoch=self.system.epoch_idx, output_keys=trace_input_keys) self.system.batch_idx = None with Suppressor(): batch = next(iterator) traces = self._sort_traces( traces, available_outputs=to_set(batch.keys()) | self.network.get_all_output_keys(self.system.mode, self.system.epoch_idx)) self._run_traces_on_epoch_begin(traces=traces) while True: try: if self.system.mode == "train": self.system.update_global_step() self.system.update_batch_idx() batch = self._configure_tensor(loader, batch) self._run_traces_on_batch_begin(batch, traces=traces) batch, prediction = self.network.run_step(batch) self._run_traces_on_batch_end(batch, prediction, traces=traces) if isinstance(loader, DataLoader) and ( (self.system.batch_idx == self.system.max_train_steps_per_epoch and self.system.mode == "train") or (self.system.batch_idx == self.system.max_eval_steps_per_epoch and self.system.mode == "eval")): raise StopIteration with Suppressor(): batch = next(iterator) except StopIteration: break self._run_traces_on_epoch_end(traces=traces) self.network.unload_epoch()
def __init__(self, *numpy_ops: NumpyOp) -> None: inputs = to_set(numpy_ops[0].inputs) outputs = to_set(numpy_ops[0].outputs) mode = numpy_ops[0].mode ds_id = numpy_ops[0].ds_id self.in_list = numpy_ops[0].in_list self.out_list = numpy_ops[0].out_list for op in numpy_ops[1:]: assert self.in_list == op.in_list, "All ops within OneOf must share the same input configuration" assert self.out_list == op.out_list, "All ops within OneOf must share the same output configuration" assert mode == op.mode, "All ops within a OneOf must share the same mode" for inp in op.inputs: inputs.add(inp) for out in op.outputs: outputs.add(out) # Bypassing OneOf Op's restriction of same input and output key(s) on the list of passed NumpyOps. super(OneOf, self).__init__(inputs=inputs.union(outputs), outputs=outputs, mode=mode, ds_id=ds_id) self.ops = numpy_ops
def load_epoch(self, epoch, mode): """ This function loads stable computational graph for the current epoch. Args: epoch: Training epoch number mode: 'train' or 'eval' Returns: list of the models, epoch losses """ ops = self.op_schedule[mode].get_current_value(epoch) epoch_losses = set() for op in ops: if isinstance(op, Loss): epoch_losses |= to_set(op.outputs) self.epoch_losses = to_list(epoch_losses) return ops
def get_modes(self, epoch: Optional[int] = None) -> Set[str]: """Get the modes for which the Pipeline has data. Args: epoch: The current epoch index Returns: The modes for which the Pipeline has data. """ if epoch is None: all_modes = set(self.data.keys()) else: all_modes = [] for mode, dataset in self.data.items(): if isinstance(dataset, Scheduler): dataset = dataset.get_current_value(epoch) if dataset: all_modes.append(mode) return to_set(all_modes)
def __init__(self, pipeline: Pipeline, network: BaseNetwork, epochs: int, max_train_steps_per_epoch: Optional[int] = None, max_eval_steps_per_epoch: Optional[int] = None, traces: Union[None, Trace, Scheduler[Trace], Iterable[Union[Trace, Scheduler[Trace]]]] = None, log_steps: Optional[int] = 100, monitor_names: Union[None, str, Iterable[str]] = None): self.traces_in_use = [] assert log_steps is None or log_steps >= 0, \ "log_steps must be None or positive (or 0 to disable only train logging)" self.monitor_names = to_set(monitor_names) | network.get_loss_keys() self.system = System(network=network, pipeline=pipeline, traces=to_list(traces), log_steps=log_steps, total_epochs=epochs, max_train_steps_per_epoch=max_train_steps_per_epoch, max_eval_steps_per_epoch=max_eval_steps_per_epoch, system_config=self.fe_summary())
def __init__(self, test_cases: Union[TestCase, List[TestCase]], save_path: str, test_title: Optional[str] = None, data_id: str = None) -> None: self.check_pdf_dependency() self.test_title = test_title self.report_name = None self.instance_cases = [] self.aggregate_cases = [] self.data_id = data_id all_inputs = to_set(self.data_id) for case in to_list(test_cases): all_inputs.update(case.criteria_inputs) if case.aggregate: self.aggregate_cases.append(case) else: self.instance_cases.append(case) path = os.path.normpath(save_path) path = os.path.abspath(path) root_dir = os.path.dirname(path) report = os.path.basename(path) or 'report' report = report.split('.')[0] self.save_dir = os.path.join(root_dir, report) self.resource_dir = os.path.join(self.save_dir, "resources") os.makedirs(self.save_dir, exist_ok=True) os.makedirs(self.resource_dir, exist_ok=True) self.json_summary = {} # PDF document related self.doc = None self.test_id = None super().__init__(inputs=all_inputs, mode="test")
def _run_epoch(self, eager: bool) -> None: """A method to perform an epoch of activity. This method requires that the current mode and epoch already be specified within the self.system object. Args: eager: Whether to run the training in eager mode. This is only related to TensorFlow training because PyTorch by nature is always in eager mode. """ ds_ids = self.pipeline.get_ds_ids(self.system.epoch_idx, self.system.mode) epoch_traces = sort_traces(get_current_items( self.traces_in_use, run_modes=self.system.mode, epoch=self.system.epoch_idx), ds_ids=ds_ids) self._run_traces_on_epoch_begin(traces=epoch_traces) self.system.batch_idx = None end_epoch_data = Data( ) # We will aggregate data over on_ds_end and put it into on_epoch_end for printing # run for each dataset for self.system.ds_id in ds_ids: ds_traces = get_current_items(self.traces_in_use, run_modes=self.system.mode, epoch=self.system.epoch_idx, ds_id=self.system.ds_id) trace_input_keys = set() for ds_trace in ds_traces: trace_input_keys.update(ds_trace.inputs) network_input_keys = self.network.get_effective_input_keys( mode=self.system.mode, epoch=self.system.epoch_idx, ds_id=self.system.ds_id) network_output_keys = self.network.get_all_output_keys( mode=self.system.mode, epoch=self.system.epoch_idx, ds_id=self.system.ds_id) self.network.load_epoch(mode=self.system.mode, epoch=self.system.epoch_idx, ds_id=self.system.ds_id, output_keys=trace_input_keys, eager=eager) with self.pipeline( mode=self.system.mode, epoch=self.system.epoch_idx, ds_id=self.system.ds_id, steps_per_epoch=self.system.steps_per_epoch, output_keys=trace_input_keys - network_output_keys | network_input_keys) as loader: loader = self._configure_loader(loader) iterator = iter(loader) with Suppressor(): batch = next(iterator) ds_traces = sort_traces(ds_traces, available_outputs=to_set(batch.keys()) | network_output_keys, ds_ids=ds_ids) per_ds_traces = [ trace for trace in ds_traces if isinstance(trace, PerDSTrace) ] self._run_traces_on_ds_begin(traces=per_ds_traces) while True: try: if self.system.mode == "train": self.system.update_global_step() self.system.update_batch_idx() batch = self._configure_tensor(loader, batch) self._run_traces_on_batch_begin(batch, traces=ds_traces) batch, prediction = self.network.run_step(batch) self._run_traces_on_batch_end(batch, prediction, traces=ds_traces) if isinstance(loader, DataLoader) and ( (self.system.batch_idx == self.system.train_steps_per_epoch and self.system.mode == "train") or (self.system.batch_idx == self.system.eval_steps_per_epoch and self.system.mode == "eval")): raise StopIteration with Suppressor(): batch = next(iterator) except StopIteration: break self._run_traces_on_ds_end(traces=per_ds_traces, data=end_epoch_data) self.network.unload_epoch() self._run_traces_on_epoch_end(traces=epoch_traces, data=end_epoch_data)
def get_fe_loss_keys(self) -> Set[str]: return to_set(self.loss_name)
def plot_logs(experiments: List[Summary], smooth_factor: float = 0, share_legend: bool = True, ignore_metrics: Optional[Set[str]] = None, pretty_names: bool = False, include_metrics: Optional[Set[str]] = None) -> plt.Figure: """A function which will plot experiment histories for comparison viewing / analysis. Args: experiments: Experiment(s) to plot. smooth_factor: A non-negative float representing the magnitude of gaussian smoothing to apply (zero for none). share_legend: Whether to have one legend across all graphs (True) or one legend per graph (False). pretty_names: Whether to modify the metric names in graph titles (True) or leave them alone (False). ignore_metrics: Any keys to ignore during plotting. include_metrics: A whitelist of keys to include during plotting. If None then all will be included. Returns: The handle of the pyplot figure. """ # Sort to keep same colors between multiple runs of visualization experiments = humansorted(to_list(experiments), lambda exp: exp.name) n_experiments = len(experiments) if n_experiments == 0: return plt.subplots(111)[0] ignore_keys = ignore_metrics or set() ignore_keys = to_set(ignore_keys) ignore_keys |= {'epoch'} include_keys = to_set(include_metrics) # TODO: epoch should be indicated on the axis (top x axis?). Problem - different epochs per experiment. # TODO: figure out how ignore_metrics should interact with mode metric_histories = defaultdict(_MetricGroup) # metric: MetricGroup for idx, experiment in enumerate(experiments): history = experiment.history # Since python dicts remember insertion order, sort the history so that train mode is always plotted on bottom for mode, metrics in sorted(history.items(), key=lambda x: 0 if x[0] == 'train' else 1 if x[0] == 'eval' else 2 if x[0] == 'test' else 3 if x[0] == 'infer' else 4): for metric, step_val in metrics.items(): if len(step_val) == 0: continue # Ignore empty metrics if metric in ignore_keys: continue if include_keys and metric not in include_keys: continue metric_histories[metric].add(idx, mode, step_val) metric_list = list(sorted(metric_histories.keys())) if len(metric_list) == 0: return plt.subplots(111)[0] # If sharing legend and there is more than 1 plot, then dedicate 1 subplot for the legend share_legend = share_legend and (len(metric_list) > 1) n_legends = math.ceil(n_experiments / 4) n_plots = len(metric_list) + (share_legend * n_legends) # map the metrics into an n x n grid, then remove any extra columns. Final grid will be n x m with m <= n n_rows = math.ceil(math.sqrt(n_plots)) n_cols = math.ceil(n_plots / n_rows) metric_grid_location = {} nd1_metrics = [] idx = 0 for metric in metric_list: if metric_histories[metric].ndim() == 1: # Delay placement of the 1D plots until the end nd1_metrics.append(metric) else: metric_grid_location[metric] = (idx // n_cols, idx % n_cols) idx += 1 for metric in nd1_metrics: metric_grid_location[metric] = (idx // n_cols, idx % n_cols) idx += 1 sns.set_context('paper') fig, axs = plt.subplots(n_rows, n_cols, sharex='all', figsize=(4 * n_cols, 2.8 * n_rows)) # If only one row, need to re-format the axs object for consistency. Likewise for columns if n_rows == 1: axs = [axs] if n_cols == 1: axs = [axs] for metric in metric_grid_location.keys(): axis = axs[metric_grid_location[metric][0]][ metric_grid_location[metric][1]] if metric_histories[metric].ndim() == 1: axis.grid(linestyle='') else: axis.grid(linestyle='--') axis.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3)) axis.set_title( metric if not pretty_names else prettify_metric_name(metric), fontweight='bold') axis.spines['top'].set_visible(False) axis.spines['right'].set_visible(False) axis.spines['bottom'].set_visible(False) axis.spines['left'].set_visible(False) axis.tick_params(bottom=False, left=False) # some of the later rows/columns might be unused or reserved for legends, so disable them last_row_idx = math.ceil(len(metric_list) / n_cols) - 1 last_column_idx = len(metric_list) - last_row_idx * n_cols - 1 for c in range(n_cols): if c <= last_column_idx: axs[last_row_idx][c].set_xlabel('Steps') axs[last_row_idx][c].xaxis.set_tick_params(which='both', labelbottom=True) else: axs[last_row_idx][c].axis('off') axs[last_row_idx - 1][c].set_xlabel('Steps') axs[last_row_idx - 1][c].xaxis.set_tick_params(which='both', labelbottom=True) for r in range(last_row_idx + 1, n_rows): axs[r][c].axis('off') # the 1D metrics don't need x axis, so move them up, starting with the last in case multiple rows of them for metric in reversed(nd1_metrics): row = metric_grid_location[metric][0] col = metric_grid_location[metric][1] axs[row][col].axis('off') if row > 0: axs[row - 1][col].set_xlabel('Steps') axs[row - 1][col].xaxis.set_tick_params(which='both', labelbottom=True) colors = sns.hls_palette( n_colors=n_experiments, s=0.95) if n_experiments > 10 else sns.color_palette("colorblind") color_offset = defaultdict(lambda: 0) # If there is only 1 experiment, we will use alternate colors based on mode if n_experiments == 1: color_offset['eval'] = 1 color_offset['test'] = 2 color_offset['infer'] = 3 handles = [] labels = [] has_label = defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: False))) # exp_id : {mode: {type: True}} ax_text = defaultdict(lambda: (0.0, 0.9)) # Where to put the text on a given axis for exp_idx, experiment in enumerate(experiments): for metric, group in metric_histories.items(): axis = axs[metric_grid_location[metric][0]][ metric_grid_location[metric][1]] if group.ndim() == 1: # Single value for mode in group.modes(exp_idx): ax_id = id(axis) prefix = f"{experiment.name} ({mode})" if n_experiments > 1 else f"{mode}" axis.text(ax_text[ax_id][0], ax_text[ax_id][1], f"{prefix}: {group.get_val(exp_idx, mode)}", color=colors[exp_idx + color_offset[mode]], transform=axis.transAxes) ax_text[ax_id] = (ax_text[ax_id][0], ax_text[ax_id][1] - 0.1) if ax_text[ax_id][1] < 0: ax_text[ax_id] = (ax_text[ax_id][0] + 0.5, 0.9) elif group.ndim() == 2: for mode, data in group[exp_idx].items(): title = f"{experiment.name} ({mode})" if n_experiments > 1 else f"{mode}" if data.shape[0] < 2: # This particular mode only has a single data point, so need to draw a shape instead of a line xy = [data[0][0], data[0][1]] if mode == 'train': style = MarkerStyle(marker='o', fillstyle='full') elif mode == 'eval': style = MarkerStyle(marker='v', fillstyle='full') elif mode == 'test': style = MarkerStyle(marker='*', fillstyle='full') else: style = MarkerStyle(marker='s', fillstyle='full') if isinstance(xy[1], ValWithError): # We've got error bars x = xy[0] y = xy[1] # Plotting requires positive values for error y_err = [[max(1e-9, y.y - y.y_min)], [max(1e-9, y.y_max - y.y)]] axis.errorbar( x=x, y=y.y, yerr=y_err, ecolor=colors[exp_idx + color_offset[mode]], elinewidth=1.5, capsize=4.0, capthick=1.5, zorder=3 ) # zorder to put markers on top of line segments xy[1] = y.y s = axis.scatter( xy[0], xy[1], s=40, c=[colors[exp_idx + color_offset[mode]]], marker=style, linewidth=1.0, edgecolors='black', zorder=4 ) # zorder to put markers on top of line segments if not has_label[exp_idx][mode]['patch']: labels.append(title) handles.append(s) has_label[exp_idx][mode]['patch'] = True else: # We can draw a line y = data[:, 1] y_min = None y_max = None if isinstance(y[0], ValWithError): y = np.stack(y) y_min = y[:, 0] y_max = y[:, 2] y = y[:, 1] if smooth_factor != 0: y_min = gaussian_filter1d(y_min, sigma=smooth_factor) y_max = gaussian_filter1d(y_max, sigma=smooth_factor) if smooth_factor != 0: y = gaussian_filter1d(y, sigma=smooth_factor) x = data[:, 0] ln = axis.plot( x, y, color=colors[exp_idx + color_offset[mode]], label=title, linewidth=1.5, linestyle='solid' if mode == 'train' else 'dashed' if mode == 'eval' else 'dotted' if mode == 'test' else 'dashdot') if not has_label[exp_idx][mode]['line']: labels.append(title) handles.append(ln[0]) has_label[exp_idx][mode]['line'] = True if y_max is not None and y_min is not None: axis.fill_between( x.astype(np.float32), y_max, y_min, facecolor=colors[exp_idx + color_offset[mode]], alpha=0.3, zorder=-1) else: # Some kind of image or matrix. Not implemented yet. pass plt.tight_layout() if labels: if share_legend: # Sort the labels handles = [ h for _, h in sorted(zip(labels, handles), key=lambda pair: pair[0]) ] labels = sorted(labels) # Split the labels over multiple legends if there are too many to fit in one axis elems_per_legend = math.ceil(len(labels) / n_legends) i = 0 for r in range(last_row_idx, n_rows): for c in range(last_column_idx + 1 if r == last_row_idx else 0, n_cols): if len(handles) <= i: break axs[r][c].legend( handles[i:i + elems_per_legend], labels[i:i + elems_per_legend], loc='center', fontsize='large' if elems_per_legend <= 6 else 'medium' if elems_per_legend <= 8 else 'small') i += elems_per_legend else: for i in range(n_rows): for j in range(n_cols): if i == last_row_idx and j > last_column_idx: break axs[i][j].legend(loc='best', fontsize='small') return fig
def __init__(self, inputs: Union[str, Iterable[str]], outputs: Union[str, Iterable[str]], mode: Union[None, str, Iterable[str]] = None, ds_id: Union[None, str, Iterable[str]] = None, choices: Union[str, NumpyOp, List[Union[str, NumpyOp]]] = "defaults", level: Union[int, float] = 18): self.default_aug_dict = { "Rotate": Rotate(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, limit=90), "Identity": Identity(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id), "AutoContrast": AutoContrast(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id), "Equalize": Equalize(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id), "Posterize": Posterize(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, num_bits=7), "Solarize": Solarize(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, threshold=256), "Sharpness": Sharpness(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, limit=0.9), "Contrast": Contrast(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, limit=0.9), "Color": Color(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, limit=0.9), "Brightness": Brightness(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, limit=0.9), "ShearX": ShearX(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, shear_coef=0.5), "ShearY": ShearY(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, shear_coef=0.5), "TranslateX": TranslateX(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, shift_limit=0.33), "TranslateY": TranslateY(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id, shift_limit=0.33) } aug_options = self._parse_aug_choices(magnitude_coef=(level / 30.), choices=to_list(choices)) inputs, outputs = to_set(inputs), to_set(outputs) for op in aug_options: for inp in op.inputs: inputs.add(inp) for out in op.outputs: outputs.add(out) super().__init__(inputs=inputs.union(outputs), outputs=outputs, mode=mode, ds_id=ds_id) # Calculating number of augmentation to apply at each training iteration N_min = 1 N_max = min(len(aug_options), 5) N = level * (N_max - N_min) / 30 + N_min N_guarantee, N_p = int(N), N % 1 self.ops = [OneOfMultiVar(*aug_options) for _ in range(N_guarantee)] if N_p > 0: self.ops.append(Sometimes(OneOfMultiVar(*aug_options), prob=N_p))
def _sort_traces( traces: List[Trace], available_outputs: Optional[Set[str]] = None) -> List[Trace]: """Sort traces to attempt to resolve any dependency issues. This is essentially a topological sort, but it doesn't seem worthwhile to convert the data into a graph representation in order to get the slightly better asymptotic runtime complexity. Args: traces: A list of traces (not inside schedulers) to be sorted. available_outputs: What output keys are already available for the traces to use. If None are provided, the sorting algorithm will assume that any keys not generated by traces are being provided by the system. This results in a less rigorous sorting. Returns: The sorted list of `traces`. Raises: AssertionError: If Traces have circular dependencies or require input keys which are not available. """ sorted_traces = [] trace_outputs = { output for trace in traces for output in trace.outputs } if available_outputs is None: # Assume that anything not generated by a Trace is provided by the system available_outputs = { inp for trace in traces for inp in trace.inputs } - trace_outputs weak_sort = True else: available_outputs = to_set(available_outputs) weak_sort = False end_traces = deque() intermediate_traces = deque() intermediate_outputs = set() trace_deque = deque(traces) while trace_deque: trace = trace_deque.popleft() ins = set(trace.inputs) outs = set(trace.outputs) if not ins or isinstance(trace, (TrainEssential, EvalEssential)): sorted_traces.append(trace) available_outputs |= outs elif "*" in ins: if outs: end_traces.appendleft(trace) else: end_traces.append(trace) elif ins <= available_outputs or ( weak_sort and (ins - outs - available_outputs).isdisjoint(trace_outputs)): sorted_traces.append(trace) available_outputs |= outs else: intermediate_traces.append(trace) intermediate_outputs |= outs already_seen = set() while intermediate_traces: trace = intermediate_traces.popleft() ins = set(trace.inputs) outs = set(trace.outputs) already_seen.add(trace) if ins <= available_outputs or ( weak_sort and (ins - outs - available_outputs).isdisjoint(trace_outputs)): sorted_traces.append(trace) available_outputs |= outs already_seen.clear() elif ins <= (available_outputs | intermediate_outputs): intermediate_traces.append(trace) else: raise AssertionError( "The {} trace has unsatisfiable inputs: {}".format( type(trace).__name__, ", ".join(ins - (available_outputs | intermediate_outputs)))) if intermediate_traces and len(already_seen) == len( intermediate_traces): raise AssertionError( "Dependency cycle detected amongst traces: {}".format( ", ".join([type(tr).__name__ for tr in already_seen]))) sorted_traces.extend(list(end_traces)) return sorted_traces
def plot_logs(experiments, smooth_factor=0, share_legend=True, ignore_metrics=None, pretty_names=False, include_metrics=None): """A function which will plot experiment histories for comparison viewing / analysis Args: experiments (list, Experiment): Experiment(s) to plot smooth_factor (float): A non-negative float representing the magnitude of gaussian smoothing to apply (zero for none) share_legend (bool): Whether to have one legend across all graphs (true) or one legend per graph (false) pretty_names (bool): Whether to modify the metric names in graph titles (true) or leave them alone (false) ignore_metrics (set): Any keys to ignore during plotting include_metrics (set): A whitelist of keys to include during plotting. If None then all will be included. Returns: The handle of the pyplot figure """ experiments = to_list(experiments) ignore_keys = ignore_metrics or set() ignore_keys = to_set(ignore_keys) ignore_keys |= {'epoch', 'progress', 'total_train_steps'} include_keys = to_set(include_metrics) if include_metrics else None # TODO: epoch should be indicated on the axis (top x axis?) # TODO: figure out how ignore_metrics should interact with mode max_time = 0 metric_keys = set() for experiment in experiments: history = experiment.history for mode, metrics in history.items(): for key, value in metrics.items(): if value.keys(): max_time = max(max_time, max(value.keys())) if key in ignore_keys: continue if include_keys and key not in include_keys: ignore_keys.add(key) continue if any( map(lambda x: isinstance(x[1], np.ndarray), value.items())): ignore_keys.add(key) continue # TODO: nd array not currently supported. maybe in future visualize as heat map? metric_keys.add("{}: {}".format(mode, key)) metric_list = sorted( list(metric_keys)) # Sort the metrics alphabetically for consistency num_metrics = len(metric_list) num_experiments = len(experiments) if num_metrics == 0: return plt.subplots(111)[0] # map the metrics into an n x n grid, then remove any extra rows. Final grid will be m x n with m <= n num_cols = math.ceil(math.sqrt(num_metrics)) metric_grid_location = { key: (idx // num_cols, idx % num_cols) for (idx, key) in enumerate(metric_list) } num_rows = math.ceil(num_metrics / num_cols) sns.set_context('paper') fig, axs = plt.subplots(num_rows, num_cols, sharex='all', figsize=(4 * num_cols, 2.8 * num_rows)) # If only one row, need to re-format the axs object for consistency. Likewise for columns if num_rows == 1: axs = [axs] if num_cols == 1: axs = [axs] for metric in metric_grid_location.keys(): axis = axs[metric_grid_location[metric][0]][ metric_grid_location[metric][1]] axis.set_title( metric if not pretty_names else prettify_metric_name(metric)) axis.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3)) axis.grid(linestyle='--') axis.spines['top'].set_visible(False) axis.spines['right'].set_visible(False) axis.spines['bottom'].set_visible(False) axis.spines['left'].set_visible(False) axis.tick_params(bottom=False, left=False) for i in range(num_cols): axs[num_rows - 1][i].set_xlabel('Steps') # some of the columns in the last row might be unused, so disable them last_column_idx = num_cols - (num_rows * num_cols - num_metrics) - 1 for i in range(last_column_idx + 1, num_cols): axs[num_rows - 1][i].axis('off') axs[num_rows - 2][i].set_xlabel('Steps') axs[num_rows - 2][i].xaxis.set_tick_params(which='both', labelbottom=True) colors = sns.hls_palette( n_colors=num_experiments, s=0.95) if num_experiments > 10 else sns.color_palette("colorblind") handles = [] labels = [] bar_counter = defaultdict(lambda: 0) for (color_idx, experiment) in enumerate(experiments): labels.append(experiment.name) metrics = { "{}: {}".format(mode, key): val for mode, sub in experiment.history.items() for key, val in sub.items() if key not in ignore_keys } for (idx, (metric, value)) in enumerate(metrics.items()): data = np.array(list(value.items())) if len(data) == 1: y = data[0][1] if isinstance(y, str): vals = [float(x) for x in re.findall(r'\d+\.?\d+', y)] if len(vals) == 1: y = vals[0] width = max(10, max_time // 10) x = max_time // 2 + (2 * (bar_counter[metric] % 2) - 1) * width * math.ceil( bar_counter[metric] / 2) ln = axs[metric_grid_location[metric][0]][ metric_grid_location[metric][1]].bar( x=x, height=y, color=colors[color_idx], label=experiment.name, width=width) bar_counter[metric] += 1 else: y = data[:, 1] if smooth_factor == 0 else gaussian_filter1d( data[:, 1], sigma=smooth_factor) ln = axs[metric_grid_location[metric][0]][ metric_grid_location[metric][1]].plot( data[:, 0], y, color=colors[color_idx], label=experiment.name, linewidth=1.5) if idx == 0: handles.append(ln[0]) plt.tight_layout() if len(labels) > 1 or labels[0]: if share_legend and num_rows > 1: if last_column_idx == num_cols - 1: fig.subplots_adjust(bottom=0.15) fig.legend(handles, labels, loc='lower center', ncol=num_cols + 1) else: axs[num_rows - 1][last_column_idx + 1].legend(handles, labels, loc='center', fontsize='large') else: for i in range(num_rows): for j in range(num_cols): if i == num_rows - 1 and j > last_column_idx: break axs[i][j].legend(loc='best', fontsize='small') return fig
def get_current_items(items: Iterable[Union[T, Scheduler[T]]], run_modes: Optional[Union[str, Iterable[str]]] = None, epoch: Optional[int] = None, ds_id: Optional[str] = None) -> List[T]: """Select items which should be executed for given mode and epoch. Args: items: A list of possible items or Schedulers of items to choose from. run_modes: The desired execution mode. One or more of "train", "eval", "test", or "infer". If None, items of all modes will be returned. epoch: The desired execution epoch. If None, items across all epochs will be returned. ds_id: The desired execution dataset id. If None, items across all ds_ids will be returned. An empty string indicates that positive matches should be excluded ('' != 'ds1'), but that negative matches are satisfied ('' == '!ds1'). Returns: The items which should be executed. """ selected_items = [] run_modes = to_set(run_modes) for item in items: if isinstance(item, Scheduler): if epoch is None: item = item.get_all_values() else: item = [item.get_current_value(epoch)] else: item = [item] for item_ in item: # mode matching mode_match = False if not run_modes: mode_match = True if not hasattr(item_, "mode"): mode_match = True else: if not item_.mode: mode_match = True elif item_.mode.intersection(run_modes): mode_match = True # ds_id matching ds_id_match = False if ds_id is None: ds_id_match = True if not hasattr(item_, "ds_id"): ds_id_match = True else: # If the object has no requirements, then allow it if not item_.ds_id: ds_id_match = True # blacklist check (before whitelist due to desired empty string behavior) # if any of ds_id starts with "!", then they will all start with "!" elif any([x.startswith("!") for x in item_.ds_id]) and all( [ds_id != x[1:] for x in item_.ds_id]): ds_id_match = True # Note that empty string will pass this check (unless target is literally "!") # whitelist check elif ds_id in item_.ds_id: ds_id_match = True # Note that empty string will fail this check if item_ and mode_match and ds_id_match: selected_items.append(item_) return selected_items