Exemplo n.º 1
0
 def __init__(self,
              inputs: Union[None, str, Iterable[str]] = None,
              outputs: Union[None, str, Iterable[str]] = None,
              mode: Union[None, str, Iterable[str]] = None,
              ds_id: Union[None, str, Iterable[str]] = None) -> None:
     self.inputs = check_io_names(to_list(inputs))
     self.outputs = check_io_names(to_list(outputs))
     self.mode = parse_modes(to_set(mode))
     self.ds_id = check_ds_id(to_set(ds_id))
     self.in_list = not isinstance(inputs, (str, type(None)))
     self.out_list = not isinstance(outputs, (str, type(None)))
Exemplo n.º 2
0
 def __init__(self,
              inputs: Union[None, str, Iterable[str]] = None,
              outputs: Union[None, str, Iterable[str]] = None,
              mode: Union[None, str, Iterable[str]] = None,
              ds_id: Union[None, str, Iterable[str]] = None) -> None:
     self.inputs = check_io_names(to_list(inputs))
     self.outputs = check_io_names(to_list(outputs))
     self.mode = parse_modes(to_set(mode))
     self.ds_id = check_ds_id(to_set(ds_id))
     self.fe_monitor_names = set(
     )  # The use-case here is rare enough that we don't want to add this to the init sig
Exemplo n.º 3
0
 def __init__(self,
              index: str,
              metric: str,
              n_max_to_keep: int = 5,
              n_min_to_keep: int = 5,
              list_to_keep: Optional[Iterable[Any]] = None,
              epoch_frequency: int = 1,
              mode: Union[None, str, Set[str]] = "eval",
              outputs: Optional[str] = None):
     # TODO - highlight 'interesting' samples (sudden changes in relative ordering?)
     super().__init__(inputs=[index, metric],
                      outputs=outputs or f"{metric}_by_{index}",
                      mode=mode)
     self.points = []
     if n_max_to_keep < 0:
         raise ValueError(
             f"n_max_to_keep must be non-negative, but got {n_max_to_keep}")
     self.n_max_to_keep = n_max_to_keep
     if n_min_to_keep < 0:
         raise ValueError(
             f"n_min_to_keep must be non-negative, but got {n_min_to_keep}")
     self.n_min_to_keep = n_min_to_keep
     self.idx_to_keep = to_set(list_to_keep)
     # Ideally the step and metric would be separated to save space, but a given idx may not appear each epoch
     self.index_history = defaultdict(
         lambda: defaultdict(list))  # {mode: {idx: [(step, metric)]}}
     self.epoch_frequency = epoch_frequency
Exemplo n.º 4
0
def get_current_items(items: Iterable[Union[T, Scheduler[T]]],
                      run_modes: Optional[Union[str, Iterable[str]]] = None,
                      epoch: Optional[int] = None) -> List[T]:
    """Select items which should be executed for given mode and epoch.

    Args:
        items: A list of possible items or Schedulers of items to choose from.
        run_modes: The desired execution mode. One or more of "train", "eval", "test", or "infer". If None, items of
            all modes will be returned.
        epoch: The desired execution epoch. If None, items across all epochs will be returned.

    Returns:
        The items which should be executed.
    """
    selected_items = []
    run_modes = to_set(run_modes)
    for item in items:
        if isinstance(item, Scheduler):
            if epoch is None:
                item = item.get_all_values()
            else:
                item = [item.get_current_value(epoch)]
        else:
            item = [item]
        for item_ in item:
            if item_ and (not run_modes or not hasattr(item_, "mode")
                          or not item_.mode
                          or item_.mode.intersection(run_modes)):
                selected_items.append(item_)
    return selected_items
Exemplo n.º 5
0
 def __init__(self,
              pipeline: Pipeline,
              network: BaseNetwork,
              epochs: int,
              train_steps_per_epoch: Optional[int] = None,
              eval_steps_per_epoch: Optional[int] = None,
              traces: Union[None, Trace, Scheduler[Trace],
                            Iterable[Union[Trace,
                                           Scheduler[Trace]]]] = None,
              log_steps: Optional[int] = 100,
              monitor_names: Union[None, str, Iterable[str]] = None):
     self.traces_in_use = []
     self.filepath = os.path.realpath(
         inspect.stack()[2].filename)  # Record this for history tracking
     assert log_steps is None or log_steps >= 0, \
         "log_steps must be None or positive (or 0 to disable only train logging)"
     self.monitor_names = to_set(monitor_names) | network.get_loss_keys()
     self.system = System(network=network,
                          pipeline=pipeline,
                          traces=to_list(traces),
                          log_steps=log_steps,
                          total_epochs=epochs,
                          train_steps_per_epoch=train_steps_per_epoch,
                          eval_steps_per_epoch=eval_steps_per_epoch,
                          system_config=self.fe_summary())
Exemplo n.º 6
0
 def __init__(self,
              label: str,
              metric: str,
              label_mapping: Optional[Dict[str, Any]] = None,
              bounds: Union[None, str, Iterable[Union[str, None]]] = "std",
              mode: Union[None, str, Iterable[str]] = "eval",
              ds_id: Union[None, str, Iterable[str]] = None,
              outputs: Optional[str] = None):
     super().__init__(inputs=[label, metric],
                      outputs=outputs or f"{metric}_by_{label}",
                      mode=mode,
                      ds_id=ds_id)
     self.points = []
     self.label_summaries = DefaultKeyDict(
         default=lambda x: Summary(name=x))
     self.label_mapping = {val: key
                           for key, val in label_mapping.items()
                           } if label_mapping else None
     bounds = to_set(bounds)
     if not bounds:
         bounds.add(None)
     for option in bounds:
         if option not in (None, "std", "range"):
             raise ValueError(
                 f"'interval' must be either None, 'std', or 'range', but got '{bounds}'."
             )
     self.bounds = bounds
Exemplo n.º 7
0
 def __init__(self,
              inputs: Union[None, str, Iterable[str]] = None,
              outputs: Union[None, str, Iterable[str]] = None,
              mode: Union[None, str, Iterable[str]] = None) -> None:
     self.inputs = to_list(inputs)
     self.outputs = to_list(outputs)
     self.mode = parse_modes(to_set(mode))
Exemplo n.º 8
0
 def __init__(self,
              inputs: Union[None, str, Iterable[str]] = None,
              outputs: Union[None, str, Iterable[str]] = None,
              mode: Union[None, str, Iterable[str]] = None) -> None:
     self.inputs = to_list(inputs)
     self.outputs = to_list(outputs)
     self.mode = parse_modes(to_set(mode))
     self.in_list = not isinstance(inputs, (str, type(None)))
     self.out_list = not isinstance(outputs, (str, type(None)))
Exemplo n.º 9
0
    def _warmup(self, eager: bool = True) -> None:
        """Perform a test run of each pipeline and network signature epoch to make sure that training won't fail later.

        Traces are not executed in the warmup since they are likely to contain state variables which could become
        corrupted by running extra steps.

        Args:
            eager: Whether to run the training in eager mode. This is only related to TensorFlow training because
                PyTorch by nature is always in eager mode.
        """
        all_traces = get_current_items(self.traces_in_use, run_modes={"train", "eval"})
        sort_traces(all_traces)  # This ensures that the traces can sort properly for on_begin and on_end
        monitor_names = self.monitor_names
        for mode in self.pipeline.get_modes() - {"test"}:
            scheduled_items = self.pipeline.get_scheduled_items(mode) + self.network.get_scheduled_items(
                mode) + self.get_scheduled_items(mode)
            signature_epochs = get_signature_epochs(scheduled_items, self.system.total_epochs, mode=mode)
            epochs_with_data = self.pipeline.get_epochs_with_data(total_epochs=self.system.total_epochs, mode=mode)
            for epoch in signature_epochs:
                if epoch not in epochs_with_data:
                    continue
                network_output_keys = self.network.get_all_output_keys(mode, epoch)
                network_input_keys = self.network.get_effective_input_keys(mode, epoch)
                trace_input_keys = set()
                trace_output_keys = {"*"}
                traces = get_current_items(self.traces_in_use, run_modes=mode, epoch=epoch)
                for idx, trace in enumerate(traces):
                    if idx > 0:  # ignore TrainEssential and EvalEssential's inputs for unmet requirement checking
                        trace_input_keys.update(trace.inputs)
                    trace_output_keys.update(trace.outputs)
                # key checking
                loader = self._configure_loader(
                    self.pipeline.get_loader(mode,
                                             epoch,
                                             output_keys=trace_input_keys - network_output_keys | network_input_keys))
                with Suppressor():
                    if isinstance(loader, tf.data.Dataset):
                        batch = list(loader.take(1))[0]
                    else:
                        batch = next(iter(loader))
                batch = self._configure_tensor(loader, batch)
                assert isinstance(batch, dict), "please make sure data output format is dictionary"
                pipeline_output_keys = to_set(batch.keys())

                monitor_names = monitor_names - (pipeline_output_keys | network_output_keys)
                unmet_requirements = trace_input_keys - (pipeline_output_keys | network_output_keys | trace_output_keys)
                assert not unmet_requirements, \
                    "found missing key(s) during epoch {} mode {}: {}".format(epoch, mode, unmet_requirements)
                sort_traces(traces, available_outputs=pipeline_output_keys | network_output_keys)
                trace_input_keys.update(traces[0].inputs)
                self.network.load_epoch(mode, epoch, output_keys=trace_input_keys, warmup=True, eager=eager)
                self.network.run_step(batch)
                self.network.unload_epoch()
        assert not monitor_names, "found missing key(s): {}".format(monitor_names)
Exemplo n.º 10
0
    def __init__(self,
                 log_dir: str = 'logs',
                 update_freq: Union[None, int, str] = 100,
                 write_graph: bool = True,
                 write_images: Union[None, str, List[str]] = None,
                 weight_histogram_freq: Union[None, int, str] = None,
                 paint_weights: bool = False,
                 embedding_freq: Union[None, int, str] = 'epoch',
                 write_embeddings: Union[None, str, List[str]] = None,
                 embedding_labels: Union[None, str, List[str]] = None,
                 embedding_images: Union[None, str, List[str]] = None) -> None:
        super().__init__(inputs=["*"] + to_list(write_images) +
                         to_list(write_embeddings) +
                         to_list(embedding_labels) + to_list(embedding_images))
        self.root_log_dir = log_dir
        self.update_freq = self._parse_freq(update_freq)
        self.write_graph = write_graph
        self.painted_graphs = set()
        self.write_images = to_set(write_images)
        self.histogram_freq = self._parse_freq(weight_histogram_freq)
        if paint_weights and self.histogram_freq.freq == 0:
            self.histogram_freq.is_step = False
            self.histogram_freq.freq = 1
        self.paint_weights = paint_weights
        if write_embeddings is None and embedding_labels is None and embedding_images is None:
            # Speed up if-check short-circuiting later
            embedding_freq = None
        self.embedding_freq = self._parse_freq(embedding_freq)
        write_embeddings = to_list(write_embeddings)
        embedding_labels = to_list(embedding_labels)
        if embedding_labels:
            assert len(embedding_labels) == len(write_embeddings), \
                f"Expected {len(write_embeddings)} embedding_labels keys, but recieved {len(embedding_labels)}. Use \
                None to pad out the list if you have labels for only a subset of all embeddings."

        else:
            embedding_labels = [None for _ in range(len(write_embeddings))]
        embedding_images = to_list(embedding_images)
        if embedding_images:
            assert len(embedding_images) == len(write_embeddings), \
                f"Expected {len(write_embeddings)} embedding_images keys, but recieved {len(embedding_images)}. Use \
                None to pad out the list if you have labels for only a subset of all embeddings."

        else:
            embedding_images = [None for _ in range(len(write_embeddings))]
        self.write_embeddings = [
            (feature, label, img_label) for feature, label, img_label in zip(
                write_embeddings, embedding_labels, embedding_images)
        ]
        self.collected_embeddings = defaultdict(list)
Exemplo n.º 11
0
    def _run_epoch(self) -> None:
        """A method to perform an epoch of activity.

        This method requires that the current mode and epoch already be specified within the self.system object.
        """
        traces = get_current_items(self.traces_in_use,
                                   run_modes=self.system.mode,
                                   epoch=self.system.epoch_idx)
        trace_input_keys = set()
        for trace in traces:
            trace_input_keys.update(trace.inputs)
        loader = self._configure_loader(
            self.pipeline.get_loader(self.system.mode, self.system.epoch_idx))
        iterator = iter(loader)
        self.network.load_epoch(mode=self.system.mode,
                                epoch=self.system.epoch_idx,
                                output_keys=trace_input_keys)
        self.system.batch_idx = None
        with Suppressor():
            batch = next(iterator)
        traces = self._sort_traces(
            traces,
            available_outputs=to_set(batch.keys())
            | self.network.get_all_output_keys(self.system.mode,
                                               self.system.epoch_idx))
        self._run_traces_on_epoch_begin(traces=traces)
        while True:
            try:
                if self.system.mode == "train":
                    self.system.update_global_step()
                self.system.update_batch_idx()
                batch = self._configure_tensor(loader, batch)
                self._run_traces_on_batch_begin(batch, traces=traces)
                batch, prediction = self.network.run_step(batch)
                self._run_traces_on_batch_end(batch, prediction, traces=traces)
                if isinstance(loader, DataLoader) and (
                    (self.system.batch_idx
                     == self.system.max_train_steps_per_epoch
                     and self.system.mode == "train") or
                    (self.system.batch_idx
                     == self.system.max_eval_steps_per_epoch
                     and self.system.mode == "eval")):
                    raise StopIteration
                with Suppressor():
                    batch = next(iterator)
            except StopIteration:
                break
        self._run_traces_on_epoch_end(traces=traces)
        self.network.unload_epoch()
Exemplo n.º 12
0
    def __init__(self, *numpy_ops: NumpyOp) -> None:
        inputs = to_set(numpy_ops[0].inputs)
        outputs = to_set(numpy_ops[0].outputs)
        mode = numpy_ops[0].mode
        ds_id = numpy_ops[0].ds_id
        self.in_list = numpy_ops[0].in_list
        self.out_list = numpy_ops[0].out_list
        for op in numpy_ops[1:]:
            assert self.in_list == op.in_list, "All ops within OneOf must share the same input configuration"
            assert self.out_list == op.out_list, "All ops within OneOf must share the same output configuration"
            assert mode == op.mode, "All ops within a OneOf must share the same mode"

            for inp in op.inputs:
                inputs.add(inp)

            for out in op.outputs:
                outputs.add(out)

        # Bypassing OneOf Op's restriction of same input and output key(s) on the list of passed NumpyOps.
        super(OneOf, self).__init__(inputs=inputs.union(outputs),
                                    outputs=outputs,
                                    mode=mode,
                                    ds_id=ds_id)
        self.ops = numpy_ops
Exemplo n.º 13
0
    def load_epoch(self, epoch, mode):
        """ This function loads stable computational graph for the current epoch.

        Args:
            epoch: Training epoch number
            mode: 'train' or 'eval'

        Returns:
             list of the models, epoch losses
        """
        ops = self.op_schedule[mode].get_current_value(epoch)
        epoch_losses = set()
        for op in ops:
            if isinstance(op, Loss):
                epoch_losses |= to_set(op.outputs)
        self.epoch_losses = to_list(epoch_losses)
        return ops
Exemplo n.º 14
0
    def get_modes(self, epoch: Optional[int] = None) -> Set[str]:
        """Get the modes for which the Pipeline has data.

        Args:
            epoch: The current epoch index

        Returns:
            The modes for which the Pipeline has data.
        """
        if epoch is None:
            all_modes = set(self.data.keys())
        else:
            all_modes = []
            for mode, dataset in self.data.items():
                if isinstance(dataset, Scheduler):
                    dataset = dataset.get_current_value(epoch)
                if dataset:
                    all_modes.append(mode)
        return to_set(all_modes)
Exemplo n.º 15
0
 def __init__(self,
              pipeline: Pipeline,
              network: BaseNetwork,
              epochs: int,
              max_train_steps_per_epoch: Optional[int] = None,
              max_eval_steps_per_epoch: Optional[int] = None,
              traces: Union[None, Trace, Scheduler[Trace], Iterable[Union[Trace, Scheduler[Trace]]]] = None,
              log_steps: Optional[int] = 100,
              monitor_names: Union[None, str, Iterable[str]] = None):
     self.traces_in_use = []
     assert log_steps is None or log_steps >= 0, \
         "log_steps must be None or positive (or 0 to disable only train logging)"
     self.monitor_names = to_set(monitor_names) | network.get_loss_keys()
     self.system = System(network=network,
                          pipeline=pipeline,
                          traces=to_list(traces),
                          log_steps=log_steps,
                          total_epochs=epochs,
                          max_train_steps_per_epoch=max_train_steps_per_epoch,
                          max_eval_steps_per_epoch=max_eval_steps_per_epoch,
                          system_config=self.fe_summary())
Exemplo n.º 16
0
    def __init__(self,
                 test_cases: Union[TestCase, List[TestCase]],
                 save_path: str,
                 test_title: Optional[str] = None,
                 data_id: str = None) -> None:

        self.check_pdf_dependency()

        self.test_title = test_title
        self.report_name = None

        self.instance_cases = []
        self.aggregate_cases = []
        self.data_id = data_id

        all_inputs = to_set(self.data_id)
        for case in to_list(test_cases):
            all_inputs.update(case.criteria_inputs)
            if case.aggregate:
                self.aggregate_cases.append(case)
            else:
                self.instance_cases.append(case)

        path = os.path.normpath(save_path)
        path = os.path.abspath(path)
        root_dir = os.path.dirname(path)
        report = os.path.basename(path) or 'report'
        report = report.split('.')[0]
        self.save_dir = os.path.join(root_dir, report)
        self.resource_dir = os.path.join(self.save_dir, "resources")
        os.makedirs(self.save_dir, exist_ok=True)
        os.makedirs(self.resource_dir, exist_ok=True)

        self.json_summary = {}
        # PDF document related
        self.doc = None
        self.test_id = None

        super().__init__(inputs=all_inputs, mode="test")
Exemplo n.º 17
0
    def _run_epoch(self, eager: bool) -> None:
        """A method to perform an epoch of activity.

        This method requires that the current mode and epoch already be specified within the self.system object.

        Args:
            eager: Whether to run the training in eager mode. This is only related to TensorFlow training because
                PyTorch by nature is always in eager mode.
        """
        ds_ids = self.pipeline.get_ds_ids(self.system.epoch_idx,
                                          self.system.mode)
        epoch_traces = sort_traces(get_current_items(
            self.traces_in_use,
            run_modes=self.system.mode,
            epoch=self.system.epoch_idx),
                                   ds_ids=ds_ids)
        self._run_traces_on_epoch_begin(traces=epoch_traces)
        self.system.batch_idx = None
        end_epoch_data = Data(
        )  # We will aggregate data over on_ds_end and put it into on_epoch_end for printing
        # run for each dataset
        for self.system.ds_id in ds_ids:
            ds_traces = get_current_items(self.traces_in_use,
                                          run_modes=self.system.mode,
                                          epoch=self.system.epoch_idx,
                                          ds_id=self.system.ds_id)
            trace_input_keys = set()
            for ds_trace in ds_traces:
                trace_input_keys.update(ds_trace.inputs)
            network_input_keys = self.network.get_effective_input_keys(
                mode=self.system.mode,
                epoch=self.system.epoch_idx,
                ds_id=self.system.ds_id)
            network_output_keys = self.network.get_all_output_keys(
                mode=self.system.mode,
                epoch=self.system.epoch_idx,
                ds_id=self.system.ds_id)
            self.network.load_epoch(mode=self.system.mode,
                                    epoch=self.system.epoch_idx,
                                    ds_id=self.system.ds_id,
                                    output_keys=trace_input_keys,
                                    eager=eager)

            with self.pipeline(
                    mode=self.system.mode,
                    epoch=self.system.epoch_idx,
                    ds_id=self.system.ds_id,
                    steps_per_epoch=self.system.steps_per_epoch,
                    output_keys=trace_input_keys - network_output_keys
                    | network_input_keys) as loader:
                loader = self._configure_loader(loader)
                iterator = iter(loader)
                with Suppressor():
                    batch = next(iterator)
                ds_traces = sort_traces(ds_traces,
                                        available_outputs=to_set(batch.keys())
                                        | network_output_keys,
                                        ds_ids=ds_ids)
                per_ds_traces = [
                    trace for trace in ds_traces
                    if isinstance(trace, PerDSTrace)
                ]
                self._run_traces_on_ds_begin(traces=per_ds_traces)
                while True:
                    try:
                        if self.system.mode == "train":
                            self.system.update_global_step()
                        self.system.update_batch_idx()
                        batch = self._configure_tensor(loader, batch)
                        self._run_traces_on_batch_begin(batch,
                                                        traces=ds_traces)
                        batch, prediction = self.network.run_step(batch)
                        self._run_traces_on_batch_end(batch,
                                                      prediction,
                                                      traces=ds_traces)
                        if isinstance(loader, DataLoader) and (
                            (self.system.batch_idx
                             == self.system.train_steps_per_epoch
                             and self.system.mode == "train") or
                            (self.system.batch_idx
                             == self.system.eval_steps_per_epoch
                             and self.system.mode == "eval")):
                            raise StopIteration
                        with Suppressor():
                            batch = next(iterator)
                    except StopIteration:
                        break
                self._run_traces_on_ds_end(traces=per_ds_traces,
                                           data=end_epoch_data)
            self.network.unload_epoch()
        self._run_traces_on_epoch_end(traces=epoch_traces, data=end_epoch_data)
Exemplo n.º 18
0
 def get_fe_loss_keys(self) -> Set[str]:
     return to_set(self.loss_name)
Exemplo n.º 19
0
def plot_logs(experiments: List[Summary],
              smooth_factor: float = 0,
              share_legend: bool = True,
              ignore_metrics: Optional[Set[str]] = None,
              pretty_names: bool = False,
              include_metrics: Optional[Set[str]] = None) -> plt.Figure:
    """A function which will plot experiment histories for comparison viewing / analysis.

    Args:
        experiments: Experiment(s) to plot.
        smooth_factor: A non-negative float representing the magnitude of gaussian smoothing to apply (zero for none).
        share_legend: Whether to have one legend across all graphs (True) or one legend per graph (False).
        pretty_names: Whether to modify the metric names in graph titles (True) or leave them alone (False).
        ignore_metrics: Any keys to ignore during plotting.
        include_metrics: A whitelist of keys to include during plotting. If None then all will be included.

    Returns:
        The handle of the pyplot figure.
    """
    # Sort to keep same colors between multiple runs of visualization
    experiments = humansorted(to_list(experiments), lambda exp: exp.name)
    n_experiments = len(experiments)
    if n_experiments == 0:
        return plt.subplots(111)[0]

    ignore_keys = ignore_metrics or set()
    ignore_keys = to_set(ignore_keys)
    ignore_keys |= {'epoch'}
    include_keys = to_set(include_metrics)
    # TODO: epoch should be indicated on the axis (top x axis?). Problem - different epochs per experiment.
    # TODO: figure out how ignore_metrics should interact with mode

    metric_histories = defaultdict(_MetricGroup)  # metric: MetricGroup
    for idx, experiment in enumerate(experiments):
        history = experiment.history
        # Since python dicts remember insertion order, sort the history so that train mode is always plotted on bottom
        for mode, metrics in sorted(history.items(),
                                    key=lambda x: 0 if x[0] == 'train' else 1
                                    if x[0] == 'eval' else 2
                                    if x[0] == 'test' else 3
                                    if x[0] == 'infer' else 4):
            for metric, step_val in metrics.items():
                if len(step_val) == 0:
                    continue  # Ignore empty metrics
                if metric in ignore_keys:
                    continue
                if include_keys and metric not in include_keys:
                    continue
                metric_histories[metric].add(idx, mode, step_val)

    metric_list = list(sorted(metric_histories.keys()))
    if len(metric_list) == 0:
        return plt.subplots(111)[0]

    # If sharing legend and there is more than 1 plot, then dedicate 1 subplot for the legend
    share_legend = share_legend and (len(metric_list) > 1)
    n_legends = math.ceil(n_experiments / 4)
    n_plots = len(metric_list) + (share_legend * n_legends)

    # map the metrics into an n x n grid, then remove any extra columns. Final grid will be n x m with m <= n
    n_rows = math.ceil(math.sqrt(n_plots))
    n_cols = math.ceil(n_plots / n_rows)
    metric_grid_location = {}
    nd1_metrics = []
    idx = 0
    for metric in metric_list:
        if metric_histories[metric].ndim() == 1:
            # Delay placement of the 1D plots until the end
            nd1_metrics.append(metric)
        else:
            metric_grid_location[metric] = (idx // n_cols, idx % n_cols)
            idx += 1
    for metric in nd1_metrics:
        metric_grid_location[metric] = (idx // n_cols, idx % n_cols)
        idx += 1

    sns.set_context('paper')
    fig, axs = plt.subplots(n_rows,
                            n_cols,
                            sharex='all',
                            figsize=(4 * n_cols, 2.8 * n_rows))

    # If only one row, need to re-format the axs object for consistency. Likewise for columns
    if n_rows == 1:
        axs = [axs]
        if n_cols == 1:
            axs = [axs]

    for metric in metric_grid_location.keys():
        axis = axs[metric_grid_location[metric][0]][
            metric_grid_location[metric][1]]
        if metric_histories[metric].ndim() == 1:
            axis.grid(linestyle='')
        else:
            axis.grid(linestyle='--')
            axis.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3))
        axis.set_title(
            metric if not pretty_names else prettify_metric_name(metric),
            fontweight='bold')
        axis.spines['top'].set_visible(False)
        axis.spines['right'].set_visible(False)
        axis.spines['bottom'].set_visible(False)
        axis.spines['left'].set_visible(False)
        axis.tick_params(bottom=False, left=False)

    # some of the later rows/columns might be unused or reserved for legends, so disable them
    last_row_idx = math.ceil(len(metric_list) / n_cols) - 1
    last_column_idx = len(metric_list) - last_row_idx * n_cols - 1
    for c in range(n_cols):
        if c <= last_column_idx:
            axs[last_row_idx][c].set_xlabel('Steps')
            axs[last_row_idx][c].xaxis.set_tick_params(which='both',
                                                       labelbottom=True)
        else:
            axs[last_row_idx][c].axis('off')
            axs[last_row_idx - 1][c].set_xlabel('Steps')
            axs[last_row_idx - 1][c].xaxis.set_tick_params(which='both',
                                                           labelbottom=True)
        for r in range(last_row_idx + 1, n_rows):
            axs[r][c].axis('off')

    # the 1D metrics don't need x axis, so move them up, starting with the last in case multiple rows of them
    for metric in reversed(nd1_metrics):
        row = metric_grid_location[metric][0]
        col = metric_grid_location[metric][1]
        axs[row][col].axis('off')
        if row > 0:
            axs[row - 1][col].set_xlabel('Steps')
            axs[row - 1][col].xaxis.set_tick_params(which='both',
                                                    labelbottom=True)

    colors = sns.hls_palette(
        n_colors=n_experiments,
        s=0.95) if n_experiments > 10 else sns.color_palette("colorblind")
    color_offset = defaultdict(lambda: 0)
    # If there is only 1 experiment, we will use alternate colors based on mode
    if n_experiments == 1:
        color_offset['eval'] = 1
        color_offset['test'] = 2
        color_offset['infer'] = 3

    handles = []
    labels = []
    has_label = defaultdict(lambda: defaultdict(lambda: defaultdict(
        lambda: False)))  # exp_id : {mode: {type: True}}
    ax_text = defaultdict(lambda:
                          (0.0, 0.9))  # Where to put the text on a given axis
    for exp_idx, experiment in enumerate(experiments):
        for metric, group in metric_histories.items():
            axis = axs[metric_grid_location[metric][0]][
                metric_grid_location[metric][1]]
            if group.ndim() == 1:
                # Single value
                for mode in group.modes(exp_idx):
                    ax_id = id(axis)
                    prefix = f"{experiment.name} ({mode})" if n_experiments > 1 else f"{mode}"
                    axis.text(ax_text[ax_id][0],
                              ax_text[ax_id][1],
                              f"{prefix}: {group.get_val(exp_idx, mode)}",
                              color=colors[exp_idx + color_offset[mode]],
                              transform=axis.transAxes)
                    ax_text[ax_id] = (ax_text[ax_id][0],
                                      ax_text[ax_id][1] - 0.1)
                    if ax_text[ax_id][1] < 0:
                        ax_text[ax_id] = (ax_text[ax_id][0] + 0.5, 0.9)
            elif group.ndim() == 2:
                for mode, data in group[exp_idx].items():
                    title = f"{experiment.name} ({mode})" if n_experiments > 1 else f"{mode}"
                    if data.shape[0] < 2:
                        # This particular mode only has a single data point, so need to draw a shape instead of a line
                        xy = [data[0][0], data[0][1]]
                        if mode == 'train':
                            style = MarkerStyle(marker='o', fillstyle='full')
                        elif mode == 'eval':
                            style = MarkerStyle(marker='v', fillstyle='full')
                        elif mode == 'test':
                            style = MarkerStyle(marker='*', fillstyle='full')
                        else:
                            style = MarkerStyle(marker='s', fillstyle='full')
                        if isinstance(xy[1], ValWithError):
                            # We've got error bars
                            x = xy[0]
                            y = xy[1]
                            # Plotting requires positive values for error
                            y_err = [[max(1e-9, y.y - y.y_min)],
                                     [max(1e-9, y.y_max - y.y)]]
                            axis.errorbar(
                                x=x,
                                y=y.y,
                                yerr=y_err,
                                ecolor=colors[exp_idx + color_offset[mode]],
                                elinewidth=1.5,
                                capsize=4.0,
                                capthick=1.5,
                                zorder=3
                            )  # zorder to put markers on top of line segments
                            xy[1] = y.y
                        s = axis.scatter(
                            xy[0],
                            xy[1],
                            s=40,
                            c=[colors[exp_idx + color_offset[mode]]],
                            marker=style,
                            linewidth=1.0,
                            edgecolors='black',
                            zorder=4
                        )  # zorder to put markers on top of line segments
                        if not has_label[exp_idx][mode]['patch']:
                            labels.append(title)
                            handles.append(s)
                            has_label[exp_idx][mode]['patch'] = True
                    else:
                        # We can draw a line
                        y = data[:, 1]
                        y_min = None
                        y_max = None
                        if isinstance(y[0], ValWithError):
                            y = np.stack(y)
                            y_min = y[:, 0]
                            y_max = y[:, 2]
                            y = y[:, 1]
                            if smooth_factor != 0:
                                y_min = gaussian_filter1d(y_min,
                                                          sigma=smooth_factor)
                                y_max = gaussian_filter1d(y_max,
                                                          sigma=smooth_factor)
                        if smooth_factor != 0:
                            y = gaussian_filter1d(y, sigma=smooth_factor)
                        x = data[:, 0]
                        ln = axis.plot(
                            x,
                            y,
                            color=colors[exp_idx + color_offset[mode]],
                            label=title,
                            linewidth=1.5,
                            linestyle='solid'
                            if mode == 'train' else 'dashed' if mode == 'eval'
                            else 'dotted' if mode == 'test' else 'dashdot')
                        if not has_label[exp_idx][mode]['line']:
                            labels.append(title)
                            handles.append(ln[0])
                            has_label[exp_idx][mode]['line'] = True
                        if y_max is not None and y_min is not None:
                            axis.fill_between(
                                x.astype(np.float32),
                                y_max,
                                y_min,
                                facecolor=colors[exp_idx + color_offset[mode]],
                                alpha=0.3,
                                zorder=-1)
            else:
                # Some kind of image or matrix. Not implemented yet.
                pass

    plt.tight_layout()

    if labels:
        if share_legend:
            # Sort the labels
            handles = [
                h for _, h in sorted(zip(labels, handles),
                                     key=lambda pair: pair[0])
            ]
            labels = sorted(labels)
            # Split the labels over multiple legends if there are too many to fit in one axis
            elems_per_legend = math.ceil(len(labels) / n_legends)
            i = 0
            for r in range(last_row_idx, n_rows):
                for c in range(last_column_idx + 1 if r == last_row_idx else 0,
                               n_cols):
                    if len(handles) <= i:
                        break
                    axs[r][c].legend(
                        handles[i:i + elems_per_legend],
                        labels[i:i + elems_per_legend],
                        loc='center',
                        fontsize='large' if elems_per_legend <= 6 else
                        'medium' if elems_per_legend <= 8 else 'small')
                    i += elems_per_legend
        else:
            for i in range(n_rows):
                for j in range(n_cols):
                    if i == last_row_idx and j > last_column_idx:
                        break
                    axs[i][j].legend(loc='best', fontsize='small')
    return fig
Exemplo n.º 20
0
    def __init__(self,
                 inputs: Union[str, Iterable[str]],
                 outputs: Union[str, Iterable[str]],
                 mode: Union[None, str, Iterable[str]] = None,
                 ds_id: Union[None, str, Iterable[str]] = None,
                 choices: Union[str, NumpyOp,
                                List[Union[str, NumpyOp]]] = "defaults",
                 level: Union[int, float] = 18):
        self.default_aug_dict = {
            "Rotate":
            Rotate(inputs=inputs,
                   outputs=outputs,
                   mode=mode,
                   ds_id=ds_id,
                   limit=90),
            "Identity":
            Identity(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id),
            "AutoContrast":
            AutoContrast(inputs=inputs,
                         outputs=outputs,
                         mode=mode,
                         ds_id=ds_id),
            "Equalize":
            Equalize(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id),
            "Posterize":
            Posterize(inputs=inputs,
                      outputs=outputs,
                      mode=mode,
                      ds_id=ds_id,
                      num_bits=7),
            "Solarize":
            Solarize(inputs=inputs,
                     outputs=outputs,
                     mode=mode,
                     ds_id=ds_id,
                     threshold=256),
            "Sharpness":
            Sharpness(inputs=inputs,
                      outputs=outputs,
                      mode=mode,
                      ds_id=ds_id,
                      limit=0.9),
            "Contrast":
            Contrast(inputs=inputs,
                     outputs=outputs,
                     mode=mode,
                     ds_id=ds_id,
                     limit=0.9),
            "Color":
            Color(inputs=inputs,
                  outputs=outputs,
                  mode=mode,
                  ds_id=ds_id,
                  limit=0.9),
            "Brightness":
            Brightness(inputs=inputs,
                       outputs=outputs,
                       mode=mode,
                       ds_id=ds_id,
                       limit=0.9),
            "ShearX":
            ShearX(inputs=inputs,
                   outputs=outputs,
                   mode=mode,
                   ds_id=ds_id,
                   shear_coef=0.5),
            "ShearY":
            ShearY(inputs=inputs,
                   outputs=outputs,
                   mode=mode,
                   ds_id=ds_id,
                   shear_coef=0.5),
            "TranslateX":
            TranslateX(inputs=inputs,
                       outputs=outputs,
                       mode=mode,
                       ds_id=ds_id,
                       shift_limit=0.33),
            "TranslateY":
            TranslateY(inputs=inputs,
                       outputs=outputs,
                       mode=mode,
                       ds_id=ds_id,
                       shift_limit=0.33)
        }
        aug_options = self._parse_aug_choices(magnitude_coef=(level / 30.),
                                              choices=to_list(choices))

        inputs, outputs = to_set(inputs), to_set(outputs)
        for op in aug_options:
            for inp in op.inputs:
                inputs.add(inp)

            for out in op.outputs:
                outputs.add(out)
        super().__init__(inputs=inputs.union(outputs),
                         outputs=outputs,
                         mode=mode,
                         ds_id=ds_id)

        # Calculating number of augmentation to apply at each training iteration
        N_min = 1
        N_max = min(len(aug_options), 5)
        N = level * (N_max - N_min) / 30 + N_min
        N_guarantee, N_p = int(N), N % 1

        self.ops = [OneOfMultiVar(*aug_options) for _ in range(N_guarantee)]
        if N_p > 0:
            self.ops.append(Sometimes(OneOfMultiVar(*aug_options), prob=N_p))
Exemplo n.º 21
0
    def _sort_traces(
            traces: List[Trace],
            available_outputs: Optional[Set[str]] = None) -> List[Trace]:
        """Sort traces to attempt to resolve any dependency issues.

        This is essentially a topological sort, but it doesn't seem worthwhile to convert the data into a graph
        representation in order to get the slightly better asymptotic runtime complexity.

        Args:
            traces: A list of traces (not inside schedulers) to be sorted.
            available_outputs: What output keys are already available for the traces to use. If None are provided, the
                sorting algorithm will assume that any keys not generated by traces are being provided by the system.
                This results in a less rigorous sorting.

        Returns:
            The sorted list of `traces`.

        Raises:
            AssertionError: If Traces have circular dependencies or require input keys which are not available.
        """
        sorted_traces = []
        trace_outputs = {
            output
            for trace in traces for output in trace.outputs
        }
        if available_outputs is None:
            # Assume that anything not generated by a Trace is provided by the system
            available_outputs = {
                inp
                for trace in traces for inp in trace.inputs
            } - trace_outputs
            weak_sort = True
        else:
            available_outputs = to_set(available_outputs)
            weak_sort = False
        end_traces = deque()
        intermediate_traces = deque()
        intermediate_outputs = set()
        trace_deque = deque(traces)
        while trace_deque:
            trace = trace_deque.popleft()
            ins = set(trace.inputs)
            outs = set(trace.outputs)
            if not ins or isinstance(trace, (TrainEssential, EvalEssential)):
                sorted_traces.append(trace)
                available_outputs |= outs
            elif "*" in ins:
                if outs:
                    end_traces.appendleft(trace)
                else:
                    end_traces.append(trace)
            elif ins <= available_outputs or (
                    weak_sort and
                (ins - outs - available_outputs).isdisjoint(trace_outputs)):
                sorted_traces.append(trace)
                available_outputs |= outs
            else:
                intermediate_traces.append(trace)
                intermediate_outputs |= outs

        already_seen = set()
        while intermediate_traces:
            trace = intermediate_traces.popleft()
            ins = set(trace.inputs)
            outs = set(trace.outputs)
            already_seen.add(trace)
            if ins <= available_outputs or (
                    weak_sort and
                (ins - outs - available_outputs).isdisjoint(trace_outputs)):
                sorted_traces.append(trace)
                available_outputs |= outs
                already_seen.clear()
            elif ins <= (available_outputs | intermediate_outputs):
                intermediate_traces.append(trace)
            else:
                raise AssertionError(
                    "The {} trace has unsatisfiable inputs: {}".format(
                        type(trace).__name__,
                        ", ".join(ins -
                                  (available_outputs | intermediate_outputs))))

            if intermediate_traces and len(already_seen) == len(
                    intermediate_traces):
                raise AssertionError(
                    "Dependency cycle detected amongst traces: {}".format(
                        ", ".join([type(tr).__name__ for tr in already_seen])))
        sorted_traces.extend(list(end_traces))
        return sorted_traces
Exemplo n.º 22
0
def plot_logs(experiments,
              smooth_factor=0,
              share_legend=True,
              ignore_metrics=None,
              pretty_names=False,
              include_metrics=None):
    """A function which will plot experiment histories for comparison viewing / analysis

    Args:
        experiments (list, Experiment): Experiment(s) to plot
        smooth_factor (float): A non-negative float representing the magnitude of gaussian smoothing to apply (zero for
        none)
        share_legend (bool): Whether to have one legend across all graphs (true) or one legend per graph (false)
        pretty_names (bool): Whether to modify the metric names in graph titles (true) or leave them alone (false)
        ignore_metrics (set): Any keys to ignore during plotting
        include_metrics (set): A whitelist of keys to include during plotting. If None then all will be included.
    Returns:
        The handle of the pyplot figure
    """
    experiments = to_list(experiments)

    ignore_keys = ignore_metrics or set()
    ignore_keys = to_set(ignore_keys)
    ignore_keys |= {'epoch', 'progress', 'total_train_steps'}
    include_keys = to_set(include_metrics) if include_metrics else None
    # TODO: epoch should be indicated on the axis (top x axis?)
    # TODO: figure out how ignore_metrics should interact with mode

    max_time = 0
    metric_keys = set()
    for experiment in experiments:
        history = experiment.history
        for mode, metrics in history.items():
            for key, value in metrics.items():
                if value.keys():
                    max_time = max(max_time, max(value.keys()))
                if key in ignore_keys:
                    continue
                if include_keys and key not in include_keys:
                    ignore_keys.add(key)
                    continue
                if any(
                        map(lambda x: isinstance(x[1], np.ndarray),
                            value.items())):
                    ignore_keys.add(key)
                    continue  # TODO: nd array not currently supported. maybe in future visualize as heat map?
                metric_keys.add("{}: {}".format(mode, key))
    metric_list = sorted(
        list(metric_keys))  # Sort the metrics alphabetically for consistency
    num_metrics = len(metric_list)
    num_experiments = len(experiments)

    if num_metrics == 0:
        return plt.subplots(111)[0]

    # map the metrics into an n x n grid, then remove any extra rows. Final grid will be m x n with m <= n
    num_cols = math.ceil(math.sqrt(num_metrics))
    metric_grid_location = {
        key: (idx // num_cols, idx % num_cols)
        for (idx, key) in enumerate(metric_list)
    }
    num_rows = math.ceil(num_metrics / num_cols)

    sns.set_context('paper')
    fig, axs = plt.subplots(num_rows,
                            num_cols,
                            sharex='all',
                            figsize=(4 * num_cols, 2.8 * num_rows))

    # If only one row, need to re-format the axs object for consistency. Likewise for columns
    if num_rows == 1:
        axs = [axs]
        if num_cols == 1:
            axs = [axs]

    for metric in metric_grid_location.keys():
        axis = axs[metric_grid_location[metric][0]][
            metric_grid_location[metric][1]]
        axis.set_title(
            metric if not pretty_names else prettify_metric_name(metric))
        axis.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3))
        axis.grid(linestyle='--')
        axis.spines['top'].set_visible(False)
        axis.spines['right'].set_visible(False)
        axis.spines['bottom'].set_visible(False)
        axis.spines['left'].set_visible(False)
        axis.tick_params(bottom=False, left=False)

    for i in range(num_cols):
        axs[num_rows - 1][i].set_xlabel('Steps')

    # some of the columns in the last row might be unused, so disable them
    last_column_idx = num_cols - (num_rows * num_cols - num_metrics) - 1
    for i in range(last_column_idx + 1, num_cols):
        axs[num_rows - 1][i].axis('off')
        axs[num_rows - 2][i].set_xlabel('Steps')
        axs[num_rows - 2][i].xaxis.set_tick_params(which='both',
                                                   labelbottom=True)

    colors = sns.hls_palette(
        n_colors=num_experiments,
        s=0.95) if num_experiments > 10 else sns.color_palette("colorblind")

    handles = []
    labels = []
    bar_counter = defaultdict(lambda: 0)
    for (color_idx, experiment) in enumerate(experiments):
        labels.append(experiment.name)
        metrics = {
            "{}: {}".format(mode, key): val
            for mode, sub in experiment.history.items()
            for key, val in sub.items() if key not in ignore_keys
        }
        for (idx, (metric, value)) in enumerate(metrics.items()):
            data = np.array(list(value.items()))
            if len(data) == 1:
                y = data[0][1]
                if isinstance(y, str):
                    vals = [float(x) for x in re.findall(r'\d+\.?\d+', y)]
                    if len(vals) == 1:
                        y = vals[0]
                width = max(10, max_time // 10)
                x = max_time // 2 + (2 * (bar_counter[metric] % 2) -
                                     1) * width * math.ceil(
                                         bar_counter[metric] / 2)
                ln = axs[metric_grid_location[metric][0]][
                    metric_grid_location[metric][1]].bar(
                        x=x,
                        height=y,
                        color=colors[color_idx],
                        label=experiment.name,
                        width=width)
                bar_counter[metric] += 1
            else:
                y = data[:, 1] if smooth_factor == 0 else gaussian_filter1d(
                    data[:, 1], sigma=smooth_factor)
                ln = axs[metric_grid_location[metric][0]][
                    metric_grid_location[metric][1]].plot(
                        data[:, 0],
                        y,
                        color=colors[color_idx],
                        label=experiment.name,
                        linewidth=1.5)
            if idx == 0:
                handles.append(ln[0])

    plt.tight_layout()

    if len(labels) > 1 or labels[0]:
        if share_legend and num_rows > 1:
            if last_column_idx == num_cols - 1:
                fig.subplots_adjust(bottom=0.15)
                fig.legend(handles,
                           labels,
                           loc='lower center',
                           ncol=num_cols + 1)
            else:
                axs[num_rows - 1][last_column_idx + 1].legend(handles,
                                                              labels,
                                                              loc='center',
                                                              fontsize='large')
        else:
            for i in range(num_rows):
                for j in range(num_cols):
                    if i == num_rows - 1 and j > last_column_idx:
                        break
                    axs[i][j].legend(loc='best', fontsize='small')
    return fig
Exemplo n.º 23
0
def get_current_items(items: Iterable[Union[T, Scheduler[T]]],
                      run_modes: Optional[Union[str, Iterable[str]]] = None,
                      epoch: Optional[int] = None,
                      ds_id: Optional[str] = None) -> List[T]:
    """Select items which should be executed for given mode and epoch.

    Args:
        items: A list of possible items or Schedulers of items to choose from.
        run_modes: The desired execution mode. One or more of "train", "eval", "test", or "infer". If None, items of
            all modes will be returned.
        epoch: The desired execution epoch. If None, items across all epochs will be returned.
        ds_id: The desired execution dataset id. If None, items across all ds_ids will be returned. An empty string
            indicates that positive matches should be excluded ('' != 'ds1'), but that negative matches are satisfied
            ('' == '!ds1').

    Returns:
        The items which should be executed.
    """
    selected_items = []
    run_modes = to_set(run_modes)
    for item in items:
        if isinstance(item, Scheduler):
            if epoch is None:
                item = item.get_all_values()
            else:
                item = [item.get_current_value(epoch)]
        else:
            item = [item]
        for item_ in item:
            # mode matching
            mode_match = False
            if not run_modes:
                mode_match = True
            if not hasattr(item_, "mode"):
                mode_match = True
            else:
                if not item_.mode:
                    mode_match = True
                elif item_.mode.intersection(run_modes):
                    mode_match = True

            # ds_id matching
            ds_id_match = False
            if ds_id is None:
                ds_id_match = True
            if not hasattr(item_, "ds_id"):
                ds_id_match = True
            else:
                # If the object has no requirements, then allow it
                if not item_.ds_id:
                    ds_id_match = True
                # blacklist check (before whitelist due to desired empty string behavior)
                # if any of ds_id starts with "!", then they will all start with "!"
                elif any([x.startswith("!") for x in item_.ds_id]) and all(
                    [ds_id != x[1:] for x in item_.ds_id]):
                    ds_id_match = True  # Note that empty string will pass this check (unless target is literally "!")
                # whitelist check
                elif ds_id in item_.ds_id:
                    ds_id_match = True  # Note that empty string will fail this check
            if item_ and mode_match and ds_id_match:
                selected_items.append(item_)
    return selected_items