Exemplo n.º 1
0
def test_merge() -> None:
    merged = hip.Experiment.merge({
        "xp1":
        hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "b"})]),
        "xp2":
        hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "c"})]),
    })
    assert len(merged.datapoints) == 2, merged
    merged.validate()
Exemplo n.º 2
0
def search(table_name):
    if os.path.exists(databases_root_folder + table_name):

        db = ScaDatabase(databases_root_folder + table_name)
        analysis_all = db.select_all(Analysis)
        analyses = []

        hp = []

        for analysis in analysis_all:

            final_key_ranks = db.select_final_key_rank_json_from_analysis(KeyRank, analysis.id)

            if len(final_key_ranks) > 0:
                hyper_parameters = db.select_from_analysis(HyperParameter, analysis.id)
                training_hyper_parameters = hyper_parameters.hyper_parameters
                training_hyper_parameters[0]['guessing_entropy'] = final_key_ranks[0][0]['key_rank']
                hp.append(training_hyper_parameters[0])

        exp = hip.Experiment().from_iterable(hp)
        exp.display_data(hip.Displays.PARALLEL_PLOT).update({
            'hide': ['uid', 'key_rank', 'key'],  # Hide some columns
            'order': ['guessing_entropy'],  # Put column time first on the left
        })
        exp.validate()
        exp.to_html("webapp/templates/hiplot.html")

        return render_template("dashboard/search.html", analyses=analyses)
    return render_template("dashboard/search.html", analyses=[])
Exemplo n.º 3
0
def test_validation_missing_parent() -> None:
    xp = hip.Experiment(datapoints=[hip.Datapoint(uid="1", from_uid="2", values={})])
    with pytest.raises(hip.ExperimentValidationMissingParent):
        xp.validate()
    xp.remove_missing_parents()
    assert xp.datapoints[0].from_uid is None
    xp.validate()
Exemplo n.º 4
0
    def get_search(self, databases_root_folder, table_name):

        analyses = []
        if os.path.exists(f"{databases_root_folder}{table_name}"):

            db_select = DBSelect(f"{databases_root_folder}{table_name}")
            analysis_all = db_select.select_all(Analysis)

            hp = []

            for analysis in analysis_all:

                final_key_ranks = db_select.select_all_guessing_entropy_from_analysis(
                    GuessingEntropy, analysis.id)

                if len(final_key_ranks) > 0:
                    hyper_parameters = db_select.select_from_analysis(
                        HyperParameter, analysis.id)
                    training_hyper_parameters = hyper_parameters.hyper_parameters
                    training_hyper_parameters[0][
                        'guessing_entropy'] = final_key_ranks[0][0]['key_rank']
                    hp.append(training_hyper_parameters[0])

            exp = hip.Experiment().from_iterable(hp)
            exp.display_data(hip.Displays.PARALLEL_PLOT).update({
                'hide': ['uid', 'key_rank', 'key'],  # Hide some columns
                'order':
                ['guessing_entropy'],  # Put column time first on the left
            })
            exp.validate()
            exp.to_html("webapp/templates/hiplot.html")

        return analyses
Exemplo n.º 5
0
    def to_hiplot_experiment(self, max_list_elements: int = 24) -> tp.Any:  # no typing here since Hiplot is not a hard requirement
        """Converts the logs into an hiplot experiment for display.


        Example
        -------
        exp = logs.to_hiplot_experiment()
        exp.display(force_full_width=True)

        Note
        ----
        - You can easily change the axes of the XY plot:
          exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'})
        - For more context about hiplot, check:
          - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/
          - github repo: https://github.com/facebookresearch/hiplot
          - documentation: https://facebookresearch.github.io/hiplot/
        """
        import hiplot as hip
        exp = hip.Experiment()
        for xp in self.load_flattened(max_list_elements=max_list_elements):
            dp = hip.Datapoint(
                from_uid=xp.get("#parents_uids#0"),
                uid=xp["#uid"],
                values={x: y for x, y in xp.items() if not (x.startswith("#") and ("uid" in x or "ask" in x))}
            )
            exp.datapoints.append(dp)
        exp.display_data(hip.Displays.XY).update({'axis_x': '#num-tell', 'axis_y': '#loss'})
        # for the record, some more options:
        exp.display_data(hip.Displays.XY).update({'lines_thickness': 1.0, 'lines_opacity': 1.0})
        return exp
Exemplo n.º 6
0
def test_validation_circular_ref() -> None:
    with pytest.raises(hip.ExperimentValidationCircularRef):
        hip.Experiment(datapoints=[
            hip.Datapoint(uid="1", from_uid="2", values={}),
            hip.Datapoint(uid="2", from_uid="3", values={}),
            hip.Datapoint(uid="3", from_uid="4", values={}),
            hip.Datapoint(uid="4", from_uid="2", values={}),
        ]).validate()
Exemplo n.º 7
0
    def to_hiplot_experiment(
        self,
        max_list_elements: int = 24
    ) -> tp.Any:  # no typing here since Hiplot is not a hard requirement
        """Converts the logs into an hiplot experiment for display.

        Parameters
        ----------
        max_list_elements: int
            maximum number of elements of list/arrays to export (only the first elements are extracted)

        Example
        -------
        .. code-block:: python

            exp = logs.to_hiplot_experiment()
            exp.display(force_full_width=True)

        Note
        ----
        - You can easily change the axes of the XY plot:
          :code:`exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'})`
        - For more context about hiplot, check:

          - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/
          - github repo: https://github.com/facebookresearch/hiplot
          - documentation: https://facebookresearch.github.io/hiplot/
        """
        # pylint: disable=import-outside-toplevel
        try:
            import hiplot as hip
        except ImportError as e:
            raise ImportError(
                f"{self.__class__.__name__} requires hiplot which is not installed by default "
                "(pip install hiplot)") from e
        exp = hip.Experiment()
        for xp in self.load_flattened(max_list_elements=max_list_elements):
            dp = hip.Datapoint(
                from_uid=xp.get("#parents_uids#0"),
                uid=xp["#uid"],
                values={
                    x: y
                    for x, y in xp.items()
                    if not (x.startswith("#") and ("uid" in x or "ask" in x))
                },
            )
            exp.datapoints.append(dp)
        exp.display_data(hip.Displays.XY).update({
            "axis_x": "#num-tell",
            "axis_y": "#loss"
        })
        # for the record, some more options:
        exp.display_data(hip.Displays.XY).update({
            "lines_thickness": 1.0,
            "lines_opacity": 1.0
        })
        return exp
Exemplo n.º 8
0
    def gey_hiplot(
        self, query: str = None, sort: str = None, limit: int = None, offset: int = None
    ):
        import hiplot

        data = self.get_runs_io(query=query, sort=sort, limit=limit, offset=offset)
        exp = hiplot.Experiment()
        for d in data:
            dp = hiplot.Datapoint(
                uid=data["uid"], values=data["values"],
            )
            exp.datapoints.append(dp)
        return exp
Exemplo n.º 9
0
def _create_experiment_from_dataframe(df: DataFrame,
                                      include_tags: bool) -> hiplot.Experiment:
    """Generate HiPlot experiment from MLFlow runs.

    Parameters
    ----------
    df: pandas.DataFrame
        A dataframe (returned by ``mlflow.search_runs`` normally)
        to turn process
    include_tags: bool
        Whether or not to include tags in the results (False)

    Returns
    -------
    hiplot.Experiment
        The processed experiment
    """
    exp = hiplot.Experiment()
    params = [p for p in df.columns if p.startswith("params.")]
    metrics = [m for m in df.columns if m.startswith("metrics.")]
    if include_tags:
        tags = [t for t in df.columns if t.startswith("tags.")]
    for _, row in df.iterrows():
        values = {}
        for p in params:
            values[p] = row[p]

        for m in metrics:
            if isfinite(row[m]):
                values[m] = row[m]

        if include_tags:
            for t in tags:
                values[t] = row[t]

        dp = hiplot.Datapoint(
            uid=str(uuid.UUID(row["run_id"])),
            values=values,
        )
        exp.datapoints.append(dp)
    return exp
Exemplo n.º 10
0
SEARCH_RUNS_RESULT = DataFrame({
    "run_id": [RUN_ID_1, RUN_ID_2],
    "params.numeric": [0, 1],
    "params.category": ["value1", "value2"],
    "metrics.third": [2, 4],
    "tags.test": ["yes", "no"],
})
EXPERIMENT = hiplot.Experiment([
    hiplot.Datapoint(
        uid=RUN_ID_1,
        values={
            "params.numeric": 0,
            "params.category": "value1",
            "metrics.third": 2,
        },
    ),
    hiplot.Datapoint(
        uid=RUN_ID_2,
        values={
            "params.numeric": 1,
            "params.category": "value2",
            "metrics.third": 4,
        },
    ),
])
EXPERIMENT_WITH_TAGS = hiplot.Experiment([
    hiplot.Datapoint(
        uid=RUN_ID_1,
        values={
            "params.numeric": 0,
            "params.category": "value1",
            "metrics.third": 2,
Exemplo n.º 11
0
def fetcher(uri):
    """Prepare param sweep output for hiplot
    Collects the sweep results and simplifies them for easy display using hiplot.
    :param uri: root dir that containing all the param_sweeping results.
    :returns: hiplot Experiment Object for display
    """

    print("got request for %s, collecting logs" % uri)

    exp = hip.Experiment()
    exp.display_data(hip.Displays.XY).update({
        "axis_x": "step",
        "axis_y": "cumulative_reward"
    })

    dfs = collect_logs(Path(uri))  # list of (name, log, df) triplets
    cfg_variants = {}
    cfgs = {}
    for name, _dfs in dfs:
        # first collect each config
        print("loading config from %s" % name)
        target = Path(name)
        configpath = target / "config.yaml"
        cfg = flatten(OmegaConf.load(str(configpath)))
        cfgs[name] = cfg
        for k, v in cfg.items():
            if k not in cfg_variants:
                cfg_variants[k] = set()
            cfg_variants[k].add(v)

    print("Read in %d logs successfully" % len(cfgs))

    order = []
    order.append("mean_final_reward")
    # cfg_variants are hyperparams with more than one value
    for key, vals in cfg_variants.items():
        if len(vals) > 1:
            order.append(key)
    order.append("cumulative_reward")
    print("headers found to plot: ", order)
    exp.display_data(hip.Displays.PARALLEL_PLOT).update(
        hide=["step", "uid", "from_uid"], order=order)

    # min_points = min(len(df["step"]) for _name, df in dfs)
    # max_points = max(len(df["step"]) for _name, df in dfs)
    ave_points = sum(len(df["step"]) for _name, df in dfs) // len(dfs)
    step_size = ave_points // 100 + 1  # I want an average of 100 points per experiment
    print("ave_points:", ave_points, "step_size:", step_size)

    for name, df in dfs:
        # now go through each dataframe
        cfg = cfgs[name]

        hyperparams = dict()
        for key, val in cfg.items():
            if len(cfg_variants[key]) > 1:
                try:
                    hyperparams[key] = float(val)
                except ValueError:
                    hyperparams[key] = str(val)

        steps = df["step"]
        prev_name = None
        cum_sum = df["mean_episode_return"].cumsum()

        for idx in range(0, len(cum_sum), step_size):
            step = int(steps[idx])
            cumulative_reward = cum_sum[idx]
            curr_name = "{},step{}".format(name, step)
            sp = hip.Datapoint(
                uid=curr_name,
                values=dict(step=step, cumulative_reward=cumulative_reward),
            )
            if prev_name is not None:
                sp.from_uid = prev_name
            exp.datapoints.append(sp)
            prev_name = curr_name

        mean_final_reward = float(df["mean_episode_return"][-10000:].mean())
        peak_performance = float(
            df["mean_episode_return"].rolling(window=1000).mean().max())
        end_vals = copy.deepcopy(hyperparams)
        end_vals.update(
            step=int(steps.iloc[-1]),
            cumulative_reward=cum_sum.iloc[-1],
            mean_final_reward=mean_final_reward,
            peak_performance=peak_performance,
        )
        dp = hip.Datapoint(uid=name, from_uid=prev_name, values=end_vals)
        exp.datapoints.append(dp)

    return exp
def make_parallel_coordinates_plot(html_file_path=None,
                                   metrics=False,
                                   text_logs=False,
                                   params=True,
                                   properties=False,
                                   experiment_id=None,
                                   state=None,
                                   owner=None,
                                   tag=None,
                                   min_running_time=None):
    """Visualize experiments on the parallel coordinates plot.

    This function, when executed in Notebook, displays interactive parallel coordinates plot in the cell's output.
    Another option is to save visualization to the standalone html file.
    You can also inspect the lineage of experiments.

    **See** `example <https://neptune-contrib.readthedocs.io/examples/hiplot_visualizations.html>`_
    **for the full use case.**

    Axes are ordered as follows: first axis is neptune ``experiment id``,
    second is experiment ``owner``,
    then ``params`` and ``properties`` in alphabetical order.
    Finally, ``metrics`` on the right side (alphabetical order as well).

    This visualization it built using `HiPlot <https://facebookresearch.github.io/hiplot/index.html>`_.
    It is a library published by the Facebook AI group.
    Learn more about the `parallel coordinates plot <https://en.wikipedia.org/wiki/Parallel_coordinates>`_.

    Tip:
        Use ``metrics``, ``params`` and ``properties`` arguments to select what data you want to see as axes.

        Use ``experiment_id``, ``state``, ``owner``, ``tag``, ``min_running_time`` arguments to filter experiments
        included in a plot. Only experiments matching all the criteria will be returned.

    Note:
        Make sure you have your project set: ``neptune.init('USERNAME/example-project')``

    Args:
        html_file_path (:obj:`str`, optional, default is ``None``):
            | Saves visualization as a standalone html file. No external dependencies needed.
        metrics (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``):
            | Metrics to display on the plot (as axes).
            | If `True`, then display all metrics.
            | If `False`, then exclude all metrics.
        text_logs (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``):
            | Text logs to display on the plot (as axes).
            | If `True`, then display all text logs.
            | If `False`, then exclude all text logs.
        params (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``True``):
            | Parameters to display on the plot (as axes).
            | If `True`, then display all parameters.
            | If `False`, then exclude all parameters.
        properties (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``):
            | Properties to display on the plot (as axes).
            | If `True`, then display all properties.
            | If `False`, then exclude all properties.
        experiment_id (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``):
            | An experiment id like ``'SAN-1'`` or list of ids like ``['SAN-1', 'SAN-2']``.
            | Matching any element of the list is sufficient to pass criterion.
        state (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``):
            | An experiment state like ``'succeeded'`` or list of states like ``['succeeded', 'running']``.
            | Possible values: ``'running'``, ``'succeeded'``, ``'failed'``, ``'aborted'``.
            | Matching any element of the list is sufficient to pass criterion.
        owner (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``):
            | *Username* of the experiment owner (User who created experiment is an owner) like ``'josh'``
              or list of owners like ``['frederic', 'josh']``.
            | Matching any element of the list is sufficient to pass criterion.
        tag (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``):
            | An experiment tag like ``'lightGBM'`` or list of tags like ``['pytorch', 'cycleLR']``.
            | Only experiments that have all specified tags will match this criterion.
        min_running_time (:obj:`int`, optional, default is ``None``):
            Minimum running time of an experiment in seconds, like ``2000``.

    Returns:

        :obj:`ExperimentDisplayed`, object that can be used to get a ``list`` of ``Datapoint`` objects,
        like this: ``ExperimentDisplayed.get_selected()``. This is only implemented for Jupyter notebook. Check
        `HiPlot docs
        <https://facebookresearch.github.io/hiplot/py_reference.html?highlight=display#hiplot.Experiment.display>`_.

    Examples:

        .. code:: python3

            # Make sure you have your project set:
            neptune.init('USERNAME/example-project')

            # (example 1) visualization for all experiments in project
            make_parallel_coordinates_plot()

            # (example 2) visualization for experiment with tag 'optuna' and saving to html file.
            make_parallel_coordinates_plot(html_file_path='visualizations.html', tag='optuna')

            # (example 3) visualization with all params, two metrics for experiment with tag 'optuna'
            make_parallel_coordinates_plot(tag='optuna', metrics=['epoch_accuracy', 'eval_accuracy'])

            # (example 4) visualization with all params and two metrics. All experiments created by john.
            make_parallel_coordinates_plot(metrics=['epoch_accuracy', 'eval_accuracy'], owner='john')
    """
    _all_metrics = []
    _all_text_logs = []
    _all_params = []
    _all_properties = []

    if neptune.project is None:
        msg = """You do not have project, from which to fetch data.
                 Use neptune.init() to set project, for example: neptune.init('USERNAME/example-project').
                 See docs: https://docs.neptune.ai/neptune-client/docs/neptune.html#neptune.init"""
        raise ValueError(msg)

    df = neptune.project.get_leaderboard(id=experiment_id,
                                         state=state,
                                         owner=owner,
                                         tag=tag,
                                         min_running_time=min_running_time)
    assert df.shape[0] != 0, 'No experiments to show. Try other filters.'

    # Cast columns to int or str
    for column in df.columns.to_list():
        if column.startswith('channel_'):
            try:
                df = df.astype({column: float})
                _all_metrics.append((column, column.replace('channel_', '')))
            except ValueError:
                df = df.astype({column: str})
                _all_text_logs.append((column, column.replace('channel_', '')))
        elif column.startswith('parameter_'):
            try:
                df = df.astype({column: float})
            except ValueError:
                df = df.astype({column: str})
            _all_params.append((column, column.replace('parameter_', '')))
        elif column.startswith('property_'):
            try:
                df = df.astype({column: float})
            except ValueError:
                df = df.astype({column: str})
            _all_properties.append((column, column.replace('property_', '')))

    # Validate each type of input
    metrics = _validate_input(metrics, _all_metrics, 'metric')
    text_logs = _validate_input(text_logs, _all_text_logs, 'text log')
    params = _validate_input(params, _all_params, 'parameter')
    properties = _validate_input(properties, _all_properties, 'property')

    # Check for name conflicts
    for column in [k for k, v in Counter(metrics + text_logs + params + properties).items() if v > 1]:
        if column in metrics:
            metrics = ['metric__' + column if j == column else j for j in metrics]
            _all_metrics = [(j[0], 'metric__' + column) if j[1] == column else j for j in _all_metrics]
        if column in text_logs:
            text_logs = ['text_log__' + column if j == column else j for j in text_logs]
            _all_text_logs = [(j[0], 'text_log__' + column) if j[1] == column else j for j in _all_text_logs]
        if column in params:
            params = ['param__' + column if j == column else j for j in params]
            _all_params = [(j[0], 'param__' + column) if j[1] == column else j for j in _all_params]
        if column in properties:
            properties = ['property__' + column if j == column else j for j in properties]
            _all_properties = [(j[0], 'property__' + column) if j[1] == column else j for j in _all_properties]

    # Rename columns in DataFrame and sort experiments by neptune id
    new_col_names = {'id': 'neptune_id',
                     'owner': 'owner'}

    metrics = [(j[0], j[1]) for j in _all_metrics if j[1] in metrics]
    text_logs = [(j[0], j[1]) for j in _all_text_logs if j[1] in text_logs]
    params = [(j[0], j[1]) for j in _all_params if j[1] in params]
    properties = [(j[0], j[1]) for j in _all_properties if j[1] in properties]

    new_col_names.update(metrics)
    new_col_names.update(text_logs)
    new_col_names.update(params)
    new_col_names.update(properties)

    df = df[new_col_names.keys()]
    df = df.rename(columns=new_col_names)
    _exp_ids_series = df['neptune_id'].apply(lambda x: int(x.split('-')[-1]))
    df.insert(loc=0, column='neptune_exp_number', value=_exp_ids_series)
    df = df.astype({'neptune_exp_number': int})
    df = df.sort_values(by='neptune_exp_number', ascending=True)
    df = df.drop(columns='neptune_exp_number')

    # Prepare order of axes, where 'neptune_id' is first, metrics to the right.
    all_axes = df.columns.to_list()
    if metrics:
        metric_names = [j[1] for j in metrics]
        metric_names.sort()
        for metric in metric_names:
            all_axes.remove(metric)
        all_axes.sort()
        all_axes.sort(reverse=True, key='owner'.__eq__)
        all_axes.sort(reverse=True, key='neptune_id'.__eq__)
        all_axes = all_axes + metric_names

    # Prepare HiPlot visualization
    input_to_hiplot = df.T.to_dict().values()
    hiplot_vis = hip.Experiment().from_iterable(input_to_hiplot)
    for j, datapoint in enumerate(hiplot_vis.datapoints[1:], 1):
        datapoint.from_uid = hiplot_vis.datapoints[j - 1].uid

    # Save to html if requested
    if html_file_path is not None:
        assert isinstance(html_file_path, str), \
            '"html_file_path" should be string, but {} is given'.format(type(html_file_path))
        if os.path.dirname(html_file_path):
            os.makedirs(os.path.dirname(html_file_path), exist_ok=True)
        hiplot_vis.to_html(html_file_path)
    hiplot_vis.display_data(hip.Displays.PARALLEL_PLOT).update({'categoricalMaximumValues': df.shape[0],
                                                                'hide': ['uid', 'from_uid'],
                                                                'order': all_axes})
    return hiplot_vis.display()