def test_merge() -> None: merged = hip.Experiment.merge({ "xp1": hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "b"})]), "xp2": hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "c"})]), }) assert len(merged.datapoints) == 2, merged merged.validate()
def search(table_name): if os.path.exists(databases_root_folder + table_name): db = ScaDatabase(databases_root_folder + table_name) analysis_all = db.select_all(Analysis) analyses = [] hp = [] for analysis in analysis_all: final_key_ranks = db.select_final_key_rank_json_from_analysis(KeyRank, analysis.id) if len(final_key_ranks) > 0: hyper_parameters = db.select_from_analysis(HyperParameter, analysis.id) training_hyper_parameters = hyper_parameters.hyper_parameters training_hyper_parameters[0]['guessing_entropy'] = final_key_ranks[0][0]['key_rank'] hp.append(training_hyper_parameters[0]) exp = hip.Experiment().from_iterable(hp) exp.display_data(hip.Displays.PARALLEL_PLOT).update({ 'hide': ['uid', 'key_rank', 'key'], # Hide some columns 'order': ['guessing_entropy'], # Put column time first on the left }) exp.validate() exp.to_html("webapp/templates/hiplot.html") return render_template("dashboard/search.html", analyses=analyses) return render_template("dashboard/search.html", analyses=[])
def test_validation_missing_parent() -> None: xp = hip.Experiment(datapoints=[hip.Datapoint(uid="1", from_uid="2", values={})]) with pytest.raises(hip.ExperimentValidationMissingParent): xp.validate() xp.remove_missing_parents() assert xp.datapoints[0].from_uid is None xp.validate()
def get_search(self, databases_root_folder, table_name): analyses = [] if os.path.exists(f"{databases_root_folder}{table_name}"): db_select = DBSelect(f"{databases_root_folder}{table_name}") analysis_all = db_select.select_all(Analysis) hp = [] for analysis in analysis_all: final_key_ranks = db_select.select_all_guessing_entropy_from_analysis( GuessingEntropy, analysis.id) if len(final_key_ranks) > 0: hyper_parameters = db_select.select_from_analysis( HyperParameter, analysis.id) training_hyper_parameters = hyper_parameters.hyper_parameters training_hyper_parameters[0][ 'guessing_entropy'] = final_key_ranks[0][0]['key_rank'] hp.append(training_hyper_parameters[0]) exp = hip.Experiment().from_iterable(hp) exp.display_data(hip.Displays.PARALLEL_PLOT).update({ 'hide': ['uid', 'key_rank', 'key'], # Hide some columns 'order': ['guessing_entropy'], # Put column time first on the left }) exp.validate() exp.to_html("webapp/templates/hiplot.html") return analyses
def to_hiplot_experiment(self, max_list_elements: int = 24) -> tp.Any: # no typing here since Hiplot is not a hard requirement """Converts the logs into an hiplot experiment for display. Example ------- exp = logs.to_hiplot_experiment() exp.display(force_full_width=True) Note ---- - You can easily change the axes of the XY plot: exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'}) - For more context about hiplot, check: - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/ - github repo: https://github.com/facebookresearch/hiplot - documentation: https://facebookresearch.github.io/hiplot/ """ import hiplot as hip exp = hip.Experiment() for xp in self.load_flattened(max_list_elements=max_list_elements): dp = hip.Datapoint( from_uid=xp.get("#parents_uids#0"), uid=xp["#uid"], values={x: y for x, y in xp.items() if not (x.startswith("#") and ("uid" in x or "ask" in x))} ) exp.datapoints.append(dp) exp.display_data(hip.Displays.XY).update({'axis_x': '#num-tell', 'axis_y': '#loss'}) # for the record, some more options: exp.display_data(hip.Displays.XY).update({'lines_thickness': 1.0, 'lines_opacity': 1.0}) return exp
def test_validation_circular_ref() -> None: with pytest.raises(hip.ExperimentValidationCircularRef): hip.Experiment(datapoints=[ hip.Datapoint(uid="1", from_uid="2", values={}), hip.Datapoint(uid="2", from_uid="3", values={}), hip.Datapoint(uid="3", from_uid="4", values={}), hip.Datapoint(uid="4", from_uid="2", values={}), ]).validate()
def to_hiplot_experiment( self, max_list_elements: int = 24 ) -> tp.Any: # no typing here since Hiplot is not a hard requirement """Converts the logs into an hiplot experiment for display. Parameters ---------- max_list_elements: int maximum number of elements of list/arrays to export (only the first elements are extracted) Example ------- .. code-block:: python exp = logs.to_hiplot_experiment() exp.display(force_full_width=True) Note ---- - You can easily change the axes of the XY plot: :code:`exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'})` - For more context about hiplot, check: - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/ - github repo: https://github.com/facebookresearch/hiplot - documentation: https://facebookresearch.github.io/hiplot/ """ # pylint: disable=import-outside-toplevel try: import hiplot as hip except ImportError as e: raise ImportError( f"{self.__class__.__name__} requires hiplot which is not installed by default " "(pip install hiplot)") from e exp = hip.Experiment() for xp in self.load_flattened(max_list_elements=max_list_elements): dp = hip.Datapoint( from_uid=xp.get("#parents_uids#0"), uid=xp["#uid"], values={ x: y for x, y in xp.items() if not (x.startswith("#") and ("uid" in x or "ask" in x)) }, ) exp.datapoints.append(dp) exp.display_data(hip.Displays.XY).update({ "axis_x": "#num-tell", "axis_y": "#loss" }) # for the record, some more options: exp.display_data(hip.Displays.XY).update({ "lines_thickness": 1.0, "lines_opacity": 1.0 }) return exp
def gey_hiplot( self, query: str = None, sort: str = None, limit: int = None, offset: int = None ): import hiplot data = self.get_runs_io(query=query, sort=sort, limit=limit, offset=offset) exp = hiplot.Experiment() for d in data: dp = hiplot.Datapoint( uid=data["uid"], values=data["values"], ) exp.datapoints.append(dp) return exp
def _create_experiment_from_dataframe(df: DataFrame, include_tags: bool) -> hiplot.Experiment: """Generate HiPlot experiment from MLFlow runs. Parameters ---------- df: pandas.DataFrame A dataframe (returned by ``mlflow.search_runs`` normally) to turn process include_tags: bool Whether or not to include tags in the results (False) Returns ------- hiplot.Experiment The processed experiment """ exp = hiplot.Experiment() params = [p for p in df.columns if p.startswith("params.")] metrics = [m for m in df.columns if m.startswith("metrics.")] if include_tags: tags = [t for t in df.columns if t.startswith("tags.")] for _, row in df.iterrows(): values = {} for p in params: values[p] = row[p] for m in metrics: if isfinite(row[m]): values[m] = row[m] if include_tags: for t in tags: values[t] = row[t] dp = hiplot.Datapoint( uid=str(uuid.UUID(row["run_id"])), values=values, ) exp.datapoints.append(dp) return exp
SEARCH_RUNS_RESULT = DataFrame({ "run_id": [RUN_ID_1, RUN_ID_2], "params.numeric": [0, 1], "params.category": ["value1", "value2"], "metrics.third": [2, 4], "tags.test": ["yes", "no"], }) EXPERIMENT = hiplot.Experiment([ hiplot.Datapoint( uid=RUN_ID_1, values={ "params.numeric": 0, "params.category": "value1", "metrics.third": 2, }, ), hiplot.Datapoint( uid=RUN_ID_2, values={ "params.numeric": 1, "params.category": "value2", "metrics.third": 4, }, ), ]) EXPERIMENT_WITH_TAGS = hiplot.Experiment([ hiplot.Datapoint( uid=RUN_ID_1, values={ "params.numeric": 0, "params.category": "value1", "metrics.third": 2,
def fetcher(uri): """Prepare param sweep output for hiplot Collects the sweep results and simplifies them for easy display using hiplot. :param uri: root dir that containing all the param_sweeping results. :returns: hiplot Experiment Object for display """ print("got request for %s, collecting logs" % uri) exp = hip.Experiment() exp.display_data(hip.Displays.XY).update({ "axis_x": "step", "axis_y": "cumulative_reward" }) dfs = collect_logs(Path(uri)) # list of (name, log, df) triplets cfg_variants = {} cfgs = {} for name, _dfs in dfs: # first collect each config print("loading config from %s" % name) target = Path(name) configpath = target / "config.yaml" cfg = flatten(OmegaConf.load(str(configpath))) cfgs[name] = cfg for k, v in cfg.items(): if k not in cfg_variants: cfg_variants[k] = set() cfg_variants[k].add(v) print("Read in %d logs successfully" % len(cfgs)) order = [] order.append("mean_final_reward") # cfg_variants are hyperparams with more than one value for key, vals in cfg_variants.items(): if len(vals) > 1: order.append(key) order.append("cumulative_reward") print("headers found to plot: ", order) exp.display_data(hip.Displays.PARALLEL_PLOT).update( hide=["step", "uid", "from_uid"], order=order) # min_points = min(len(df["step"]) for _name, df in dfs) # max_points = max(len(df["step"]) for _name, df in dfs) ave_points = sum(len(df["step"]) for _name, df in dfs) // len(dfs) step_size = ave_points // 100 + 1 # I want an average of 100 points per experiment print("ave_points:", ave_points, "step_size:", step_size) for name, df in dfs: # now go through each dataframe cfg = cfgs[name] hyperparams = dict() for key, val in cfg.items(): if len(cfg_variants[key]) > 1: try: hyperparams[key] = float(val) except ValueError: hyperparams[key] = str(val) steps = df["step"] prev_name = None cum_sum = df["mean_episode_return"].cumsum() for idx in range(0, len(cum_sum), step_size): step = int(steps[idx]) cumulative_reward = cum_sum[idx] curr_name = "{},step{}".format(name, step) sp = hip.Datapoint( uid=curr_name, values=dict(step=step, cumulative_reward=cumulative_reward), ) if prev_name is not None: sp.from_uid = prev_name exp.datapoints.append(sp) prev_name = curr_name mean_final_reward = float(df["mean_episode_return"][-10000:].mean()) peak_performance = float( df["mean_episode_return"].rolling(window=1000).mean().max()) end_vals = copy.deepcopy(hyperparams) end_vals.update( step=int(steps.iloc[-1]), cumulative_reward=cum_sum.iloc[-1], mean_final_reward=mean_final_reward, peak_performance=peak_performance, ) dp = hip.Datapoint(uid=name, from_uid=prev_name, values=end_vals) exp.datapoints.append(dp) return exp
def make_parallel_coordinates_plot(html_file_path=None, metrics=False, text_logs=False, params=True, properties=False, experiment_id=None, state=None, owner=None, tag=None, min_running_time=None): """Visualize experiments on the parallel coordinates plot. This function, when executed in Notebook, displays interactive parallel coordinates plot in the cell's output. Another option is to save visualization to the standalone html file. You can also inspect the lineage of experiments. **See** `example <https://neptune-contrib.readthedocs.io/examples/hiplot_visualizations.html>`_ **for the full use case.** Axes are ordered as follows: first axis is neptune ``experiment id``, second is experiment ``owner``, then ``params`` and ``properties`` in alphabetical order. Finally, ``metrics`` on the right side (alphabetical order as well). This visualization it built using `HiPlot <https://facebookresearch.github.io/hiplot/index.html>`_. It is a library published by the Facebook AI group. Learn more about the `parallel coordinates plot <https://en.wikipedia.org/wiki/Parallel_coordinates>`_. Tip: Use ``metrics``, ``params`` and ``properties`` arguments to select what data you want to see as axes. Use ``experiment_id``, ``state``, ``owner``, ``tag``, ``min_running_time`` arguments to filter experiments included in a plot. Only experiments matching all the criteria will be returned. Note: Make sure you have your project set: ``neptune.init('USERNAME/example-project')`` Args: html_file_path (:obj:`str`, optional, default is ``None``): | Saves visualization as a standalone html file. No external dependencies needed. metrics (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``): | Metrics to display on the plot (as axes). | If `True`, then display all metrics. | If `False`, then exclude all metrics. text_logs (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``): | Text logs to display on the plot (as axes). | If `True`, then display all text logs. | If `False`, then exclude all text logs. params (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``True``): | Parameters to display on the plot (as axes). | If `True`, then display all parameters. | If `False`, then exclude all parameters. properties (:obj:`bool` or :obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``False``): | Properties to display on the plot (as axes). | If `True`, then display all properties. | If `False`, then exclude all properties. experiment_id (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``): | An experiment id like ``'SAN-1'`` or list of ids like ``['SAN-1', 'SAN-2']``. | Matching any element of the list is sufficient to pass criterion. state (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``): | An experiment state like ``'succeeded'`` or list of states like ``['succeeded', 'running']``. | Possible values: ``'running'``, ``'succeeded'``, ``'failed'``, ``'aborted'``. | Matching any element of the list is sufficient to pass criterion. owner (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``): | *Username* of the experiment owner (User who created experiment is an owner) like ``'josh'`` or list of owners like ``['frederic', 'josh']``. | Matching any element of the list is sufficient to pass criterion. tag (:obj:`str` or :obj:`list` of :obj:`str`, optional, default is ``None``): | An experiment tag like ``'lightGBM'`` or list of tags like ``['pytorch', 'cycleLR']``. | Only experiments that have all specified tags will match this criterion. min_running_time (:obj:`int`, optional, default is ``None``): Minimum running time of an experiment in seconds, like ``2000``. Returns: :obj:`ExperimentDisplayed`, object that can be used to get a ``list`` of ``Datapoint`` objects, like this: ``ExperimentDisplayed.get_selected()``. This is only implemented for Jupyter notebook. Check `HiPlot docs <https://facebookresearch.github.io/hiplot/py_reference.html?highlight=display#hiplot.Experiment.display>`_. Examples: .. code:: python3 # Make sure you have your project set: neptune.init('USERNAME/example-project') # (example 1) visualization for all experiments in project make_parallel_coordinates_plot() # (example 2) visualization for experiment with tag 'optuna' and saving to html file. make_parallel_coordinates_plot(html_file_path='visualizations.html', tag='optuna') # (example 3) visualization with all params, two metrics for experiment with tag 'optuna' make_parallel_coordinates_plot(tag='optuna', metrics=['epoch_accuracy', 'eval_accuracy']) # (example 4) visualization with all params and two metrics. All experiments created by john. make_parallel_coordinates_plot(metrics=['epoch_accuracy', 'eval_accuracy'], owner='john') """ _all_metrics = [] _all_text_logs = [] _all_params = [] _all_properties = [] if neptune.project is None: msg = """You do not have project, from which to fetch data. Use neptune.init() to set project, for example: neptune.init('USERNAME/example-project'). See docs: https://docs.neptune.ai/neptune-client/docs/neptune.html#neptune.init""" raise ValueError(msg) df = neptune.project.get_leaderboard(id=experiment_id, state=state, owner=owner, tag=tag, min_running_time=min_running_time) assert df.shape[0] != 0, 'No experiments to show. Try other filters.' # Cast columns to int or str for column in df.columns.to_list(): if column.startswith('channel_'): try: df = df.astype({column: float}) _all_metrics.append((column, column.replace('channel_', ''))) except ValueError: df = df.astype({column: str}) _all_text_logs.append((column, column.replace('channel_', ''))) elif column.startswith('parameter_'): try: df = df.astype({column: float}) except ValueError: df = df.astype({column: str}) _all_params.append((column, column.replace('parameter_', ''))) elif column.startswith('property_'): try: df = df.astype({column: float}) except ValueError: df = df.astype({column: str}) _all_properties.append((column, column.replace('property_', ''))) # Validate each type of input metrics = _validate_input(metrics, _all_metrics, 'metric') text_logs = _validate_input(text_logs, _all_text_logs, 'text log') params = _validate_input(params, _all_params, 'parameter') properties = _validate_input(properties, _all_properties, 'property') # Check for name conflicts for column in [k for k, v in Counter(metrics + text_logs + params + properties).items() if v > 1]: if column in metrics: metrics = ['metric__' + column if j == column else j for j in metrics] _all_metrics = [(j[0], 'metric__' + column) if j[1] == column else j for j in _all_metrics] if column in text_logs: text_logs = ['text_log__' + column if j == column else j for j in text_logs] _all_text_logs = [(j[0], 'text_log__' + column) if j[1] == column else j for j in _all_text_logs] if column in params: params = ['param__' + column if j == column else j for j in params] _all_params = [(j[0], 'param__' + column) if j[1] == column else j for j in _all_params] if column in properties: properties = ['property__' + column if j == column else j for j in properties] _all_properties = [(j[0], 'property__' + column) if j[1] == column else j for j in _all_properties] # Rename columns in DataFrame and sort experiments by neptune id new_col_names = {'id': 'neptune_id', 'owner': 'owner'} metrics = [(j[0], j[1]) for j in _all_metrics if j[1] in metrics] text_logs = [(j[0], j[1]) for j in _all_text_logs if j[1] in text_logs] params = [(j[0], j[1]) for j in _all_params if j[1] in params] properties = [(j[0], j[1]) for j in _all_properties if j[1] in properties] new_col_names.update(metrics) new_col_names.update(text_logs) new_col_names.update(params) new_col_names.update(properties) df = df[new_col_names.keys()] df = df.rename(columns=new_col_names) _exp_ids_series = df['neptune_id'].apply(lambda x: int(x.split('-')[-1])) df.insert(loc=0, column='neptune_exp_number', value=_exp_ids_series) df = df.astype({'neptune_exp_number': int}) df = df.sort_values(by='neptune_exp_number', ascending=True) df = df.drop(columns='neptune_exp_number') # Prepare order of axes, where 'neptune_id' is first, metrics to the right. all_axes = df.columns.to_list() if metrics: metric_names = [j[1] for j in metrics] metric_names.sort() for metric in metric_names: all_axes.remove(metric) all_axes.sort() all_axes.sort(reverse=True, key='owner'.__eq__) all_axes.sort(reverse=True, key='neptune_id'.__eq__) all_axes = all_axes + metric_names # Prepare HiPlot visualization input_to_hiplot = df.T.to_dict().values() hiplot_vis = hip.Experiment().from_iterable(input_to_hiplot) for j, datapoint in enumerate(hiplot_vis.datapoints[1:], 1): datapoint.from_uid = hiplot_vis.datapoints[j - 1].uid # Save to html if requested if html_file_path is not None: assert isinstance(html_file_path, str), \ '"html_file_path" should be string, but {} is given'.format(type(html_file_path)) if os.path.dirname(html_file_path): os.makedirs(os.path.dirname(html_file_path), exist_ok=True) hiplot_vis.to_html(html_file_path) hiplot_vis.display_data(hip.Displays.PARALLEL_PLOT).update({'categoricalMaximumValues': df.shape[0], 'hide': ['uid', 'from_uid'], 'order': all_axes}) return hiplot_vis.display()