Exemplo n.º 1
0
def experiment_trials(experiment_id: int) -> List[bindings.v1GetTrialResponse]:
    r1 = bindings.get_GetExperimentTrials(test_session(),
                                          experimentId=experiment_id)
    src_trials = r1.trials
    trials = []
    for trial in src_trials:
        r2 = bindings.get_GetTrial(test_session(), trialId=trial.id)
        trials.append(r2)  # includes trial and workload
    return trials
Exemplo n.º 2
0
def write_api_call(args: Namespace, temp_dir: str) -> Tuple[str, str]:
    api_experiment_filepath = os.path.join(temp_dir,
                                           "api_experiment_call.json")
    api_trial_filepath = os.path.join(temp_dir, "api_trial_call.json")

    trial_obj = bindings.get_GetTrial(setup_session(args),
                                      trialId=args.trial_id).trial
    experiment_id = trial_obj.experimentId
    exp_obj = bindings.get_GetExperiment(setup_session(args),
                                         experimentId=experiment_id)

    create_json_file_in_dir(exp_obj.to_json(), api_experiment_filepath)
    create_json_file_in_dir(trial_obj.to_json(), api_trial_filepath)
    return api_experiment_filepath, api_trial_filepath
Exemplo n.º 3
0
def describe(args: Namespace) -> None:
    session = setup_session(args)
    exps = []
    for experiment_id in args.experiment_ids.split(","):
        r = bindings.get_GetExperiment(session, experimentId=experiment_id)
        if args.json:
            exps.append(r.to_json())
        else:
            exps.append(r.experiment)

    if args.json:
        print(json.dumps(exps, indent=4))
        return

    # Display overall experiment information.
    headers = [
        "Experiment ID",
        "State",
        "Progress",
        "Start Time",
        "End Time",
        "Name",
        "Description",
        "Archived",
        "Resource Pool",
        "Labels",
    ]
    values = [[
        exp.id,
        exp.state.value.replace("STATE_", ""),
        render.format_percent(exp.progress),
        render.format_time(exp.startTime),
        render.format_time(exp.endTime),
        exp.name,
        exp.description,
        exp.archived,
        exp.resourcePool,
        ", ".join(sorted(exp.labels or [])),
    ] for exp in exps]
    if not args.outdir:
        outfile = None
        print("Experiment:")
    else:
        outfile = args.outdir.joinpath("experiments.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display trial-related information.
    trials_for_experiment: Dict[str, Sequence[bindings.trialv1Trial]] = {}
    for exp in exps:
        trials_for_experiment[exp.id] = bindings.get_GetExperimentTrials(
            session, experimentId=exp.id).trials

    headers = [
        "Trial ID", "Experiment ID", "State", "Start Time", "End Time",
        "H-Params"
    ]
    values = [[
        trial.id,
        exp.id,
        trial.state.value.replace("STATE_", ""),
        render.format_time(trial.startTime),
        render.format_time(trial.endTime),
        json.dumps(trial.hparams, indent=4),
    ] for exp in exps for trial in trials_for_experiment[exp.id]]
    if not args.outdir:
        outfile = None
        print("\nTrials:")
    else:
        outfile = args.outdir.joinpath("trials.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display step-related information.
    t_metrics_headers: List[str] = []
    t_metrics_names: List[str] = []
    v_metrics_headers: List[str] = []
    v_metrics_names: List[str] = []
    if args.metrics:
        # Accumulate the scalar training and validation metric names from all provided experiments.
        for exp in exps:
            sample_trial = trials_for_experiment[exp.id][0]
            sample_workloads = bindings.get_GetTrial(
                session, trialId=sample_trial.id).workloads
            t_metrics_names += scalar_training_metrics_names(sample_workloads)
            v_metrics_names += scalar_validation_metrics_names(
                sample_workloads)
        t_metrics_names = sorted(set(t_metrics_names))
        t_metrics_headers = [
            "Training Metric: {}".format(name) for name in t_metrics_names
        ]
        v_metrics_names = sorted(set(v_metrics_names))
        v_metrics_headers = [
            "Validation Metric: {}".format(name) for name in v_metrics_names
        ]

    headers = (["Trial ID", "# of Batches", "State", "Report Time"] +
               t_metrics_headers + [
                   "Checkpoint State",
                   "Checkpoint Report Time",
                   "Validation State",
                   "Validation Report Time",
               ] + v_metrics_headers)

    wl_output: Dict[int, List[Any]] = {}
    for exp in exps:
        for trial in trials_for_experiment[exp.id]:
            workloads = bindings.get_GetTrial(session,
                                              trialId=trial.id).workloads
            for workload in workloads:
                t_metrics_fields = []
                wl_detail: Optional[
                    Union[bindings.v1MetricsWorkload,
                          bindings.v1CheckpointWorkload]] = None
                if workload.training:
                    wl_detail = workload.training
                    for name in t_metrics_names:
                        if wl_detail.metrics and (name in wl_detail.metrics):
                            t_metrics_fields.append(wl_detail.metrics[name])
                        else:
                            t_metrics_fields.append(None)
                else:
                    t_metrics_fields = [None for name in t_metrics_names]

                if workload.checkpoint:
                    wl_detail = workload.checkpoint

                if workload.checkpoint and wl_detail:
                    checkpoint_state = wl_detail.state.value
                    checkpoint_end_time = wl_detail.endTime
                else:
                    checkpoint_state = ""
                    checkpoint_end_time = None

                v_metrics_fields = []
                if workload.validation:
                    wl_detail = workload.validation
                    validation_state = wl_detail.state.value
                    validation_end_time = wl_detail.endTime
                    for name in v_metrics_names:
                        if wl_detail.metrics and (name in wl_detail.metrics):
                            v_metrics_fields.append(wl_detail.metrics[name])
                        else:
                            v_metrics_fields.append(None)
                else:
                    validation_state = ""
                    validation_end_time = None
                    v_metrics_fields = [None for name in v_metrics_names]

                if wl_detail:
                    if wl_detail.totalBatches in wl_output:
                        # condense training, checkpoints, validation workloads into one step-like
                        # row for compatibility with previous versions of describe
                        merge_row = wl_output[wl_detail.totalBatches]
                        merge_row[3] = max(
                            merge_row[3],
                            render.format_time(wl_detail.endTime))
                        for idx, tfield in enumerate(t_metrics_fields):
                            if tfield and merge_row[4 + idx] is None:
                                merge_row[4 + idx] = tfield
                        start_checkpoint = 4 + len(t_metrics_fields)
                        if checkpoint_state:
                            merge_row[
                                start_checkpoint] = checkpoint_state.replace(
                                    "STATE_", "")
                            merge_row[start_checkpoint +
                                      1] = render.format_time(
                                          checkpoint_end_time)
                        if validation_end_time:
                            merge_row[start_checkpoint +
                                      3] = render.format_time(
                                          validation_end_time)
                        if validation_state:
                            merge_row[start_checkpoint +
                                      2] = validation_state.replace(
                                          "STATE_", "")
                        for idx, vfield in enumerate(v_metrics_fields):
                            if vfield and merge_row[start_checkpoint + idx +
                                                    4] is None:
                                merge_row[start_checkpoint + idx + 4] = vfield
                    else:
                        row = ([
                            trial.id,
                            wl_detail.totalBatches,
                            wl_detail.state.value.replace("STATE_", ""),
                            render.format_time(wl_detail.endTime),
                        ] + t_metrics_fields + [
                            checkpoint_state.replace("STATE_", ""),
                            render.format_time(checkpoint_end_time),
                            validation_state.replace("STATE_", ""),
                            render.format_time(validation_end_time),
                        ] + v_metrics_fields)
                        wl_output[wl_detail.totalBatches] = row

    if not args.outdir:
        outfile = None
        print("\nWorkloads:")
    else:
        outfile = args.outdir.joinpath("workloads.csv")
    values = sorted(wl_output.values(), key=lambda a: int(a[1]))
    render.tabulate_or_csv(headers, values, args.csv, outfile)