def runner(self, concurrent_index, job_id, delay, duration, child_count,
               reports, search_type):
        ws_name = "quick-test"
        exper_name = "qtexper"

        fn = "code/miniSweeps.yaml"
        yd = file_utils.load_yaml(fn)
        hd = yd[constants.HPARAM_DIST]

        # simulate a controller for each concurrent runner
        hparam_search = HParamSearch()

        for index in range(child_count):
            # create a new RUN record
            run_name = self.store.start_run(ws_name,
                                            exper_name=exper_name,
                                            is_parent=False,
                                            job_id=job_id,
                                            node_index=0,
                                            search_type=search_type,
                                            search_style="dynamic")

            os.environ["XT_RUN_NAME"] = run_name
            os.environ["XT_WORKSPACE_NAME"] = ws_name
            os.environ["XT_EXPERIMENT_NAME"] = exper_name

            fake_context = cmd_core.build_mock_context(self.config, job_id,
                                                       ws_name, exper_name,
                                                       run_name)
            metric_name = fake_context.primary_metric

            xt_run = Run(self.config, self.store, supress_normal_output=True)
            xt_run.direct_run = True
            xt_run.context = fake_context

            #print("  starting: concurrent_index={}, child_index={}".format(concurrent_index, index))
            # delay start
            sleep_time = delay * random.random()
            time.sleep(sleep_time)

            hp_set = xt_run.get_next_hp_set_in_search(
                hd, search_type, hparam_search=hparam_search)
            self._assert("channels1" in hp_set)

            # log HPARAMS
            xt_run.log_hparams(hp_set)

            for i in range(reports):
                run_time = (duration / reports) * random.random()
                time.sleep(run_time)

                # log METRICS
                fake_metric = random.random()
                md = {"epoch": 1 + i, "acc": fake_metric}
                xt_run.log_metrics(md, step_name="epoch", stage="test")

            # mark the run as completed
            xt_run.close()
示例#2
0
class NexusLogger():
    '''
        Centralizes various forms of low-frequency output, such as occasional metric reports.
        Not intended for high-frequency logging (multiple calls per second throughout a run).
    '''
    def __init__(self):
        self.file_paths = []
        self.stage_num = 0
        self.last_metric = None
        self.last_y_axis_name = None
        self.last_x_axis_name = None
        self.last_x_axis_value = None
        self.last_stage_num = None
        # XT related
        self.xt_run_name = os.getenv("XT_RUN_NAME", None)
        self.xt_run = None
        if self.xt_run_name:
            from xtlib.run import Run as XTRun
            self.xt_run = XTRun()

    def add_output_file(self, file_path):
        # Add one console output file.
        file_path = os.path.abspath(file_path)
        self.file_paths.append(file_path)
        utils.ensure_dir_exists(file=file_path)
        output_file = open(file_path, 'w')
        output_file.close()

    def write_line(self, line):
        # Write one line to stdout and all console files.
        print(line)
        for path in self.file_paths:
            output_file = open(path, 'a')
            output_file.write(line + '\n')
            output_file.close()

    def write_and_condense_metrics(self, total_seconds, x_axis_name,
                                   x_axis_value, saved, metrics, tf_writer):
        '''
            Outputs the given metric values for the last reporting period and condenses the metric.
        '''
        hours = total_seconds / 3600
        self.last_x_axis_name = x_axis_name
        self.last_x_axis_value = x_axis_value
        self.last_stage_num = self.stage_num

        # Report one line.
        sz = "{:7.3f} hrs  {:12,d} {}".format(hours, x_axis_value, x_axis_name)

        # Write one line of formatted metrics.
        for metric in metrics:
            sz_format = '      {} {{}}'.format(metric.formatting_string)
            sz += sz_format.format(metric.aggregate_value, metric.short_name)
        if saved:
            sz += "    SAVED"
        self.write_line(sz)

        if self.xt_run:
            # Log metrics to XT
            xt_metrics = {}
            xt_metrics["hrs"] = hours
            xt_metrics[x_axis_name] = x_axis_value
            for metric in metrics:
                xt_metrics[metric.short_name] = metric.aggregate_value
            self.xt_run.log_metrics(data_dict=xt_metrics,
                                    step_name=x_axis_name,
                                    stage='s{}'.format(self.stage_num))

        if tf_writer:
            # Log metrics to tensorboard.
            for metric in metrics:
                tf_writer.add_scalar(metric.long_name, metric.aggregate_value,
                                     x_axis_value)
            tf_writer.flush()

        # Condense the metrics
        for metric in metrics:
            metric.condense_values()

    def summarize_stage(self, metric):
        '''
            Outputs the metric value for the entire processing stage.
        '''
        metric.condense_values(
        )  # Condense any values accumulated since the last report.
        sz_format = 'Stage summary (mean {{}}):  {}'.format(
            metric.formatting_string)
        self.write_line(
            sz_format.format(metric.long_name, metric.lifetime_value))
        self.last_metric = metric
        self.last_y_axis_name = metric.short_name
        return metric.lifetime_value

    def finish_run(self, in_hp_search):
        '''
            Outputs the final stage's summary metric as hpmax (used for hyperparameter tuning).
        '''
        if self.last_metric:
            # Log hpmax.
            explanation = 'Objective that would be maximized by hyperparameter tuning (hpmax):'
            hpmax = self.last_metric.lifetime_value
            if not self.last_metric.higher_is_better:
                hpmax = -hpmax
            if self.xt_run:
                # Log hpmax to XT
                xt_metrics = {}
                xt_metrics[self.last_x_axis_name] = self.last_x_axis_value
                xt_metrics['hpmax'] = hpmax
                self.xt_run.log_metrics(data_dict=xt_metrics,
                                        step_name=self.last_x_axis_name)
                self.xt_run.tag_job({
                    'plotted_metric':
                    's{}-{}'.format(self.last_stage_num, self.last_y_axis_name)
                })
                # self.xt_run.tag_job({'primary_metric': 'hpmax'})  # To override xt_config.yaml's default of 'hpmax'.
                # self.xt_run.tag_job({'step_name': 'iters'})  # To override xt_config.yaml's default of 'iters'.
                if in_hp_search:
                    explanation = 'Objective being maximized by hyperparameter tuning (hpmax):'
            sz_format = '{}  {}\n'.format(explanation,
                                          self.last_metric.formatting_string)
            self.write_line(sz_format.format(hpmax))
示例#3
0
class OPELogger():
    '''
        Centralizes various forms of low-frequency output, such as occasional metric reports.
        Not intended for high-frequency logging (multiple calls per second throughout a run).
    '''
    def __init__(self):
        self.file_paths = []
        self.stage_num = 0
        self.last_metric = None
        self.last_y_axis_name = None
        self.last_x_axis_name = None
        self.last_x_axis_value = None
        self.last_stage_num = None
        # XT related
        self.xt_run_name = os.getenv("XT_RUN_NAME", None)
        self.xt_run = None
        if self.xt_run_name:
            from xtlib.run import Run as XTRun
            self.xt_run = XTRun()
        self.estimate_record = {}
        self.mse_record = {}
        self.all_estimators = ['PDIS', 'WPDIS', 'MB-K', 'LSTD', 'LSTDQ', 'TDREG-K', 'MWL', 'MSWL', 'MQL', 'DualDICE', \
            'TDREG-N', 'FQE', 'MB-N', 'W-Regression', 'FL', 'On_Policy', 'Behavior']
        for estimator in self.all_estimators:
            self.estimate_record[estimator] = []
            self.mse_record[estimator] = []
        # self.estimate_record['On_Policy'] = []
        # self.estimate_record['LSTDQ'] = []
        # self.mse_record['LSTDQ'] = []
        # self.estimate_record['MB'] = []
        # self.mse_record['MB'] = []
        # self.estimate_record['TDREG_Neural'] = []
        # self.mse_record['TDREG_Neural'] = []
        # self.estimate_record['FQE'] = []
        # self.mse_record['FQE'] = []


    def add_output_file(self, file_path):
        # Add one console output file.
        file_path = os.path.abspath(file_path)
        self.file_paths.append(file_path)
        utils.ensure_dir_exists(file=file_path)
        output_file = open(file_path, 'w')
        output_file.close()

    def write_line(self, line):
        # Write one line to stdout and all console files.
        print(line)
        for path in self.file_paths:
            output_file = open(path, 'a')
            output_file.write(line + '\n')
            output_file.close()

    def write_ope_metrics(self, dataset_seed, metrics, result):
        # report one line
        formatting_string = '{:6.4f}'
        sz = "Dataset {} - Relative Error:".format(dataset_seed)
        for estimator, error in metrics.items():
            sz_format = ' {{}}: {}'.format(formatting_string)
            sz += sz_format.format(estimator,error)
        self.write_line(sz)
        self.estimate_record['On_Policy'].append(result['On_Policy'])
        self.estimate_record['Behavior'].append(result['Behavior'])
        for estimator, error in metrics.items():
            self.estimate_record[estimator].append(result[estimator])
            self.mse_record[estimator].append(error)
        # summary_metrics = {}
        # summary_metrics['On_Policy'] = sum(self.estimate_record['On_Policy']) / len(self.estimate_record['On_Policy'])
        # summary_metrics['LSTDQ'] = sum(self.estimate_record['LSTDQ']) / len(self.estimate_record['LSTDQ'])
        # summary_metrics['squared_error'] = sum(self.mse_record['LSTDQ']) / len(self.mse_record['LSTDQ'])
        # print(summary_metrics)

        # if self.xt_run:
        #     xt_metrics = {}
        #     xt_metrics["True Val"] = result['On_Policy']
        #     # xt_metrics[x_axis_name] = x_axis_value
        #     for estimator, error in metrics.items():
        #         xt_metrics[estimator] = result[estimator]
        #         xt_metrics['squared_error'] = error
        #         # xt_metrics[estimator] = error
        #     # self.xt_run.log_metrics(data_dict=xt_metrics, step_name="Dataset")
        #     self.xt_run.log_metrics(data_dict=xt_metrics)


    def write_and_condense_metrics(self, total_seconds, x_axis_name, x_axis_value, saved, metrics, tf_writer):
        '''
            Outputs the given metric values for the last reporting period and condenses the metric.
        '''
        hours = total_seconds / 3600
        self.last_x_axis_name = x_axis_name
        self.last_x_axis_value = x_axis_value
        self.last_stage_num = self.stage_num

        # Report one line.
        sz = "{:7.3f} hrs  {:12,d} {}".format(hours, x_axis_value, x_axis_name)

        # Write one line of formatted metrics.
        for metric in metrics:
            sz_format = '      {} {{}}'.format(metric.formatting_string)
            sz += sz_format.format(metric.aggregate_value, metric.short_name)
        if saved:
            sz += "    SAVED"
        self.write_line(sz)

        if self.xt_run:
            # Log metrics to XT
            xt_metrics = {}
            xt_metrics["hrs"] = hours
            xt_metrics[x_axis_name] = x_axis_value
            for metric in metrics:
                xt_metrics[metric.short_name] = metric.aggregate_value
            self.xt_run.log_metrics(data_dict=xt_metrics, step_name=x_axis_name, stage='s{}'.format(self.stage_num))

        if tf_writer:
            # Log metrics to tensorboard.
            for metric in metrics:
                tf_writer.add_scalar(metric.long_name, metric.aggregate_value, x_axis_value)
            tf_writer.flush()

        # Condense the metrics
        for metric in metrics:
            metric.condense_values()

    def summarize_stage(self, metric):
        '''
            Outputs the metric value for the entire processing stage.
        '''
        metric.condense_values() # Condense any values accumulated since the last report.
        sz_format = 'Stage summary (mean {{}}):  {}'.format(metric.formatting_string)
        self.write_line(sz_format.format(metric.long_name, metric.lifetime_value))
        self.last_metric = metric
        self.last_y_axis_name = metric.short_name
        return metric.lifetime_value

    def finish_run(self, in_hp_search):
        '''
            Outputs the final stage's summary metric as hpmax (used for hyperparameter tuning).
        '''
        summary_metrics = {}
        summary_metrics['On_Policy'] = sum(self.estimate_record['On_Policy']) / len(self.estimate_record['On_Policy'])
        summary_metrics['Behavior'] = sum(self.estimate_record['Behavior']) / len(self.estimate_record['Behavior'])
        # summary_metrics['MB'] = sum(self.estimate_record['MB']) / len(self.estimate_record['MB'])
        # summary_metrics['squared_error'] = sum(self.mse_record['MB']) / len(self.mse_record['MB'])
        # summary_metrics['TDREG_Neural'] = sum(self.estimate_record['TDREG_Neural']) / len(self.estimate_record['TDREG_Neural'])
        # summary_metrics['squared_error'] = sum(self.mse_record['TDREG_Neural']) / len(self.mse_record['TDREG_Neural'])
        for estimator in self.all_estimators:
            if estimator != 'On_Policy' and estimator != 'Behavior' and len(self.estimate_record[estimator]) >0:
                summary_metrics[estimator] = sum(self.estimate_record[estimator]) / len(self.estimate_record[estimator])
                summary_metrics[estimator+'_se'] = sum(self.mse_record[estimator]) / len(self.mse_record[estimator])
        # summary_metrics['FQE'] = sum(self.estimate_record['FQE']) / len(self.estimate_record['FQE'])
        # summary_metrics['squared_error'] = sum(self.mse_record['FQE']) / len(self.mse_record['FQE'])

        # print(summary_metrics)

        if self.xt_run:
            self.xt_run.log_metrics(data_dict = summary_metrics)
示例#4
0
    "epochs": args.epochs,
    "lr": args.lr,
    "momentum": args.momentum,
    "channels1": args.channels1,
    "channels2": args.channels2,
    "kernel_size": args.kernel_size,
    "mlp-units": args.mlp_units,
    "weight-decay": args.weight_decay,
    "optimizer": args.optimizer,
    "mid-conv": args.mid_conv
}

run.log_hparams(hp_dict)

with open("userapp.txt", "at") as tfile:
    tfile.write("starting...\n")

for epoch in range(1, 1 + args.epochs):
    accuracy = np.random.random()
    with open("userapp.txt", "at") as tfile:
        tfile.write("epoch=" + str(epoch) + "\n")

    print("epoch={}, test-acc={}".format(epoch, accuracy))
    run.log_metrics({"epoch": epoch, "test-acc": accuracy})

    time.sleep(2)

with open("userapp.txt", "at") as tfile:
    tfile.write("completed.\n")

print("end of ML run.")