示例#1
0
def _get_all_accuracies(tensorboard_hdfs_logdir, args_dict, number_params):
    """
    Retrieves all accuracies from the parallel executions (each one is in a
    different file, one per combination of wrapper function parameter)

    Args:
        :tensorboard_hdfs_logdir:
        :args_dict:
        :number_params:

    Returns:

    """

    results = []

    #Important, this must be ordered equally than _parse_to_dict function
    population_dict = diff_evo.get_dict()
    global run_id
    for i in range(number_params):
        path_to_log = tensorboard_hdfs_logdir + "/generation." + str(
            generation_id - 1) + "/"
        for k in population_dict:
            path_to_log += k + '=' + str(args_dict[k][i]) + '&'
        path_to_log = path_to_log[:(len(path_to_log) - 1)]
        path_to_log = path_to_log + '/.metric'

        with hdfs.open_file(path_to_log, flags="r") as fi:
            metric = fi.read()
            fi.close()

        results.append(metric)

    return [float(res) for res in results]
示例#2
0
 def init_logger(self, trial_log_file):
     """Initializes the trial log file
     """
     self.trial_log_file = trial_log_file
     # Open trial log file descriptor
     if not hopshdfs.exists(self.trial_log_file):
         hopshdfs.dump("", self.trial_log_file)
     self.trial_fd = hopshdfs.open_file(self.trial_log_file, flags="w")
示例#3
0
def _get_best(root_logdir, direction):

    min_val = sys.float_info.max
    min_logdir = None

    max_val = sys.float_info.min
    max_logdir = None

    generation_folders = hdfs.ls(root_logdir)
    generation_folders.sort()

    for generation in generation_folders:
        for individual in hdfs.ls(generation):
            invidual_files = hdfs.ls(individual, recursive=True)
            for file in invidual_files:
                if file.endswith("/.metric"):
                    val = hdfs.load(file)
                    val = float(val)

                    if val > max_val:
                        max_val = val
                        max_logdir = file[:-8]

                    if val < min_val:
                        min_val = val
                        min_logdir = file[:-8]



    if direction.upper() == Direction.MAX:
        return_dict = {}
        with hdfs.open_file(max_logdir + '/.outputs.json', flags="r") as fi:
            return_dict = json.loads(fi.read())
            fi.close()
        return max_logdir, return_dict
    else:
        return_dict = {}
        with hdfs.open_file(min_logdir + '/.outputs.json', flags="r") as fi:
            return_dict = json.loads(fi.read())
            fi.close()
        return min_logdir, return_dict
示例#4
0
def _run(sc,
         train_fn,
         run_id,
         local_logdir=False,
         name="no-name",
         evaluator=False):
    """

    Args:
        sc:
        train_fn:
        local_logdir:
        name:

    Returns:

    """
    app_id = str(sc.applicationId)

    num_executions = util.num_executors()

    #Each TF task should be run on 1 executor
    nodeRDD = sc.parallelize(range(num_executions), num_executions)

    #Make SparkUI intuitive by grouping jobs
    sc.setJobGroup(
        os.environ['ML_ID'],
        "{} | ParameterServerStrategy - Distributed Training".format(name))

    server = parameter_server_reservation.Server(num_executions)

    server_addr = server.start()

    num_ps = util.num_param_servers()

    #Force execution on executor, since GPU is located on executor
    nodeRDD.foreachPartition(
        _prepare_func(app_id, run_id, train_fn, local_logdir, server_addr,
                      num_ps, evaluator))

    logdir = experiment_utils._get_logdir(app_id, run_id)

    print('Finished Experiment \n')

    path_to_return = logdir + '/.outputs.json'
    if hdfs.exists(path_to_return):
        with hdfs.open_file(path_to_return, flags="r") as fi:
            contents = fi.read()
            fi.close()
            return logdir, json.loads(contents)

    return logdir, None
示例#5
0
    def initialize_logger(self, exp_dir):
        """Initialize logger of optimizer

        :param exp_dir: path of experiment directory
        :rtype exp_dir: str
        """

        # configure logger
        self.log_file = exp_dir + "/pruner.log"
        if not hdfs.exists(self.log_file):
            hdfs.dump("", self.log_file)
        self.fd = hdfs.open_file(self.log_file, flags="w")
        self._log("Initialized Pruner Logger")
示例#6
0
文件: reporter.py 项目: carlee0/maggy
    def __init__(self, log_file, partition_id, task_attempt, print_executor):
        self.metric = None
        self.lock = threading.RLock()
        self.stop = False
        self.trial_id = None
        self.trial_log_file = None
        self.logs = ""
        self.log_file = log_file
        self.partition_id = partition_id
        self.task_attempt = task_attempt
        self.print_executor = print_executor

        # Open executor log file descriptor
        # This log is for all maggy system related log messages
        if not hopshdfs.exists(log_file):
            hopshdfs.dump("", log_file)
        self.fd = hopshdfs.open_file(log_file, flags="w")
        self.trial_fd = None
示例#7
0
def _get_best(args_dict, num_combinations, arg_names, arg_count,
              hdfs_appid_dir, optimization_key):

    if not optimization_key:
        optimization_key = 'metric'

    max_hp = ''
    max_val = ''

    min_hp = ''
    min_val = ''

    min_return_dict = {}
    max_return_dict = {}

    results = []

    first = True

    for i in range(num_combinations):

        argIndex = 0
        param_string = ''

        num_args = arg_count

        while num_args > 0:
            #Get args for executor and run function
            param_name = arg_names[argIndex]
            param_val = args_dict[param_name][i]
            param_string += str(param_name) + '=' + str(param_val) + '&'
            num_args -= 1
            argIndex += 1

        param_string = param_string[:-1]

        path_to_return = hdfs_appid_dir + '/' + param_string + '/.outputs.json'

        assert hdfs.exists(
            path_to_return), 'Could not find .return file on path: {}'.format(
                path_to_return)

        with hdfs.open_file(path_to_return, flags="r") as fi:
            return_dict = json.loads(fi.read())
            fi.close()

            # handle case when dict with 1 key is returned
            if optimization_key == 'metric' and len(return_dict.keys()) == 1:
                optimization_key = list(return_dict.keys())[0]

            metric = float(return_dict[optimization_key])

            if first:
                max_hp = param_string
                max_val = metric
                max_return_dict = return_dict
                min_hp = param_string
                min_val = metric
                min_return_dict = return_dict
                first = False

            if metric > max_val:
                max_val = metric
                max_hp = param_string
                max_return_dict = return_dict
            if metric < min_val:
                min_val = metric
                min_hp = param_string
                min_return_dict = return_dict

        results.append(metric)

    avg = sum(results) / float(len(results))

    return max_val, max_hp, min_val, min_hp, avg, max_return_dict, min_return_dict
示例#8
0
    def __init__(self, experiment_type, **kwargs):

        global driver_secret

        # COMMON EXPERIMENT SETUP
        self._final_store = []
        self._trial_store = {}
        self.num_executors = kwargs.get("num_executors")
        self._message_q = queue.Queue()
        self.name = kwargs.get("name")
        self.experiment_done = False
        self.worker_done = False
        self.hb_interval = kwargs.get("hb_interval")
        self.description = kwargs.get("description")
        self.experiment_type = experiment_type
        self.es_interval = kwargs.get("es_interval")
        self.es_min = kwargs.get("es_min")

        # TYPE-SPECIFIC EXPERIMENT SETUP
        if self.experiment_type == "optimization":
            # set up an optimization experiment

            self.num_trials = kwargs.get("num_trials", 1)

            searchspace = kwargs.get("searchspace")
            if isinstance(searchspace, Searchspace):
                self.searchspace = searchspace
            elif searchspace is None:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be an instance of maggy.Searchspace, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            optimizer = kwargs.get("optimizer")

            if optimizer is None:
                if len(self.searchspace.names()) == 0:
                    self.optimizer = SingleRun()
                else:
                    raise Exception(
                        "Searchspace has to be empty or None to use without optimizer"
                    )
            elif isinstance(optimizer, str):
                if optimizer.lower() == "randomsearch":
                    self.optimizer = RandomSearch()
                elif optimizer.lower() == "asha":
                    self.optimizer = Asha()
                elif optimizer.lower() == "none":
                    if len(self.searchspace.names()) == 0:
                        self.optimizer = SingleRun()
                    else:
                        raise Exception(
                            "Searchspace has to be empty or None to use without Optimizer."
                        )
                else:
                    raise Exception(
                        "Unknown Optimizer. Can't initialize experiment driver."
                    )
            elif isinstance(optimizer, AbstractOptimizer):
                self.optimizer = optimizer
                print("Custom Optimizer initialized.")
            else:
                raise Exception(
                    "The experiment's optimizer should either be an string indicating the name "
                    "of an implemented optimizer (such as 'randomsearch') or an instance of "
                    "maggy.optimizer.AbstractOptimizer, "
                    "but it is {0} (of type '{1}').".format(
                        str(optimizer),
                        type(optimizer).__name__))

            direction = kwargs.get("direction", "max")
            if isinstance(direction,
                          str) and direction.lower() in ["min", "max"]:
                self.direction = direction.lower()
            else:
                raise Exception(
                    "The experiment's direction should be an string (either 'min' or 'max') "
                    "but it is {0} (of type '{1}').".format(
                        str(direction),
                        type(direction).__name__))

            es_policy = kwargs.get("es_policy")
            if isinstance(es_policy, str):
                if es_policy.lower() == "median":
                    self.earlystop_check = MedianStoppingRule.earlystop_check
                elif es_policy.lower() == "none":
                    self.earlystop_check = NoStoppingRule.earlystop_check
                else:
                    raise Exception(
                        "The experiment's early stopping policy should either be a string ('median' or 'none') "
                        "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                        "but it is {0} (of type '{1}').".format(
                            str(es_policy),
                            type(es_policy).__name__))
            elif isinstance(es_policy, AbstractEarlyStop):
                self.earlystop_check = es_policy.earlystop_check
                print("Custom Early Stopping policy initialized.")
            else:
                raise Exception(
                    "The experiment's early stopping policy should either be a string ('median' or 'none') "
                    "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                    "but it is {0} (of type '{1}').".format(
                        str(es_policy),
                        type(es_policy).__name__))

            self.es_interval = kwargs.get("es_interval")
            self.es_min = kwargs.get("es_min")

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": 0
            }

        elif self.experiment_type == "ablation":
            # set up an ablation study experiment
            self.earlystop_check = NoStoppingRule.earlystop_check

            ablation_study = kwargs.get("ablation_study")
            if isinstance(ablation_study, AblationStudy):
                self.ablation_study = ablation_study
            else:
                raise Exception(
                    "The experiment's ablation study configuration should be an instance of "
                    "maggy.ablation.AblationStudy, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablation_study),
                        type(ablation_study).__name__))

            searchspace = kwargs.get("searchspace")
            if not searchspace:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be None for ablation experiments, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            ablator = kwargs.get("ablator")
            if isinstance(ablator, str):
                if ablator.lower() == "loco":
                    self.ablator = LOCO(ablation_study, self._final_store)
                    self.num_trials = self.ablator.get_number_of_trials()
                    if self.num_executors > self.num_trials:
                        self.num_executors = self.num_trials
                else:
                    raise Exception(
                        "The experiment's ablation study policy should either be a string ('loco') "
                        "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                        "but it is {0} (of type '{1}').".format(
                            str(ablator),
                            type(ablator).__name__))
            elif isinstance(ablator, AbstractAblator):
                self.ablator = ablator
                print("Custom Ablator initialized. \n")
            else:
                raise Exception(
                    "The experiment's ablation study policy should either be a string ('loco') "
                    "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablator),
                        type(ablator).__name__))

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": "n.a"
            }
        else:
            raise Exception(
                "Unknown experiment type. experiment_type should be either 'optimization' or 'ablation', "
                "but it is {0}.".format(str(self.experiment_type)))

        # FINALIZE EXPERIMENT SETUP
        self.server = rpc.Server(self.num_executors)
        if not driver_secret:
            driver_secret = self._generate_secret(
                ExperimentDriver.SECRET_BYTES)
        self._secret = driver_secret
        self.job_start = datetime.now()
        self.executor_logs = ""
        self.maggy_log = ""
        self.log_lock = threading.RLock()
        self.log_file = kwargs.get("log_dir") + "/maggy.log"
        self.log_dir = kwargs.get("log_dir")
        self.exception = None

        # Open File desc for HDFS to log
        if not hopshdfs.exists(self.log_file):
            hopshdfs.dump("", self.log_file)
        self.fd = hopshdfs.open_file(self.log_file, flags="w")
示例#9
0
 def open_file(self, hdfs_path, project=None, flags="r", buff_size=0):
     return hopshdfs.open_file(
         hdfs_path, project=project, flags=flags, buff_size=buff_size
     )