def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ assert("parameters" in layer_spec and "class_labels" in layer_spec["parameters"] and "node" in layer_spec["parameters"]),\ "Node requires specification of a node and classification labels!" scheme = layer_spec["parameters"].pop("scheme","1vs1") # Create all nodes that are packed together in this layer layer_nodes = [] node_spec = layer_spec["parameters"]["node"][0] classes = layer_spec["parameters"]["class_labels"] if scheme=='1vR': for label in layer_spec["parameters"]["class_labels"]: node_obj = BaseNode.node_from_yaml(NodeChainFactory.instantiate(node_spec,{"LABEL":label})) layer_nodes.append(node_obj) else: n=len(classes) for i in range(n-1): for j in range(i+1,n): replace_dict = {"LABEL1":classes[i],"LABEL2":classes[j]} node_obj = BaseNode.node_from_yaml(NodeChainFactory.instantiate(node_spec,replace_dict)) layer_nodes.append(node_obj) layer_spec["parameters"].pop("node") layer_spec["parameters"].pop("class_labels") # Create the node object node_obj = MultiClassLayerNode(nodes = layer_nodes,**layer_spec["parameters"]) return node_obj
def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ assert("parameters" in layer_spec and "class_labels" in layer_spec["parameters"] and "node" in layer_spec["parameters"]),\ "Node requires specification of a node and classification labels!" scheme = layer_spec["parameters"].pop("scheme", "1vs1") # Create all nodes that are packed together in this layer layer_nodes = [] node_spec = layer_spec["parameters"]["node"][0] classes = layer_spec["parameters"]["class_labels"] if scheme == '1vR': for label in layer_spec["parameters"]["class_labels"]: node_obj = BaseNode.node_from_yaml( NodeChainFactory.instantiate(node_spec, {"LABEL": label})) layer_nodes.append(node_obj) else: n = len(classes) for i in range(n - 1): for j in range(i + 1, n): replace_dict = {"LABEL1": classes[i], "LABEL2": classes[j]} node_obj = BaseNode.node_from_yaml( NodeChainFactory.instantiate(node_spec, replace_dict)) layer_nodes.append(node_obj) layer_spec["parameters"].pop("node") layer_spec["parameters"].pop("class_labels") # Create the node object node_obj = MultiClassLayerNode(nodes=layer_nodes, **layer_spec["parameters"]) return node_obj
def _stop_training(self): """ Do the optimization step and define final parameter choice This is the main method of this node! .. todo:: Allow also parallelization over nominal_ranges! """ self._log("Starting optimization Process.") self.runs = [10 * self.run_number + run for run in range(self.runs)] original_flow_template = copy.copy(self.flow_template) # Fill in validation parameters in the template if not self.validation_parameter_settings=={}: self.flow_template = [NodeChainFactory.instantiate(template=node, parametrization=self.validation_parameter_settings) for node in original_flow_template] if self.nom_rng is None: self.prepare_optimization() self.best_parametrization, self.best_performance = \ self.get_best_parametrization() self.performance_dict[self.p2key(self.best_parametrization)] = \ (self.best_performance, self.best_parametrization) else: nom_grid = self.search_grid(self.nom_rng) iterations = 0 search_history = [] # copy flow_template since we have to instantiate for every nom_par flow_template = copy.copy(self.flow_template) for nom_par in nom_grid: # for getting the best parameterization, # the class attribute flow_template must be overwritten self.flow_template = [NodeChainFactory.instantiate(template=node, parametrization=nom_par) for node in flow_template] self.prepare_optimization() parametrization, performance = self.get_best_parametrization() self.performance_dict[self.p2key(nom_par)] = (performance, parametrization) iterations += self.iterations search_history.append((nom_par,self.search_history)) # reinitialize optimization parameters self.re_init() # reconstructing the overwritten flow for further usage self.flow_template = flow_template self.iterations = iterations self.search_history = sorted(search_history, key=lambda t: t[1][-1]["best_performance"]) best_key = max(sorted(self.performance_dict.items()), key=lambda t: t[1])[0] self.best_performance, self.best_parametrization = \ self.performance_dict[best_key] self.best_parametrization.update(dict(best_key)) # when best parameter dict is calculated, this has to be logged # or saved and the chosen parameter is used for training on the # whole data set, independent of the chosen algorithm self._log("Using parameterization %s with optimal performance %s for " \ "metric %s." % (self.best_parametrization, self.best_performance, self.metric)) # Fill in the final parameters in the flow template if not self.final_training_parameter_settings=={}: self.flow_template = [NodeChainFactory.instantiate(template=node, parametrization=self.final_training_parameter_settings) for node in original_flow_template] else: self.flow_template = original_flow_template best_flow_template = self.flow_template best_flow_template[1] = {'node': 'All_Train_Splitter'} #delete last node best_flow_template.pop(-1) self.flow = self.generate_subflow(best_flow_template, self.best_parametrization, NodeChain) self.flow[-1].set_run_number(self.run_number) self.flow[0].set_generator(self.train_instances) self.flow.train() self._log("Training of optimal flow finished") # delete training instances that would be stored to disk if this node # is saved del self.train_instances
def _stop_training(self): """ Do the optimization step and define final parameter choice This is the main method of this node! .. todo:: Allow also parallelization over nominal_ranges! """ self._log("Starting optimization Process.") self.runs = [10 * self.run_number + run for run in range(self.runs)] original_flow_template = copy.copy(self.flow_template) # Fill in validation parameters in the template if not self.validation_parameter_settings == {}: self.flow_template = [ NodeChainFactory.instantiate( template=node, parametrization=self.validation_parameter_settings) for node in original_flow_template ] if self.nom_rng is None: self.prepare_optimization() self.best_parametrization, self.best_performance = \ self.get_best_parametrization() self.performance_dict[self.p2key(self.best_parametrization)] = \ (self.best_performance, self.best_parametrization) else: nom_grid = self.search_grid(self.nom_rng) iterations = 0 search_history = [] # copy flow_template since we have to instantiate for every nom_par flow_template = copy.copy(self.flow_template) for nom_par in nom_grid: # for getting the best parameterization, # the class attribute flow_template must be overwritten self.flow_template = [ NodeChainFactory.instantiate(template=node, parametrization=nom_par) for node in flow_template ] self.prepare_optimization() parametrization, performance = self.get_best_parametrization() self.performance_dict[self.p2key(nom_par)] = (performance, parametrization) iterations += self.iterations search_history.append((nom_par, self.search_history)) # reinitialize optimization parameters self.re_init() # reconstructing the overwritten flow for further usage self.flow_template = flow_template self.iterations = iterations self.search_history = sorted( search_history, key=lambda t: t[1][-1]["best_performance"]) best_key = max(sorted(self.performance_dict.items()), key=lambda t: t[1])[0] self.best_performance, self.best_parametrization = \ self.performance_dict[best_key] self.best_parametrization.update(dict(best_key)) # when best parameter dict is calculated, this has to be logged # or saved and the chosen parameter is used for training on the # whole data set, independent of the chosen algorithm self._log("Using parameterization %s with optimal performance %s for " \ "metric %s." % (self.best_parametrization, self.best_performance, self.metric)) # Fill in the final parameters in the flow template if not self.final_training_parameter_settings == {}: self.flow_template = [ NodeChainFactory.instantiate( template=node, parametrization=self.final_training_parameter_settings) for node in original_flow_template ] else: self.flow_template = original_flow_template best_flow_template = self.flow_template best_flow_template[1] = {'node': 'All_Train_Splitter'} #delete last node best_flow_template.pop(-1) self.flow = self.generate_subflow(best_flow_template, self.best_parametrization, NodeChain) self.flow[-1].set_run_number(self.run_number) self.flow[0].set_generator(self.train_instances) self.flow.train() self._log("Training of optimal flow finished") # delete training instances that would be stored to disk if this node # is saved del self.train_instances