def test_count_nodes(self): n_trees, n_nodes = ProxyLightGBM._count_nodes(model_file) # print "Num Trees: %d\nNum Nodes: %d" % (n_trees, n_nodes), assert_equal(n_trees, 2) assert_equal(n_nodes, 10) assert_equal(n_trees, self.model.trees_root.size) assert_equal(n_nodes, self.model.trees_nodes_value.size)
def save(self, f, format="QuickRank"): """ Save the model onto the file identified by file_path, using the given model format. Parameters ---------- f : str The path to the filename where the model has to be saved format : str The format to use for saving the model Returns ------- status : bool Returns true if the save is successful, false otherwise """ if format == "QuickRank": from rankeval.model import ProxyQuickRank return ProxyQuickRank.save(f, self) elif format == "LightGBM": from rankeval.model import ProxyLightGBM return ProxyLightGBM.save(f, self) elif format == "XGBoost": from rankeval.model import ProxyXGBoost return ProxyXGBoost.save(f, self) elif format == "ScikitLearn": from rankeval.model import ProxyScikitLearn return ProxyScikitLearn.save(f, self) else: raise TypeError("Model format %s not yet supported!" % format)
def __init__(self, file_path, name=None, format="QuickRank", base_score=None, learning_rate=1, n_trees=None): """ Load the model from the file identified by file_path using the given format. Parameters ---------- file_path : str The fpath to the filename where the model has been saved name : str The name to be given to the current model format : ['QuickRank', 'ScikitLearn', 'XGBoost', 'LightGBM'] The format of the model to load. base_score : None or float The initial prediction score of all instances, global bias. If None, it uses default value used by each software (0.5 XGBoost, 0.0 all the others). learning_rate : None or float The learning rate used by the model to shrinks the contribution of each tree. By default it is set to 1 (no shrinking at all). n_trees : None or int The maximum number of trees to load from the model. By default it is set to None, meaning the method will load all the trees. Attributes ---------- file : str The path to the filename where the model has been saved name : str The name to be given to the current model n_trees : integer The number of regression trees in the ensemble. n_nodes : integer The total number of nodes (splitting nodes and leaves) in the ensemble trees_root: list of integers Numpy array modelling the indexes of the root nodes of the regression trees composing the ensemble. The indexes refer to the following data structures: * trees_left_child * trees_right_child * trees_nodes_value * trees_nodes_feature trees_weight: list of floats Numpy array modelling the weights of the regression trees composing the ensemble. trees_left_child: list of integers Numpy array modelling the structure (shape) of the regression trees, considering only the left children. Given a node of a regression tree (a single cell in this array), the value identify the index of the left children. If the node is a leaf, the children assumes -1 value. trees_right_child: list of integers Numpy array modelling the structure (shape) of the regression trees, considering only the right children. Given a node of a regression tree (a single cell in this array), the value identify the index of the right children. If the node is a leaf, the children assumes -1 value. trees_nodes_value: list of integers Numpy array modelling either the output of a leaf node (whether the node is a leaf, in accordance with the trees_structure data structure) or the splitting value of the node in the regression trees (with respect to the feature identified by the trees_nodes_feature data structure). trees_nodes_feature: list of integers Numpy array modelling the feature-id used by the selected splitting node (or -1 if the node is a leaf). Returns ------- model : RegressionTreeEnsemble The loaded model as a RTEnsemble object """ self.file = file_path self.name = "RTEnsemble: " + file_path if name is not None: self.name = name self.learning_rate = learning_rate self.base_score = base_score if self.base_score is None and format == "XGBoost": self.base_score = 0.5 self.n_trees = None self.n_nodes = None self.trees_root = None self.trees_weight = None self.trees_left_child = None self.trees_right_child = None self.trees_nodes_value = None self.trees_nodes_feature = None self._cache_scorer = dict() if format == "QuickRank": from rankeval.model import ProxyQuickRank ProxyQuickRank.load(file_path, self) elif format == "LightGBM": from rankeval.model import ProxyLightGBM ProxyLightGBM.load(file_path, self) elif format == "XGBoost": from rankeval.model import ProxyXGBoost ProxyXGBoost.load(file_path, self) elif format == "ScikitLearn": from rankeval.model import ProxyScikitLearn ProxyScikitLearn.load(file_path, self) else: raise TypeError("Model format %s not yet supported!" % format) if n_trees is not None and n_trees < self.n_trees: self._prune_model(n_trees)