def __init__(self, *nodes): super(Methods, self).__init__() for node in nodes: node_cp = copy.deepcopy(node) node_cp = NodeFactory.build(node_cp) self.add_child(node_cp) curr_nodes = self.children leaves_key = [l.get_key() for l in self.walk_leaves()] curr_nodes_key = [c.get_key() for c in curr_nodes] while len(leaves_key) != len(set(leaves_key)) and curr_nodes: curr_nodes_state = [c.get_parameters() for c in curr_nodes] curr_nodes_next = list() for key in set(curr_nodes_key): collision_indices = _list_indices(curr_nodes_key, key) if len(collision_indices) == 1: # no collision for this cls continue diff_arg_keys = dict_diff( *[curr_nodes_state[i] for i in collision_indices]).keys() for curr_node_idx in collision_indices: if diff_arg_keys: curr_nodes[curr_node_idx].signature_args = \ _sub_dict(curr_nodes_state[curr_node_idx], diff_arg_keys) curr_nodes_next += curr_nodes[curr_node_idx].children curr_nodes = curr_nodes_next curr_nodes_key = [c.get_key() for c in curr_nodes] leaves_key = [l.get_key() for l in self.walk_leaves()] leaves_key = [l.get_key() for l in self.walk_leaves()] if len(leaves_key) != len(set(leaves_key)): raise ValueError("Some methods are identical, they could not be " "differentiated according to their arguments")
def __init__(self, *nodes): super(Methods, self).__init__() for node in nodes: node_cp = copy.deepcopy(node) node_cp = NodeFactory.build(node_cp) self.add_child(node_cp) curr_nodes = self.children leaves_key = [l.get_key() for l in self.walk_leaves()] curr_nodes_key = [c.get_key() for c in curr_nodes] while len(leaves_key) != len(set(leaves_key)) and curr_nodes: curr_nodes_state = [c.get_parameters() for c in curr_nodes] curr_nodes_next = list() for key in set(curr_nodes_key): collision_indices = _list_indices(curr_nodes_key, key) if len(collision_indices) == 1: # no collision for this cls continue diff_arg_keys = dict_diff(*[curr_nodes_state[i] for i in collision_indices]).keys() for curr_node_idx in collision_indices: if diff_arg_keys: curr_nodes[curr_node_idx].signature_args = \ _sub_dict(curr_nodes_state[curr_node_idx], diff_arg_keys) curr_nodes_next += curr_nodes[curr_node_idx].children curr_nodes = curr_nodes_next curr_nodes_key = [c.get_key() for c in curr_nodes] leaves_key = [l.get_key() for l in self.walk_leaves()] leaves_key = [l.get_key() for l in self.walk_leaves()] if len(leaves_key) != len(set(leaves_key)): raise ValueError("Some methods are identical, they could not be " "differentiated according to their arguments")
def __init__(self, node, n_perms=100, permute="y", random_state=None, reducer=PvalPerms(), col_or_row=False, **kwargs): super(Perms, self).__init__(**kwargs) self.n_perms = n_perms self.permute = permute # the name of the bloc to be permuted self.random_state = random_state self.reducer = reducer self.slicer = CRSlicer(signature_name="Perm", nb=0, apply_on=permute, col_or_row=col_or_row) self.children = VirtualList(size=n_perms, parent=self) self.slicer.parent = self subtree = NodeFactory.build(node) # subtree = node if isinstance(node, BaseNode) else LeafEstimator(node) self.slicer.add_child(subtree) self.col_or_row = col_or_row
def __init__(self, node, n_folds=5, random_state=None, cv_type="stratified", cv_key="y", reducer=ClassificationReport(), **kwargs): super(CV, self).__init__(**kwargs) self.n_folds = n_folds self.random_state = random_state self.cv_type = cv_type self.cv_key = cv_key self.reducer = reducer self.slicer = CRSlicer(signature_name="CV", nb=0, apply_on=None, col_or_row=False) self.children = VirtualList(size=n_folds, parent=self) self.slicer.parent = self subtree = NodeFactory.build(node) # subtree = node if isinstance(node, BaseNode) else LeafEstimator(node) self.slicer.add_child(subtree)
def __init__(self, node, indices_of_groups, col_or_row=True): super(CRSplitter, self).__init__() self.indices_of_groups = indices_of_groups self.slicer = CRSlicer(signature_name=self.__class__.__name__, nb=0, apply_on=None, col_or_row=col_or_row) self.uni_indices_of_groups = {} for key_indices_of_groups in indices_of_groups: self.uni_indices_of_groups[key_indices_of_groups] = \ list(set(indices_of_groups[key_indices_of_groups])) self.size = 1 for key_indices_of_groups in self.uni_indices_of_groups: tmp_data = list(self.uni_indices_of_groups[key_indices_of_groups]) self.size = self.size * len(tmp_data) self.children = VirtualList(size=self.size, parent=self) self.slicer.parent = self subtree = NodeFactory.build(node) # subtree = node if isinstance(node, BaseNode) else LeafEstimator(node) self.slicer.add_child(subtree)
def Pipe(*nodes): """ Pipelineuential execution of Nodes. Parameters ---------- task [, task]* Example ------- >>> from epac import Pipe >>> from sklearn import datasets >>> from sklearn.feature_selection import SelectKBest >>> from sklearn.lda import LDA >>> from sklearn.svm import SVC >>> >>> X, y = datasets.make_classification(n_samples=12, ... n_features=10, ... n_informative=2, ... random_state=1) >>> pipe = Pipe(SelectKBest(k=2), LDA()) >>> pipe.transform(X=X, y=y) {'y': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'X': array([[-0.34385368, 0.75623409], [ 0.19829972, -1.16389861], [-0.74715829, 0.86977629], [ 1.13162939, 0.90876519], [ 0.23009474, -0.68017257], [ 0.16003707, -1.55458039], [ 0.40349164, 1.38791468], [-1.11731035, 0.23476552], [ 1.19891788, 0.0888684 ], [-0.75439794, -0.90039992], [ 0.12015895, 2.05996541], [-0.20889423, 2.05313908]])} >>> >>> pipe2 = Pipe(Pipe(SelectKBest(k=2), LDA()), SVC()) >>> pipe2.transform(X=X, y=y) {'y': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'X': array([[-0.34385368, 0.75623409], [ 0.19829972, -1.16389861], [-0.74715829, 0.86977629], [ 1.13162939, 0.90876519], [ 0.23009474, -0.68017257], [ 0.16003707, -1.55458039], [ 0.40349164, 1.38791468], [-1.11731035, 0.23476552], [ 1.19891788, 0.0888684 ], [-0.75439794, -0.90039992], [ 0.12015895, 2.05996541], [-0.20889423, 2.05313908]])} """ root = None prev = None for i in range(len(nodes)): node = nodes[i] curr = NodeFactory.build(node) if not root: root = curr else: __insert_node_at_leaf(curr, prev) # prev.add_child(curr) prev = curr return root
def Pipe(*nodes): """ Pipelineuential execution of Nodes. Parameters ---------- task [, task]* Example ------- >>> from epac import Pipe >>> from sklearn import datasets >>> from sklearn.feature_selection import SelectKBest >>> from sklearn.lda import LDA >>> from sklearn.svm import SVC >>> >>> X, y = datasets.make_classification(n_samples=12, ... n_features=10, ... n_informative=2, ... random_state=1) >>> pipe = Pipe(SelectKBest(k=2), LDA()) >>> pipe.transform(X=X, y=y) {'y': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'X': array([[-0.34385368, 0.75623409], [ 0.19829972, -1.16389861], [-0.74715829, 0.86977629], [ 1.13162939, 0.90876519], [ 0.23009474, -0.68017257], [ 0.16003707, -1.55458039], [ 0.40349164, 1.38791468], [-1.11731035, 0.23476552], [ 1.19891788, 0.0888684 ], [-0.75439794, -0.90039992], [ 0.12015895, 2.05996541], [-0.20889423, 2.05313908]])} >>> >>> pipe2 = Pipe(Pipe(SelectKBest(k=2), LDA()), SVC()) >>> pipe2.transform(X=X, y=y) {'y': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'X': array([[-0.34385368, 0.75623409], [ 0.19829972, -1.16389861], [-0.74715829, 0.86977629], [ 1.13162939, 0.90876519], [ 0.23009474, -0.68017257], [ 0.16003707, -1.55458039], [ 0.40349164, 1.38791468], [-1.11731035, 0.23476552], [ 1.19891788, 0.0888684 ], [-0.75439794, -0.90039992], [ 0.12015895, 2.05996541], [-0.20889423, 2.05313908]])} """ root = None prev = None for i in xrange(len(nodes)): node = nodes[i] curr = NodeFactory.build(node) if not root: root = curr else: __insert_node_at_leaf(curr, prev) # prev.add_child(curr) prev = curr return root