def test_get_trimmed_trees(): tree = Node(Leaf(0, 0, 0, 0), 0, 0, gain=.50, tot_gain=.75, left=Node(Leaf(0, 0, 0, 0), 0, 0, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(0, 0, 0, 0)) results = t.get_trimmed_trees(tree) assert [alpha for alpha, tree in results] == [-np.inf, 0.25, 0.5] tree = Node(Leaf(0, 0, 0, 0), 0, 0, gain=.15, tot_gain=.40, left=Node(Leaf(0, 0, 0, 0), 0, 0, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(0, 0, 0, 0)) results = t.get_trimmed_trees(tree) assert [alpha for alpha, tree in results] == [-np.inf, 0.2]
def meta_or(self, children, meta): children = [ x if x.T == "meta_and" else Node("meta_and", [x]) for x in self._flatten_bool("meta_or", children) ] out = Node("meta_or", children) return out
def test_get_trim_levels(): tree = Node(Leaf(0, 0, 0, 0), 0, 0, gain=.5, tot_gain=.5, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)) assert t.get_min_trim(tree) == 0.5 tree = Node(Leaf(0, 0, 0, 0), 0, 0, gain=.15, tot_gain=.40, left=Node(Leaf(0, 0, 0, 0), 0, 0, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(0, 0, 0, 0)) assert t.get_min_trim(tree) == 0.2
def dataset(self, args): assert len(args) in (1, 2) if len(args) == 1: return Node("dataset", [args[0], None]) # dataset without meta filter else: return Node("dataset", [args[0], args[1]])
def test_feature_importance_works_with_weights(): tree = Node(Leaf(0, 0, 0, 0), 0, 4, gain=.15, tot_gain=.40, left=Node(Leaf(0, 0, 0, 0), 1, 8, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(0, 0, 0, 0)) X = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5], [7, 4], [8, 3]]) y = np.array([10, 20, 30, 40, 50, 60, 70, 80], dtype=np.float64) w = np.array([1, 1, 1, 1, 3, 3, 3, 3], dtype=np.float64).reshape(-1, 1) w /= w.sum() dat = dataset(w, X, y) importance = t.feature_importance(tree, dat) expected = np.array([1.0 * .15, 0.25 * .25]) expected /= expected.sum() assert np.all(importance == expected)
def arrayToBST(arr, start, end): if end < start: return None mid = (start + end) / 2 node = Node(arr[mid]) node.l = arrayToBST(arr, start, mid - 1) node.r = arrayToBST(arr, mid + 1, end) return node
def arrayToBST(arr, start, end): if end < start: return None mid = ( start + end )/ 2 node = Node(arr[mid]) node.l = arrayToBST(arr, start, mid-1) node.r = arrayToBST(arr, mid + 1, end) return node
def children_of(self, node, _): children = node.C assert len(children) == 1 child = children[0] if isinstance(child, Node) and child.T == "union": return Node( "union", [self.walk(Node("children_of", [cc])) for cc in child.C])
def _default(self, node, limit): print("_LimitPusher._default: node:", node.pretty()) if limit is not None: new_node = Node(node.T, node.C, node.M) self.visit_children(new_node, None) return Node("limit", [new_node], meta=limit) else: return self.visit_children(node, None)
def qualified_name(self, args): assert len(args) in (1, 2) if len(args) == 1: out = Node("qualified_name", meta=[None, args[0].value]) # no namespace else: out = Node("qualified_name", meta=[args[0].value, args[1].value]) #print("Converter.qualified_name: returning: %s" % (out.pretty(),)) return out
def createBinarySearchTree(values): """ Values is ordered array of distinct integers""" midIdx = ceil(len(values) / 2) - 1 newNode = Node(values[midIdx]) if midIdx > 0: newNode.left = createBinarySearchTree(values[:midIdx]) if midIdx < len(values) - 1: newNode.right = createBinarySearchTree(values[midIdx + 1:]) return newNode
def assemble(self, db, default_namespace=None, limit=None): #print("Query.assemble: self.Assembled:", self.Assembled) if self.Assembled is None: parsed = self.parse() #print("Query.assemble(): parsed:", parsed.pretty()) self.Assembled = _Assembler(db, default_namespace).walk(parsed) #print("Query.assemble: self.Assembled:", self.Assembled.pretty()) if limit is not None: self.Assembled = Node("limit", [self.Assembled], meta=limit) return self.Assembled
def meta_filter(self, node, meta_exp): node_q, node_exp = node.C if meta_exp is None: meta_exp = node_exp elif node_exp is None: meta_exp = meta_exp # duh else: meta_exp = Node("meta_or", [Node("meta_and", [meta_exp, node_exp])]) out = self.walk(node_q, meta_exp) return out
def mult(self, args): assert len(args) == 2 left, right = args if isinstance(left, Node) and left.T == "join": return left + [right] else: return Node("join", [left, right])
class MyTestCase(unittest.TestCase): pforest = ProximityForest.ProximityForest(61) ptree1 = ProximityTree.ProximityTree(42, pforest) ptree2 = ProximityTree.ProximityTree(47, forest=None) node = Node.Node(parent=None, label="NodeOne", node_id=26, tree=ptree1) def test_train_1(self): dt = rdnumbers.randomNumbers.generate_dataset(5, 6) self.ptree1.train(dt) def test_predict_1(self): dt1 = rdnumbers.randomNumbers.generate_dataset(5, 6) dt2 = rdnumbers.randomNumbers.generate_dataset(5, 6) self.ptree1.train(dt1) serie = rdnumbers.randomNumbers.generate_random_array(6) self.ptree1.predict(serie) def test_get_min_depth(self): dt1 = rdnumbers.randomNumbers.generate_dataset(5, 6) dt2 = rdnumbers.randomNumbers.generate_dataset(5, 6) self.ptree1.train(dt1) serie = rdnumbers.randomNumbers.generate_random_array(6) self.ptree1.predict(serie) print(self.ptree1.get_min_depth(self.node)) def test_something(self): self.assertEqual(True, False)
def meta_or(self, args): children = [] for a in args: if a.T == "meta_or": children += a.C else: children.append(a) return Node("meta_or", children)
def train(self, data): self.node_counter = self.node_counter + 1 self.root = Node.Node(parent=None, label=None, node_id=self.node_counter, depth=self.tree_depth, tree=self) self.root.train(data)
def query(self, node, params): if len(node.C) == 2: p, q = args new_params = params.copy() new_params.update(p) return Node("query", [self.walk(q, new_params)]) else: return node
def increasingBST(root): sorted_arr = [] def inorder(node): if node: inorder(node.left) sorted_arr.append(node.value) inorder(node.right) inorder(root) root = Node(sorted_arr.pop(0)) current_node = root for n in sorted_arr: current_node.right = Node(n) current_node = current_node.right return root
def meta_and(self, children, meta): children = self._flatten_bool("meta_and", children) or_present = False for c in children: if c.T == "meta_or": or_present = True break if or_present: paths = list(self._generate_and_terms([], children)) #print("paths:") #for p in paths: # print(p) paths = [self._flatten_bool("meta_and", p) for p in paths] #print("meta_and: children:", paths) return Node("meta_or", [Node("meta_and", p) for p in paths]) else: return Node("meta_and", children)
def _make_DNF_lists(exp): if exp is None: return None if exp.T in CMP_OPS or exp.T == "in": return self._make_DNF(Node("meta_and", [exp])) elif exp.T == "meta_and": return self._make_DNF(Node("meta_or", [exp])) elif exp.T == "meta_or": or_exp = [] assert exp.T == "meta_or" for meta_and in exp.C: and_exp = [] assert meta_and.T == "meta_and" for c in meta_and.C: assert c.T in CMP_OPS or c.T == "in", "Unknown operation %s, expected cmp op or 'in'" % ( c.T, ) and_exp.append((c.T, c.C[0], c.C[1])) or_exp.append(and_exp) return or_exp
def meta_and(self, args): if len(args) == 1: return args[0] children = [] for a in args: if a.T == "meta_and": children += a.C else: children.append(a) return Node("meta_and", children)
def insert_recursive(self, key, value, node_input, parent_node, went_left): """ Recursively find where to insert and then insert # ARGUMENTS key -> current key value -> current value node_input -> the current node parent_node -> the parent node went_left -> True if we went left, False if we went right """ # We found where to insert if node_input is None: node_input = Node(key=key, value=value) # Set parent node_input.parent = parent_node # Set correct children if went_left: parent_node.left_child = node_input else: parent_node.right_child = node_input return node_input # If we do have a root else: key_compared = key - node_input.key # If it is less than or equal to, go left if key_compared <= 0: return self.insert_recursive(key, value, node_input.left_child, node_input, went_left=True) # If it is greater than, go right else: return self.insert_recursive(key, value, node_input.right_child, node_input, went_left=False)
def union(self, args): assert len(args) == 1 args = args[0].C if len(args) == 1: return args[0] unions = [] others = [] for a in args: if isinstance(a, Node) and a.T == "union": unions += a[1:] else: others.append(a) return Node("union", unions + others)
def test_0(self): self.assertEqual(tree_by_levels(None), []) self.assertEqual( tree_by_levels( Node(Node(None, Node(None, None, 4), 2), Node(Node(None, None, 5), Node(None, None, 6), 3), 1)), [1, 2, 3, 4, 5, 6])
def insert(self, key, value): inserted_node = None if self.root is None: self.root = Node(key, value) inserted_node = self.root else: inserted_node = self.insert_recursive(key, value, self.root, None, went_left=True) return inserted_node
def _apply_not(self, node): if node.T == "meta_and": return Node("meta_or", [self._apply_not(c) for c in node.C]) elif node.T == "meta_or": return Node("meta_and", [self._apply_not(c) for c in node.C]) elif node.T == "meta_not": return node.C[0] elif node.T in CMP_OPS: new_op = { "~~": "!~~", "!~~": "~~", "~~*": "!~~*", "!~~*": "~~*", ">": "<=", "<": ">=", ">=": "<", "<=": ">", "=": "!=", "==": "!=", "!=": "==" }[node.T] return Node(new_op, node.C)
def get_phylogenetic_weights(languages): """Returns the phylogenetic weights for each language. Args: languages (list of str): The list of languages. The languages need to have a full phylogenetic name, e.g. "IE, Germanic, German". Returns: list of float: The phylogenetic weights in the same order as the input languages. """ nodes = {"ALL" : Node("ALL", None, [])} for language in languages: language = language.split(", ") for i, part in enumerate(language): parent = "ALL" if i > 0: parent = language[i-1] if not part in nodes.keys(): nodes[part] = Node(part, None, [], nodes[parent]) nodes[parent].children.append(nodes[part]) weights = [get_phylogenetic_weight(nodes[language.split(", ")[-1]]) for language in languages] return weights
def test_prune_tree(): tree = Node(Leaf(2, 2, 2, 2), 0, 0, gain=.50, tot_gain=.75, left=Node(Leaf(1, 1, 1, 1), 0, 0, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(1, 1, 1, 1)) new_tree = t.prune_tree(tree, 0.25) assert new_tree.leaves() == 2 assert tree.leaves() == 3 assert new_tree.left.prediction == 1 new_tree = t.prune_tree(tree, 0.375) assert isinstance(new_tree, Leaf) assert new_tree.prediction == 2
def insert(self, key, value): # R child R subtree: Left rotation # L child L subtree: Right rotation # R child L subtree: Right-Left rotation # L child R subtree: Left-Right rotation) if self.root is None: self.root = Node(key, value) self.set_node_height(self.root) else: inserted_node = super(AvlTree, self).insert(key, value) self.set_node_height(inserted_node) # Start at the insertion Node self.rebalance(node_input=inserted_node)
def join(self, args): #print("join: args:", args) #for a in args: # print(" ", a.pretty()) assert len(args) == 1 args = args[0].C if len(args) == 1: return args[0] joins = [] others = [] for a in args: if isinstance(a, Node) and a.T == "join": joins += a.C else: others.append(a) return Node("join", joins + others)