def gen_random_node(scope, nodetype): if len(scope)==1: return NormalLeafNode(0, scope[0]) elif len(scope) <= 2: return MultiNormalLeafNode.create(0, scope) if nodetype == 'P': i = np.random.randint(2, size=len(scope)) i0 = np.where(i==0)[0] i1 = np.where(i==1)[0] if len(i0)==0 or len(i1)==0: return gen_random_node(scope, 'P') else: p = ProductNode(0, scope, 'normal') s0 = gen_random_node(scope[i0], 'S') s1 = gen_random_node(scope[i1], 'S') p.add_children(s0, s1) return p else: s = SumNode(0, scope) nc = np.random.randint(2,3) children = [None]*nc for i in range(nc): children[i] = gen_random_node(scope, 'P') s.add_children(*children) return s
def make_children(data, scope, n): children = [] for c in range(n): batch_size = data.shape[0] // n node = ProductNode(0, scope, 'normal') for s in scope: D = data[batch_size * c:batch_size * (c + 1), s:s + 1] child = NormalLeafNode(0, s, np.mean(D), np.var(D)) node.add_child(child) children.append(node) return children
def learn(data, scope=None): samples, variables = data.shape if scope is None: scope = np.arange(variables) groups = [] for v1 in scope: ind = True for i, g in enumerate(groups): for v2 in g: ind = is_ind(data[:, v1], data[:, v2]) if not ind: break if not ind: groups[i].append(v1) break if ind: groups.append([v1]) # print (groups) # if samples < 100: # groups = [[x] for x in scope] if len(groups) > 1: node = ProductNode(samples, scope, "normal") for i in range(len(groups)): if len(groups[i]) == 1: D = data[:, groups[i][0]] child = NormalLeafNode(samples, groups[i][0], np.mean(D), np.var(D)) else: child = learn(data, scope=np.array(groups[i])) node.add_child(child) else: if samples < 100: node = MultiNormalLeafNode.create(0, groups[0]) node.update(data, None) #node = ProductNode(len(samples), scope=np.array(groups[0])) else: node = SumNode(samples, scope) clusters = cluster(data) #print([len(c) for c in clusters]) for c in clusters: # print (c.shape) child = learn(c, scope) node.add_child(child) return node