Пример #1
0
def to_layers(spn, sparse=True, copy=True):
    with elapsed_timer() as e:
        if copy:
            spn = Copy(spn)
        print('copy', e())
        spn = Prune(spn, contract_single_parents=False)
        print('prune', e())
        complete_layers([spn], type(spn))
        print('complete layers', e())
        node_layers = get_topological_order_layers(spn)
        print('topo search', e())
        print('nr layers', len(node_layers))

        layers = [LeafLayer(node_layers[0])]
        for i in tqdm(range(1, len(node_layers))):
            cur_layer = node_layers[i]
            prev_layer = node_layers[i - 1]
            scope = get_scope(cur_layer, prev_layer, sparse)

            if isinstance(cur_layer[0], Sum):
                weights = np.concatenate(
                    list(map(lambda x: x.weights, cur_layer)))
                layers.append(SumLayer(cur_layer, scope, weights))
            else:
                layers.append(ProductLayer(cur_layer, scope))
        print('to layer objects', e())
        return layers
Пример #2
0
def condition(spn, evidence):
    scope = set(
        [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]])
    node_conditions = {
        type(leaf): leaf_condition
        for leaf in get_nodes_by_type(spn, Leaf)
    }
    node_conditions.update({Sum: sum_condition, Product: prod_condition})

    new_root, val = eval_spn_bottom_up(spn,
                                       node_conditions,
                                       input_vals=evidence,
                                       scope=scope)
    assign_ids(new_root)
    return Prune(new_root)
Пример #3
0
    def learn_spmn(self, data):
        """
        :param
        :return: learned spmn
        """

        index = 0
        curr_information_set_scope = np.array(
            range(len(self.params.partial_order[0]))).tolist()
        remaining_vars_scope = np.array(range(len(
            self.params.feature_names))).tolist()
        self.set_next_operation('Any')

        self.spmn_structure = self.__learn_spmn_structure(
            data, remaining_vars_scope, curr_information_set_scope, index)

        Prune(self.spmn_structure)
        return self.spmn_structure
Пример #4
0
def to_compressed_layers(spn):
    with elapsed_timer() as e:
        spn = Copy(spn)
        print('copy', e())
        spn = Prune(spn, contract_single_parents=False)
        print('prune', e())
        complete_layers([spn], type(spn))
        print('complete layers', e())
        node_layers = get_topological_order_layers(spn)
        print('topo search', e())
        print('nr layers', len(node_layers))

        layers = [LeafLayer(node_layers[0])]
        for i in range(1, len(node_layers)):

            cur_layer = node_layers[i]
            prev_layer = node_layers[i - 1]

            cur_is_sum = isinstance(cur_layer[0], Sum)
            prev_is_prod = isinstance(prev_layer[0], Product)

            # print(i, cur_is_sum, prev_is_prod)
            if cur_is_sum:
                weights = list(map(lambda x: x.weights, cur_layer))

            if cur_is_sum and prev_is_prod:
                # build sp layer
                # remove prod from previous layer
                layers.pop()
                scopes = get_two_layer_scopes(cur_layer, node_layers[i - 2],
                                              True)
                layers.append(SumProductLayer(cur_layer, scopes, weights))
            else:
                scope = get_scope(cur_layer, prev_layer, True)
                if cur_is_sum:
                    layers.append(SumLayer(cur_layer, scope, weights))
                else:
                    layers.append(ProductLayer(cur_layer, scope))
        print('to layer objects', e())
        return layers
Пример #5
0
def marginalize(node, keep):
    #keep must be a set of features that you want to keep
    
    keep = set(keep)

    def marg_recursive(node):
        new_node_scope = keep.intersection(set(node.scope))

        if len(new_node_scope) == 0:
            # we are summing out this node
            return None

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception('Leaf Node with |scope| > 1')

            return deepcopy(node)

        newNode = node.__class__()

        if isinstance(node, Sum):
            newNode.weights.extend(node.weights)

        for c in node.children:
            new_c = marg_recursive(c)
            if new_c is None:
                continue
            newNode.children.append(new_c)

        newNode.scope.extend(new_node_scope)
        return newNode


    newNode = marg_recursive(node)
    assign_ids(newNode)
    newNode = Prune(newNode)
    valid, err = is_valid(newNode)
    assert valid, err

    return newNode
Пример #6
0
def learn_structure(
    dataset,
    ds_context,
    split_rows,
    split_cols,
    create_leaf,
    next_operation=get_next_operation(),
    initial_scope=None,
    data_slicer=default_slicer,
):
    assert dataset is not None
    assert ds_context is not None
    assert split_rows is not None
    assert split_cols is not None
    assert create_leaf is not None
    assert next_operation is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))
        num_conditional_cols = None
    elif len(initial_scope) < dataset.shape[1]:
        num_conditional_cols = dataset.shape[1] - len(initial_scope)
    else:
        num_conditional_cols = None
        assert len(initial_scope) > dataset.shape[
            1], "check initial scope: %s" % initial_scope

    tasks = deque()
    tasks.append((dataset, root, 0, initial_scope, False, False))

    while tasks:

        local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft(
        )

        operation, op_params = next_operation(
            local_data,
            scope,
            create_leaf,
            no_clusters=no_clusters,
            no_independencies=no_independencies,
            is_first=(parent is root),
        )

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(
            operation, local_data.shape, len(tasks)))

        if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            rest_scope = set(range(len(scope)))
            for col in op_params:
                rest_scope.remove(col)
                node.children.append(None)
                tasks.append((
                    data_slicer(local_data, [col], num_conditional_cols),
                    node,
                    len(node.children) - 1,
                    [scope[col]],
                    True,
                    True,
                ))

            next_final = False

            if len(rest_scope) == 0:
                continue
            elif len(rest_scope) == 1:
                next_final = True

            node.children.append(None)
            c_pos = len(node.children) - 1

            rest_cols = list(rest_scope)
            rest_scope = [scope[col] for col in rest_scope]

            tasks.append((
                data_slicer(local_data, rest_cols, num_conditional_cols),
                node,
                c_pos,
                rest_scope,
                next_final,
                next_final,
            ))

            continue

        elif operation == Operation.SPLIT_ROWS:

            split_start_t = perf_counter()
            data_slices = split_rows(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, True, False))
                continue

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node
            # assert parent.scope == node.scope

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                node.weights.append(proportion)
                tasks.append((data_slice, node, len(node.children) - 1, scope,
                              False, False))

            continue

        elif operation == Operation.SPLIT_COLUMNS:
            split_start_t = perf_counter()
            data_slices = split_cols(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, False, True))
                assert np.shape(data_slices[0][0]) == np.shape(local_data)
                assert data_slices[0][1] == scope
                continue

            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, _ in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                tasks.append((data_slice, node, len(node.children) - 1,
                              scope_slice, False, False))

            continue

        elif operation == Operation.NAIVE_FACTORIZATION:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            local_tasks = []
            local_children_params = []
            split_start_t = perf_counter()
            for col in range(len(scope)):
                node.children.append(None)
                # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True))
                local_tasks.append(len(node.children) - 1)
                child_data_slice = data_slicer(local_data, [col],
                                               num_conditional_cols)
                local_children_params.append(
                    (child_data_slice, ds_context, [scope[col]]))

            result_nodes = pool.starmap(create_leaf, local_children_params)
            # result_nodes = []
            # for l in tqdm(local_children_params):
            #    result_nodes.append(create_leaf(*l))
            # result_nodes = [create_leaf(*l) for l in local_children_params]
            for child_pos, child in zip(local_tasks, result_nodes):
                node.children[child_pos] = child

            split_end_t = perf_counter()

            logging.debug(
                "\t\tnaive factorization {} columns (in {:.5f} secs)".format(
                    len(scope), split_end_t - split_start_t))

            continue

        elif operation == Operation.CREATE_LEAF:
            leaf_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            leaf_end_t = perf_counter()

            logging.debug(
                "\t\t created leaf {} for scope={} (in {:.5f} secs)".format(
                    node.__class__.__name__, scope, leaf_end_t - leaf_start_t))

        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node
Пример #7
0
    rg_layers = rg.make_layers()
    print("random graph built in  ", (time.perf_counter() - start))

    start = time.perf_counter()
    vector_list, root = Make_SPN_from_RegionGraph(rg_layers, np.random.RandomState(100),
                                                  num_classes=1, num_gauss=20, num_sums=20)
    print("Make_SPN_from_RegionGraph in  ", (time.perf_counter() - start))

    start = time.perf_counter()
    print(get_structure_stats(root))
    print("get_structure_stats in  ", (time.perf_counter() - start))

    old_root = Copy(root)

    start = time.perf_counter()
    root = Prune(root)
    print("Prune in  ", (time.perf_counter() - start))

    start = time.perf_counter()
    root = SPN_Reshape(root, 2)
    print("SPN_Reshape in  ", (time.perf_counter() - start))

    start = time.perf_counter()
    print(get_structure_stats(root))
    print("get_structure_stats in  ", (time.perf_counter() - start))

    start = time.perf_counter()
    layers, layer_types = get_execution_layers(root)
    print("get_execution_layers in  ", (time.perf_counter() - start))

    for i, lt in enumerate(layer_types):
def spn_for_evidence(spn,
                     evidence_ranges,
                     node_likelihood=None,
                     distribution_update_ranges=None):
    from spn.structure.Base import Sum, Product, Leaf, assign_ids
    from spn.algorithms.TransformStructure import Prune
    from spn.algorithms.Validity import is_valid
    from copy import deepcopy

    def spn_for_evidence_recursive(node):

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception("Leaf Node with |scope| > 1")

            if evidence_ranges[node.scope[0]] is not None:
                t_node = type(node)
                if t_node in node_likelihood:
                    ranges = np.array([evidence_ranges])
                    prob = node_likelihood[t_node](
                        node, ranges, node_likelihood=node_likelihood)[0][0]
                    if prob == 0:
                        newNode = deepcopy(node)
                    else:
                        newNode = deepcopy(node)
                        distribution_update_ranges[t_node](
                            newNode, evidence_ranges[node.scope[0]])
                else:
                    raise Exception(
                        'No log-likelihood method specified for node type: ' +
                        str(type(node)))
            else:
                prob = 1
                newNode = deepcopy(node)

            return prob, newNode

        newNode = node.__class__()
        newNode.scope = node.scope

        if isinstance(node, Sum):
            new_weights = []
            new_childs = []

            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob = prob * node.weights[i]
                if new_prob > 0:
                    new_weights.append(new_prob)
                    new_childs.append(new_child)

            new_weights = np.array(new_weights)
            newNode.weights = new_weights / np.sum(new_weights)
            newNode.children = new_childs
            return np.sum(new_weights), newNode

        elif isinstance(node, Product):
            new_childs = []

            new_prob = 1.
            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob *= prob
                new_childs.append(new_child)

            newNode.children = new_childs
            return new_prob, newNode

    prob, newNode = spn_for_evidence_recursive(spn)
    assign_ids(newNode)
    newNode = Prune(newNode)
    valid, err = is_valid(newNode)
    assert valid, err

    return prob, newNode
Пример #9
0
def learn_structure_cnet(
    dataset,
    ds_context,
    conditioning,
    create_leaf,
    next_operation_cnet=get_next_operation_cnet(),
    initial_scope=None,
    data_slicer=default_slicer,
):
    assert dataset is not None
    assert ds_context is not None
    assert create_leaf is not None
    assert next_operation_cnet is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))

    tasks = deque()
    tasks.append((dataset, root, 0, initial_scope))

    while tasks:

        local_data, parent, children_pos, scope = tasks.popleft()

        operation, op_params = next_operation_cnet(local_data, scope)

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(
            operation, local_data.shape, len(tasks)))

        if operation == Operation.CONDITIONING:
            from spn.algorithms.splitting.Base import split_data_by_clusters

            conditioning_start_t = perf_counter()

            col_conditioning, found_conditioning = conditioning(local_data)

            if not found_conditioning:
                node = create_leaf(local_data, ds_context, scope)
                parent.children[children_pos] = node

                continue

            clusters = (local_data[:, col_conditioning] == 1).astype(int)
            data_slices = split_data_by_clusters(local_data,
                                                 clusters,
                                                 scope,
                                                 rows=True)

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.weights.append(proportion)

                product_node = Product()
                node.children.append(product_node)
                node.children[-1].scope.extend(scope)

                right_data_slice = np.hstack(
                    (data_slice[:, :col_conditioning],
                     data_slice[:, (col_conditioning + 1):])).reshape(
                         data_slice.shape[0], data_slice.shape[1] - 1)
                product_node.children.append(None)
                tasks.append((
                    right_data_slice,
                    product_node,
                    len(product_node.children) - 1,
                    scope_slice[:col_conditioning] +
                    scope_slice[col_conditioning + 1:],
                ))

                left_data_slice = data_slice[:, col_conditioning].reshape(
                    data_slice.shape[0], 1)
                product_node.children.append(None)
                tasks.append((left_data_slice, product_node,
                              len(product_node.children) - 1,
                              [scope_slice[col_conditioning]]))

            conditioning_end_t = perf_counter()
            logging.debug("\t\tconditioning  (in {:.5f} secs)".format(
                conditioning_end_t - conditioning_start_t))

            continue

        elif operation == Operation.CREATE_LEAF:
            cltree_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            cltree_end_t = perf_counter()
        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node
Пример #10
0
def marg_rang_special(spn,
                      rang,
                      node_likelihood,
                      distribution_update_ranges=distribution_update_ranges):
    def spn_for_evidence_recursive(node):

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception("Leaf Node with |scope| > 1")

            if rang[node.scope[0]] is not None:
                t_node = type(node)
                if t_node in node_likelihood:
                    ranges = np.array([rang])
                    prob = node_likelihood[t_node](
                        node, ranges, node_likelihood=node_likelihood)[0][0]
                    if prob == 0:
                        newNode = deepcopy(node)
                    else:
                        newNode = deepcopy(node)
                        distribution_update_ranges[t_node](newNode,
                                                           rang[node.scope[0]])
                else:
                    raise Exception(
                        'No log-likelihood method specified for node type: ' +
                        str(type(node)))
            else:
                prob = 1
                newNode = deepcopy(node)

            return prob, newNode

        newNode = node.__class__()
        newNode.scope = node.scope

        if isinstance(node, Sum):
            new_weights = []
            new_childs = []

            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob = prob * node.weights[i]
                if new_prob > 0:
                    new_weights.append(new_prob)
                    new_childs.append(new_child)

            new_weights = np.array(new_weights)
            newNode.weights = new_weights / np.sum(new_weights)
            newNode.children = new_childs
            return np.sum(new_weights), newNode

        elif isinstance(node, Product):
            new_childs = []

            new_prob = 1.
            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob *= prob
                new_childs.append(new_child)

            newNode.children = new_childs
            return new_prob, newNode

    prob, newNode = spn_for_evidence_recursive(spn)
    assign_ids(newNode)
    newSPN = Prune(newNode)
    valid, err = is_valid(newSPN)
    assert valid, err

    return prob, newSPN
Пример #11
0
def get_flat_spn(spn, target_id):
    
    from spn.structure.Base import Sum, Product, Leaf, assign_ids
    from spn.algorithms.TransformStructure import Prune
    from spn.algorithms.Validity import is_valid
    from copy import deepcopy
    
    
    flat_spn = Sum()
    flat_spn.scope=spn.scope
    
    def create_flat_spn_recursive(node, distribution_mix, prob=1.0, independent_nodes=[]):
        
        if isinstance(node, Sum):
            for i, c in enumerate(node.children):
                forwarded_weight = node.weights[i] * prob
                create_flat_spn_recursive(c, distribution_mix, forwarded_weight, independent_nodes.copy())
        
        elif isinstance(node, Product):
            
            stop = False
            next_node = None
            
            for c in node.children:
                if target_id in c.scope:
                    if len(c.scope) == 1:
                        stop = True
                        independent_nodes.append(deepcopy(c))
                    else:
                        next_node = c
                else:
                    for feature_id in c.scope:
                        weighted_nodes = get_nodes_with_weight(c, feature_id)
                        t_node = type(weighted_nodes[0][1])
                        mixed_node = distribution_mix[t_node](weighted_nodes)
                        independent_nodes.append(mixed_node)
            
            if stop:
                flat_spn.weights.append(prob)
                prod = Product(children=independent_nodes)
                prod.scope = spn.scope
                flat_spn.children.append(prod)
                
            else:
                create_flat_spn_recursive(next_node, distribution_mix, prob, independent_nodes)
                
        else:
            raise Exception("Can only iterate over Sum and Product nodes")
        
        
    from simple_spn.internal.MixDistributions import mix_categorical
    
    distribution_mix = {Categorical : mix_categorical}
    
    
    create_flat_spn_recursive(spn, distribution_mix)
    assign_ids(flat_spn)
    flat_spn = Prune(flat_spn)
    valid, err = is_valid(flat_spn)
    assert valid, err

    return flat_spn
Пример #12
0
def learn_structure(
    dataset,
    ds_context,
    split_rows,
    split_cols,
    create_leaf,
    next_operation=get_next_operation(),
    initial_scope=None,
    num_conditional_cols=None,
    data_slicer=default_slicer,
    l_rfft=None,
    is_2d=False,
):
    assert dataset is not None
    assert ds_context is not None
    assert split_rows is not None
    assert split_cols is not None
    assert create_leaf is not None
    assert next_operation is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))
        ## num_conditional_cols = None
    ## elif len(initial_scope) < dataset.shape[1]:
        ## num_conditional_cols = dataset.shape[1] - len(initial_scope)
    ## else:
        ## num_conditional_cols = None
        ## assert len(initial_scope) > dataset.shape[1], "check initial scope: %s" % initial_scope

    tasks = deque()

    ## tasks.append((dataset, root, 0, initial_scope, False, False))
    tasks.append((default_slicer(dataset, initial_scope), root, 0, initial_scope, False, False))

    while tasks:

        local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft()
        assert(local_data.shape[1]==len(scope))

        operation, op_params = next_operation(
            local_data,
            scope,
            create_leaf,
            no_clusters=no_clusters,
            no_independencies=no_independencies,
            is_first=(parent is root),
        )

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(operation, local_data.shape, len(tasks)))

        if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            rest_scope = set(range(len(scope)))
            for col in op_params:
                rest_scope.remove(col)
                node.children.append(None)
                tasks.append(
                    (
                        data_slicer(local_data, [col], num_conditional_cols),
                        node,
                        len(node.children) - 1,
                        [scope[col]],
                        True,
                        True,
                    )
                )

            next_final = False

            if len(rest_scope) == 0:
                continue
            elif len(rest_scope) == 1:
                next_final = True

            node.children.append(None)
            c_pos = len(node.children) - 1

            rest_cols = list(rest_scope)
            rest_scope = [scope[col] for col in rest_scope]

            tasks.append(
                (
                    data_slicer(local_data, rest_cols, num_conditional_cols),
                    node,
                    c_pos,
                    rest_scope,
                    next_final,
                    next_final,
                )
            )

            continue

        elif operation == Operation.SPLIT_ROWS:

            split_start_t = perf_counter()
            data_slices = split_rows(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug(
                "\t\tfound {} row clusters (in {:.5f} secs)".format(len(data_slices), split_end_t - split_start_t)
            )

            if len(data_slices) == 1:
                tasks.append((local_data, parent, children_pos, scope, True, False))
                continue

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node
            # assert parent.scope == node.scope

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                node.weights.append(proportion)
                tasks.append((data_slice, node, len(node.children) - 1, scope, False, False))

            continue

        elif operation == Operation.SPLIT_COLUMNS:
            split_start_t = perf_counter()
            data_slices = split_cols(local_data, ds_context, scope, l_rfft, is_2d)
            split_end_t = perf_counter()
            logging.debug(
                "\t\tfound {} col clusters (in {:.5f} secs)".format(len(data_slices), split_end_t - split_start_t)
            )

            if len(data_slices) == 1:
                tasks.append((local_data, parent, children_pos, scope, False, True))
                assert np.shape(data_slices[0][0]) == np.shape(local_data)
                assert data_slices[0][1] == scope
                continue

            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, _ in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                tasks.append((data_slice, node, len(node.children) - 1, scope_slice, False, False))

            continue

        elif operation == Operation.NAIVE_FACTORIZATION:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            local_tasks = []
            local_children_params = []
            split_start_t = perf_counter()
            # modified by zhongjie on 04.10.2019
            # 1. if is_2d==False --> no Multi Variate Gaussian here, use Univariate Gaussian to model all RVs
            #    or if scope=1, the factorization ends with univariate Gaussian
            if not is_2d or len(scope)==1:
                for col in range(len(scope)):
                    node.children.append(None)
                    # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True))
                    local_tasks.append(len(node.children) - 1)
                    child_data_slice = data_slicer(local_data, [col], num_conditional_cols)
                    local_children_params.append((child_data_slice, ds_context, [scope[col]]))

                result_nodes = pool.starmap(create_leaf, local_children_params)
            # 2. if is_2d=True and #scope>1, Multi Variate Gaussian leaf will be created
            #    the factorization ends with pairs of coefs
            else:
                if local_data.shape[0]==1:
                    # trick, to avoid single instance? by zhongjie
                    local_data = np.concatenate([local_data, local_data], axis=0)
                for col in range(len(scope)):
                    # if it is not freq 0 or freq \Pi, consider first only the real part of coef
                    if l_rfft-1 > scope[col] % (l_rfft * 2) > 0:
                        node.children.append(None)
                        local_tasks.append(len(node.children) - 1)
                        # then find corresponding imag coefs --> scope[real]+l_rfft
                        child_data_slice = data_slicer(local_data, [col, scope.index(scope[col]+l_rfft)], num_conditional_cols)
                        local_children_params.append((child_data_slice, ds_context, [scope[col], scope[col]+l_rfft]))
                    # if it is freq 0 or freq \pi, which has no imag part, do normally
                    elif scope[col] % (l_rfft * 2)==0 or scope[col] % (l_rfft * 2)==l_rfft-1:
                        node.children.append(None)
                        # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True))
                        local_tasks.append(len(node.children) - 1)
                        child_data_slice = data_slicer(local_data, [col], num_conditional_cols)
                        local_children_params.append((child_data_slice, ds_context, [scope[col]]))

                result_nodes = pool.starmap(create_leaf, local_children_params)
            # result_nodes = []
            # for l in tqdm(local_children_params):
            #    result_nodes.append(create_leaf(*l))
            # result_nodes = [create_leaf(*l) for l in local_children_params]
            for child_pos, child in zip(local_tasks, result_nodes):
                node.children[child_pos] = child

            split_end_t = perf_counter()

            logging.debug(
                "\t\tnaive factorization {} columns (in {:.5f} secs)".format(len(scope), split_end_t - split_start_t)
            )

            continue

        elif operation == Operation.CREATE_LEAF:
            leaf_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            leaf_end_t = perf_counter()

            logging.debug(
                "\t\t created leaf {} for scope={} (in {:.5f} secs)".format(
                    node.__class__.__name__, scope, leaf_end_t - leaf_start_t
                )
            )

        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node