def naive_factorization(data=None,
                        node_id=0,
                        context=None,
                        scope=None,
                        **kwargs):
    assert scope is not None, "No scope"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    result = []
    for i, rv in enumerate(scope):
        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, i].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))

    return prod_node, result
示例#2
0
def prod_condition(node, children, input_vals=None, scope=None):
    if not scope.intersection(node.scope):
        return Copy(node), 0
    new_node = Product()
    new_node.scope = list(set(node.scope) - scope)
    probability = 0

    for c in children:
        if c[0]:
            new_node.children.append(c[0])
        probability += float(c[1])
    return new_node, probability
def remove_non_informative_features(data=None,
                                    node_id=0,
                                    scope=None,
                                    context=0,
                                    uninformative_features_idx=None,
                                    **kwargs):
    assert uninformative_features_idx is not None, "parameter uninformative_features_idx can't be None"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    non_zero_variance_rvs = []
    non_zero_variance_idx = []
    result = []
    for idx, zero_var in enumerate(uninformative_features_idx):
        rv = scope[idx]

        if not zero_var:
            non_zero_variance_rvs.append(rv)
            non_zero_variance_idx.append(idx)
            continue

        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, idx].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))
    assert len(result) > 0
    if len(non_zero_variance_idx) > 0:
        prod_node.children.append(None)
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": concatenate_yx(data[:, non_zero_variance_idx], x),
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": non_zero_variance_rvs,
            },
        ))

    return prod_node, result
def remove_non_informative_features(data=None,
                                    node_id=0,
                                    scope=None,
                                    **kwargs):
    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    uninformative_features_idx = np.var(data[:, scope], 0) == 0
    zero_variance_rvs = [s for s in scope]
    result = []
    for idx, zero_var in enumerate(uninformative_features_idx):
        if not zero_var:
            continue
        prod_node.children.append(None)
        rv = scope[idx]
        data_slice = data[:, rv].reshape(-1, 1)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": node_id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))
        del zero_variance_rvs[idx]
    assert len(result) > 0
    prod_node.children.append(None)
    result.append((
        SplittingOperations.GET_NEXT_OP,
        {
            "data": data[:, zero_variance_rvs],
            "parent_id": node_id,
            "pos": len(prod_node.children) - 1,
            "scope": zero_variance_rvs,
        },
    ))
    return prod_node, result
def naive_factorization(data=None, node_id=0, scope=None, **kwargs):
    assert scope is not None, "No scope"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.node_id = node_id

    result = []
    for rv in scope:
        prod_node.children.append(None)
        data_slice = data[:, rv].reshape(-1, 1)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": node_id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))

    return prod_node, result
def get_credit_spn():
    from spn.structure.Base import Product
    from spn.structure.leaves.parametric.Parametric import Categorical

    spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5],
                                                              scope=[3])
    spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9],
                                                              scope=[3])
    spn3 = 0.3 * spn1 + 0.7 * spn2
    spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3

    spn6 = Product([
        Categorical(p=[1.0, 0.0], scope=[1]),
        Categorical(p=[0.0, 1.0], scope=[2]),
        Categorical(p=[1.0, 0.0], scope=[3])
    ])
    spn6.scope = [1, 2, 3]

    spn7 = 0.8 * spn4 + 0.2 * spn6
    spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0])

    spn.scope = sorted(spn.scope)
    return spn
示例#7
0
 def create_flat_spn_recursive(node, distribution_mix, prob=1.0, independent_nodes=[]):
     
     if isinstance(node, Sum):
         for i, c in enumerate(node.children):
             forwarded_weight = node.weights[i] * prob
             create_flat_spn_recursive(c, distribution_mix, forwarded_weight, independent_nodes.copy())
     
     elif isinstance(node, Product):
         
         stop = False
         next_node = None
         
         for c in node.children:
             if target_id in c.scope:
                 if len(c.scope) == 1:
                     stop = True
                     independent_nodes.append(deepcopy(c))
                 else:
                     next_node = c
             else:
                 for feature_id in c.scope:
                     weighted_nodes = get_nodes_with_weight(c, feature_id)
                     t_node = type(weighted_nodes[0][1])
                     mixed_node = distribution_mix[t_node](weighted_nodes)
                     independent_nodes.append(mixed_node)
         
         if stop:
             flat_spn.weights.append(prob)
             prod = Product(children=independent_nodes)
             prod.scope = spn.scope
             flat_spn.children.append(prod)
             
         else:
             create_flat_spn_recursive(next_node, distribution_mix, prob, independent_nodes)
             
     else:
         raise Exception("Can only iterate over Sum and Product nodes")
示例#8
0
  

if __name__ == '__main__':
    
   
    from spn.structure.Base import Sum, Product, Leaf
    from spn.structure.leaves.parametric.Parametric import Categorical 
    
    
    spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) 
    spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) 
    spn3 = 0.3 * spn1 + 0.7 * spn2
    spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3
    
    spn6 = Product([Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3])])
    spn6.scope = [1,2,3]
    
    spn7 = 0.8 * spn4 + 0.2 * spn6
    spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0])
    
    #spn_util.plot_spn(spn, "rule_spn.pdf")
    
    
    
    extract_rules(spn)
    
    #res = get_frequent_items(spn)
    #print(res)