Пример #1
0
def test_unnest_tuple():
    examples = [
        ((1, 2, 3), (1, 2, 3)),
        ((1, (2, 3)), (1, 2, 3)),
        ((1, (2, (3))), (1, 2, 3)),
        (((1, ), (2, ), (3, 4)), (1, 2, 3, 4)),
    ]

    for nested_tuple, unnested_tuple in examples:
        assert unnest_tuple(nested_tuple) == unnested_tuple
Пример #2
0
def add_columns_selector(Graph, var_type_node_dico, var_type_columns_dico,
                         all_models_params):
    """ include columns selector where it is needed
    Either modify the graph by adding new edge with selector model
    Or modify all_models_params to include a 'columns_to_use' parameter
    
    Parameters
    ----------
    Graph : nx.DiGraph
        Graph representation the model
        
    var_type_node_dico : dict
        dictionnary indicating which node works on which type of columns.
        keys = node of Graph (ie : name of models)
        values = variable type
        
    var_type_columns_dico : dict
        dictionnary indicating which type of variable corresponds to which columns
        keys = variable type
        values = list of columns
        
    all_models_params : dict
        dictionnary indicating the parameter of each models, it will be modified to include 'columns_to_use'
        keys = node of Graph (ie: name of models)
        values = parameters of each models
        
        WILL BE MODIFIED in PLACE !
        
    Returns
    -------
    modified Graph
    modified all_params
    
    
    """

    nodes_no_composition = [
        n for n in Graph.nodes if not StepCategories.is_composition_step(n[0])
    ]
    sub_Graph = Graph.subgraph(nodes_no_composition)

    starting_nodes = gh.get_starting_nodes(sub_Graph)

    for node in starting_nodes:
        vtype = var_type_node_dico[node]
        if _must_include_selector(node[1]):

            if vtype is not None:

                name_of_cat = "Selector_%s" % unnest_tuple(vtype)
                new_node = (name_of_cat, (name_of_cat,
                                          SpecialModels.ColumnsSelector))
                if new_node in Graph.nodes:
                    raise ValueError(
                        "Please check, I have duplicate names : %s" %
                        str(new_node))

                Graph = gh.insert_node_above(Graph, node, new_node=new_node)

                all_models_params[new_node] = {
                    "columns_to_use": _get_columns(vtype,
                                                   var_type_columns_dico)
                }

            else:
                pass  # nothing to do : the transformer would need a selector BUT vtype is None which means I can apply to everything

        else:
            if vtype is not None:
                all_models_params[node]["columns_to_use"] = _get_columns(
                    vtype, var_type_columns_dico)

    return Graph, all_models_params
Пример #3
0
def create_graphical_representation(steps):
    """ from a an OrderedDict of steps create a Graphical reprensetation of the model we'll use """

    # Rmk : il faut a priori, mettre les numero de l'etape dans le graph
    # + mettre les labels correct
    # comme ça on pourra avoir plusieurs noeud avec le meme nom (Ex : Scaler...)

    ### 1) Split Composion Steps vs Rest
    all_composition_steps = []
    all_others = []
    for (step_name, model_name), var_type in steps.items():
        if StepCategories.is_composition_step(step_name):
            all_composition_steps.append((step_name, model_name, var_type))
        else:
            all_others.append((step_name, model_name, var_type))

    ### 2) Create Graph for non-composition step
    new_steps = OrderedDict()

    G = nx.DiGraph()
    for step_name, model_name, var_type in all_others:
        # for name,var_type in steps.items():

        unested_var_type = unnest_tuple(var_type)

        terminal_nodes = gh.get_terminal_nodes(
            G
        )  # Terminal links : I'll add the new step on one (or more) of those

        ending_node_type = {
            unnest_tuple(steps[node]): node
            for node in terminal_nodes
        }

        node_name = (step_name, model_name)  # 2-uple
        if node_name in G.nodes:
            raise ValueError("This node already exists '(%s,%s)'" % node_name)

        # 1) Soit je rattache le nouveau a UN noeud terminal
        # 2) Soit je cree une nouvelle branche (nouveau noeud ratacher a rien)
        # 3) Soit je rattache a PLUSIEURS noeud terminaux

        elif unested_var_type in ending_node_type:
            ### 1) I already have a branch of this type
            last_node = ending_node_type[unested_var_type]
            G = gh.add_node_after(G, node_name, last_node)

        ### I don't have a branch ###
        else:
            all_candidates = [(t, n) for t, n in ending_node_type.items()
                              if tuple_include(t, unested_var_type)]
            # I need to look where I want to plug it #
            if len(all_candidates) == 0:
                ### 2) Je dois creer une nouvelle branche : aucun noeud ###
                G = gh.add_node_after(G, node_name)
            else:
                ### 3) Je rattache a plusieurs noeuds

                ### Ici : il faut parfois rajouter un noeud en AMONT, si on a des types qui n'ont pas ete rajouter
                types_added = unnest_tuple([t for t, n in all_candidates])
                types_not_added = diff(unested_var_type, types_added)
                if len(types_not_added) > 0:

                    name_of_cat = "Selector_%s" % unnest_tuple(types_not_added)
                    new_node = (name_of_cat, (name_of_cat,
                                              SpecialModels.ColumnsSelector))

                    G = gh.add_node_after(G, new_node)

                    new_steps[
                        new_node] = types_not_added  # I also must dynamically add the node to the list of steps

                    all_candidates = all_candidates + [
                        (types_not_added, new_node)
                    ]

                G = gh.add_node_after(G, node_name,
                                      *[n for t, n in all_candidates])

    ### 3) Include composition node on top
    for step_name, model_name, _ in reversed(all_composition_steps):
        starting_nodes = gh.get_starting_nodes(G)
        for n in starting_nodes:
            G.add_edge((step_name, model_name), n)

    ### 4) Verify the Graph structure

    for (step_name, model_name), _ in steps.items():
        if (step_name, model_name) not in G:
            raise ValueError("'(%s , %s)' should be in graph" %
                             (step_name, model_name))
    # all nodes were in the steps
    for node in G.nodes():
        if node not in steps and node not in new_steps:
            raise ValueError("'(%s,%s)' shouldn't be in graph" % node)

    assert_model_graph_structure(G)

    return G, new_steps