示例#1
0
def combine_pareto(input_data,
                   PA1,
                   PA2,
                   idx_list_1,
                   idx_list_2,
                   PA,
                   sep_type="+"):
    possible_vars = ["x%s" % i for i in np.arange(0, 30, 1)]
    PA1 = np.array(PA1.get_pareto_points()).astype('str')
    PA2 = np.array(PA2.get_pareto_points()).astype('str')
    for i in range(len(PA1)):
        for j in range(len(PA2)):
            try:
                # replace the variables from the separated parts with the variables reflecting the new combined equation
                exp1 = PA1[i][2]
                exp2 = PA2[j][2]
                for k in range(len(idx_list_1) - 1, -1, -1):
                    exp1 = exp1.replace(possible_vars[k],
                                        possible_vars[idx_list_1[k]])
                for k in range(len(idx_list_2) - 1, -1, -1):
                    exp2 = exp2.replace(possible_vars[k],
                                        possible_vars[idx_list_2[k]])
                new_eq = "(" + exp1 + ")" + sep_type + "(" + exp2 + ")"
                compl = get_expr_complexity(new_eq)
                PA.add(
                    Point(x=compl,
                          y=get_symbolic_expr_error(input_data, new_eq),
                          data=new_eq))
            except:
                continue
    return PA
def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr):
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq
    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]
    # Do bf on one parameter at a time
    bf_on_numbers_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq_ = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            eq = eq_

            np.savetxt(pathdir + "number_for_bf_%s.txt" % w, [eq_numbers[w]])
            brute_force_number(pathdir, "number_for_bf_%s.txt" % w)
            # Load the predictions made by the bf code
            bf_numbers = np.loadtxt("results.dat", usecols=(1, ), dtype="str")
            new_numbers = copy.deepcopy(eq_numbers)

            # replace the number under consideration by all the proposed bf numbers
            for kk in range(len(bf_numbers)):
                eq = eq_
                new_numbers[w] = parse_expr(RPN_to_eq(bf_numbers[kk]))

                jj = 0
                for parm in unsnapped_param_dict:
                    if parm != "p":
                        eq = eq.subs(parm, new_numbers[jj])
                        jj = jj + 1

                bf_on_numbers_expr = bf_on_numbers_expr + [eq]
        except:
            continue

    for i in range(len(bf_on_numbers_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir, filename,
                                                    str(bf_on_numbers_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(bf_on_numbers_expr[i]))
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]

            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(
                    float(j))
            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr, visual=True).free_symbols)
            if n_operations != 0 or n_variables != 0:
                snapped_complexity = snapped_complexity + (
                    n_variables + n_operations) * np.log2(
                        (n_variables + n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue

    return (PA)
示例#3
0
def run_aifeynman(pathdir,filename,BF_try_time,BF_ops_file_type, polyfit_deg=4, NN_epochs=4000, vars_name=[],test_percentage=20):    
    # If the variable names are passed, do the dimensional analysis first
    filename_orig = filename
    try:
        if vars_name!=[]:
            dimensionalAnalysis(pathdir,filename,vars_name)
            DR_file = filename + "_dim_red_variables.txt"
            filename = filename + "_dim_red"
        else:
            DR_file = ""
    except:
        DR_file = ""

    # Split the data into train and test set                                                                                                                                      
    input_data = np.loadtxt(pathdir+filename)
    sep_idx = np.random.permutation(len(input_data))

    train_data = input_data[sep_idx[0:(100-test_percentage)*len(input_data)//100]]
    test_data = input_data[sep_idx[test_percentage*len(input_data)//100:len(input_data)]]

    np.savetxt(pathdir+filename+"_train",train_data)
    if test_data.size != 0:
        np.savetxt(pathdir+filename+"_test",test_data)

    PA = ParetoSet()
    # Run the code on the train data 
    PA = run_AI_all(pathdir,filename+"_train",BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA=PA)
    PA_list = PA.get_pareto_points()

    # Run bf snap on the resulted equations
    for i in range(len(PA_list)):
        try:
            PA = add_bf_on_numbers_on_pareto(pathdir,filename,PA,PA_list[i][-1])
        except:
            continue
    PA_list = PA.get_pareto_points()

    np.savetxt("results/solution_before_snap_%s.txt" %filename,PA_list,fmt="%s")

    # Run zero, integer and rational snap on the resulted equations  
    for j in range(len(PA_list)):
        PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, "")

    PA_list = PA.get_pareto_points()
    np.savetxt("results/solution_first_snap_%s.txt" %filename,PA_list,fmt="%s")
    
    # Run gradient descent on the data one more time                                                                                                                          
    for i in range(len(PA_list)):
        try:
            gd_update = final_gd(pathdir,filename,PA_list[i][-1])
            PA.add(Point(x=gd_update[1],y=gd_update[0],data=gd_update[2]))
        except:
            continue
  
    PA_list = PA.get_pareto_points()
    for j in range(len(PA_list)):
        PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, DR_file)

    list_dt = np.array(PA.get_pareto_points())
    data_file_len = len(np.loadtxt(pathdir+filename))
    log_err = []
    log_err_all = []
    for i in range(len(list_dt)):
        log_err = log_err + [np.log2(float(list_dt[i][1]))]
        log_err_all = log_err_all + [data_file_len*np.log2(float(list_dt[i][1]))]
    log_err = np.array(log_err)
    log_err_all = np.array(log_err_all)

    # Try the found expressions on the test data                                                                                                                                  
    if DR_file=="" and test_data.size != 0:
        test_errors = []
        for i in range(len(list_dt)):
            test_errors = test_errors + [get_symbolic_expr_error(pathdir,filename+"_test",str(list_dt[i][-1]))]
        test_errors = np.array(test_errors)
        # Save all the data to file                                                                                                                                               
        save_data = np.column_stack((test_errors,log_err,log_err_all,list_dt))
    else:
        save_data = np.column_stack((log_err,log_err_all,list_dt))
    np.savetxt("results/solution_%s" %filename_orig,save_data,fmt="%s")
示例#4
0
def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""):
    
#############################################################################################################################
    
    # run BF on the data (+)
    print("Checking for brute force + \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+")
    
    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "+" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass

#############################################################################################################################
    # run BF on the data (*)
    print("Checking for brute force * \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*")

    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "*" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        # add the BF output to the Pareto plot
        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass
#############################################################################################################################
    # run polyfit on the data
    print("Checking polyfit \n")
    polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename)
    eqn = str(polyfit_result[0])
    
    # Calculate the complexity of the polyfit expression the same way as for gradient descent case    
    if output_type=="":
        eqn = eqn
    elif output_type=="acos":
        eqn = "cos(" + eqn + ")"
    elif output_type=="asin":
        eqn = "sin(" + eqn + ")" 
    elif output_type=="atan":
        eqn = "tan(" + eqn + ")"
    elif output_type=="cos":
        eqn = "acos(" + eqn + ")"
    elif output_type=="exp":
        eqn = "log(" + eqn + ")"
    elif output_type=="inverse":
        eqn = "1/(" + eqn + ")"
    elif output_type=="log":
        eqn = "exp(" + eqn + ")"
    elif output_type=="sin":
        eqn = "acos(" + eqn + ")"
    elif output_type=="sqrt":
        eqn = "(" + eqn + ")**2"
    elif output_type=="squared":
        eqn = "sqrt(" + eqn + ")"
    elif output_type=="tan":
        eqn = "atan(" + eqn + ")"
    
    polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn)
    expr = parse_expr(eqn)
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    complexity = 0
    for j in numbers_expr:
        complexity = complexity + get_number_DL(float(j))
    try:
        # Add the complexity due to symbols
        n_variables = len(polyfit_result[0].free_symbols)
        n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols)
        if n_operations!=0 or n_variables!=0:
            complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))
    except:
        pass

    
    #run zero snap on polyfit output
    PA_poly = ParetoSet()
    PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn)))
    PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly)
    
    
    for l in range(len(PA_poly.get_pareto_points())):
        PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2]))

    print("Complexity  RMSE  Expression")
    for pareto_i in range(len(PA.get_pareto_points())):
        print(PA.get_pareto_points()[pareto_i])
    
    return PA
示例#5
0
def final_gd(data, math_expr, lr=1e-2, N_epochs=5000):
    param_dict = {}
    unsnapped_param_dict = {'p': 1}

    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    # Turn BF expression to pytorch expression
    eq = parse_expr(math_expr)
    eq = unsnap_recur(eq, param_dict, unsnapped_param_dict)

    N_vars = len(data[0]) - 1
    N_params = len(unsnapped_param_dict)
    possible_vars = ["x%s" % i for i in np.arange(0, 30, 1)]
    variables = []
    params = []
    for i in range(N_vars):
        variables = variables + [possible_vars[i]]
    for i in range(N_params - 1):
        params = params + ["p%s" % i]

    symbols = params + variables

    f = lambdify(symbols, N(eq), torch)
    # Set the trainable parameters in the expression

    trainable_parameters = []
    for i in unsnapped_param_dict:
        if i != "p":
            vars()[i] = torch.tensor(unsnapped_param_dict[i])
            vars()[i].requires_grad = True
            trainable_parameters = trainable_parameters + [vars()[i]]

    # Prepare the loaded data
    real_variables = []
    for i in range(len(data[0]) - 1):
        real_variables = real_variables + [
            torch.from_numpy(data[:, i]).float()
        ]

    input = trainable_parameters + real_variables
    y = torch.from_numpy(data[:, -1]).float()

    for i in range(N_epochs):
        # this order is fixed i.e. first parameters
        yy = f(*input)
        loss = torch.mean((yy - y)**2)
        loss.backward()
        with torch.no_grad():
            for j in range(N_params - 1):
                trainable_parameters[j] -= lr * trainable_parameters[j].grad
                trainable_parameters[j].grad.zero_()
        if torch.isnan(loss):
            break

    for i in range(N_epochs):
        # this order is fixed i.e. first parameters
        yy = f(*input)
        loss = torch.mean((yy - y)**2)
        loss.backward()
        with torch.no_grad():
            for j in range(N_params - 1):
                trainable_parameters[
                    j] -= lr / 10 * trainable_parameters[j].grad
                trainable_parameters[j].grad.zero_()
        if torch.isnan(loss):
            break

    for nan_i in range(len(trainable_parameters)):
        if torch.isnan(trainable_parameters[nan_i]) == True or abs(
                trainable_parameters[nan_i]) > 1e7:
            return 1000000, 10000000, "1"

    # get the updated symbolic regression
    ii = -1
    for parm in unsnapped_param_dict:
        if ii == -1:
            ii = ii + 1
        else:
            eq = eq.subs(parm, trainable_parameters[ii])
            ii = ii + 1

    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    numbers_expr = [
        subexpression for subexpression in preorder_traversal(eq)
        if is_atomic_number(subexpression)
    ]
    complexity = 0
    for j in numbers_expr:
        try:
            complexity = complexity + get_number_DL_snapped(float(j))
        except:
            complexity = complexity + 1000000
    n_variables = len(eq.free_symbols)
    n_operations = len(count_ops(eq, visual=True).free_symbols)
    if n_operations != 0 or n_variables != 0:
        complexity = complexity + (n_variables + n_operations) * np.log2(
            (n_variables + n_operations))

    error = get_symbolic_expr_error(data, str(eq))
    return error, complexity, eq
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA):
    input_data = np.loadtxt(pathdir + filename)

    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq

    #    # Get the numbers appearing in the expression
    #    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    #    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    #
    #    # Do zero snap one parameter at a time
    #    zero_snapped_expr = []
    #    for w in range(len(eq_numbers)):
    #        try:
    #            param_dict = {}
    #            unsnapped_param_dict = {'p':1}
    #            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
    #            new_numbers = zeroSnap(eq_numbers,w+1)
    #            for kk in range(len(new_numbers)):
    #                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
    #            jj = 0
    #            for parm in unsnapped_param_dict:
    #                if parm!="p":
    #                    eq = eq.subs(parm, eq_numbers[jj])
    #                    jj = jj + 1
    #            zero_snapped_expr = zero_snapped_expr + [eq]
    #        except:
    #            continue

    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do integer snap one parameter at a time
    integer_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            del unsnapped_param_dict["p"]
            temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict)
            new_numbers = integerSnap(eq_numbers, w + 1)
            new_numbers = {"p" + str(k): v for k, v in new_numbers.items()}
            temp_unsnapped_param_dict.update(new_numbers)
            #for kk in range(len(new_numbers)):
            #    eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq))
            new_eq = new_eq.format_map(temp_unsnapped_param_dict)
            integer_snapped_expr = integer_snapped_expr + [parse_expr(new_eq)]
        except:
            continue

            # Get the numbers appearing in the expression

    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do rational snap one parameter at a time
    rational_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            del unsnapped_param_dict["p"]
            temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict)
            new_numbers = rationalSnap(eq_numbers, w + 1)
            new_numbers = {"p" + str(k): v for k, v in new_numbers.items()}
            temp_unsnapped_param_dict.update(new_numbers)
            #for kk in range(len(new_numbers)):
            #    eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
            new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq))
            new_eq = new_eq.format_map(temp_unsnapped_param_dict)
            rational_snapped_expr = rational_snapped_expr + [
                parse_expr(new_eq)
            ]
        except:
            continue

    snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr)
    #    snapped_expr = np.append(snapped_expr,rational_snapped_expr)

    integer_snapped_expr = snapped_expr

    for i in range(len(snapped_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(input_data,
                                                    str(snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = snapped_expr[i]
            for s in (expr.free_symbols):
                s = symbols(str(s), real=True)
            expr = parse_expr(str(snapped_expr[i]), locals())
            expr = intify(expr)
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]
            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(
                    float(j))
            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr, visual=True).free_symbols)
            if n_operations != 0 or n_variables != 0:
                snapped_complexity = snapped_complexity + (
                    n_variables + n_operations) * np.log2(
                        (n_variables + n_operations))
            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue

    return (PA)
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA): 
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
            used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)


    def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq
    
    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
       
    # Do zero snap one parameter at a time
    zero_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p':1}
            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
            new_numbers = zeroSnap(eq_numbers,w+1)
            for kk in range(len(new_numbers)):
                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm!="p":
                    eq = eq.subs(parm, eq_numbers[jj])
                    jj = jj + 1
            zero_snapped_expr = zero_snapped_expr + [eq]
        except:
            continue

    for i in range(len(zero_snapped_expr)):
        try:
        
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir,filename,str(zero_snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(zero_snapped_expr[i]))
            for s in (expr.free_symbols):
                s = symbols(str(s), real = True)
            expr =  simplify(parse_expr(str(zero_snapped_expr[i]),locals()))
            expr = intify(expr)

            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]

            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(float(j))

            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr,visual=True).free_symbols)
            if n_operations!=0 or n_variables!=0:
                snapped_complexity = snapped_complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr)))
        except:
            print("error")
            print("")
            continue
    return(PA)
        
        
        
            
def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""):
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq

    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do integer snap one parameter at a time
    integer_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            new_numbers = integerSnap(eq_numbers, w + 1)
            for kk in range(len(new_numbers)):
                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm != "p":
                    eq = eq.subs(parm, eq_numbers[jj])
                    jj = jj + 1
            integer_snapped_expr = integer_snapped_expr + [eq]
        except:
            continue

#    # Get the numbers appearing in the expression
#    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
#    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
#
#    # Do zero snap one parameter at a time
#    zero_snapped_expr = []
#    for w in range(len(eq_numbers)):
#        try:
#            param_dict = {}
#            unsnapped_param_dict = {'p':1}
#            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
#            new_numbers = zeroSnap(eq_numbers,w+1)
#            for kk in range(len(new_numbers)):
#                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
#            jj = 0
#            for parm in unsnapped_param_dict:
#                if parm!="p":
#                    eq = eq.subs(parm, eq_numbers[jj])
#                    jj = jj + 1
#            zero_snapped_expr = zero_snapped_expr + [eq]
#        except:
#            continue

# Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do rational snap one parameter at a time
    rational_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            eq_numbers_snap = copy.deepcopy(eq_numbers)
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            new_numbers = rationalSnap(eq_numbers, w + 1)
            for kk in range(len(new_numbers)):
                eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm != "p":

                    try:
                        eq = eq.subs(
                            parm,
                            Rational(eq_numbers_snap[jj][0],
                                     eq_numbers_snap[jj][1]))
                    except:
                        eq = eq.subs(parm, eq_numbers_snap[jj])
                    jj = jj + 1
            rational_snapped_expr = rational_snapped_expr + [eq]
        except:
            continue

    snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr)
    #    snapped_expr = np.append(snapped_expr,rational_snapped_expr)

    for i in range(len(snapped_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir, filename,
                                                    str(snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(snapped_expr[i]))
            for s in (expr.free_symbols):
                s = symbols(str(s), real=True)
            expr = simplify(parse_expr(str(snapped_expr[i]), locals()))
            #print("expr 0", expr)
            expr = intify(expr)
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]

            if DR_file == "":
                snapped_complexity = 0
                for j in numbers_expr:
                    snapped_complexity = snapped_complexity + get_number_DL_snapped(
                        float(j))

                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr, visual=True).free_symbols)
                if n_operations != 0 or n_variables != 0:
                    snapped_complexity = snapped_complexity + (
                        n_variables + n_operations) * np.log2(
                            (n_variables + n_operations))

            # If a bf file is provided, replace the variables with the actual ones before calculating the complexity
            else:
                dr_data = np.loadtxt(DR_file, dtype="str", delimiter=",")

                expr = str(expr)
                old_vars = ["x%s" % k for k in range(len(dr_data) - 3)]
                for i_dr in range(len(old_vars)):
                    expr = expr.replace(old_vars[i_dr],
                                        "(" + dr_data[i_dr + 2] + ")")
                expr = "(" + dr_data[1] + ")*(" + expr + ")"

                expr = parse_expr(expr)
                for s in (expr.free_symbols):
                    s = symbols(str(s), real=True)
                expr = simplify(parse_expr(str(expr), locals()))
                #print("expr 1", expr)
                #expr = intify(expr)
                #print("expr 2", expr)
                snapped_complexity = 0
                for j in numbers_expr:
                    snapped_complexity = snapped_complexity + get_number_DL_snapped(
                        float(j))

                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr, visual=True).free_symbols)
                if n_operations != 0 or n_variables != 0:
                    snapped_complexity = snapped_complexity + (
                        n_variables + n_operations) * np.log2(
                            (n_variables + n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue
    return (PA)