Exemplo n.º 1
0
def do_lasso(X, Y):
    Y.ravel()
    """
    Runs a lasso grid search on the input data

    Inputs
    ------
    X: dataframe, n*m, n is number of data points,
        m is number of features
    y: experimental electrical conductivity

    Returns
    ------
    lasso : sklearn object with the model information
    """

    alphas = np.array([0.1, 0.01, 0.001, 0.0001])
    lasso = Lasso(alpha=0.001,
                  fit_intercept=True,
                  normalize=False,
                  precompute=False,
                  copy_X=True,
                  max_iter=10000,
                  tol=0.001,
                  positive=False,
                  random_state=None,
                  selection='cyclic')
    gs = GridSearchCV(lasso, param_grid=dict(alpha=alphas))
    gs.fit(X, Y)

    lasso.alpha_ = gs.best_params_['alpha']

    lasso.fit(X, Y)

    return lasso
Exemplo n.º 2
0
def test_do_lasso():
    """
    Test Running a lasso grid search on the input data
    
    Inputs
    ------
    X: dataframe, n*m, n is number of data points, 
        m is number of features
    y: experimental electrical conductivity
    
    Returns
    ------
    lasso : sklearn object with the model information 

    Checking:
    1. The X and y has the name datatype
    2. The X and y has the name length
    3. The input matrix has enough data points to be splitted in the regressor
    """
    X = [[
        0, 6, 234.321, 5, 0, 1, 1, 0, 1, 18.000, 1, 0, 0, 0, 0, 298.15, 101,
        0.004
    ],
         [
             1, 6, 234.321, 5, 0, 1, 1, 0, 1, 18.000, 1, 0, 0, 0, 0, 304.15,
             101, 0.007
         ],
         [
             2, 6, 234.321, 5, 0, 1, 1, 0, 1, 18.000, 1, 0, 0, 0, 0, 300.15,
             101, 0.005
         ],
         [
             3, 6, 234.321, 5, 0, 1, 1, 0, 1, 18.000, 1, 0, 0, 0, 0, 302.15,
             101, 0.006
         ],
         [
             4, 6, 234.321, 5, 0, 1, 1, 0, 1, 18.000, 1, 0, 0, 0, 0, 306.15,
             101, 0.005
         ]]
    y = [0.02, 0.03, 0.03, 0.04, 0.05]
    assert isinstance(X, type(y)), "The two input should has the same datatype"
    assert len(X) == len(y), "Dimension mismatch between two input matrix"
    assert len(X) >= 5, "Need more data points in the input data"

    alphas = np.array([5, 4, 3, 2, 1, 0.1, 0.01, 0.001, 0.0001])
    lasso = Lasso(alpha=0.001,
                  fit_intercept=True,
                  normalize=False,
                  precompute=False,
                  copy_X=True,
                  max_iter=10000,
                  tol=0.001,
                  positive=False,
                  random_state=None,
                  selection='cyclic')
    gs = GridSearchCV(lasso, param_grid=dict(alpha=alphas))
    gs.fit(X, y)

    lasso.alpha_ = gs.best_params_['alpha']

    lasso.fit(X, y)

    return lasso