Exemplo n.º 1
0
def main():
    path = "."
    metadata_path = ".."

    rfr = Pipeline([ ('ica', FastICA(random_state=random_state, max_iter=ica_max_iter)),
                     ('rfr', ensemble.RandomForestRegressor(random_state=random_state, n_jobs=-1))
                ])

    param_grid = {
        "ica__n_components": sp_randint(15, 200),
        "rfr__n_estimators": sp_randint(25, 400),
        "rfr__min_samples_split": sp_randint(1, 10)
        #,
        #"rfr__max_features": [None, "log2", "sqrt"]
    }

    randsearch = RandomizedSearchCV(rfr, param_grid, n_iter = n_iter_search)

    flux_arr, exp_arr, wavelengths = ICAize.load_all_in_dir(path=path, use_con_flux=True, recombine_flux=False)

    obs_metadata = random_forest_spectra.trim_observation_metadata(random_forest_spectra.load_observation_metadata(metadata_path))
    reduced_obs_metadata = obs_metadata[np.in1d(obs_metadata['EXP_ID'], exp_arr)]
    reduced_obs_metadata.sort('EXP_ID')
    sorted_inds = np.argsort(exp_arr)
    reduced_obs_metadata.remove_column('EXP_ID')
    md_len = len(reduced_obs_metadata)
    X_arr = np.array(reduced_obs_metadata).view('f8').reshape((md_len,-1))

    randsearch.fit(flux_arr[sorted_inds], X_arr)

    top_scores = sorted(randsearch.grid_scores_, key=itemgetter(1), reverse=True)[:5]
    for i, score in enumerate(top_scores):
        print "Model with rank:", i
        print "Mean validation score/std:", score.mean_validation_score, np.std(score.cv_validation_scores)
        print "Parameters:", score.parameters
        print ""
Exemplo n.º 2
0
def main():
    obs_metadata = trim_observation_metadata(load_observation_metadata(metadata_path))
    flux_arr, exp_arr, ivar_arr, mask_arr, wavelengths = \
                load_all_in_dir(spectra_path, use_con_flux=False, recombine_flux=False,
                                pattern="stacked*exp??????.csv", ivar_cutoff=0.001)

    sorted_inds = np.argsort(exp_arr)

    reduced_obs_metadata = obs_metadata[np.in1d(obs_metadata['EXP_ID'], exp_arr)]
    reduced_obs_metadata.sort('EXP_ID')
    md_len = len(reduced_obs_metadata)

    X_arr = np.array(reduced_obs_metadata).view('f8').reshape((md_len,-1))

    '''
    test_fold = np.zeros((X_arr.shape[0], ), dtype=int)
    test_fold[600:]=1
    ps = PredefinedSplit(test_fold=test_fold)
    print len(ps)
    '''

    test_inds = range(0, 1000)
    linear = Linear(fit_intercept=True, copy_X=True, n_jobs=-1)
    poly_linear = Pipeline([('poly', PolynomialFeatures(degree=2)),
                        ('linear', Linear(fit_intercept=True, copy_X=True, n_jobs=-1))])


    for test_ind in test_inds:
        test_X = X_arr[test_ind]
        train_X = np.vstack( [X_arr[:test_ind], X_arr[test_ind+1:]] )
        test_y =  (flux_arr[sorted_inds])[test_ind]
        train_y = np.vstack( [(flux_arr[sorted_inds])[:test_ind], (flux_arr[sorted_inds])[test_ind+1:]] )

        linear.fit(train_X, train_y)
        poly_linear.fit(train_X, train_y)

        lin_predictions = linear.predict(test_X)[0]
        plin_predictions = poly_linear.predict(test_X)[0]

        mask = (ivar_arr[test_ind] == 0) | np.isclose(lin_predictions, 0)
        if restrict_delta:
            delta_mask = mask.copy()
            delta_mask[2700:] = True
        else:
            delta_mask = mask

        lin_delta = lin_predictions - test_y
        err_term = np.sum(np.power(lin_delta[~delta_mask], 2))/len(wavelengths[~delta_mask])
        err_sum = np.sum(lin_delta[~delta_mask])/len(lin_delta[~delta_mask])
        print err_term, err_sum,

        plin_delta = plin_predictions - test_y
        err_term = np.sum(np.power(plin_delta[~delta_mask], 2))/len(wavelengths[~delta_mask])
        err_sum = np.sum(plin_delta[~delta_mask])/len(plin_delta[~delta_mask])
        print err_term, err_sum



    '''
    ransac = RANSAC()
    poly_ransac = Pipeline([('poly', PolynomialFeatures(degree=2)),
                        ('ransac', RANSAC())])
    print X_arr_train.shape, flux_arr_train.shape
    ransac.fit(np.copy(X_arr_train), np.copy(flux_arr_train))
    poly_ransac.fit(X_arr_train, flux_arr_train)

    r_predictions = ransac.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, r_predictions)
    print mse
    pr_predictions = poly_ransac.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, pr_predictions)
    print mse
    '''

    '''
    #gp = GaussianProcess(nugget=np.power(flux_arr_train, 2)/ivar_train) #regr="quadratic")
    gp = GaussianProcess()
    gp.fit(X_arr_train, flux_arr_train)
    gp_predictions = gp.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, gp_predictions)
    print mse
    '''

    '''
    del lin_predictions
    del plin_predictions
    del linear
    del poly_linear

    ridge = RidgeCV()
    poly_ridge = Pipeline([('poly', PolynomialFeatures(degree=2)),
                        ('ridge', RidgeCV())])
    ridge.fit(X_arr_train, flux_arr_train)
    poly_ridge.fit(X_arr_train, flux_arr_train)

    ridge_predictions = ridge.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, ridge_predictions)
    print mse
    pridge_predictions = poly_ridge.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, pridge_predictions)
    print mse

    del ridge_predictions
    del pridge_predictions
    del ridge
    del poly_ridge

    lasso = LassoCV(n_jobs=-1)
    poly_lasso = Pipeline([('poly', PolynomialFeatures(degree=2)),
                        ('lasso', LassoCV(n_jobs=-1))])
    lasso.fit(X_arr_train, flux_arr_train)
    poly_lasso.fit(X_arr_train, flux_arr_train)

    lasso_predictions = lasso.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, lasso_predictions)
    print mse
    plasso_predictions = poly_lasso.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, plasso_predictions)
    print mse

    del lasso_predictions
    del plasso_predictions
    del lasso
    del poly_lasso

    elastic = ElasticNetCV(n_jobs=-1)
    poly_elastic = Pipeline([('poly', PolynomialFeatures(degree=2)),
                        ('elastic', ElasticNetCV(n_jobs=-1))])
    elastic.fit(X_arr_train, flux_arr_train)
    poly_elastic.fit(X_arr_train, flux_arr_train)

    elastic_predictions = elastic.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, elastic_predictions)
    print mse
    pelastic_predictions = poly_elastic.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, pelastic_predictions)
    print mse

    del elastic_predictions
    del pelastic_predictions
    del elastic
    del poly_elastic
    '''

    '''
    pls = PLS(n_components=8, max_iter=2000)
    pls.fit(X_arr_train, flux_arr_train)
    pls_predictions = pls.predict(X_arr_test)
    mse = mean_squared_error(flux_arr_test, pls_predictions)
    '''
    '''