def main(): flux_arr, exp_arr, ivar_arr, mask_arr, wavelengths = \ load_all_in_dir(spectra_path, use_con_flux=False, recombine_flux=False, pattern="stacked*exp??????.csv") t = Table() t["exp_id"] = exp_arr t["flux_flat_avg__blue"] = np.mean(flux_arr[:,:2700], axis=1) t["flux_flat_avg__red"] = np.mean(flux_arr[:,2700:], axis=1) t["flux_weighted_avg__blue"] = np.average(flux_arr[:,:2700], axis=1, weights=ivar_arr[:,:2700]) t["flux_weighted_avg__red"] = np.average(flux_arr[:,2700:], axis=1, weights=ivar_arr[:,2700:]) t["flux_median__blue"] = np.median(flux_arr[:,:2700], axis=1) t["flux_median__red"] = np.median(flux_arr[:,2700:], axis=1) t.write("flux_averages.csv", format="ascii.csv")
def main(): flux_arr, exp_arr, ivar_arr, mask_arr, wavelengths = \ load_all_in_dir(spectra_path, use_con_flux=False, recombine_flux=False, pattern="stacked*exp??????.csv") t = Table() t["wavelengths"] = wavelengths valid_per_wl = np.sum(~mask_arr, axis=0) invalid_per_wl_mask = (valid_per_wl == 0) t["valid_per_wl"] = valid_per_wl t["ivar_avg_per_wl"] = np.mean(ivar_arr, axis=0) t["ivar_stdev_per_wl"] = np.std(ivar_arr, axis=0) t["ivar_kurtosis_per_wl"] = kurtosis(ivar_arr, axis=0, bias=False) t["ivar_kurtosis_per_wl"][invalid_per_wl_mask] = 0 t["flux_flat_avg_per_wl"] = np.mean(flux_arr, axis=0) flux_weighted_avg_per_wl = np.zeros(wavelengths.shape) ivar_arr[:, invalid_per_wl_mask] = 1 t["flux_weighted_avg_per_wl"] = np.average(flux_arr, axis=0, weights=ivar_arr) ivar_arr[:, invalid_per_wl_mask] = 0 flux_stdev_per_wl = np.std(flux_arr, axis=0) t["flux_stdev_per_wl"] = flux_stdev_per_wl invalid_per_wl_mask = (flux_stdev_per_wl == 0) flux_stdev_per_wl[invalid_per_wl_mask] == 1 t["flux_derived_ivar_per_wl"] = np.power(flux_stdev_per_wl, -2) flux_stdev_per_wl[invalid_per_wl_mask] == 0 t["flux_derived_ivar_per_wl"][invalid_per_wl_mask] = 0 invalid_per_wl_mask = (valid_per_wl == 0) flux_kurtosis_per_wl = kurtosis(flux_arr, axis=0, bias=False) flux_kurtosis_per_wl[invalid_per_wl_mask] = 0 t["flux_kurtosis_per_wl"] = flux_kurtosis_per_wl t.write("flux_stats.csv", format="ascii.csv")
def main(): obs_metadata = trim_observation_metadata(load_observation_metadata(metadata_path)) flux_arr, exp_arr, ivar_arr, mask_arr, wavelengths = \ load_all_in_dir(spectra_path, use_con_flux=False, recombine_flux=False, pattern="stacked*exp??????.csv", ivar_cutoff=0.001) sorted_inds = np.argsort(exp_arr) reduced_obs_metadata = obs_metadata[np.in1d(obs_metadata['EXP_ID'], exp_arr)] reduced_obs_metadata.sort('EXP_ID') md_len = len(reduced_obs_metadata) X_arr = np.array(reduced_obs_metadata).view('f8').reshape((md_len,-1)) ''' test_fold = np.zeros((X_arr.shape[0], ), dtype=int) test_fold[600:]=1 ps = PredefinedSplit(test_fold=test_fold) print len(ps) ''' test_inds = range(0, 1000) linear = Linear(fit_intercept=True, copy_X=True, n_jobs=-1) poly_linear = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', Linear(fit_intercept=True, copy_X=True, n_jobs=-1))]) for test_ind in test_inds: test_X = X_arr[test_ind] train_X = np.vstack( [X_arr[:test_ind], X_arr[test_ind+1:]] ) test_y = (flux_arr[sorted_inds])[test_ind] train_y = np.vstack( [(flux_arr[sorted_inds])[:test_ind], (flux_arr[sorted_inds])[test_ind+1:]] ) linear.fit(train_X, train_y) poly_linear.fit(train_X, train_y) lin_predictions = linear.predict(test_X)[0] plin_predictions = poly_linear.predict(test_X)[0] mask = (ivar_arr[test_ind] == 0) | np.isclose(lin_predictions, 0) if restrict_delta: delta_mask = mask.copy() delta_mask[2700:] = True else: delta_mask = mask lin_delta = lin_predictions - test_y err_term = np.sum(np.power(lin_delta[~delta_mask], 2))/len(wavelengths[~delta_mask]) err_sum = np.sum(lin_delta[~delta_mask])/len(lin_delta[~delta_mask]) print err_term, err_sum, plin_delta = plin_predictions - test_y err_term = np.sum(np.power(plin_delta[~delta_mask], 2))/len(wavelengths[~delta_mask]) err_sum = np.sum(plin_delta[~delta_mask])/len(plin_delta[~delta_mask]) print err_term, err_sum ''' ransac = RANSAC() poly_ransac = Pipeline([('poly', PolynomialFeatures(degree=2)), ('ransac', RANSAC())]) print X_arr_train.shape, flux_arr_train.shape ransac.fit(np.copy(X_arr_train), np.copy(flux_arr_train)) poly_ransac.fit(X_arr_train, flux_arr_train) r_predictions = ransac.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, r_predictions) print mse pr_predictions = poly_ransac.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, pr_predictions) print mse ''' ''' #gp = GaussianProcess(nugget=np.power(flux_arr_train, 2)/ivar_train) #regr="quadratic") gp = GaussianProcess() gp.fit(X_arr_train, flux_arr_train) gp_predictions = gp.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, gp_predictions) print mse ''' ''' del lin_predictions del plin_predictions del linear del poly_linear ridge = RidgeCV() poly_ridge = Pipeline([('poly', PolynomialFeatures(degree=2)), ('ridge', RidgeCV())]) ridge.fit(X_arr_train, flux_arr_train) poly_ridge.fit(X_arr_train, flux_arr_train) ridge_predictions = ridge.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, ridge_predictions) print mse pridge_predictions = poly_ridge.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, pridge_predictions) print mse del ridge_predictions del pridge_predictions del ridge del poly_ridge lasso = LassoCV(n_jobs=-1) poly_lasso = Pipeline([('poly', PolynomialFeatures(degree=2)), ('lasso', LassoCV(n_jobs=-1))]) lasso.fit(X_arr_train, flux_arr_train) poly_lasso.fit(X_arr_train, flux_arr_train) lasso_predictions = lasso.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, lasso_predictions) print mse plasso_predictions = poly_lasso.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, plasso_predictions) print mse del lasso_predictions del plasso_predictions del lasso del poly_lasso elastic = ElasticNetCV(n_jobs=-1) poly_elastic = Pipeline([('poly', PolynomialFeatures(degree=2)), ('elastic', ElasticNetCV(n_jobs=-1))]) elastic.fit(X_arr_train, flux_arr_train) poly_elastic.fit(X_arr_train, flux_arr_train) elastic_predictions = elastic.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, elastic_predictions) print mse pelastic_predictions = poly_elastic.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, pelastic_predictions) print mse del elastic_predictions del pelastic_predictions del elastic del poly_elastic ''' ''' pls = PLS(n_components=8, max_iter=2000) pls.fit(X_arr_train, flux_arr_train) pls_predictions = pls.predict(X_arr_test) mse = mean_squared_error(flux_arr_test, pls_predictions) ''' '''