def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description= 'Compute PCA/ICA/NMF/etc. components over set of stacked spectra, save those out, and pickle model' ) parser.add_argument('--pattern', type=str, default='stacked*exp??????.*', metavar='PATTERN', help='File pattern for stacked sky fibers.') parser.add_argument('--path', type=str, default='.', metavar='PATH', help='Path to work from, if not ' '.' '') parser.add_argument( '--compacted_path', type=str, default=None, metavar='COMPATED_PATH', help= 'Path to find compacted/arrayized data; setting this will cause --path, --pattern to be ignored' ) parser.add_argument('--n_components', type=int, default=40, metavar='N_COMPONENTS', help='Number of ICA/PCA/etc. components') parser.add_argument( '--method', type=str, default='ICA', metavar='METHOD', choices=['ICA', 'PCA', 'SPCA', 'NMF', 'ISO', 'KPCA', 'FA', 'DL'], help='Which dim. reduction method to use') parser.add_argument( '--scale', action='store_true', help= 'Should inputs variance be scaled? Defaults to mean subtract and value scale, but w/out this does not scale variance.' ) parser.add_argument('--no_scale', action='store_true', help='Suppresses all scaling') parser.add_argument( '--ivar_cutoff', type=float, default=0.001, metavar='IVAR_CUTOFF', help='data with inverse variace below cutoff is masked as if ivar==0') parser.add_argument( '--n_iter', type=int, default=1200, metavar='MAX_ITER', help= 'Maximum number of iterations to allow for convergence. For SDSS data 1000 is a safe number of ICA, while SPCA requires larger values e.g. ~2000 to ~2500' ) parser.add_argument('--n_jobs', type=int, default=None, metavar='N_JOBS', help='N_JOBS') args = parser.parse_args() comb_flux_arr, comb_exposure_arr, comb_ivar_arr, comb_masks, comb_wavelengths = iz.load_data( args) model = iz.get_model(args.method, n=args.n_components, n_neighbors=None, max_iter=args.n_iter, random_state=iz.random_state, n_jobs=args.n_jobs) ss = None if args.no_scale: scaled_flux_arr = comb_flux_arr else: ss = skpp.StandardScaler(with_std=False) if args.scale: ss = skpp.StandardScaler(with_std=True) scaled_flux_arr = ss.fit_transform(comb_flux_arr) #Heavily copied from J. Vanderplas/astroML bayesian_blocks.py N = comb_wavelengths.size step = args.n_components * 4 edges = np.concatenate([ comb_wavelengths[:1:step], 0.5 * (comb_wavelengths[1::step] + comb_wavelengths[:-1:step]), comb_wavelengths[-1::step] ]) block_length = comb_wavelengths[-1::step] - edges # arrays to store the best configuration nn_vec = np.ones(N / step) * step best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) for R in range(N / step): print("R: " + str(R)) width = block_length[:R + 1] - block_length[R + 1] count_vec = np.cumsum(nn_vec[:R + 1][::-1])[::-1] #width = nn_vec[:R + 1] - nn_vec[R + 1] #count_vec = np.cumsum(nn_vec[:R + 1][::-1])[::-1] #print(width) #print(count_vec) #raw_input("Pausing... ") fit_vec = map( lambda n: iz.score_via_CV(['LL'], scaled_flux_arr[:, :n], model, ss, args.method, folds=3, n_jobs=args.n_jobs), count_vec) fit_vec = [d["mle"] for d in fit_vec] #print(fit_vec) fit_vec[1:] += best[:R] #print(fit_vec) i_max = np.argmax(fit_vec) last[R] = i_max best[R] = fit_vec[i_max] #print(best) change_points = np.zeros(N / step, dtype=int) i_cp = N / step ind = N / step while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] print(edges[change_points]) '''
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Compute PCA/ICA/NMF/etc. components over set of stacked spectra, save those out, and pickle model' ) subparsers = parser.add_subparsers(dest='subparser_name') parser.add_argument( '--pattern', type=str, default='stacked*exp??????.*', metavar='PATTERN', help='File pattern for stacked sky fibers.' ) parser.add_argument( '--path', type=str, default='.', metavar='PATH', help='Path to work from, if not ''.''' ) parser.add_argument( '--compacted_path', type=str, default=None, metavar='COMPATED_PATH', help='Path to find compacted/arrayized data; setting this will cause --path, --pattern to be ignored' ) parser.add_argument( '--method', type=str, default=['ICA'], metavar='METHOD', choices=['ICA', 'PCA', 'SPCA', 'NMF', 'ISO', 'KPCA', 'FA', 'DL'], nargs='+', help='Which dim. reduction method to use' ) parser.add_argument( '--scale', action='store_true', help='Should inputs be scaled? Will mean subtract and value scale, but does not scale variace.' ) parser.add_argument( '--ivar_cutoff', type=float, default=0.001, metavar='IVAR_CUTOFF', help='data with inverse variace below cutoff is masked as if ivar==0' ) parser.add_argument( '--n_iter', type=int, default=1200, metavar='MAX_ITER', help='Maximum number of iterations to allow for convergence. For SDSS data 1000 is a safe number of ICA, while SPCA requires larger values e.g. ~2000 to ~2500' ) parser.add_argument( '--n_jobs', type=int, default=None, metavar='N_JOBS', help='N_JOBS' ) parser_compare = subparsers.add_parser('compare') parser_compare.add_argument( '--max_components', type=int, default=50, metavar='COMP_MAX', help='Max number of components to use/test' ) parser_compare.add_argument( '--min_components', type=int, default=0, metavar='COMP_MIN', help='Min number of compoenents to use/test' ) parser_compare.add_argument( '--step_size', type=int, default=5, metavar='COMP_STEP', help='Step size from comp_min to comp_max' ) parser_compare.add_argument( '--comparison', choices=['EXP_VAR', 'R2', 'MSE', 'MAE'], nargs='*', default=['EXP_VAR'], help='Comparison methods: Explained variance (score), R2 (score), mean sq. error (loss), MEDIAN absolute error (loss)' ) parser_compare.add_argument( '--mle_if_avail', action='store_true', help='In additon to --comparison, include MLE if PCA or FA methods specified' ) parser_compare.add_argument( '--plot_example_reconstruction', action='store_true', help='Pick a random spectrum, plot its actual and reconstructed versions' ) parser_build = subparsers.add_parser('build') parser_build.add_argument( '--n_components', type=int, default=40, metavar='N_COMPONENTS', help='Number of ICA/PCA/etc. components' ) parser_build.add_argument( '--n_neighbors', type=int, default=10, metavar='N_NEIGHBORS', help='Number of neighbots for e.g. IsoMap' ) args = parser.parse_args() comb_flux_arr, comb_exposure_arr, comb_ivar_arr, comb_masks, comb_wavelengths = iz.load_data(args) if 'DL' in args.method: flux_arr = comb_flux_arr.astype(dtype=np.float64) else: flux_arr = comb_flux_arr scaled_flux_arr = None ss = None if args.scale: ss = skpp.StandardScaler(with_std=False) scaled_flux_arr = ss.fit_transform(flux_arr) else: scaled_flux_arr = flux_arr if args.subparser_name == 'compare': fig, ax1 = plt.subplots() ax2 = ax1.twinx() for method in args.method: model = iz.get_model(method, max_iter=args.n_iter, random_state=iz.random_state, n_jobs=args.n_jobs) scores = {} mles_and_covs = args.mle_if_avail and (method == 'FA' or method == 'PCA') n_components = np.arange(args.min_components, args.max_components+1, args.step_size) for n in n_components: print("Cross validating for n=" + str(n) + " on method " + method) model.n_components = n comparisons = iz.score_via_CV(args.comparison, flux_arr if method == 'NMF' else scaled_flux_arr, model, method, n_jobs=args.n_jobs, include_mle=mles_and_covs, modeler=_iter_modeler, scorer=_iter_scorer) for key, val in comparisons.items(): if key in scores: scores[key].append(val) else: scores[key] = [val] if mles_and_covs: #ax2.axhline(cov_mcd_score(scaled_flux_arr, args.scale), color='violet', label='MCD Cov', linestyle='--') ax2.axhline(cov_lw_score(scaled_flux_arr, args.scale), color='orange', label='LW Cov', linestyle='--') for key, score_list in scores.items(): if key != 'mle': ax1.plot(n_components, score_list, label=method + ':' + key + ' scores') else: ax2.plot(n_components, score_list, '-.', label=method + ' mle scores') ax1.set_xlabel('nb of components') ax1.set_ylabel('CV scores', figure=fig) ax1.legend(loc='lower left') ax2.legend(loc='lower right') plt.show()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Compute PCA/ICA/NMF/etc. components over set of stacked spectra, save those out, and pickle model' ) parser.add_argument( '--pattern', type=str, default='stacked*exp??????.*', metavar='PATTERN', help='File pattern for stacked sky fibers.' ) parser.add_argument( '--path', type=str, default='.', metavar='PATH', help='Path to work from, if not ''.''' ) parser.add_argument( '--compacted_path', type=str, default=None, metavar='COMPATED_PATH', help='Path to find compacted/arrayized data; setting this will cause --path, --pattern to be ignored' ) parser.add_argument( '--n_components', type=int, default=40, metavar='N_COMPONENTS', help='Number of ICA/PCA/etc. components' ) parser.add_argument( '--method', type=str, default='ICA', metavar='METHOD', choices=['ICA', 'PCA', 'SPCA', 'NMF', 'ISO', 'KPCA', 'FA', 'DL'], help='Which dim. reduction method to use' ) parser.add_argument( '--scale', action='store_true', help='Should inputs variance be scaled? Defaults to mean subtract and value scale, but w/out this does not scale variance.' ) parser.add_argument( '--no_scale', action='store_true', help='Suppresses all scaling' ) parser.add_argument( '--ivar_cutoff', type=float, default=0.001, metavar='IVAR_CUTOFF', help='data with inverse variace below cutoff is masked as if ivar==0' ) parser.add_argument( '--n_iter', type=int, default=1200, metavar='MAX_ITER', help='Maximum number of iterations to allow for convergence. For SDSS data 1000 is a safe number of ICA, while SPCA requires larger values e.g. ~2000 to ~2500' ) parser.add_argument( '--n_jobs', type=int, default=None, metavar='N_JOBS', help='N_JOBS' ) args = parser.parse_args() comb_flux_arr, comb_exposure_arr, comb_ivar_arr, comb_masks, comb_wavelengths = iz.load_data(args) model = iz.get_model(args.method, n=args.n_components, n_neighbors=None, max_iter=args.n_iter, random_state=iz.random_state, n_jobs=args.n_jobs) ss = None if args.no_scale: scaled_flux_arr = comb_flux_arr else: ss = skpp.StandardScaler(with_std=False) if args.scale: ss = skpp.StandardScaler(with_std=True) scaled_flux_arr = ss.fit_transform(comb_flux_arr) #Heavily copied from J. Vanderplas/astroML bayesian_blocks.py N = comb_wavelengths.size step = args.n_components * 4 edges = np.concatenate([comb_wavelengths[:1:step], 0.5 * (comb_wavelengths[1::step] + comb_wavelengths[:-1:step]), comb_wavelengths[-1::step]]) block_length = comb_wavelengths[-1::step] - edges # arrays to store the best configuration nn_vec = np.ones(N/step) * step best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) for R in range(N/step): print("R: " + str(R)) width = block_length[:R + 1] - block_length[R + 1] count_vec = np.cumsum(nn_vec[:R + 1][::-1])[::-1] #width = nn_vec[:R + 1] - nn_vec[R + 1] #count_vec = np.cumsum(nn_vec[:R + 1][::-1])[::-1] #print(width) #print(count_vec) #raw_input("Pausing... ") fit_vec = map(lambda n: iz.score_via_CV(['LL'], scaled_flux_arr[:, :n], model, ss, args.method, folds=3, n_jobs=args.n_jobs), count_vec) fit_vec = [d["mle"] for d in fit_vec] #print(fit_vec) fit_vec[1:] += best[:R] #print(fit_vec) i_max = np.argmax(fit_vec) last[R] = i_max best[R] = fit_vec[i_max] #print(best) change_points = np.zeros(N/step, dtype=int) i_cp = N/step ind = N/step while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] print(edges[change_points]) '''