def prep(n, labelcol): """ Load the data """ f = pf.open(n) data = f[1].data names = f[1].columns.names f.close() try: labels = data.field(labelcol) except: labels = np.zeros(data.field(0).size) - 99 ylim = np.inf featurenames = ['cmodelmag', 'psffwhm', 'petror50', 'petror90'] targetnames = ['psfmag', 'cmodelmag'] filters = ['u', 'g', 'r', 'i', 'z'] x = FeatureExtractor(data, featurenames, filters, color_band='r', scale_kind=None, mag_range=None) data = data[x.idx] labels = labels[x.idx] y = FeatureExtractor(data, targetnames, filters, color_band=None, scale_kind=None, mag_range=None) # taylor to target, set for psf - model y.features[:, :5] = y.features[:, :5] - y.features[:, 5:10] y.features[:, 5:10] = np.sqrt(y.features[:, 10:15] ** 2. + y.features[:, 15:20] ** 2.) y.features = y.features[:, :10] # restrict y range ylim = 10. ind = y.features[:, 2] < ylim x.features = x.features[ind] y.features = y.features[ind] labels = labels[ind] y.Ndata = y.features.shape[0] return x, y, labels
x = FeatureExtractor(data, featurenames, filters, color_band="r", scale_kind=None, mag_range=None) data = data[x.idx] y = FeatureExtractor(data, targetnames, filters, color_band=None, scale_kind=None, mag_range=None) # taylor to target, set for psf - model y.features[:, :5] = y.features[:, :5] - y.features[:, 5:10] y.features[:, 5:10] = np.sqrt(y.features[:, 10:15] ** 2.0 + y.features[:, 15:20] ** 2.0) y.features = y.features[:, :10] # restrict x range xlim = (19.5, 20.5) ind = (x.features[:, 2] > xlim[0]) & (x.features[:, 2] < xlim[1]) x.features = x.features[ind] y.features = y.features[ind] y.Ndata = y.features.shape[0] # restrict y range ylim = 0.5 ind = y.features[:, 2] < ylim x.features = x.features[ind] y.features = y.features[ind] y.Ndata = y.features.shape[0] # specify scikit regressor rname = "RF" if rname == "KNN": rgr = KNeighborsRegressor(n_neighbors=8) if rname == "RF": rgr = RandomForestRegressor(n_estimators=128)