示例#1
0
def prep(n, labelcol):
    """
    Load the data
    """
    f = pf.open(n)
    data = f[1].data
    names = f[1].columns.names
    f.close()

    try:
        labels = data.field(labelcol)
    except:
        labels = np.zeros(data.field(0).size) - 99

    ylim = np.inf
    featurenames = ['cmodelmag', 'psffwhm', 'petror50', 'petror90']
    targetnames = ['psfmag', 'cmodelmag']
    filters = ['u', 'g', 'r', 'i', 'z']

    x = FeatureExtractor(data, featurenames, filters, color_band='r', scale_kind=None,
                         mag_range=None)
    data = data[x.idx]
    labels = labels[x.idx]
    y = FeatureExtractor(data, targetnames, filters, color_band=None, scale_kind=None,
                         mag_range=None)

    # taylor to target, set for psf - model                                                      
    y.features[:, :5] = y.features[:, :5] - y.features[:, 5:10]
    y.features[:, 5:10] = np.sqrt(y.features[:, 10:15] ** 2. + y.features[:, 15:20] ** 2.)
    y.features = y.features[:, :10]

    # restrict y range                                                                           
    ylim = 10.
    ind = y.features[:, 2] < ylim
    x.features = x.features[ind]
    y.features = y.features[ind]
    labels = labels[ind]
    y.Ndata = y.features.shape[0]

    return x, y, labels
    x = FeatureExtractor(data, featurenames, filters, color_band="r", scale_kind=None, mag_range=None)
    data = data[x.idx]
    y = FeatureExtractor(data, targetnames, filters, color_band=None, scale_kind=None, mag_range=None)

    # taylor to target, set for psf - model
    y.features[:, :5] = y.features[:, :5] - y.features[:, 5:10]
    y.features[:, 5:10] = np.sqrt(y.features[:, 10:15] ** 2.0 + y.features[:, 15:20] ** 2.0)
    y.features = y.features[:, :10]

    # restrict x range
    xlim = (19.5, 20.5)
    ind = (x.features[:, 2] > xlim[0]) & (x.features[:, 2] < xlim[1])
    x.features = x.features[ind]
    y.features = y.features[ind]
    y.Ndata = y.features.shape[0]

    # restrict y range
    ylim = 0.5
    ind = y.features[:, 2] < ylim
    x.features = x.features[ind]
    y.features = y.features[ind]
    y.Ndata = y.features.shape[0]

    # specify scikit regressor
    rname = "RF"
    if rname == "KNN":
        rgr = KNeighborsRegressor(n_neighbors=8)
    if rname == "RF":
        rgr = RandomForestRegressor(n_estimators=128)