Exemplo n.º 1
0
def ExtractFeature(feature, target_spec_ids = None, data_dir = './repaired/*.pkl', nproc = 1):
    '''get the designated feature and related info from data
    '''

    input_files = ExpandWildcard(data_dir)
    reducer = FeatureExtractor(feature, target_spec_ids)
    engine = mr.ReduceEngine(reducer, nproc)
    result = engine.Start(input_files)

    return result
Exemplo n.º 2
0
        dummy, tag = dr7pca.DoPCA(None, feature, 0)
        pca_model_file = "pca_model_{0}.pkl".format(tag)
        if os.path.exists(pca_model_file):
            log.info(
                'Using existing PCA model from {0}'.format(pca_model_file))
            pca_model = ml.PCA(pca_model_file)
        else:
            pca_model = dr7pca.DoPCA(input_files, feature, 0, poolsize)[0]
            pca_model.Save(pca_model_file)
        pca_model.R = 2  # !!
    else:
        pca_model = None

    # clustering
    reducer = ClusteringReducer(feature, n_clusters, sample_rate, pca_model)
    engine = mr.ReduceEngine(reducer, poolsize)
    centers, distortion = engine.Start(input_files)
    run += '[{0:.3}]'.format(distortion)

    reducer = QuantizationReducer(feature, centers,
                                  pca_model if dopca else None)
    engine = mr.ReduceEngine(reducer, poolsize)
    result = engine.Start(input_files)

    coord = result['feature'].T
    f = figure()
    scatter(coord[0], coord[1], s=20, c=result['cluster_id'])
    show()

    # SavePickle("{0}_{1}.pkl".format(output_file, run), result)
Exemplo n.º 3
0
    # zmax = 0.36

    log.info('Rebin: raw wave in [%0.1f,%0.1f], c in [%0.5f,%0.5f], z in [%0.2f,%0.2f], No O line = %s' % (
            10**rebin_c0, 10**(rebin_c0 + rebin_c1*rebin_nbin),
            rebin_c0, rebin_c0 + rebin_c1*rebin_nbin,
            zmin, zmax, remove_sky_absorption))

    # fields to extract from the SDSS DB
    fields = ['specObjID', 'mjd', 'plate', 'fiberID', 'bestObjID', 'specClass', 'fiberMag_g', 'fiberMag_r', 'fiberMag_i', 'fiberMag_u', 'fiberMag_z']

    # retrieving the plate list
    log.info('Retrieving object list...')
    db = GetDB(input_dir + '/sdss.db3', 1000)
    cmd = "SELECT {0}, fits_url from object_list where {1}".format(','.join(fields), selector)
    cur = db.execute(cmd)

    rows = [list(r) for r in cur]
    plates = [r[2] for r in rows]
    # update the path to the fits file
    for r in rows:
        r[-1] = GetFITSPath(input_dir, r[1], r[2], r[3])
    log.info('{0} objects found'.format(len(rows)))

    reducer = PlateReducer(fields, rebin_c0, rebin_c1, rebin_nbin, zmin, zmax, remove_sky_absorption, {1215.67:'Ly\\d\\ga'})
    engine = mr.ReduceEngine(reducer, nproc)

    jobs = zip(plates, rows)
    nobjs = engine.Start(jobs)

    log.info("{0} objects processed".format(nobjs))