示例#1
0
    def run(self, modelcard=None, testcard=None, stokes='I'):
        """
        Method that cross-validates set of image-plane models obtained by
        modelling training samples on corresponding set of testing samples.

        :param modelfiles:
            Wildcard of file names ~ 'model_0i_0jofN.txt', where model in
            'model_0i_0jofN.txt' file is from modelling ``0j``-th training
            sample ('train_0jofN.FITS') with ``0i``-th model.

        :param testfiles:
            Wildcard of file names ~ 'test_0jofN.FITS'.

        :return:
            List of lists [modelfilename, CV-score, sigma_cv_score].
        """

        modelfiles = glob.glob(modelcard)
        testfiles = glob.glob(testcard)
        modelfiles.sort()
        testfiles.sort()
        ntest = len(testfiles)
        nmodels = len(modelfiles) / ntest

        assert (not len(modelfiles) % float(len(testfiles)))

        print("modelfiles : " + str(modelfiles))
        print("testfiles : " + str(testfiles))

        result = list()

        for i in range(nmodels):
            print("Using models " + str(modelfiles[ntest * i:ntest *
                                                   (i + 1)]) +
                  " and testing sample " + str(testfiles))
            models = modelfiles[ntest * i:ntest * (i + 1)]
            cv_scores = list()
            for j, testfile in enumerate(testfiles):
                model = Model()
                model.add_from_txt(models[j], stoke=stokes)
                print("Using test file " + str(testfile))
                data = UVData(testfile)
                cv_score = data.cv_score(model, stokes=stokes)
                print("cv_score for one testing sample is " + str(cv_score))
                cv_scores.append(cv_score)

            mean_cv_score = np.mean(cv_scores)
            std_cv_score = np.std(cv_scores)
            print(mean_cv_score, std_cv_score)

            result.append(["model#" + str(i + 1), mean_cv_score, std_cv_score])

        return result
示例#2
0
uvdata = UVData(os.path.join(data_dir, uv_fits))

original_comps = import_difmap_model(mdl_file, data_dir)
lnpost, sampler = fit_model_with_mcmc(os.path.join(data_dir, uv_fits),
                                      os.path.join(data_dir, mdl_file),
                                      samples_file='samples_of_mcmc.txt',
                                      outdir='/home/ilya/code/vlbi_errors/bin_c1/')
samples = sampler.flatchain[::10, :]

# Create a sample of models with parameters from posterior distribution
models = list()
for i, s in enumerate(samples[np.random.randint(len(samples), size=100)]):
    model = Model(stokes='I')
    j = 0
    for orig_comp in original_comps:
        comp = orig_comp.__class__(*(s[j: j + orig_comp.size]))
        model.add_component(comp)
        j += orig_comp.size
    models.append(model)

cv_scores = list()
for model in models:
    cv_scores.append(uvdata.cv_score(model, baselines=[774, 1546]))

np.savetxt(os.path.join(data_dir, 'cv_scores_eg.txt'), np.array(cv_scores))

# # Now check delta
# modelfit_difmap(uv_fits, '0235+164.c1.2008_09_02_cgauss.mdl',
#                 '0235+164.c1.2008_09_02_cgauss_fitted_fitted.mdl', niter=100,
#                 path=data_dir, mdl_path=data_dir, out_path=data_dir)
示例#3
0
    n_folds = 10
    for niter in cc_pars:
        print "Using niter = {}".format(niter)
        kfold = KFoldCV(uv_fits, n_folds)
        cv = list()
        for j, (tr_fname, ts_fname) in enumerate(kfold):
            clean_n(kfold.train_fname,
                    'trained_model_{}.FITS'.format(niter),
                    'I', (1024, 0.1),
                    niter=niter,
                    path_to_script=path_to_script,
                    show_difmap_output=True)
            tr_model = create_model_from_fits_file(
                'trained_model_{}.FITS'.format(niter))
            ts_uvdata = UVData(ts_fname)
            score = ts_uvdata.cv_score(tr_model)
            print "{} of {} gives {}".format(j + 1, n_folds, score)
            cv.append(score)
        cv_scores[niter] = (np.nanmean(cv), np.nanstd(cv))
        print "CV gives {} +/- {}".format(np.nanmean(cv), np.nanstd(cv))

    print cv_scores
    n = cv_scores.keys()
    scores = [cv_scores[i][0] for i in n]
    errors = [cv_scores[i][1] for i in n]

    import matplotlib
    label_size = 12
    matplotlib.rcParams['xtick.labelsize'] = label_size
    matplotlib.rcParams['ytick.labelsize'] = label_size
    matplotlib.rcParams['axes.titlesize'] = label_size