Exemplo n.º 1
0
class KFoldCV(object):
    def __init__(self, fname, k, basename='cv', seed=None):
        self.fname = fname
        self.uvdata = UVData(fname)
        self.k = k
        self.seed = seed
        self.basename = basename
        self.test_fname = "{}_test.FITS".format(basename)
        self.train_fname = "{}_train.FITS".format(basename)
        self.baseline_folds = None
        self.create_folds()

    def create_folds(self):
        baseline_folds = dict()
        for bl, indxs in self.uvdata._indxs_baselines.items():
            print "Baseline {} has {} samples".format(bl,
                                                      np.count_nonzero(indxs))
            try:
                kfold = KFold(np.count_nonzero(indxs),
                              self.k,
                              shuffle=True,
                              random_state=self.seed)
                baseline_folds[bl] = list()
                for train, test in kfold:
                    tr = to_boolean_array(
                        np.nonzero(indxs)[0][train], len(indxs))
                    te = to_boolean_array(
                        np.nonzero(indxs)[0][test], len(indxs))
                    baseline_folds[bl].append((tr, te))
            # When ``k`` more then number of baseline samples
            except ValueError:
                pass
        self.baseline_folds = baseline_folds

    def __iter__(self):
        for i in xrange(self.k):
            train_indxs = np.zeros(len(self.uvdata.hdu.data))
            test_indxs = np.zeros(len(self.uvdata.hdu.data))
            for bl, kfolds in self.baseline_folds.items():
                itrain, itest = kfolds[i]
                # itrain = to_boolean_array(itrain)
                train_indxs = np.logical_or(train_indxs, itrain)
                test_indxs = np.logical_or(test_indxs, itest)
            train_data = self.uvdata.hdu.data[train_indxs]
            test_data = self.uvdata.hdu.data[test_indxs]
            self.uvdata.save(self.test_fname, test_data, rewrite=True)
            self.uvdata.save(self.train_fname, train_data, rewrite=True)

            yield self.train_fname, self.test_fname
Exemplo n.º 2
0
class KFoldCV(object):
    def __init__(self,
                 uv_fits_path,
                 k,
                 basename='cv',
                 seed=None,
                 baselines=None,
                 stokes='I'):
        if stokes not in ('I', 'RR', 'LL'):
            raise Exception("Only stokes (I, RR, LL) supported!")
        self.stokes = stokes
        self.uv_fits_path = uv_fits_path
        self.uvdata = UVData(uv_fits_path)
        self.k = k
        self.seed = seed
        self.basename = basename
        self.test_fname_base = "{}_test".format(basename)
        self.train_fname_base = "{}_train".format(basename)
        self.baseline_folds = None
        self.create_folds(baselines)

    def create_folds(self, baselines=None):
        baseline_folds = dict()

        if baselines is None:
            baselines = self.uvdata.baselines

        if self.stokes == 'I':
            stokes = ['RR', 'LL']
            average_stokes = True
        elif self.stokes == 'RR':
            stokes = ['RR']
            average_stokes = False
        elif self.stokes == 'LL':
            stokes = ['LL']
            average_stokes = False
        else:
            raise Exception("Only stokes (I, RR, LL) supported!")

        for bl in baselines:
            bl_indxs = self.uvdata._indxs_baselines[bl]
            print("Baseline {} has {}"
                  " samples".format(bl, np.count_nonzero(bl_indxs)))
            bl_indxs_pw = self.uvdata.pw_indxs_baseline(
                bl,
                average_bands=True,
                stokes=stokes,
                average_stokes=average_stokes)
            bl_indxs = mask_boolean_with_boolean(bl_indxs, bl_indxs_pw)
            print("Baseline {} has {} samples with"
                  " positive weight".format(bl, np.count_nonzero(bl_indxs)))

            try:
                kfold = KFold(self.k, shuffle=False, random_state=self.seed)
                baseline_folds[bl] = list()
                for train, test in kfold.split(np.count_nonzero(bl_indxs)):
                    tr = to_boolean_array(
                        np.nonzero(bl_indxs)[0][train], len(bl_indxs))
                    te = to_boolean_array(
                        np.nonzero(bl_indxs)[0][test], len(bl_indxs))
                    baseline_folds[bl].append((tr, te))
            # When ``k`` more then number of baseline samples
            except ValueError:
                pass

        # Add all other baselines data w/o folding - all data to train & nothing
        # to test
        rest_baselines = list(self.uvdata.baselines)
        for bl in baselines:
            rest_baselines.remove(bl)
        for bl in rest_baselines:
            baseline_folds[bl] = list()
        for bl in rest_baselines:
            bl_indxs = self.uvdata._indxs_baselines[bl]
            for k in range(self.k):
                baseline_folds[bl].append(
                    (bl_indxs, np.zeros(len(bl_indxs), dtype=bool)))

        self.baseline_folds = baseline_folds

    def create_train_test_data(self, outdir=None):
        if outdir is None:
            outdir = os.getcwd()
        for i in range(self.k):
            train_indxs = np.zeros(len(self.uvdata.hdu.data))
            test_indxs = np.zeros(len(self.uvdata.hdu.data))
            for bl, kfolds in self.baseline_folds.items():
                itrain, itest = kfolds[i]
                # itrain = to_boolean_array(itrain)
                train_indxs = np.logical_or(train_indxs, itrain)
                test_indxs = np.logical_or(test_indxs, itest)
            train_data = self.uvdata.hdu.data[train_indxs]
            test_data = self.uvdata.hdu.data[test_indxs]
            self.uvdata.save(os.path.join(
                outdir, self.test_fname_base + '_{}.fits'.format(i)),
                             test_data,
                             rewrite=True)
            self.uvdata.save(os.path.join(
                outdir, self.train_fname_base + '_{}.fits'.format(i)),
                             train_data,
                             rewrite=True)

    def cv_score(self,
                 initial_dfm_model_path=None,
                 data_dir=None,
                 niter=100,
                 path_to_script=None,
                 mapsize_clean=None):
        if data_dir is None:
            data_dir = os.getcwd()
        train_uv_fits_paths = sorted(
            glob.glob(os.path.join(data_dir, self.train_fname_base + '*')))
        test_uv_fits_paths = sorted(
            glob.glob(os.path.join(data_dir, self.test_fname_base + '*')))
        cv_scores = list()
        train_scores = list()
        if initial_dfm_model_path is not None:
            for i, (train_uv_fits_path, test_uv_fits_path) in enumerate(
                    zip(train_uv_fits_paths, test_uv_fits_paths)):
                print("Calculating CV-score for {} of {} splits".format(
                    i + 1, self.k))
                print("Training FITS: {}".format(train_uv_fits_path))
                print("Testing FITS: {}".format(test_uv_fits_path))
                out_mdl_fname = 'train_{}.mdl'.format(i)
                dfm_model_dir, dfm_model_fname = os.path.split(
                    initial_dfm_model_path)
                modelfit_difmap(train_uv_fits_path,
                                dfm_model_fname,
                                out_mdl_fname,
                                niter=niter,
                                path=data_dir,
                                mdl_path=dfm_model_dir,
                                out_path=data_dir,
                                stokes=self.stokes,
                                show_difmap_output=True)
                cv_scores.append(
                    score(test_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
                train_scores.append(
                    score(train_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
        else:
            for i, (train_uv_fits_path, test_uv_fits_path) in enumerate(
                    zip(train_uv_fits_paths, test_uv_fits_paths)):
                out_mdl_fname = 'train_{}.fits'.format(i)
                # This used when learning curves are created
                # clean_difmap(train_uv_fits_path, out_mdl_fname, 'I',
                #              mapsize_clean, data_dir, path_to_script,
                #              outpath=data_dir, show_difmap_output=True)
                # This used when different number of iterations are tested
                clean_n(
                    train_uv_fits_path,
                    out_mdl_fname,
                    'I',
                    mapsize_clean,
                    niter=niter,
                    path_to_script=path_to_script,
                    outpath=data_dir,
                    show_difmap_output=True,
                )
                cv_scores.append(
                    score(test_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
                train_scores.append(
                    score(train_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))

        return cv_scores, train_scores
Exemplo n.º 3
0
for freq, uv_fits_fname in uv_fits_fnames.items():
    uv_fits_path = os.path.join(data_dir, uv_fits_fname)
    cg1 = CGComponent(2.0, 0., 0., 0.2)
    cg2 = CGComponent(1.0, 0., 0.3, 0.3)
    cg3 = CGComponent(0.5, 0., 1.5, 0.4)
    mdl = Model(stokes='I')
    mdl.add_components(cg1, cg2, cg3)
    uvdata = UVData(uv_fits_path)
    noise = uvdata.noise()
    for i in range(1, 101):
        uvdata = UVData(uv_fits_path)
        uvdata.substitute([mdl])
        uvdata.noise_add(noise)
        art_fits_fname = 'art_{}_{}.fits'.format(freq, i)
        art_fits_path = os.path.join(data_dir, art_fits_fname)
        uvdata.save(art_fits_path)

        # Here we should MCMC posterior
        modelfit_difmap(art_fits_fname,
                        'initial.mdl',
                        'out_{}_{}.mdl'.format(freq, i),
                        niter=100,
                        path=data_dir,
                        mdl_path=data_dir,
                        out_path=data_dir)

    params = list()
    for i in range(1, 101):
        comps = import_difmap_model('out_{}_{}.mdl'.format(freq, i), data_dir)
        params.append([
            comps[0].p[0], comps[0].p[2], comps[1].p[0], comps[1].p[2],
Exemplo n.º 4
0
    # model.add_components(ccmodel)

    if stokes == "I":
        use_V = True
    else:
        use_V = False
    noise = uvdata_template.noise(use_V=use_V)

    params = list()

    for i in range(n_mc):
        uvdata_template.substitute([ccmodel])
        uvdata_template.uvdata = uvdata_template.uvdata * corrections
        uvdata_template.noise_add(noise)
        uvdata_template.save(os.path.join(data_dir, "artificial.uvf"),
                             rewrite=True,
                             downscale_by_freq=True)

        # Self-calibrate
        selfcal_difmap(
            fname="artificial.uvf",
            outfname="artificial.uvf",
            path=data_dir,
            path_to_script="/home/ilya/github/ve/difmap/auto_selfcal",
            outpath=data_dir,
            show_difmap_output=True)

        modelfit_difmap("artificial.uvf",
                        "{}.mod".format(epoch),
                        "boot_artificial.mdl",
                        niter=300,
Exemplo n.º 5
0
for path in original_dfm_models:
    fname = os.path.split(path)[-1]
    epoch = fname[:-4]
    print("Processing epoch : {}".format(epoch))
    if epoch in epochs_ready:
        print("Skipping epoch {}".format(epoch))
        continue
    original_model_fname = fname
    original_model_path = os.path.join(data_dir, original_model_fname)
    uv_fits_fname = mojave_uv_fits_fname('0316+413', 'u', epoch)
    uv_fits_path = os.path.join(data_dir, uv_fits_fname)

    uvdata = UVData(uv_fits_path)
    try:
        del uvdata.hdu.header['HISTORY']
        uvdata.save(rewrite=True, downscale_by_freq=False)
    except KeyError:
        pass

    out_txt_file = os.path.join(txt_file_dir, 'errors_{}.mod'.format(epoch))
    out_png_file = os.path.join(txt_file_dir, 'errors_{}.png'.format(epoch))
    try:
        bootstrap_uvfits_with_difmap_model(uv_fits_path,
                                           original_model_path,
                                           n_boot=100,
                                           boot_dir=boot_dir,
                                           out_txt_file=out_txt_file,
                                           out_plot_file=out_png_file,
                                           clean_after=True,
                                           niter=200,
                                           out_rchisq_file=os.path.join(
Exemplo n.º 6
0
x *= mas_to_rad
y *= mas_to_rad
image = transfer.image()

icomp = ImageComponent(image, x[0], y[..., 0])
uvdata = UVData(os.path.join(data_dir, '0952+179.U1.2007_04_30.PINAL'))
# uvdata = UVData('/home/ilya/Dropbox/ACC/3c120/uvdata/0430+052.u.2006_05_24.uvf')
# uvdata = UVData('/home/ilya/Dropbox/ACC/3c120/uvdata/0430+052.u.2006_05_24.uvf')
model = Model(stokes='I')
model.add_component(icomp)
noise = uvdata.noise(use_V=True)
uvdata.substitute([model])
for bl in noise:
    noise[bl] *= 10
uvdata.noise_add(noise)
uvdata.save(os.path.join(data_dir, '0952_15GHz_BK.fits'))

# clean_difmap('15GHz_BK.fits', 'u_BK_cc.fits', 'I', (1024, 0.1), path=data_dir,
#              path_to_script=path_to_script, show_difmap_output=True,
#              outpath=data_dir)

ccimage = create_clean_image_from_fits_file(os.path.join(data_dir, '0952_15GHz_BK_cc.fits'))
beam = ccimage.beam
rms = rms_image(ccimage)
blc, trc = find_bbox(ccimage.image, rms, 10)
iplot(ccimage.image, x=ccimage.x, y=ccimage.y, min_abs_level=3*rms, beam=beam,
      show_beam=True, blc=blc, trc=trc, core=tuple(p_map))

r = np.array([9.86391978e-01, 6.43996321e-01, 3.53391595e-01])
r_15 = np.array(r) - r[-1]
nu = np.array([5., 8., 15.])
Exemplo n.º 7
0
# DOESNT WORK - ERRORS STILL SMALL
import os
from mojave import mojave_uv_fits_fname
from uv_data import UVData
from bootstrap import bootstrap_uvfits_with_difmap_model

data_dir = '/home/ilya/silke'
epoch = '2017_01_28'
original_model_fname = '2017_01_28us'
original_model_path = os.path.join(data_dir, original_model_fname)
uv_fits_fname = mojave_uv_fits_fname('0851+202', 'u', epoch)
uv_fits_path = os.path.join(data_dir, uv_fits_fname)
uvdata = UVData(uv_fits_path)
uvdata.noise_add({baseline: [0.137] for baseline in uvdata.baselines})
new_fits_path = os.path.join(data_dir, 'added_noise.fits')
uvdata.save(new_fits_path)
bootstrap_uvfits_with_difmap_model(new_fits_path,
                                   original_model_path,
                                   boot_dir=os.path.join(data_dir, 'boot'))
Exemplo n.º 8
0
cg2 = CGComponent(0.5, 0., -2., 0.55)
cg2.add_prior(flux=(
    sp.stats.uniform.logpdf,
    [0., 5.],
    dict(),
),
              bmaj=(
                  sp.stats.uniform.logpdf,
                  [0, 20.],
                  dict(),
              ))
model = Model(stokes='I')
model.add_components(cg1, cg2)
uvdata.substitute([model])
uvdata.noise_add(noise)
uvdata.save(os.path.join(data_dir, 'fake.uvf'))
# Clean uv-data
clean_difmap(
    'fake.uvf',
    'fake_cc.fits',
    'I', (1024, 0.1),
    path=data_dir,
    path_to_script='/home/ilya/code/vlbi_errors/difmap/final_clean_nw',
    outpath=data_dir,
    show_difmap_output=True)
image = create_clean_image_from_fits_file(
    os.path.join(data_dir, 'fake_cc.fits'))
rms = rms_image(image)
blc, trc = find_bbox(image.image, 2. * rms, delta=int(image._beam.beam[0]))
# Plot image
iplot(image.image,
Exemplo n.º 9
0
y, z = np.meshgrid(np.arange(imsize[0]), np.arange(imsize[1]))
y = y - imsize[0] / 2. + 0.5
z = z - imsize[0] / 2. + 0.5
y_mas = y * mas_in_pix
z_mas = z * mas_in_pix
y_rad = mas_to_rad * y_mas
z_rad = mas_to_rad * z_mas

icomp = ImageComponent(image_g, y_rad[0, :], z_rad[:, 0])

noise = uvdata.noise(use_V=True)
for key, value in noise.items():
    noise[key] = 0.1 * value
model = Model(stokes='I')
model.add_component(icomp)

# jet_comp = CGComponent(0.5, 1., 0., 0.3)
# model.add_component(jet_comp)

uvdata.substitute([model])
uvdata.noise_add(noise)
uvdata.save('/home/ilya/github/bck/jetshow/uvf/test.fits', rewrite=True)

modelfit_difmap('test.fits',
                'initial_cg.mdl',
                'out_test.mdl',
                niter=300,
                path='/home/ilya/github/bck/jetshow/uvf',
                mdl_path='/home/ilya/github/bck/jetshow',
                out_path='/home/ilya/github/bck/jetshow/uvf')