Пример #1
0
def run(
        dataset='bpic_naco',
        cube_size=50,
        tscale=1.,
        xscale=1.,
        sep_from=1.,
        sep_to=4.,
        n_jobs=None,
        verbose=10,
        prad=1.,
        model=None,
        opts=dict(),
        planets=[],
        planets_flevel_noise=False,
):
    dataset_name = dataset
    model_name = model

    if n_jobs is None:
        n_jobs = max(1, (multiprocessing.cpu_count() - 4) / THREADS_LIMIT)

    if model is None:
        print("Please provide a model")
        exit(1)

    ### MODEL LOADING ###

    model = globals()[model_name](**opts)

    matrix_completion = ('complete' in model.__class__.__dict__)

    ### DATASET LOADING ###

    # Load data
    dataset = loader(dataset_name)
    print("Loaded dataset {}".format(dataset_name))

    # Rescale, possibly
    if xscale != 1. or tscale != 1.:
        dataset = dataset.resample(spatial=xscale, temporal=tscale)
        print("Cube resampled by factors {} (spatially) and {} (temporally)".
              format(xscale, tscale))

    # Resize
    dataset.crop_frames(cube_size)
    print("Cube resized to ({}, {})".format(*dataset.cube.shape[1:]))

    ### INJECT PLANETS, MAYBE ###

    if len(planets) > 0:
        noise = noise_per_sep(dataset, [sep_from, sep_to])

    for planet in planets:
        sep, angle, flevel = planet
        if planets_flevel_noise:
            flevel = flevel * noise(sep)
        print(
            "Injecting companion at distance {:.2f} FWHM, angle {:.1f} deg, flevel {:.2f}"
            .format(sep, angle, flevel))
        dataset = dataset.inject_companion(sep * dataset.fwhm, angle, flevel)

    ### WRITING HEADER ###

    params = dict(
        dataset=dataset_name,
        cube_size=cube_size,
        tscale=tscale,
        xscale=xscale,
        planets=planets,
        planets_flevel_noise=planets_flevel_noise,
        sep_from=sep_from,
        sep_to=sep_to,
        prad=prad,
        model=model_name,
        opts=opts,
        matrix_completion=matrix_completion,
    )

    output_filename = 'perf/maps_{}_{}.pkl'.format(thetime, params['model'])
    print("Writing results to \"{}\"".format(output_filename))

    with open(output_filename, "a+") as f:
        pickle.dump(params, f)

    ### DATA COLLECTION ###

    start_time = time.time()

    print()
    print("Starting data collection on loaded dataset, with parameters:")
    print(" - Annulus {}--{} (FWHM)".format(sep_from, sep_to))
    print(" - Model \"{}\"{}".format(
        type(model).__name__,
        " (matrix completion with radius {} FWHM)".format(prad)
        if matrix_completion else ""))
    print("   with parameters: {}".format(', '.join(
        '{}={}'.format(k, repr(v)) for (k, v) in opts.iteritems())))
    print(" - Nbr of parallel processes {}".format(n_jobs))
    print()

    cy, cx = vip.var.frame_center(dataset.cube[0])
    fwhm = dataset.fwhm

    if not matrix_completion:

        scoring_functions = {
            'flux': flux_cube,
            'stim': stim_cube,
            'snr': snr_cube,
        }

        score_maps = _fit_score(dataset,
                                model,
                                scoring_functions=scoring_functions)

    else:

        scoring_functions = {
            ('flux', 'loglr'): flux_loglr_path,
        }

        score_maps = _fit_complete_score_parallel(
            dataset,
            model,
            prad,
            pixels_in_annulus(dataset.cube.shape[1:], (cy, cx),
                              sep_from * fwhm, sep_to * fwhm),
            parallel=Parallel(n_jobs=n_jobs, verbose=verbose),
            scoring_functions=scoring_functions)

    # Flatten the score maps
    for key, val in score_maps.items():
        if type(key) == tuple:
            for name, scores in zip(key, val):
                score_maps[name] = scores
    for key in score_maps.keys():
        if type(key) == tuple:
            del score_maps[key]

    for name in score_maps.keys():
        score_maps[name] = frame_crop_annulus(score_maps[name], (cy, cx),
                                              sep_from * fwhm, sep_to * fwhm)

    with open(output_filename, "a+") as f:
        pickle.dump(score_maps, f)

    stop_time = time.time()

    print()
    print("Finished")
    print("Elapsed time: {}".format(hms_string(stop_time - start_time)))
Пример #2
0
    batch_size=64,
    val_batch_size=1000,
    epochs=10,
    lr=0.01,
    caps=[10, 20, 50, 70, 100, 200, 500],
    momentum=0.5,
)
S_proto = struct(epoch=0, bn=0)

log = Logger('mnist_capacity_nodrop',
             H_proto,
             S_proto,
             load=True,
             metric_show_freq=1)

tn_loader, val_loader = loader(mnist(), H_proto.batch_size,
                               H_proto.val_batch_size)

for epoch in range(S_proto.epoch, len(H_proto.caps)):
    S_proto.epoch = epoch
    log.flush()
    cap = H_proto.caps[epoch]
    H = H_proto.copy()
    H.cap = cap
    S = struct(epoch=1, bn=1)
    inner_log = Logger(
        'cap{}'.format(cap),
        H,
        S,
        # overwrite=True,
        load=True,
        metric_show_freq=0,
Пример #3
0
from logger import struct, Logger
from train import train, acc_metric, nll_metric
from models import vgg

H = struct(
    batch_size=128,
    val_batch_size=100,
    epochs=10,
    lr=0.1,
    momentum=0.9,
)
S = struct(epoch=1, bn=1)

log = Logger('cifar10_standard', H, S, overwrite=True, metric_show_freq=100)

tn_loader, val_loader = loader(cifar10(), H.batch_size, H.val_batch_size)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = vgg('vgg11')
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        out = F.log_softmax(out)
        return out
def run_perf_assess(
    dataset='bpic_naco_empty',
    cube_size=50, tscale=1., xscale=1.,
    n_samples_per_res=5, sep_from=1., sep_to=4., flevel_from=50, flevel_to=200,
    seed=0, n_jobs=None,
    prad=1., model=None, opts=dict(),
):
    dataset_name = dataset
    model_name = model
    
    if n_jobs is None:
        n_jobs = max(1, (multiprocessing.cpu_count() - 4)/THREADS_LIMIT)
    
    if model is None:
        print("Please provide a model")
        exit(1)
    
    
    ### MODEL LOADING ###

    n_samples = int(n_samples_per_res * 4 * (sep_to**2 - sep_from**2))
    model = globals()[model_name](**opts)
    
    
    ### DATASET LOADING ###
    
    # Load data
    dataset = loader(dataset_name)
    print("Loaded dataset {}".format(dataset_name))
    
    # Rescale, possibly
    if xscale != 1. or tscale != 1.:
        dataset = dataset.resample(spatial=xscale, temporal=tscale)
        print("Cube resampled by factors {} (spatially) and {} (temporally)".format(xscale, tscale))
    
    # Resize
    dataset = dataset.resize(cube_size)
    print("Cube resized to ({}, {})".format(*dataset.cube.shape[1:]))
    
    
    ### WRITING HEADER ###

    params = dict(
        dataset=dataset_name,
        cube_size=cube_size,
        tscale=tscale,
        xscale=xscale,
        
        n_samples=n_samples,
        sep_from=sep_from,
        sep_to=sep_to,
        flevel_from=flevel_from,
        flevel_to=flevel_to,
        seed=seed,
        
        model=model_name,
        opts=opts,
    )
    
    output_filename = 'perf/{}_gomez2017_{}.pkl'.format(thetime, params['model'])
    print("Writing results to \"{}\"".format(output_filename))
    
    
    ### DATA COLLECTION ###
    
    start_time = time.time()

    print()
    print("Starting data collection on loaded dataset, with parameters:")
    print(" - Samples {}".format(n_samples))
    print(" - Annulus {}--{} (FWHM)".format(sep_from, sep_to))
    print(" - Injected flux level {}--{}".format(flevel_from, flevel_to))
    print(" - Model \"{}\"".format(
        type(model).__name__
    ))
    print("   with parameters: {}".format(
        ', '.join('{}={}'.format(k, repr(v)) for (k, v) in opts.iteritems())
    ))
    print(" - Random seed {}".format(seed))
    print(" - Nbr of parallel processes {}".format(n_jobs))
    print()
    
    negatives, positives = perf_assess_gomez2017(
        dataset, n_samples, [sep_from, sep_to], [flevel_from, flevel_to],
        model,
        random_state=seed,
        n_jobs=n_jobs, verbose=10
    )
    
    with open(output_filename, 'w') as f:
        pickle.dump(params, f)
        pickle.dump(negatives, f)
        pickle.dump(positives, f)
    
    stop_time = time.time()
    
    print()
    print("Finished")
    print("Elapsed time: {}".format(hms_string(stop_time-start_time)))
Пример #5
0
from datasets import mnist, loader
from logger import struct, Logger
from train import train

H = struct(
    batch_size=64,
    val_batch_size=1000,
    epochs=2,
    lr=0.01,
    momentum=0.5,
)
S = struct(epoch=1, bn=1)

log = Logger('mnist_scratch', H, S, overwrite=True, metric_show_freq=100)

tn_loader, val_loader = loader(mnist(), H.batch_size, H.val_batch_size)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
Пример #6
0
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from datasets import mnist, loader

batch_size = 64
ts_batch_size = 1000
epochs = 2
lr = 0.01
momentum = 0.5
seed = 1
log_freq = 100

tn_loader, ts_loader = loader(mnist(), batch_size, ts_batch_size)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
Пример #7
0
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 320)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = self.fc2(x)
            return F.log_softmax(x)

    return Net()


def optimizer(H, params):
    return optim.SGD(params, lr=H.lr, momentum=H.momentum)


loader_fn = lambda H: loader(mnist(), H.batch_size, H.val_batch_size)

metrics = lambda model, tn_loader, val_loader: struct(
    val_acc=acc_metric(model, val_loader),
    val_loss=nll_metric(model, val_loader),
    tn_loss=nll_metric(model, tn_loader),
    tn_acc=acc_metric(model, tn_loader))

meta_metrics = struct(best_acc=max_metametric('val_acc'),
                      best_acc_idx=argmax_metametric('val_acc'),
                      best_loss=min_metametric('val_loss'),
                      best_loss_idx=argmin_metametric('val_loss'),
                      best_tn_acc=max_metametric('tn_acc'),
                      best_tn_acc_idx=argmax_metametric('tn_acc'),
                      best_tn_loss=min_metametric('tn_loss'),
                      best_tn_loss_idx=argmin_metametric('tn_loss'))
        if i >= warmup:
            imin = max(0, min(i-margin, margin))
            model = scipy.stats.linregress(fluxes[imin:i+1], snrs[imin:i+1])
            
            if model.slope > 0:
                flux_range = (snr_range - model.intercept) / model.slope
                flux_range[0] = max(0, flux_range[0])
                # print(flux_range)
    
    return fluxes[margin:], snrs[margin:]


if __name__ == '__main__':
    dataset_name = sys.argv[1]
    dataset = loader(dataset_name).resize(50)
    cube_model = PCA(rank=17)
    
    random_state = np.random.RandomState(0)
    
    snr_range = (1, 2)
    seps = [1.5, 2, 2.5, 3, 3.5]
    
    fluxes_min, fluxes_max = np.zeros(len(seps)), np.zeros(len(seps))
    
    for i, sep in tqdm(enumerate(seps)):
        fluxes, snrs = flux_snr_regression(
            dataset, cube_model, sep, snr_range,
            n_samples=100, margin=15, random_state=random_state)
        
        model = scipy.stats.linregress(fluxes, snrs)
def run_perf_assess(
        dataset='bpic_naco_empty',
        cube_size=50,
        tscale=1.,
        xscale=1.,
        n_samples_per_res=5,
        sep_from=1.,
        sep_to=4.,
        flevel_from=50,
        flevel_to=200,
        flevel_noise=None,
        flevel_file=None,
        seed=0,
        n_jobs=None,
        prad=1.,
        model=None,
        opts=dict(),
):
    dataset_name = dataset
    model_name = model

    if n_jobs is None:
        n_jobs = max(1, (multiprocessing.cpu_count() - 4) / THREADS_LIMIT)

    if model is None:
        print("Please provide a model")
        exit(1)

    if flevel_noise is not None:
        flevel_from = None
        flevel_to = None

    ### MODEL LOADING ###

    n_samples = int(n_samples_per_res * 4 * (sep_to**2 - sep_from**2))
    model = globals()[model_name](**opts)

    matrix_completion = ('complete' in model.__class__.__dict__)

    ### DATASET LOADING ###

    # Load data
    dataset = loader(dataset_name)
    print("Loaded dataset {}".format(dataset_name))

    # Rescale, possibly
    if xscale != 1. or tscale != 1.:
        dataset = dataset.resample(spatial=xscale, temporal=tscale)
        print("Cube resampled by factors {} (spatially) and {} (temporally)".
              format(xscale, tscale))

    # Resize
    dataset.crop_frames(cube_size)
    print("Cube resized to ({}, {})".format(*dataset.cube.shape[1:]))

    ### WRITING HEADER ###

    params = dict(
        dataset=dataset_name,
        cube_size=cube_size,
        tscale=tscale,
        xscale=xscale,
        n_samples=n_samples,
        sep_from=sep_from,
        sep_to=sep_to,
        flevel_from=flevel_from,
        flevel_to=flevel_to,
        flevel_noise=flevel_noise,
        flevel_file=flevel_file,
        seed=seed,
        prad=prad,
        model=model_name,
        opts=opts,
        matrix_completion=matrix_completion,
    )

    output_filename = 'perf/{}_{}.pkl'.format(thetime, params['model'])
    print("Writing results to \"{}\"".format(output_filename))

    manager = multiprocessing.Manager()
    writer_queue = manager.Queue()

    writer_process = multiprocessing.Process(target=writer,
                                             args=(output_filename,
                                                   writer_queue))
    writer_process.start()

    writer_queue.put(params)

    ### DATA COLLECTION ###

    start_time = time.time()

    print()
    print("Starting data collection on loaded dataset, with parameters:")
    print(" - Samples {}".format(n_samples))
    print(" - Annulus {}--{} (FWHM)".format(sep_from, sep_to))
    if flevel_file is not None:
        print(" - Injected flux level to match SNR requirements from '{}'".
              format(flevel_file))
    elif flevel_noise is not None:
        print(" - Injected flux level {} times the ambiant noise level".format(
            flevel_noise))
    else:
        print(" - Injected flux level {}--{}".format(flevel_from, flevel_to))
    print(" - Model \"{}\"{}".format(
        type(model).__name__,
        " (matrix completion with radius {} FWHM)".format(prad)
        if matrix_completion else ""))
    print("   with parameters: {}".format(', '.join(
        '{}={}'.format(k, repr(v)) for (k, v) in opts.iteritems())))
    print(" - Random seed {}".format(seed))
    print(" - Nbr of parallel processes {}".format(n_jobs))
    print()

    if flevel_file is not None:
        flevels = flevel_file
    elif flevel_noise is not None:
        flevels = 'noise'
    else:
        flevels = [flevel_from, flevel_to]

    negatives, positives = perf_assess(
        dataset,
        n_samples,
        [sep_from, sep_to],
        flevels,
        model,
        prad=prad,
        flevel_noise_coef=flevel_noise,
        random_state=seed,
        n_jobs=n_jobs,
        verbose=10,
        output_queue=writer_queue,
    )

    stop_time = time.time()

    print()
    print("Finished")
    print("Elapsed time: {}".format(hms_string(stop_time - start_time)))

    ### JOINING STUFF ###

    writer_queue.put(None)
    writer_process.join()