Пример #1
0
def build_model ():
    assert FLAGS.fix_width > 0
    assert FLAGS.fix_height > 0
    model = make_model(FLAGS.net, [FLAGS.fix_height, FLAGS.fix_width, FLAGS.channels])
    model.compile(optimizer=Adam(lr=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=[acc])
    return model
Пример #2
0
 def calculate_metrics_for_td(self, td, batches):
     if td.metric_eval_timestamp != 'None':
         return td
     else:
         model = make_model(td.model_name, (None, None, 3))
         model.load_weights(opjoin(self.nn_models_dir, td.model_file_name))
         td.add_metrics(self.calculate_metrics_for_model(model, batches))
         return td
Пример #3
0
 def visualize_for_train_data(self, td, batch, number_to_show=4):
     model = make_model(td.model_name, (None, None, 3))
     model.load_weights(opjoin(self.nn_models_dir, td.model_file_name))
     x, y = batch
     pred = model.predict(x, batch_size=16)
     for i in range(number_to_show):
         image, image_parts = x[i]
         mask, mask_parts = y[i]
         pred_parts = model.predict(image_parts, batch_size=len(image_parts))
         pred_parts = pred_parts.reshape(16, 16, 256, 256).swapaxes(1, 2).reshape(16 * 256, 16 * 256)
         pred_parts = cv2.resize(pred_parts, (256, 256))
         show(image, mask, pred_parts, self.predict_threshold)
Пример #4
0
def generate_predictions(file_name, model_name, target_size, test_iter):
    """
    Using model - model_name and saved weights in file_name, generates predictions for files in test_path
    """
    weight = os.path.join('{}.h5'.format(file_name))
    model = make_model(model_name, (target_size, target_size, 3), 1, "sigmoid")
    model.load_weights(weight, by_name=True)
    #model = load_model(weight)
    predictions = model.predict_generator(test_iter,
                                          steps=len(test_iter.filenames),
                                          verbose=1)
    return predictions
Пример #5
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    # Parse command line args
    (options, args) = parse_cmd_line_args()

    # Load the data
    data = load_data(options)
    # Get a model for the data
    model_type = 'standard_glm'
    model = make_model(model_type, N=data['N'])

    # Get parallel clients
    rc = Client(profile="sge")
    dview = rc[:]
    # dview = get_engines(n_workers=8)

    # Load imports on the client
    load_imports_on_client(dview)

    # Initialize population objects on the clients
    dview.apply_sync(initialize_client, (model_type,N,data))
Пример #6
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0,T_split))
    xv_data = segment_data(data, (T_split,data['T']))

    # Sample random initial state
    x0 = popn.sample()

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    use_existing = True

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i,model) in enumerate(models):
        print "Evaluating model %d" % i
        set_hyperparameters_on_engines(client[:], model)
        set_data_on_engines(client[:], train_data)

        if use_existing and  \
           os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
            print "Found existing results for model %d" % i
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f:
                (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
                train_lls[i] = ll_train
                xv_lls[i] = ll_xv
                total_lls[i] = ll_total

        else:
            x0 = copy.deepcopy(best_x)
            # set_data_on_engines(client[:], train_data)
            ll0 = parallel_compute_ll(client[:], x0, data['N'])
            print "Training LL0: %f" % ll0

            # Perform inference
            x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
                                           use_hessian=False,
                                           use_rop=False)

            ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Training LL_inf: %f" % ll_train
            train_lls[i] = ll_train

            # Compute log lkhd on xv data
            set_data_on_engines(client[:], xv_data)
            ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Cross Validation LL: %f" % ll_xv
            xv_lls[i] = ll_xv

            # Compute log lkhd on total dataset
            set_data_on_engines(client[:], data)
            ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Total LL: %f" % ll_total
            total_lls[i] = ll_total

            print "Saving partial results"
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f:
                cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1)

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    print "Training the best model (%d) with the full dataset" % best_ind
    # Set the best hyperparameters
    set_hyperparameters_on_engines(client[:], best_model)
    set_data_on_engines(client[:], data)

    # Fit the best model on the full training data
    best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1,
                                    use_hessian=False,
                                    use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N'])
    print "True LL: %f" % popn_true.compute_ll(x_true)


    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
        cPickle.dump(best_x, f, protocol=-1)

    # Plot results
    plot_results(popn, best_x,
                 popn_true, x_true,
                 do_plot_imp_responses=(model['N']<64),
                 resdir=options.resultsDir)
Пример #7
0
    def one_step_forecast(test_data, opt_manager, model_name="rnf"):
        """
        N.b. Given our main use case is for real-time data streams where speed is key,
            we test the RNF by running it continuously (i.e. states are not reset during operation)
        """

        print("*** Running one-step-ahead predictions ***")

        # Get hyperparam stuff
        params = opt_manager.get_best_params()
        checkpoint_path = opt_manager.checkpoint_path

        # Param setup
        params['total_time_steps'] = int(1e4)
        params['minibatch_size'] = 1
        output_size = int(params['output_size'])
        hidden_layer_size = int(params['hidden_layer_size'])

        # Setup data
        batcher = configs.get_batcher(model_name)
        col_defn = data_formatter.get_column_definition()

        test_batches = batcher.batch(test_data,
                                     col_defn,
                                     lookback=params['total_time_steps'])

        test_inputs, test_outputs, test_flags = test_batches

        # To make both LSTM & RNF outputs consistent
        if not isinstance(test_inputs, list):
            test_inputs = [test_inputs]

        batches, timesteps, _ = test_outputs.shape

        # Set model
        K.clear_session()

        states = [np.zeros((1, hidden_layer_size)) for _ in range(2)]
        model = model_factory.make_model(model_name,
                                         params,
                                         set_initial_states=True)
        _ = model.predict([ip[:1, ...] for ip in test_inputs] + states)
        model.load_weights(checkpoint_path)

        preds = []

        for i in range(batches):
            print("Predicting {}/{} trajs".format(i + 1, batches))
            outputs, state_h, state_c = model.predict(
                [ip[i:i + 1, ...] for ip in test_inputs] + states)

            states = [state_h, state_c]
            means, stds = outputs[..., :output_size], outputs[...,
                                                              output_size:]

            preds.append(means)

        preds = np.concatenate(preds, axis=0)

        mse = np.sum(
            (preds - test_outputs)**2 * test_flags[..., np.newaxis]) / np.sum(
                test_flags[..., np.newaxis])

        return mse
Пример #8
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    raise Exception("Make sur ethe sparsity is set properly!")

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)

    # !!!!DEBUG!!!!!
    # Initialize with true variables
    # import copy
    # x0 = copy.deepcopy(x_true)

    use_existing = False
    
    if use_existing and  \
       os.path.exists(os.path.join(options.resultsDir, 'results.pkl')):

        print "Found existing results"
        with open(os.path.join(options.resultsDir, 'results.pkl')) as f:
            x_smpls = cPickle.load(f)
            N_samples = len(x_smpls)
    else:
        N_samples = 1000

        # Define a callback to evaluate log likelihoods and predictive log likelihoods
        print "Creating synthetic test data"
        T_test = 15
        popn_test = Population(popn.model)
        test_data = gen_synth_data(data['N'], T_test, popn_true, x_true)
        popn_test.set_data(test_data)

        # Compute pred ll under true model
        popn_true.set_data(test_data)
        x_true['predll'] = popn_true.compute_ll(x_true)
        popn_true.set_data(data)

        # Compute the predictive log likelihood under a homogeneous PP model wiht MLE
        # homog_pred_lls[j] = compute_homog_pp(train_data, test_data)

        pred_lls = np.zeros(N_samples)
        smpl = [0]
        def pred_ll_cbk(x):
            pred_ll = popn_test.compute_ll(x)
            pred_lls[smpl[0]] = pred_ll
            x['predll'] = pred_ll
            smpl[0] += 1
            print "Pred LL: %.2f" % pred_ll
        pred_ll_cbk = None

        # Perform inference
        print "Performing parallel inference"
        start_time = time.time()
        x_smpls = parallel_gibbs_sample(client, data,
                                        x0=x0, N_samples=N_samples,
                                        save_interval=50, results_dir=options.resultsDir,
                                        callback=pred_ll_cbk)
        stop_time = time.time()

        # Save results
        print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl')
        with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
            cPickle.dump(x_smpls, f, protocol=-1)

        # Save runtime
        with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f:
            cPickle.dump(stop_time-start_time, f, protocol=-1)


    # Plot average of last 20% of samples
    print "Plotting results"
    smpl_frac = 1.0

    # Only plot the impulse response matrix for small N
    do_plot = data['N'] < 20
    do_plot_imp_responses = data['N'] < 30

    if do_plot:
        plot_results(popn,
                    x_smpls[-1*int(smpl_frac*len(x_smpls)):],
                    popn_true,
                    x_true,
                    do_plot_imp_responses=do_plot_imp_responses,
                    resdir=options.resultsDir)
Пример #9
0
def load_set_of_results(N, T, graph_model='er', sample_frac=0.1):
    data_dir = os.path.join('/group', 'hips', 'scott', 'pyglm', 'data', 'synth', graph_model, 'N%dT%d' % (N, T))

    # Evaluate the state for each of the parameter settings
    s_infs_mcmc = []
    s_infs_map = []
    s_trues = []

    # Enumerate the subdirectories containing the data
    subdirs = os.listdir(data_dir)
    subdirs = reduce(lambda sd, d: sd + [d] \
                                   if os.path.isdir(os.path.join(data_dir, d)) \
                                   else sd,
                     subdirs, [])

    # For each data subdirectory, load the true data, the MAP estimate, and the MCMC results
    print "WARNING: Make sure we sample all subdirs"
    # import pdb; pdb.set_trace()
    for d in subdirs:
        print "Loading data and results from %s" % d
        print "Loading true data"
        with open(os.path.join(data_dir, d, 'data.pkl'), 'r') as f:
            data = cPickle.load(f)

        print "Loading model"
        with open(os.path.join(data_dir, d, 'model.pkl'), 'r') as f:
            model_data = cPickle.load(f)
            #HACK
            if 'N_dims' not in model_data['network']['graph']:
                model_data['network']['graph']['N_dims'] = 1
            if 'location_prior' not in model_data['network']['graph']:
                model_data['network']['graph']['location_prior'] = \
                         {
                             'type' : 'gaussian',
                             'mu' : 0.0,
                             'sigma' : 1.0
                         }
            if 'L' in data['vars']['net']['graph']:
                data['vars']['net']['graph']['L'] = data['vars']['net']['graph']['L'].ravel()
        popn_data = Population(model_data)
        popn_data.set_data(data)
        s_trues.append(popn_data.eval_state(data['vars']))

        try:
            print "Loading map estimate"
            with open(os.path.join(data_dir, d, 'map', 'results.pkl'), 'r') as f:
                x_map = cPickle.load(f)

            model_map = make_model('standard_glm', N=data['N'])
            popn_map = Population(model_map)
            popn_map.set_data(data)
            print "Evaluating MAP state"
            s_infs_map.append(popn_map.eval_state(x_map))

        except Exception as e:
            print "ERROR: Failed to load MAP estimate"

        try:
            print "Loading mcmc estimate"
            with open(os.path.join(data_dir, d, 'mcmc', 'results.pkl'), 'r') as f:
                x_mcmc = cPickle.load(f)

            model_mcmc = make_model('sparse_weighted_model', N=data['N'])
            popn_mcmc = Population(model_mcmc)
            popn_mcmc.set_data(data)

            # Now compute the true and false positive rates for MCMC
            # For MCMC results, only consider the tail of the samples
            print "Evaluating MCMC states"
            N_samples = len(x_mcmc)
            start_smpl = int(np.floor(N_samples - sample_frac*N_samples))

            # Evaluate the state
            this_s_mcmc = []
            for i in range(start_smpl, N_samples):
                this_s_mcmc.append(popn_mcmc.eval_state(x_mcmc[i]))
            s_infs_mcmc.append(this_s_mcmc)
        except Exception as e:
            print "ERROR: Failed to load MCMC estimate"

    return s_trues, s_infs_map, s_infs_mcmc
def main(args):    
    #initialize Horovod.
    hvd.init()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    K.set_session(tf.Session(config=config))
    
    fold = args.data_path.split("fold_")[1]
    if hvd.rank()==0:
        print("================================")
        if args.use_lovasz:
            print("Fine tuning with ")
        print("Fold {}".format(fold))
        
    #Find best saved model
    best_model_file = 'weights/{}/fold_{}_{epoch}_best.h5'.format(args.model, fold, epoch='{epoch}')
    resume_from_epoch = 0
    for try_epoch in range(args.epochs, 0, -1):
        if os.path.exists(best_model_file.format(epoch=try_epoch)):
            resume_from_epoch = try_epoch
            break
    if hvd.rank()==0:
        print("Last model saved: {}".format(best_model_file.format(epoch=resume_from_epoch)))
    resume_from_epoch = hvd.broadcast(resume_from_epoch, 0, name='resume_from_epoch')
    #verbose mode for one node
    if hvd.rank()==0:
        verbose = 1
    else:
        verbose = 0
   
    #Create dataset
    
    dataset = TGSDataset(data_path=args.data_path, batch_size=args.batch_size)
    input_shape = (args.target_size, args.target_size)
    mask_shape = (101, 101)
    train_data_generator = dataset.get_train_data_generator(input_size=input_shape, mask_size=mask_shape, seed=np.random.rand())
    val_data_generator = dataset.get_val_data_generator(input_size=input_shape, mask_size=mask_shape, seed=np.random.rand())
    train_step_size = dataset.train_step_size // hvd.size()
    val_step_size = dataset.val_step_size // hvd.size()
    #Create model
    model = make_model(args.model, (args.target_size, args.target_size, 3), 2)

    #load weights
    if resume_from_epoch > 0:
        model.load_weights(best_model_file.format(epoch=resume_from_epoch))
        
    size = hvd.size()
    opt = hvd.DistributedOptimizer(SGD(lr=args.learning_rate * size, momentum=0.9, nesterov=True))

    #Loss
    loss = losses.c_lovasz_loss if args.use_lovasz else losses.c_binary_crossentropy
    
    model.compile(loss=loss,
                  optimizer=opt,
                  metrics=[metrics.c_binary_accuracy, metrics.c_iou])

    #h5 model
    best_model = ModelCheckpointMGPU(model, filepath=best_model_file, monitor='val_loss',
                                     verbose=1,
                                     mode='min',
                                     period=1,
                                     save_best_only=True,
                                     save_weights_only=True)
    callbacks = [
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),

        # Horovod: average metrics among workers at the end of every epoch.
        #
        # Note: This callback must be in the list before the ReduceLROnPlateau,
        # TensorBoard, or other metrics-based callbacks.
        hvd.callbacks.MetricAverageCallback(),

        # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
        # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
        # the first five epochs. See https://arxiv.org/abs/1706.02677 for details.
        hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=args.warmup_epochs, verbose=True)
    ]

    # Horovod: save checkpoints only on the first worker to prevent other workers from corrupting them.
    if hvd.rank() == 0:
        callbacks.append(keras.callbacks.TensorBoard(args.log_dir))
        callbacks.append(best_model)
    
    #Fit model
    history = model.fit_generator(train_data_generator,
                        steps_per_epoch=train_step_size,
                        callbacks=callbacks,
                        epochs=args.epochs,
                        verbose=verbose,
                        workers=4,
                        initial_epoch=resume_from_epoch,
                        validation_data=val_data_generator,
                        validation_steps=val_step_size)
  

    score = hvd.allreduce(model.evaluate_generator(val_data_generator, val_step_size, workers=4))
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
Пример #11
0
    valid_inputs, valid_outputs, valid_flags = valid_batches
    test_inputs, test_outputs, test_flags = test_batches

    while len(opt_manager.results.columns) < hyperparam_iterations:

        K.clear_session()

        print()
        print("# -----------------------------------------------------------")
        print("# Running hyperparam optimisation {} of {} for {}".format(
            len(opt_manager.results.columns) + 1, hyperparam_iterations, expt_name))
        print("# -----------------------------------------------------------")
        print()

        params = opt_manager.get_next_parameters()
        model = model_factory.make_model(model_name, params)

        tmp_folder = os.path.join(model_folder, 'tmp')
        tmp_model = os.path.join(tmp_folder, "model")

        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=early_stopping,
                min_delta=1e-4),
            tf.keras.callbacks.ModelCheckpoint(
                filepath=tmp_model,
                monitor='val_loss',
                save_best_only=True,
                save_weights_only=True),
            tf.keras.callbacks.TerminateOnNaN()
Пример #12
0
def postprocess(popn, x_inf, popn_true, x_true, options):
    """ Compute an ROC curve from a set of inferred samples and the true state
    """
    true_state = popn_true.eval_state(x_true)

    # Make sure we have a list of x's
    if not isinstance(x_inf, list):
        x_inf = [x_inf]

    inf_state = [popn.eval_state(x) for x in x_inf]


    # Check if the inference model is a standard GLM or a network GLM
    if
    # Now compute the true and false positive rates for MAP
    (map_tpr, map_fpr) = compute_roc_from_std_glm(true_state, map_state)
    map_tprs.append(map_tpr)
    map_fprs.append(map_fpr)

    print "Loading mcmc estimate"
    x_mcmc = None
    with open(os.path.join(data_dir, d, 'mcmc', 'results.pkl'), 'r') as f:
        x_mcmc = cPickle.load(f)


    model_mcmc = make_model('sparse_weighted_model', N=data['N'])
    popn_mcmc = Population(model_mcmc)
    popn_mcmc.set_data(data)

    # Evaluate the state
    mcmc_state = []
    for x in x_mcmc:
        mcmc_state.append(popn_mcmc.eval_state(x))


    # Now compute the true and false positive rates for MCMC
    # For MCMC results, only consider the tail of the samples
    N_samples = len(mcmc_state)
    sample_frac = 0.2
    start_smpl = int(np.floor(N_samples - sample_frac*N_samples))
    (mcmc_tpr, mcmc_fpr) = compute_roc_from_sparse_glm_smpls(true_state, mcmc_state[start_smpl:])
    mcmc_tprs.append(mcmc_tpr)
    mcmc_fprs.append(mcmc_fpr)


    # Pickle the roc results
    with open(PKL_FNAME, 'w') as f:
        # TODO Dump the MCMC results too
        cPickle.dump({'map_tprs' : map_tprs,
                      'map_fprs' : map_fprs},
                     f,
                     protocol=-1)

    # Plot the actual ROC curve
    # Subsample to get about 10 errorbars
    subsample = N*N//10
    f = plt.figure()
    ax = f.add_subplot(111)
    plot_roc_curve(map_tprs, map_fprs, ax=ax, color='b', subsample=subsample)
    # plot_roc_curve(mcmc_tprs, mcmc_fprs, ax=ax, color='r', subsample=subsample)
    fname = os.path.join(PLOTDIR,'roc_N%dT%d.pdf' % (N,T))
    print "Saving ROC to %s" % fname
    f.savefig(fname)
    plt.close(f)
Пример #13
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample()
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)
    
    use_existing = False
    
    if use_existing and  \
       os.path.exists(os.path.join(options.resultsDir, 'results.pkl')):

        print "Found existing results"
        with open(os.path.join(options.resultsDir, 'results.pkl')) as f:
            x_smpls = cPickle.load(f)
            N_samples = len(x_smpls)
    else:
        # Perform inference
        print "Performing parallel inference"
        N_samples = 1000
        x_smpls = parallel_gibbs_sample(client, data,
                                        x0=x0, N_samples=N_samples,
                                        save_interval=50, results_dir=options.resultsDir)
        
        # Save results
        print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl')
        with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
            cPickle.dump(x_smpls, f, protocol=-1)

    # Plot average of last 20% of samples
    print "Plotting results"
    smpl_frac = 0.5

    # Only plot the impulse response matrix for small N
    do_plot = data['N'] < 20
    do_plot_imp_responses = data['N'] < 30

    if do_plot:
        plot_results(popn,
                    x_smpls[-1*int(smpl_frac*N_samples):],
                    popn_true,
                    x_true,
                    do_plot_imp_responses=do_plot_imp_responses,
                    resdir=options.resultsDir)
Пример #14
0
def run_synth_test():
    """ Run a test with synthetic data and MAP inference with cross validation
    """
    options, popn, data, popn_true, x_true = initialize_test_harness()
    
    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # TODO Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0,T_split))
    xv_data = segment_data(data, (T_split,data['T']))
    
    # Sample random initial state
    x0 = popn.sample()

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i,model) in enumerate(models):
        print "Training model %d" % i
        x0 = copy.deepcopy(best_x)
        popn.set_hyperparameters(model)
        popn.set_data(train_data)
        ll0 = popn.compute_log_p(x0)
        print "Training LL0: %f" % ll0

        # Perform inference
        x_inf = coord_descent(popn, data, x0=x0, maxiter=1,
                              use_hessian=False,
                              use_rop=False)
        ll_train = popn.compute_log_p(x_inf)
        print "Training LL_inf: %f" % ll_train
        train_lls[i] = ll_train

        
        # Compute log lkhd on xv data
        popn.set_data(xv_data)
        ll_xv = popn.compute_ll(x_inf)
        print "Cross Validation LL: %f" % ll_xv
        xv_lls[i] = ll_xv

        # Compute log lkhd on total dataset
        popn.set_data(data)
        ll_total = popn.compute_ll(x_inf)
        print "Tota LL: %f" % ll_total
        total_lls[i] = ll_total

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)
        
    # Create a population with the best model
    popn.set_hyperparameters(best_model)
    popn.set_data(data)

    # Fit the best model on the full training data
    best_x = coord_descent(popn, data, x0=x0, maxiter=1,
                           use_hessian=False,
                           use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % popn.compute_ll(best_x)
    print "True LL: %f" % popn_true.compute_ll(x_true)

    # Save results
    results_file = os.path.join(options.resultsDir, 'results.pkl')
    print "Saving results to %s" % results_file
    with open(results_file, 'w') as f:
        cPickle.dump(best_x, f)

    # Plot results
    plot_results(popn, best_x, popn_true, x_true, resdir=options.resultsDir)
Пример #15
0
if __name__ == '__main__':

    logging.basicConfig(
        format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
        datefmt='%d-%b-%y %H:%M:%S')
    logger = logging.getLogger("main")
    logger.setLevel(logging.INFO)

    t0 = timeit.default_timer()
    logger.info("Loading model weights")
    weights = [path.join(args.models_dir, m) for m in args.models]
    models = []
    num_tiles = int(args.num_tiles)
    for w in weights:
        model = make_model(args.network, (None, None, 3))
        logger.info("Building model {} from weights {} ".format(
            args.network, w))
        model.load_weights(w)
        models.append(model)
    os.makedirs(test_pred, exist_ok=True)
    #print('Predicting test')
    for d in tqdm(listdir(test_folder)):
        logger.info("Predicting Image: {}".format(d))
        fid = d
        full_img = cv2.imread(path.join(test_folder, fid),
                              cv2.IMREAD_COLOR)[..., ::-1]

        if num_tiles == None:
            num_tiles = 1
Пример #16
0
def gibbs_sample(population, 
                 data, 
                 N_samples=1000,
                 x0=None, 
                 init_from_mle=True):
    """
    Sample the posterior distribution over parameters using MCMC.
    """
    N = population.model['N']

    # Draw initial state from prior if not given
    if x0 is None:
        x0 = population.sample()
        
        if init_from_mle:
            print "Initializing with coordinate descent"
            from models.model_factory import make_model, convert_model
            from population import Population
            mle_model = make_model('standard_glm', N=N)
            mle_popn = Population(mle_model)
            mle_popn.set_data(data)
            mle_x0 = mle_popn.sample()

            # Initialize with MLE under standard GLM
            mle_x0 = coord_descent(mle_popn, data, x0=mle_x0, maxiter=1)

            # Convert between inferred parameters of the standard GLM
            # and the parameters of this model. Eg. Convert unweighted 
            # networks to weighted networks with normalized impulse responses.
            x0 = convert_model(mle_popn, mle_model, mle_x0, population, population.model, x0)

    # Create updates for this population
    serial_updates, parallel_updates = initialize_updates(population)

    # DEBUG Profile the Gibbs sampling loop
    import cProfile, pstats, StringIO
    pr = cProfile.Profile()
    pr.enable()

    # Alternate fitting the network and fitting the GLMs
    x_smpls = [x0]
    x = x0

    import time
    start_time = time.clock()

    for smpl in np.arange(N_samples):
        # Print the current log likelihood
        lp = population.compute_log_p(x)

        # Compute iters per second
        stop_time = time.clock()
        if stop_time - start_time == 0:
            print "Gibbs iteration %d. Iter/s exceeds time resolution. Log prob: %.3f" % (smpl, lp)
        else:
            print "Gibbs iteration %d. Iter/s = %f. Log prob: %.3f" % (smpl,
                                                                       1.0/(stop_time-start_time),
                                                                       lp)
        start_time = stop_time

        # Go through each parallel MH update
        for parallel_update in parallel_updates:
            for n in np.arange(N):
                parallel_update.update(x, n)

        # Sample the serial updates
        for serial_update in serial_updates:
            serial_update.update(x)

        x_smpls.append(copy.deepcopy(x))

    pr.disable()
    s = StringIO.StringIO()
    sortby = 'cumulative'
    ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
    ps.print_stats()

    with open('mcmc.prof.txt', 'w') as f:
        f.write(s.getvalue())
        f.close()

    return x_smpls
Пример #17
0
def main(data_path='/data/SN6_buildings/train/AOI_11_Rotterdam/',
         config_path='/project/configs/senet154_gcc_fold1.py',
         gpu='0'):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    config = get_config(config_path)
    model_name = config['model_name']
    fold_number = config['fold_number']
    alias = config['alias']
    log_path = osp.join(config['logs_path'],
                        alias + str(fold_number) + '_' + model_name)

    device = torch.device(config['device'])
    weights = config['weights']
    loss_name = config['loss']
    optimizer_name = config['optimizer']
    lr = config['lr']
    decay = config['decay']
    momentum = config['momentum']
    epochs = config['epochs']
    fp16 = config['fp16']
    n_classes = config['n_classes']
    input_channels = config['input_channels']
    main_metric = config['main_metric']

    best_models_count = config['best_models_count']
    minimize_metric = config['minimize_metric']
    min_delta = config['min_delta']

    train_images = data_path
    data_type = config['data_type']
    masks_data_path = config['masks_data_path']
    folds_file = config['folds_file']
    train_augs = config['train_augs']
    preprocessing_fn = config['preprocessing_fn']
    limit_files = config['limit_files']
    batch_size = config['batch_size']
    shuffle = config['shuffle']
    num_workers = config['num_workers']
    valid_augs = config['valid_augs']
    val_batch_size = config['val_batch_size']
    multiplier = config['multiplier']

    train_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  masks_dir=masks_data_path,
                                  mode='train',
                                  n_classes=n_classes,
                                  folds_file=folds_file,
                                  fold_number=fold_number,
                                  augmentation=train_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files,
                                  multiplier=multiplier)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle,
                              num_workers=num_workers)

    valid_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  mode='valid',
                                  folds_file=folds_file,
                                  n_classes=n_classes,
                                  fold_number=fold_number,
                                  augmentation=valid_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files)

    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=val_batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    model = make_model(model_name=model_name,
                       weights=weights,
                       n_classes=n_classes,
                       input_channels=input_channels).to(device)

    loss = get_loss(loss_name=loss_name)
    optimizer = get_optimizer(optimizer_name=optimizer_name,
                              model=model,
                              lr=lr,
                              momentum=momentum,
                              decay=decay)

    if config['scheduler'] == 'reduce_on_plateau':
        print('reduce lr')
        alpha = config['alpha']
        patience = config['patience']
        threshold = config['thershold']
        min_lr = config['min_lr']
        mode = config['scheduler_mode']
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer,
            factor=alpha,
            verbose=True,
            patience=patience,
            mode=mode,
            threshold=threshold,
            min_lr=min_lr)
    elif config['scheduler'] == 'steps':
        print('steps lr')
        steps = config['steps']
        step_gamma = config['step_gamma']
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                         milestones=steps,
                                                         gamma=step_gamma)
    else:
        scheduler = None

    callbacks = []

    dice_callback = DiceCallback()
    callbacks.append(dice_callback)
    callbacks.append(CheckpointCallback(save_n_best=best_models_count))
    callbacks.append(
        EarlyStoppingCallback(patience=config['early_stopping'],
                              metric=main_metric,
                              minimize=minimize_metric,
                              min_delta=min_delta))

    runner = SupervisedRunner(device=device)
    loaders = {'train': train_loader, 'valid': valid_loader}

    runner.train(model=model,
                 criterion=loss,
                 optimizer=optimizer,
                 loaders=loaders,
                 scheduler=scheduler,
                 callbacks=callbacks,
                 logdir=log_path,
                 num_epochs=epochs,
                 verbose=True,
                 main_metric=main_metric,
                 minimize_metric=minimize_metric,
                 fp16=fp16)
Пример #18
0
def main(config_path='/project/configs/senet154_gcc_fold1.py',
         test_images='/data/SN6_buildings/test_public/AOI_11_Rotterdam/',
         test_predict_result='/wdata/folds_predicts/',
         batch_size=1,
         workers=1,
         gpu='1'):

    with torch.no_grad():

        config = get_config(config_path)
        model_name = config['model_name']
        weights_path = config['load_from']
        device = config['device']
        val_batch_size = batch_size
        input_channels = config['input_channels']

        original_size = config['original_size']
        cropper = albu.Compose(
            [albu.CenterCrop(original_size[0], original_size[1], p=1.0)])
        n_classes = config['n_classes']
        preprocessing_fn = config['preprocessing_fn']
        valid_augs = config['valid_augs']
        limit_files = config['limit_files']
        num_workers = workers
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        if not os.path.exists(test_predict_result):
            os.mkdir(test_predict_result)
        fold_name = weights_path.split('/')[-3]
        folder_to_save = os.path.join(test_predict_result, fold_name)
        if os.path.exists(folder_to_save):
            shutil.rmtree(folder_to_save)

        os.mkdir(folder_to_save)

        test_dataset = TestSemSegDataset(images_dir=os.path.join(
            test_images, 'SAR-Intensity'),
                                         preprocessing=preprocessing_fn,
                                         augmentation=valid_augs,
                                         limit_files=limit_files)

        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=val_batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)
        print('Loading {}'.format(weights_path))
        model = make_model(model_name=model_name,
                           weights=None,
                           n_classes=n_classes,
                           input_channels=input_channels).to(device)

        model.load_state_dict(torch.load(weights_path)['model_state_dict'])

        model.eval()
        model = tta.TTAWrapper(model, flip_image2mask)
        model = torch.nn.DataParallel(model).cuda()

        file_names = sorted(test_dataset.ids)

        for batch_i, test_batch in enumerate(tqdm(test_loader)):
            runner_out = model(test_batch.cuda())
            image_pred = runner_out

            image_pred = image_pred.cpu().detach().numpy()
            names = file_names[batch_i * val_batch_size:(batch_i + 1) *
                               val_batch_size]
            for i in range(len(names)):
                file_name = os.path.join(folder_to_save,
                                         names[i].split('.')[0] + '.png')

                data = image_pred[i, ...]
                data = np.moveaxis(data, 0, -1)
                sample = cropper(image=data)
                data = sample['image']
                data = (data * 255).astype(np.uint8)
                cv2.imwrite(file_name, data)
Пример #19
0
def postprocess(popn, x_inf, popn_true, x_true, options):
    """ Compute an ROC curve from a set of inferred samples and the true state
    """
    true_state = popn_true.eval_state(x_true)

    # Make sure we have a list of x's
    if not isinstance(x_inf, list):
        x_inf = [x_inf]

    inf_state = [popn.eval_state(x) for x in x_inf]


    # Check if the inference model is a standard GLM or a network GLM
    if
    # Now compute the true and false positive rates for MAP
    (map_tpr, map_fpr) = compute_roc_from_std_glm(true_state, map_state)
    map_tprs.append(map_tpr)
    map_fprs.append(map_fpr)

    print "Loading mcmc estimate"
    x_mcmc = None
    with open(os.path.join(data_dir, d, 'mcmc', 'results.pkl'), 'r') as f:
        x_mcmc = cPickle.load(f)


    model_mcmc = make_model('sparse_weighted_model', N=data['N'])
    popn_mcmc = Population(model_mcmc)
    popn_mcmc.set_data(data)

    # Evaluate the state
    mcmc_state = []
    for x in x_mcmc:
        mcmc_state.append(popn_mcmc.eval_state(x))


    # Now compute the true and false positive rates for MCMC
    # For MCMC results, only consider the tail of the samples
    N_samples = len(mcmc_state)
    sample_frac = 0.2
    start_smpl = int(np.floor(N_samples - sample_frac*N_samples))
    (mcmc_tpr, mcmc_fpr) = compute_roc_from_sparse_glm_smpls(true_state, mcmc_state[start_smpl:])
    mcmc_tprs.append(mcmc_tpr)
    mcmc_fprs.append(mcmc_fpr)


    # Pickle the roc results
    with open(PKL_FNAME, 'w') as f:
        # TODO Dump the MCMC results too
        cPickle.dump({'map_tprs' : map_tprs,
                      'map_fprs' : map_fprs},
                     f,
                     protocol=-1)

    # Plot the actual ROC curve
    # Subsample to get about 10 errorbars
    subsample = N*N//10
    f = plt.figure()
    ax = f.add_subplot(111)
    plot_roc_curve(map_tprs, map_fprs, ax=ax, color='b', subsample=subsample)
    # plot_roc_curve(mcmc_tprs, mcmc_fprs, ax=ax, color='r', subsample=subsample)
    fname = os.path.join(PLOTDIR,'roc_N%dT%d.pdf' % (N,T))
    print "Saving ROC to %s" % fname
    f.savefig(fname)
    plt.close(f)
Пример #20
0
def main(args):

    num_of_folds = len(os.listdir(args.data_path))
    print("Found {} number of folds".format(num_of_folds))
    for fold_index in range(num_of_folds):
        print("================================")
        print("Starting fold {}".format(fold_index))

        #Create dataset
        dataset = TGSDataset(data_path="{}/fold_{}".format(
            args.data_path, fold_index),
                             batch_size=args.batch_size)
        input_shape = (args.target_size, args.target_size)
        mask_shape = (101, 101)
        train_data_generator = dataset.get_train_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)
        val_data_generator = dataset.get_val_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)

        #Find best saved model
        best_model_file = 'weights/{}/fold_{}_{epoch}_best.h5'.format(
            args.model, fold_index, epoch='{epoch}')
        resume_from_epoch = 0
        for try_epoch in range(args.epochs, 0, -1):
            if os.path.exists(best_model_file.format(epoch=try_epoch)):
                resume_from_epoch = try_epoch
                break

        if resume_from_epoch > 0:
            print("Resuming from epoch {}".format(resume_from_epoch))
            model = load_model(best_model_file.format(epoch=resume_from_epoch),
                               custom_objects={'c_iou': metrics.c_iou})
        else:
            model = make_model(args.model,
                               (args.target_size, args.target_size, 3), 1)

        #Optimizer
        opt = adam(lr=args.learning_rate)

        #Compile model
        model.compile(loss=binary_crossentropy,
                      optimizer=opt,
                      metrics=[binary_accuracy, metrics.c_iou])

        #Keras callbacks
        callbacks = [
            keras.callbacks.TensorBoard(args.log_dir),
            keras.callbacks.ModelCheckpoint(best_model_file,
                                            save_best_only=True,
                                            save_weights_only=False),
            keras.callbacks.EarlyStopping(monitor='c_iou',
                                          patience=20,
                                          verbose=0,
                                          mode='max')
        ]

        train_step_size = dataset.train_step_size
        val_step_size = dataset.val_step_size

        history = model.fit_generator(train_data_generator,
                                      steps_per_epoch=train_step_size,
                                      callbacks=callbacks,
                                      epochs=args.epochs,
                                      verbose=args.v,
                                      workers=4,
                                      initial_epoch=resume_from_epoch,
                                      validation_data=val_data_generator,
                                      validation_steps=val_step_size)

        #Load weights
        resume_from_epoch = 0
        for try_epoch in range(args.epochs, 0, -1):
            if os.path.exists(best_model_file.format(epoch=try_epoch)):
                resume_from_epoch = try_epoch
                break

        if resume_from_epoch > 0:
            print("Resuming from epoch {}".format(resume_from_epoch))
            model_with_lovasz = load_model(
                best_model_file.format(epoch=resume_from_epoch),
                custom_objects={"c_iou": metrics.c_iou})
        else:
            model_with_lovasz = make_model(
                args.model, (args.target_size, args.target_size, 3), 1)
            #Lovasz Loss

        #Optimizer
        #Keras callbacks
        callbacks = [
            keras.callbacks.TensorBoard(args.log_dir),
            keras.callbacks.ModelCheckpoint(best_model_file,
                                            save_best_only=True,
                                            save_weights_only=False),
            keras.callbacks.EarlyStopping(monitor='c_iou_zero',
                                          mode='max',
                                          patience=20,
                                          verbose=0)
        ]

        train_data_generator = dataset.get_train_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)
        val_data_generator = dataset.get_val_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)

        model_with_lovasz = Model(model_with_lovasz.layers[0].input,
                                  model_with_lovasz.layers[-1].input)
        opt = adam(lr=args.learning_rate)
        model_with_lovasz.compile(
            loss=losses.c_lovasz_loss,
            optimizer=opt,
            metrics=[binary_accuracy, metrics.c_iou_zero])
        print("Fine tuning with lovasz loss")
        model_with_lovasz.fit_generator(train_data_generator,
                                        steps_per_epoch=train_step_size,
                                        callbacks=callbacks,
                                        epochs=args.epochs,
                                        verbose=args.v,
                                        workers=4,
                                        initial_epoch=resume_from_epoch,
                                        validation_data=val_data_generator,
                                        validation_steps=val_step_size)

        # Evaluate the model on the validation data set.
        score = model_with_lovasz.evaluate_generator(val_data_generator,
                                                     val_step_size)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
Пример #21
0
def fit_latent_network_to_mle():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, popn_true, x_true = initialize_test_harness()

    import pdb; pdb.set_trace()
    # Load MLE parameters from command line
    mle_x = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            mle_x = cPickle.load(f)

            mle_model = make_model('standard_glm', N=data['N'])
            mle_popn = Population(mle_model)
            mle_popn.set_data(data)

    # Create a location sampler
    print "Initializing latent location sampler"
    loc_sampler = LatentDistanceNetworkUpdate()
    loc_sampler.preprocess(popn)

    # Convert the mle results into a weighted adjacency matrix
    x_aw = popn.sample()
    x_aw = convert_model(mle_popn, mle_model, mle_x, popn, popn.model, x_aw)

    # Get rid of unnecessary keys
    del x_aw['glms']

    # Fit the latent distance network to a thresholded adjacency matrix
    ws = np.sort(np.abs(x_aw['net']['weights']['W']))

    wperm = np.argsort(np.abs(x_aw['net']['weights']['W']))
    nthrsh = 20
    threshs = np.arange(ws.size, step=ws.size/nthrsh)

    res = []

    N = popn.N
    for th in threshs:
        print "Fitting network for threshold: %.3f" % th
        A = np.zeros_like(ws, dtype=np.int8)
        A[wperm[th:]] = 1
        A = A.reshape((N,N))
        # A = (np.abs(x_aw['net']['weights']['W']) >= th).astype(np.int8).reshape((N,N))

        # Make sure the diag is still all 1s
        A[np.diag_indices(N)] = 1

        x = copy.deepcopy(x_aw)
        x['net']['graph']['A'] = A
        smpls = fit_latent_network_given_A(x, loc_sampler)

        # Index the results by the overall sparsity of A
        key = (np.sum(A)-N) / (np.float(np.size(A))-N)
        res.append((key, smpls))

    # Save results
    results_file = os.path.join(options.resultsDir, 'fit_latent_network_results.pkl')
    print "Saving results to %s" % results_file
    with open(results_file, 'w') as f:
        cPickle.dump(res, f)
Пример #22
0
def main():
    if args.crop_size:
        print('Using crops of shape ({}, {})'.format(args.crop_size,
                                                     args.crop_size))
    else:
        print('Using full size images')
    folds = [int(f) for f in args.fold.split(",")]
    for fold in folds:
        channels = 3
        if args.multi_gpu:
            with K.tf.device("/cpu:0"):
                model = make_model(args.network, (None, None, 3))
        else:
            model = make_model(args.network, (None, None, channels))
        if args.weights is None:
            print('No weights passed, training from scratch')
        else:
            weights_path = args.weights.format(fold)
            print('Loading weights from {}'.format(weights_path))
            model.load_weights(weights_path, by_name=True)
        freeze_model(model, args.freeze_till_layer)
        optimizer = RMSprop(lr=args.learning_rate)
        if args.optimizer:
            if args.optimizer == 'rmsprop':
                optimizer = RMSprop(lr=args.learning_rate,
                                    decay=float(args.decay))
            elif args.optimizer == 'adam':
                optimizer = Adam(lr=args.learning_rate,
                                 decay=float(args.decay))
            elif args.optimizer == 'amsgrad':
                optimizer = Adam(lr=args.learning_rate,
                                 decay=float(args.decay),
                                 amsgrad=True)
            elif args.optimizer == 'sgd':
                optimizer = SGD(lr=args.learning_rate,
                                momentum=0.9,
                                nesterov=True,
                                decay=float(args.decay))
        dataset = DSB2018BinaryDataset(args.images_dir,
                                       args.masks_dir,
                                       args.labels_dir,
                                       fold,
                                       args.n_folds,
                                       seed=args.seed)
        random_transform = aug_mega_hardcore()
        train_generator = dataset.train_generator(
            (args.crop_size, args.crop_size),
            args.preprocessing_function,
            random_transform,
            batch_size=args.batch_size)
        val_generator = dataset.val_generator(args.preprocessing_function,
                                              batch_size=1)
        best_model_file = '{}/best_{}{}_fold{}.h5'.format(
            args.models_dir, args.alias, args.network, fold)

        best_model = ModelCheckpointMGPU(model,
                                         filepath=best_model_file,
                                         monitor='val_loss',
                                         verbose=1,
                                         mode='min',
                                         period=args.save_period,
                                         save_best_only=True,
                                         save_weights_only=True)
        last_model_file = '{}/last_{}{}_fold{}.h5'.format(
            args.models_dir, args.alias, args.network, fold)

        last_model = ModelCheckpointMGPU(model,
                                         filepath=last_model_file,
                                         monitor='val_loss',
                                         verbose=1,
                                         mode='min',
                                         period=args.save_period,
                                         save_best_only=False,
                                         save_weights_only=True)
        if args.multi_gpu:
            model = multi_gpu_model(model, len(gpus))
        model.compile(
            loss=make_loss(args.loss_function),
            optimizer=optimizer,
            metrics=[binary_crossentropy, hard_dice_coef_ch1, hard_dice_coef])

        def schedule_steps(epoch, steps):
            for step in steps:
                if step[1] > epoch:
                    print("Setting learning rate to {}".format(step[0]))
                    return step[0]
            print("Setting learning rate to {}".format(steps[-1][0]))
            return steps[-1][0]

        callbacks = [best_model, last_model]

        if args.schedule is not None:
            steps = [(float(step.split(":")[0]), int(step.split(":")[1]))
                     for step in args.schedule.split(",")]
            lrSchedule = LearningRateScheduler(
                lambda epoch: schedule_steps(epoch, steps))
            callbacks.insert(0, lrSchedule)
        tb = TensorBoard("logs/{}_{}".format(args.network, fold))
        callbacks.append(tb)
        steps_per_epoch = len(dataset.train_ids) / args.batch_size + 1
        if args.steps_per_epoch > 0:
            steps_per_epoch = args.steps_per_epoch
        validation_data = val_generator
        validation_steps = len(dataset.val_ids)

        model.fit_generator(train_generator,
                            steps_per_epoch=steps_per_epoch,
                            epochs=args.epochs,
                            validation_data=validation_data,
                            validation_steps=validation_steps,
                            callbacks=callbacks,
                            max_queue_size=5,
                            verbose=1,
                            workers=args.num_workers)

        del model
        K.clear_session()
        gc.collect()
Пример #23
0
def run_gen_synth_data():
    """ Run a test with synthetic data and MCMC inference
    """
    options, args = parse_cmd_line_args()
    
    # Create the model
    model = make_model(options.model, N=options.N)
    # Set the sparsity level to minimize the risk of unstable networks
    stabilize_sparsity(model)

    print "Creating master population object"
    popn = Population(model)

    # Sample random parameters from the model
    x_true = popn.sample()

    # Check stability of matrix
    assert check_stability(model, x_true, options.N), "ERROR: Sampled network is unstable!"


    # Save the model so it can be loaded alongside the data
    fname_model = os.path.join(options.resultsDir, 'model.pkl')
    print "Saving data to %s" % fname_model
    with open(fname_model,'w') as f:
        cPickle.dump(model, f, protocol=-1)

    print "Generating synthetic data with %d neurons and %.2f seconds." % \
          (options.N, options.T_stop)

    # Set simulation parametrs
    dt = 0.001
    dt_stim = 0.1
    D_stim = 1
    stim = np.random.randn(options.T_stop/dt_stim, D_stim)

    data = gen_synth_data(options.N, options.T_stop, popn, x_true, dt, dt_stim, D_stim, stim)
    # Set the data so that the population state can be evaluated
    popn.set_data(data)
    
    # DEBUG Evaluate the firing rate and the simulated firing rate
    state = popn.eval_state(x_true)
    for n in np.arange(options.N):
        lam_true = state['glms'][n]['lam']
        lam_sim =  popn.glm.nlin_model.f_nlin(data['X'][:,n])
        assert np.allclose(lam_true, lam_sim)

    # Save the data for reuse
    #fname_mat = os.path.join(options.resultsDir, 'data.mat')
    #print "Saving data to %s" % fname_mat
    #scipy.io.savemat(fname_mat, data, oned_as='row')
        
    # Pickle the data so we can open it more easily
    fname_pkl = os.path.join(options.resultsDir, 'data.pkl')
    print "Saving data to %s" % fname_pkl
    with open(fname_pkl,'w') as f:
        cPickle.dump(data, f, protocol=-1)

    # Plot firing rates, stimulus responses, etc
    plot_results(popn, data['vars'],
                 resdir=options.resultsDir,
                 do_plot_stim_resp=False,
                 do_plot_imp_responses=False)