Exemplo n.º 1
0
def run_me(location_dir, max_t, max_d):
    data_file = os.path.join(IN_DIR, location_dir, 'simulation.pickle')
    out_dir = os.path.join(OUT_DIR, location_dir)
    log_file = os.path.join(out_dir,
                            'simulation_%.2f-%.2f.log' % (max_t, max_d))

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    # set loggers
    logger = logging.getLogger('simulation_study')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    for t in ('point_process.models', 'kde.models'):
        this_logger = logging.getLogger(t)
        this_logger.setLevel(logging.DEBUG)
        this_logger.handlers = []  # clear existing handlers
        this_logger.addHandler(fh)  # replace with the same file output

    logger.info("Logger set.  Script started.")
    logger.info("Simulation study.")

    # load data
    with open(data_file, 'r') as f:
        data = pickle.load(f)
    logger.info("Loaded data.")

    logger.info("Instantiating SEPP object")

    r = pp_models.SeppStochasticNn(data=data,
                                   max_delta_d=max_d,
                                   max_delta_t=max_t,
                                   bg_kde_kwargs=bg_kde_kwargs,
                                   trigger_kde_kwargs=trigger_kde_kwargs)
    p = estimation.estimator_bowers(data, r.linkage)
    r.p = p
    r.set_seed(42)

    try:
        logger.info("Starting training run.")
        r.train(niter=niter)
    except Exception as exc:
        logger.error(repr(exc))
        res = None
    finally:
        file_stem = os.path.join(out_dir,
                                 'simulation_%.2f-%.2f' % (max_t, max_d))
        logger.info("Saving results (or None).")
        with open(file_stem + '-sepp_obj.pickle', 'w') as f:
            pickle.dump(r, f)
Exemplo n.º 2
0
def different_time_kernel():
    mu = 1.
    st = 1.
    t = np.linspace(-3, 5, 500)
    k1 = k_models.kernels.MultivariateNormal([mu], [st])
    k2 = k_models.kernels.SpaceTimeNormalReflective([mu], [st])

    z1 = k1.pdf(t)
    z2 = k2.pdf(t)

    t_refl = t[t<0][::-1]
    z_refl = k1.pdf(t_refl)

    import seaborn as sns
    sns.set_context("paper", font_scale=2.0)
    plt.figure(figsize=(8, 6))
    plt.plot(t, z1, 'k')
    plt.plot(t, z2, 'r-')
    plt.plot(-t_refl, z_refl, 'r--')
    plt.legend(('Normal', 'Reflective'))
    plt.xlabel('Time (days)')
    plt.ylabel('Density')

    c = simulate.MohlerSimulation()
    c.run()
    data = c.data
    max_delta_t = 100
    max_delta_d = 1.
    init_est_params = {
        'ct': 10,
        'cd': .05,
        'frac_bg': 0.5,
    }
    ra = pp_models.SeppStochasticNn(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t)
    rb = pp_models.SeppStochasticNnReflected(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t)
    ra.train(niter=20)
    rb.train(niter=20)

    t = np.linspace(-10, 60, 500)
    za = ra.trigger_kde.marginal_pdf(t, dim=0, normed=False) / float(ra.ndata)
    zb = rb.trigger_kde.marginal_pdf(t, dim=0, normed=False) / float(rb.ndata)
    w = c.trigger_params['time_decay']
    th = c.trigger_params['intensity']
    ztrue = th * w * np.exp(-w * t)
    ztrue[t<0] = 0

    plt.figure(figsize=(8, 6))
    plt.plot(t, za, 'k-')
    plt.plot(t, zb, 'r-')
    plt.plot(t, ztrue, 'k--')
    plt.legend(('Inferred, normal', 'Inferred, reflective', 'True'))
    plt.xlabel('Time (days)')
    plt.ylabel('Density')
Exemplo n.º 3
0
def simulation_mohler():
    c = simulate.MohlerSimulation()
    c.run()
    data = c.data
    max_delta_t = 100.
    max_delta_d = 1.
    initial_est = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=0.1)
    r = pp_models.SeppStochasticNn(data=data,
                                   max_delta_d=max_delta_d,
                                   max_delta_t=max_delta_t,
                                   estimation_function=initial_est)
    r.train(niter=50)
    pp_plotting.multiplots(r, c)
Exemplo n.º 4
0
def apply_sepp_stochastic_nn(data,
                             data_index,
                             domain,
                             grid_squares=None,
                             max_t=90,
                             max_d=500,
                             num_nn=(100, 15),
                             niter_training=50,
                             num_sample_points=10,
                             seed=43):

    est_fun = lambda x, y: estimation.estimator_bowers_fixed_proportion_bg(
        x, y, ct=1, cd=10, frac_bg=0.5)
    trigger_kde_kwargs = {'strict': False, 'number_nn': num_nn[-1]}
    bg_kde_kwargs = {'strict': True, 'number_nn': num_nn}

    sepp = pp_models.SeppStochasticNn(data=data,
                                      max_delta_t=max_t,
                                      max_delta_d=max_d,
                                      seed=seed,
                                      estimation_function=est_fun,
                                      trigger_kde_kwargs=trigger_kde_kwargs,
                                      bg_kde_kwargs=bg_kde_kwargs)

    vb = validate.SeppValidationFixedModelIntegration(data=data,
                                                      model=sepp,
                                                      data_index=data_index,
                                                      spatial_domain=domain,
                                                      cutoff_t=INITIAL_CUTOFF +
                                                      T0)

    if grid_squares:
        vb.roc.set_sample_units_predefined(grid_squares, num_sample_points)
    else:
        vb.set_sample_units(250, num_sample_points)
    res = vb.run(time_step=1,
                 n_iter=100,
                 verbose=True,
                 train_kwargs={'niter': niter_training})
    return res
Exemplo n.º 5
0
training_xy = training[:, 1:] - training[:, 1:].mean(axis=0)
training_xy = np.dot(rot_mat(np.pi / 4.), training_xy.transpose()).transpose()

training[:, 1:] = training_xy

# sepp_isotropic = pp_models.SeppStochasticNnIsotropicTrigger(data=training, **model_kwargs)
# ps_isotropic = sepp_isotropic.train(niter=niter)

# sepp_local = pp_models.LocalSeppDeterministicNn(data=training, **model_kwargs)
# ps_local = sepp_local.train(niter=niter)

# sepp_det = pp_models.SeppDeterministicNn(data=training, **model_kwargs)
# ps_det = sepp_det.train(niter=niter)

sepp_xy = pp_models.SeppStochasticNn(data=training, **model_kwargs)
sepp_xy.trigger_kde_class = k_models.VariableBandwidthNnTimeGteZeroKde
ps_xy = sepp_xy.train(niter=niter)

# sepp_fixed = pp_models.SeppStochasticPluginBandwidth(data=training, **model_kwargs)
# model_kwargs['bg_kde_kwargs']['bandwidths'] = [1., 10., 10.]
# model_kwargs['trigger_kde_kwargs']['bandwidths'] = [1., 10., 10.]
# sepp_fixed = pp_models.SeppStochastic(data=training, **model_kwargs)
# ps_fixed = sepp_fixed.train(niter=niter)

from scipy.stats import wilcoxon

## standardise the data
# s = np.std(training, axis=0, ddof=1)

## to preserve scaling, use same std on both X and Y
        for i in range(s0.size):
            tt = sorted([s0.flat[i], s1.flat[i]])
            if tuple(tt) in sepp_obj:
                continue
            c = simulate.MohlerSimulation()
            c.t_total = t_total
            c.bg_params[0]['sigma'] = [1., 1.]
            c.bg_params[0]['intensity'] = 5
            c.trigger_sigma = list(tt)
            init_est = lambda d, t: estimation.estimator_exp_gaussian(
                d, t, ct=0.1, cd=np.mean(tt))
            c.run()
            data = c.data
            r = models.SeppStochasticNn(data=data,
                                        max_delta_d=max_delta_d,
                                        max_delta_t=max_delta_t,
                                        estimation_function=init_est,
                                        seed=42,
                                        bg_kde_kwargs=bg_kde_kwargs,
                                        trigger_kde_kwargs=trigger_kde_kwargs)
            try:
                _ = r.train(niter=num_iter)
            except Exception:
                continue
            sepp_obj[tuple(tt)] = copy.deepcopy(r)

            fullfile = os.path.join(OUT_DIR,
                                    '%.2f_%.2f.pickle' % (tt[0], tt[1]))
            r.pickle(fullfile)
    finally:
        os.remove(LOCK_FILE)
Exemplo n.º 7
0
        x, y, ct=1, cd=10, frac_bg=0.5)
    # trigger_kde_kwargs = {'strict': False}
    trigger_kde_kwargs = {'bandwidths': [10, 40, 40]}
    bg_kde_kwargs = {'strict': False}

    # sepp = pp_models.SeppStochasticNnBgFixedTrigger(data=data,
    #                                                 max_delta_t=max_t,
    #                                                 max_delta_d=max_d,
    #                                                 seed=seed,
    #                                                 estimation_function=est_fun,
    #                                                 trigger_kde_kwargs=trigger_kde_kwargs,
    #                                                 bg_kde_kwargs=bg_kde_kwargs)

    sepp = pp_models.SeppStochasticNn(data=data,
                                      max_delta_t=max_t,
                                      max_delta_d=max_d,
                                      seed=seed,
                                      estimation_function=est_fun,
                                      trigger_kde_kwargs=trigger_kde_kwargs,
                                      bg_kde_kwargs=bg_kde_kwargs)

    # sepp = pp_models.SeppStochasticNnIsotropicReflectedTrigger(data=data,
    #                                   max_delta_t=max_t,
    #                                   max_delta_d=max_d,
    #                                   seed=seed,
    #                                   estimation_function=est_fun,
    #                                   trigger_kde_kwargs=trigger_kde_kwargs,
    #                                   bg_kde_kwargs=bg_kde_kwargs)

    ps = sepp.train(niter=50)
Exemplo n.º 8
0
    # data[:, 1:] /= scaling
    scaling = 1.

    # est_fun = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=50, frac_bg=0.8)
    est_fun = lambda x, y: estimation.estimator_exp_gaussian(
        x, y, ct=0.1, cd=50 / scaling, frac_bg=None)

    for num_nn in nns:
        trigger_kde_kwargs = {'strict': False, 'number_nn': num_nn[-1]}
        bg_kde_kwargs = {'strict': False, 'number_nn': list(num_nn)}

        sepp = pp_models.SeppStochasticNn(
            data=data,
            max_delta_t=max_t,
            max_delta_d=max_d / scaling,
            seed=42,
            estimation_function=est_fun,
            trigger_kde_kwargs=trigger_kde_kwargs,
            bg_kde_kwargs=bg_kde_kwargs,
            remove_coincident_pairs=True)
        sepp.train(niter=niter)
        filename = 'nn_%d_%d.pickle' % num_nn
        sepp.pickle(os.path.join(outdir, filename))
        res_chic_n[num_nn] = copy.deepcopy(sepp)

    trigger_kde_kwargs = {
        'strict': False,
    }
    bg_kde_kwargs = {
        'strict': True,
    }
Exemplo n.º 9
0
pre_start_date = start_date - datetime.timedelta(days=training_size)
ndays = training_size + num_validation

# end date is the last date retrieved from the database of crimes
# have to cast this to a date since the addition operation automatically produces a datetime
end_date = start_date + datetime.timedelta(days=num_validation - 1)

res, t0, cid = chicago.get_crimes_by_type(
    crime_type='burglary',
    start_date=pre_start_date,
    end_date=end_date,
)

# train a model
training_data = res[res[:, 0] < training_size]
r = models.SeppStochasticNn(data=training_data, **model_kwargs)
r.train(niter=niter)

# centroid method

vb_centroid = {}
res_centroid = {}

vb_centroid_500 = validate.SeppValidationPredefinedModel(
    data=res, model=r, spatial_domain=south, cutoff_t=training_size)
vb_centroid_500.set_sample_units(500)
res_centroid[500] = vb_centroid_500.run(time_step=1,
                                        n_iter=num_validation,
                                        verbose=True)
vb_centroid[500] = vb_centroid_500