def simulation_mohler(): c = simulate.MohlerSimulation() c.run() data = c.data max_delta_t = 100. max_delta_d = 1. initial_est = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=0.1) r = pp_models.SeppStochasticNn(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t, estimation_function=initial_est) r.train(niter=50) pp_plotting.multiplots(r, c)
def validate_point_process( start_date=datetime.date(2011, 3, 1), end_date=datetime.date(2012, 3, 31), initial_cutoff=212, num_validation=100, num_pp_iter=100, grid=250, n_sample_per_grid=20, domain=None, pp_class=pp_models.SeppStochasticNn, model_kwargs=None): if not model_kwargs: model_kwargs = { 'max_delta_t': 150, 'max_delta_d': 500, 'bg_kde_kwargs': {'number_nn': [101, 16]}, 'trigger_kde_kwargs': {'number_nn': 15}, 'remove_conincident_pairs': False, 'estimation_function': lambda x, y: estimation.estimator_exp_gaussian(x, y, 0.1, 50, frac_bg=None), 'seed': 42, } if start_date + datetime.timedelta(days=initial_cutoff + num_validation) > end_date: warnings.warn("Requested number of validation runs is too large for the data size") if domain is None: domain = compute_chicago_region() data, t0, cid = get_crimes_by_type( crime_type='burglary', start_date=start_date, end_date=end_date, domain=domain, ) sepp = pp_class(data=data, **model_kwargs) vb = validate.SeppValidationFixedModelIntegration(data, spatial_domain=domain, cutoff_t=initial_cutoff) vb.set_sample_units(grid, n_sample_per_grid=n_sample_per_grid) ## TODO: check the number of iterations reported is as expected here res = vb.run(time_step=1, n_iter=num_validation, train_kwargs={'niter': num_pp_iter}, verbose=True) return res, vb
def construct_sepp(data, sepp_class=pp_models.SeppStochasticNn, seed=42, max_delta_t=60, max_delta_d=500, initial_est=None, remove_coincident_pairs=False, bg_kde_kwargs=None, trigger_kde_kwargs=None, min_bandwidth=None): if initial_est is None: # define initial estimator initial_est = lambda x, y: estimation.estimator_exp_gaussian( x, y, ct=0.1, cd=50) if bg_kde_kwargs is None: bg_kde_kwargs = {'strict': False} if min_bandwidth is not None: bg_kde_kwargs['min_bandwidth'] = min_bandwidth if trigger_kde_kwargs is None: trigger_kde_kwargs = {'strict': False} if min_bandwidth is not None: trigger_kde_kwargs['min_bandwidth'] = min_bandwidth r = sepp_class(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, estimation_function=initial_est, remove_coincident_pairs=remove_coincident_pairs, seed=seed) return r
'max_delta_t': 90, # set on each iteration 'max_delta_d': 500, # set on each iteration 'bg_kde_kwargs': { 'number_nn': [100, 15], 'min_bandwidth': None, 'strict': False }, 'trigger_kde_kwargs': { 'number_nn': 15, 'min_bandwidth': None, 'strict': False }, 'estimation_function': lambda x, y: estimation.estimator_exp_gaussian(x, y, **estimate_kwargs ), 'seed': 42, # doesn't matter what this is, just want it fixed 'remove_coincident_pairs': False } for col_space in col_spacings: print "row_space: %d, col_space: %d" % (row_space, col_space) # store containers data = [] sepp = [] for i in range(niter): net = create_grid_network(domain_extent, row_space, col_space)
def apply_point_process(start_date=datetime.datetime(2010, 3, 1, 0), end_date=datetime.datetime(2010, 6, 1, 0), domain=None, niter=15, min_bandwidth=None, max_delta_t=40, max_delta_d=300, num_nn=None, estimate_kwargs=None, pp_class=pp_models.SeppStochasticNnReflected, seed=42, remove_coincident_pairs=False): print "Getting data..." res, t0, cid = get_crimes_by_type( crime_type='burglary', start_date=start_date, end_date=end_date, domain=domain ) # if min_bandwidth is None: # min_bandwidth = np.array([0.3, 5., 5.]) if num_nn is not None: if len(num_nn) != 2: raise AttributeError("Must supply two num_nn values: [1D case, 2/3D case]") num_nn_bg = num_nn num_nn_trig = num_nn[1] else: num_nn_bg = [101, 16] num_nn_trig = 15 bg_kde_kwargs = { 'number_nn': num_nn_bg, 'strict': False, } trigger_kde_kwargs = { 'min_bandwidth': min_bandwidth, 'number_nn': num_nn_trig, 'strict': False, } if not estimate_kwargs: estimate_kwargs = { 'ct': 0.1, 'cd': 50, 'frac_bg': None } print "Instantiating SEPP class..." r = pp_class(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, remove_coincident_pairs=remove_coincident_pairs) # r = pp_models.SeppStochasticNn(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t, # bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs) # r = pp_models.SeppStochasticNnReflected(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t, # bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs) # r = pp_models.SeppStochasticNnOneSided(data=res, max_delta_d=max_delta_d, max_delta_t=max_delta_t, # bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs) print "Computing initial probability matrix estimate..." p = estimation.estimator_exp_gaussian(res, r.linkage, **estimate_kwargs) r.p = p # train on ALL data if seed: r.set_seed(seed) print "Starting training..." ps = r.train(niter=niter) return r
try: sepp_obj except NameError: sepp_obj = {} # already started for i in range(s0.size): tt = sorted([s0.flat[i], s1.flat[i]]) if tuple(tt) in sepp_obj: continue c = simulate.MohlerSimulation() c.t_total = t_total c.bg_params[0]['sigma'] = [1., 1.] c.bg_params[0]['intensity'] = 5 c.trigger_sigma = list(tt) init_est = lambda d, t: estimation.estimator_exp_gaussian( d, t, ct=0.1, cd=np.mean(tt)) c.run() data = c.data r = models.SeppStochasticNn(data=data, max_delta_d=max_delta_d, max_delta_t=max_delta_t, estimation_function=init_est, seed=42, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs) try: _ = r.train(niter=num_iter) except Exception: continue sepp_obj[tuple(tt)] = copy.deepcopy(r)
def apply_point_process( nicl_type=3, only_new=False, start_date=None, end_date=None, niter=50, num_nn=None, min_bandwidth=None, jiggle_scale=None, max_delta_t=60, # days max_delta_d=500, # metres sepp_class=pp_models.SeppStochasticNnReflected, data=None, rng_seed=42, initial_est=None, remove_coincident_pairs=False, ): # suggested value: # min_bandwidth = np.array([0.3, 5., 5.]) # get data if data is not None: res = data else: res, t0, cid = get_crimes_by_type(nicl_type=nicl_type, only_new=only_new, jiggle_scale=jiggle_scale, start_date=start_date, end_date=end_date) if initial_est is None: # define initial estimator initial_est = lambda x, y: estimation.estimator_exp_gaussian( x, y, ct=0.1, cd=50) if num_nn is not None: if len(num_nn) != 2: raise AttributeError( "Must supply two num_nn values: [1D case, 2/3D case]") num_nn_bg = num_nn num_nn_trig = num_nn[1] else: num_nn_bg = [101, 16] num_nn_trig = 15 bg_kde_kwargs = { 'number_nn': num_nn_bg, 'strict': False # attempt to restore order even if number of BG becomes less than requested NNs } trigger_kde_kwargs = { 'min_bandwidth': min_bandwidth, 'number_nn': num_nn_trig, 'strict': False # attempt to restore order even if number of trig becomes less than requested NNs } return apply_sepp_to_data( res, max_delta_t=max_delta_t, max_delta_d=max_delta_d, estimation_function=initial_est, niter=niter, bg_kde_kwargs=bg_kde_kwargs, trigger_kde_kwargs=trigger_kde_kwargs, sepp_class=sepp_class, rng_seed=rng_seed, remove_coincident_pairs=remove_coincident_pairs, )
def simulate_data_and_train(): niter = 10 sim_t_total = 1000. sim_num_to_prune = 400 # sim_bg_sigma = 100. train_kwargs = { 'niter': 100, } estimate_kwargs = { 'ct': 0.1, 'cd': 50., 'frac_bg': None, } model_kwargs = { 'parallel': True, 'max_delta_t': 90, # set on each iteration 'max_delta_d': 500, # set on each iteration 'bg_kde_kwargs': { 'number_nn': [100, 15], 'min_bandwidth': None, 'strict': False }, 'trigger_kde_kwargs': { 'number_nn': 15, 'min_bandwidth': None, 'strict': False }, 'estimation_function': lambda x, y: estimation.estimator_exp_gaussian(x, y, **estimate_kwargs ), 'seed': 42, # doesn't matter what this is, just want it fixed 'remove_coincident_pairs': False } for col_space in col_spacings: print "row_space: %d, col_space: %d" % (row_space, col_space) # generate simulation parameters bg_params = [] x_bg = np.arange(domain_extent[0], domain_extent[2], col_space) y_bg = np.arange(domain_extent[1], domain_extent[3], row_space) n_bg = x_bg.size * y_bg.size for x in x_bg: for y in y_bg: bg_params.append({ 'location': [x, y], 'intensity': 2. / n_bg, 'sigma': [sim_bg_sigma, sim_bg_sigma] }) # store containers data = [] sepp = [] for i in range(niter): c = simulate.PatchyGaussianSumBackground(bg_params=bg_params) c.trigger_params['sigma'] = [50., 50.] # TODO: change this, too? c.run(t_total=sim_t_total, num_to_prune=sim_num_to_prune) data.append(c.data) r = models.SeppStochasticNnReflected(data=c.data, **model_kwargs) _ = r.train(**train_kwargs) sepp.append(r) out = { 'row_spacing': row_space, 'col_spacing': col_space, 'domain_extent': domain_extent, 'sim_t_total': sim_t_total, 'sim_num_to_prune': sim_num_to_prune, 'train_kwargs': train_kwargs, 'model_kwargs': model_kwargs, 'data': data, 'sepp': sepp, } if not os.path.exists(subdir): os.makedirs(subdir) fn = 'simulated_data_patchy_bg_row_%d_col_%d.dill' % (row_space, col_space) with open(os.path.join(subdir, fn), 'wb') as f: dill.dump(out, f)
domain_nw = polys['Northwest'] domain_s = polys['South'] # domain = get_chicago_polys()['Northwest'] tmp = get_chicago_data(domain=domain_nw) data, t0, cid = tmp['burglary'] # normalise spatial component of data # data[:, 1] -= data[:, 1].min() # data[:, 2] -= data[:, 2].min() # scaling = np.mean(np.std(data[:, 1:], axis=0, ddof=1)) # data[:, 1:] /= scaling scaling = 1. # est_fun = lambda x, y: estimation.estimator_exp_gaussian(x, y, ct=0.1, cd=50, frac_bg=0.8) est_fun = lambda x, y: estimation.estimator_exp_gaussian( x, y, ct=0.1, cd=50 / scaling, frac_bg=None) for num_nn in nns: trigger_kde_kwargs = {'strict': False, 'number_nn': num_nn[-1]} bg_kde_kwargs = {'strict': False, 'number_nn': list(num_nn)} sepp = pp_models.SeppStochasticNn( data=data, max_delta_t=max_t, max_delta_d=max_d / scaling, seed=42, estimation_function=est_fun, trigger_kde_kwargs=trigger_kde_kwargs, bg_kde_kwargs=bg_kde_kwargs, remove_coincident_pairs=True) sepp.train(niter=niter)