def run_me(data, data_index, domain, out_dir, run_name): log_file = os.path.join(out_dir, '%s.log' % run_name) if not os.path.isdir(out_dir): try: os.makedirs(out_dir) except OSError: # wait a moment, just in case another process has just done the folder creation time.sleep(1) if not os.path.isdir(out_dir): raise # set loggers logger = logging.getLogger(run_name) logger.setLevel(logging.DEBUG) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) this_logger = logging.getLogger('validation.validation') this_logger.setLevel(logging.DEBUG) this_logger.handlers = [] # clear existing handlers this_logger.addHandler(fh) # replace with the same file output logger.info("Logger set. Script started.") # check that num_validation iterations is feasible if start_day_number + num_validation - 1 > data[-1, 0]: this_num_validation = int(data[-1, 0]) - start_day_number + 1 logger.info("Can only do %d validation runs" % this_num_validation) else: this_num_validation = num_validation logger.info("Instantiating validation object") hs = hotspot.STGaussianNn(**model_kwargs) vb = validation.ValidationIntegration(data, model=hs, data_index=data_index, spatial_domain=domain, cutoff_t=start_day_number) logger.info("Setting validation grid") vb.set_sample_units(grid_size, num_sample_points) file_stem = os.path.join(out_dir, run_name) try: logger.info("Starting validation run.") res = vb.run(time_step=1, n_iter=this_num_validation, verbose=True) except Exception as exc: logger.error(repr(exc)) res = None finally: logger.info("Saving results (or None).") with open(file_stem + '-validation.pickle', 'w') as f: pickle.dump(res, f)
def compute_and_save_planar_grid( outfile='planar_grid_prediction_sample_units.dill'): # have run this now, but for the record it's how we can recompute the planar grid poly = load_boundary_file() vb_planar = validation.ValidationIntegration(np.random.rand(10, 3), None, spatial_domain=poly, include_predictions=True) vb_planar.set_sample_units(GRID_LENGTH, N_SAMPLES_PER_GRID) sample_unit_polys = vb_planar.roc.grid_polys sample_unit_extent = vb_planar.roc.sample_units with open(os.path.join(OUT_DIR, 'birmingham', outfile), 'w') as f: dill.dump({ 'extent': sample_unit_extent, 'polys': sample_unit_polys }, f)
def run_planar_validation_compare_by_grid(data, topt, dopt): poly = load_boundary_file() sk_planar = hotspot.STLinearSpaceExponentialTime(radius=dopt, mean_time=topt) vb_planar = validation.ValidationIntegration(data, sk_planar, spatial_domain=poly, include_predictions=True) vb_planar.set_t_cutoff(START_DAY_NUMBER) vb_planar.set_sample_units(GRID_LENGTH, N_SAMPLES_PER_GRID) tic = time() vb_res = vb_planar.run(1, n_iter=NUM_VALIDATION) toc = time() print toc - tic return vb_res
def apply_historic_kde(data, data_index, domain, grid_squares=None, num_sample_points=10, time_window=60): ### Historic spatial KDE (Scott bandwidth) with integration sampling sk = hotspot.SKernelHistoric(time_window) vb = validation.ValidationIntegration(data, model=sk, data_index=data_index, spatial_domain=domain, cutoff_t=INITIAL_CUTOFF + T0) if grid_squares: vb.roc.set_sample_units_predefined(grid_squares, num_sample_points) else: vb.set_sample_units(250, num_sample_points) res = vb.run(time_step=1, n_iter=100, verbose=True) return res
def apply_historic_kde_variable_bandwidth(data, data_index, domain, grid_squares=None, num_nn=20, num_sample_points=10, time_window=60): sk = hotspot.SKernelHistoricVariableBandwidthNn(dt=time_window, nn=num_nn) vb = validation.ValidationIntegration(data, model=sk, data_index=data_index, spatial_domain=domain, cutoff_t=INITIAL_CUTOFF + T0) if grid_squares: vb.roc.set_sample_units_predefined(grid_squares, num_sample_points) else: vb.set_sample_units(250, num_sample_points) res = vb.run(time_step=1, n_iter=100, verbose=True) return res
n_samples) # 2nd argument refers to interval between sample points import time tic = time.time() vb_res = vb.run(1, n_iter=n_test) toc = time.time() print toc - tic # compare with grid-based method with same parameters cb_poly = camden_boundary() data_txy = all_data.time.adddim(all_data.space.to_cartesian(), type=models.CartesianSpaceTimeData) sk_planar = hotspot.STLinearSpaceExponentialTime(radius=h, mean_time=t_decay) vb_planar = validation.ValidationIntegration(data_txy, sk_planar, spatial_domain=cb_poly, include_predictions=True) vb_planar.set_t_cutoff(INITAL_CUTOFF) vb_planar.set_sample_units(grid_length, n_samples) tic = time.time() vb_res_planar = vb_planar.run(1, n_iter=n_test) toc = time.time() print toc - tic # compare with grid-based method using intersecting network segments to measure sample unit size vb_planar_segment = validation.ValidationIntegrationByNetworkSegment( data_txy, sk_planar, spatial_domain=cb_poly, graph=itn_net) vb_planar_segment.set_t_cutoff(INITAL_CUTOFF) vb_planar_segment.set_sample_units(grid_length, n_samples)