location_f = crime_data_fxns.house_break_f('latlng') year_f = crime_data_fxns.house_break_f('year') data_id_iterable = list(itertools.ifilter(lambda id: year_f(id) >= 2003 and year_f(id) <= 2005 and location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable())) #data_id_iterable = list(itertools.ifilter(lambda id: location_f(id) in utils.latlng_grid_region(crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng), crime_data_fxns.AllHouseBurglaryIterable())) cat_fs = [\ utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\ utils.categorical_f(crime_data_fxns.house_break_f('means_of_entry'), [utils.equals_bin('Pried'), utils.equals_bin('Unlocked'), utils.equals_bin('Shoved/Forced'), utils.equals_bin('Broke')]),\ # utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\ ] int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs] x_f = utils.series_f(*int_cat_fs) #x_f = utils.series_f(utils.hard_code_f(0)) time_f = crime_data_fxns.house_break_f('date_num') in_pattern_f = crime_data_fxns.in_pattern_f() pattern_f = crime_data_fxns.house_break_f('pattern') scratch_data = [tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id), pattern_f(id)) for id in data_id_iterable] """ scratch pattern_finder """ lat_min, lat_max, lng_min, lng_max = crime_data.constants.cambridge_min_lat, crime_data.constants.cambridge_max_lat, crime_data.constants.cambridge_min_lng, crime_data.constants.cambridge_max_lng num_lat, num_lng = 16, 16 regions_F = utils.latlng_grid_regions_F(num_lat, num_lng) background_count_F = tensor_scan_fxns.region_x_independent_tensor_count_F(tensor_scan_fxns.bin_region_count_F(0.00001), tensor_scan_fxns.joint_x_distribution_F(utils.independent_categorical_joint_distribution_F())) foreground_count_F = tensor_scan_fxns.empirical_tensor_count_F() optimizer_F = utils.iterative_argmax_F(utils.get_initial_subset_x_random(1.0), utils.cycle_through_coord_iterative_step(), 10, 0.001) p_value_F = tensor_scan_fxns.identity_test_stat_F() pattern_F = tensor_scan_fxns.pattern_F(background_count_F, foreground_count_F, optimizer_F, tensor_scan_fxns.pattern_test_stat, p_value_F) pattern_finder_regions_F = tensor_scan_fxns.background_and_foreground_regions_F(regions_F) num_windows = 20 raw_pattern_finder_F = tensor_scan_fxns.raw_pattern_finder_F(tensor_scan_fxns.many_windows_iterator(num_windows), pattern_finder_regions_F, pattern_F)
""" scratch data_iterable """ data_id_iterable = crime_data_fxns.AllHouseBurglaryIterable() cat_fs = [\ utils.categorical_f(crime_data_fxns.house_break_f('location_of_entry'), [utils.equals_bin('Door: Front'), utils.equals_bin('Window: Ground'), utils.equals_bin('Door: Rear')]),\ utils.categorical_f(crime_data_fxns.house_break_f('categorization'), [utils.equals_bin('Professional'), utils.equals_bin('Unprofessional'), utils.equals_bin('Attempt')]),\ ] int_cat_fs = [utils.int_f_from_categorical_f(cat_f) for cat_f in cat_fs] int_cat_fs_set_iterable = utils.get_powerset_iterator(int_cat_fs) x_f_iterable = itertools.starmap(utils.series_f, int_cat_fs_set_iterable) location_f = crime_data_fxns.house_break_f('latlng') time_f = crime_data_fxns.house_break_f('date_num') in_pattern_f = crime_data_fxns.in_pattern_f() scratch_data_iterable = itertools.imap(lambda x_f: map(lambda id: tensor_scan_fxns.datum(id, time_f(id), location_f(id), x_f(id), in_pattern_f(id)), data_id_iterable), x_f_iterable) """ scratch pattern_finder_iterable """ num_lat_iterable = [5, 10, 15, 20, 30] num_lng_iterable = num_lat_iterable num_lat_num_lng_iterable = itertools.izip(num_lat_iterable, num_lng_iterable) regions_F_iterable = itertools.starmap(utils.latlng_grid_regions_F, num_lat_num_lng_iterable) pseudocounts_iterable = [0.001] bin_region_count_F_iterable = itertools.imap(tensor_scan_fxns.bin_region_count_F, pseudocounts_iterable) raw_joint_distribution_F_iterable = [utils.independent_categorical_joint_distribution_F()] joint_x_distribution_F_iterable = itertools.imap(tensor_scan_fxns.joint_x_distribution_F, raw_joint_distribution_F_iterable) background_count_F_iterable = itertools.starmap(tensor_scan_fxns.region_x_independent_tensor_count_F, itertools.product(bin_region_count_F_iterable, joint_x_distribution_F_iterable)) foreground_count_F_iterable = [tensor_scan_fxns.empirical_tensor_count_F()]
background_agg_N = 300 pattern_agg_N = 300 agg_background_time_f = functools.partial(np.random.uniform, 0.0, 10.0) agg_pattern_time_f = functools.partial(np.random.uniform, 4.9, 5.1) agg_background_location_f = functools.partial(np.random.normal, (1.0, 1.0), 10.0) agg_pattern_location_f = functools.partial(np.random.normal, (-1.0, -1.0), 0.2) agg_background_x_f = utils.series_f(\ functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\ functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\ ) agg_pattern_x_f = utils.series_f(\ functools.partial(utils.random_categorical, [0.1, 0.1, 0.8]),\ functools.partial(utils.random_categorical, [0.1, 0.8, 0.1]),\ ) background_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_background_x_f(), 0) for i in xrange(background_agg_N)] pattern_time_diff_agg_data = [fxns.datum(i, agg_pattern_time_f(), agg_background_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)] pattern_location_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_pattern_location_f(), agg_background_x_f(), 1) for i in xrange(pattern_agg_N)] pattern_x_diff_agg_data = [fxns.datum(i, agg_background_time_f(), agg_background_location_f(), agg_pattern_x_f(), 1) for i in xrange(pattern_agg_N)] """ simulated data for subsetscan. have 2 different time distributions for back/foreground. should be mostly disjoint. location_f should be quite concentrated for foreground, so that it's higher at its mode than background. """ background_ss_N = 1900 pattern_ss_N = 100 ss_background_time_f = utils.generator_f(itertools.chain(iter(xrange(0,1000)), iter(xrange(1100,2000)))) ss_pattern_time_f = utils.generator_f(iter(xrange(1000, 1100))) ss_background_location_f = functools.partial(utils.multivariate_random_uniform, [(0,10),(0,10)]) ss_pattern_location_f = functools.partial(utils.multivariate_random_uniform, [(-10,-5),(-10,-5)]) ss_background_x_f = utils.series_f(\