def get_box_networks(distance, bottom, left, width, height, num_box_x, num_box_y, limits=None, keep_frac=1): """ Gets a number of networks contained in boxes. :param distance: The maximum distance between connected nodes :param bottom: The bottom coordinate of the group of boxes :param left: The left coordinate of the group of boxes :param width: The total width of the group of boxes :param height: The total height of the group of boxes :param num_box_x: The number of boxes in a row :param num_box_y: The number of boxes in a column :param limits: Any restrictions on database output eg: {'type': 'Theft'} :return: A map from the (x,y) index of a box to the network of crimes in the box. """ box_width = width / num_box_x box_height = height / num_box_y params = multithreading.combinations(width=[box_width], height=[box_height], x=range(num_box_x), y=range(num_box_y), gl_bottom=[bottom], gl_left=[left], limits=[limits], distance=[distance], keep_frac=[keep_frac]) logger.debug('Broke into {} boxes'.format(len(params))) if _multiprocess: results = multithreading.map_kwargs(get_box_network, params, failsafe=True) else: results = map(lambda args: get_box_network(**args), params) # fail if we are missing boxes if False in results: logger.fatal('Some Boxes Failed!') raise RuntimeError # get a map<(x,y), network> # I know dict is ghetto, 2d array would be better return {(params[i]['x'], params[i]['y']): results[i] for i in range(len(results))}
def stitch_rows(rows, distance, row_overlap): """ Stitches a number of rows into a single network. It is assumed the rows are ordered bottom to top in the given list. This method does log(N) parallel row merges to stitch the network. :param rows: The list of rows to stitch :param distance: The distance to connect edges :param row_overlap: The maximum distance into another network needed to check for edges. This value is in degrees. :return: A single unified network """ finished = False while not finished: logger.debug('{} crimes in {} rows'.format( reduce(lambda s, n: s + n.vcount(), rows, 0), len(rows))) # stitch every other row together params = [] i = 0 while i + 1 < len(rows): base = rows[i] i += 1 top = rows.pop(i) params.append({ 'base': base, 'other': top, 'distance': distance, 'overlap': row_overlap }) logger.info('Stitching into {} rows ({} stitches)'.format( len(rows), len(params))) if _multiprocess: results = multithreading.map_kwargs(stitch_two_rows, params, failsafe=True) else: results = map(lambda args: stitch_two_rows(**args), params) if i == len(rows) - 1: results.append(rows[i]) rows = results # check we didn't throw up on a row if False in results: logger.fatal('Some Rows Failed!') raise RuntimeError finished = len(rows) == 1 return rows[0]
def stitch_boxes(networks, distance): """ Turns box networks into a list of rows. :param networks: A map<(x, y), network>, where (x, y) is the index of the box. :param distance: The maximum distance between linked crimes. :return: A list of row networks. Each row is composed of all boxes with the same y index. The list of rows is ordered from bottom to top. ie: the index of a row in the list is the same as its y index in the input map. """ max_x = max([k[0] for k in networks.keys()]) max_y = max([k[1] for k in networks.keys()]) params = [] for y in range(max_y + 1): params.append({ 'network_row': [networks[(x, y)] for x in range(max_x + 1)], 'distance': distance, 'row_number': y }) # stitch into rows logger.info('Stitching into {} rows'.format(len(params))) if _multiprocess: rows = multithreading.map_kwargs(make_row_network, params, failsafe=True) else: rows = map(lambda args: make_row_network(**args), params) # make sure no rows died if False in rows: logger.fatal('Some Rows Failed!') raise RuntimeError return rows
crime_types=[['all', 'assault', 'burglary', 'theft']], distances=[[3.2, 2.4, 1.6, 0.8, 0.1]], node_types=[['zip']], filenames=[year_files], algorithms=[['label_propagation']], iterations_list=[[1000]]) # filter out bad combinations params = filter( lambda args: args['clustering'] == 'average' and args['level'] == 50000 or args['clustering'] == 'single' and args['level'] == 25000 or args[ 'clustering'] == 'complete' and args['level'] == 25000, params) logger.info('{} Base Networks Found'.format(len(params))) score_lists = multithreading.map_kwargs(get_z_scores, params) logger.info('Combining Results') data = [('area', 'clustering', 'level', 'crime_type', 'distance', 'node_type', 'filename', 'algorithm', 'iterations', 'zscore') ] # fill with the header row logger.debug(params) logger.debug(score_lists) for args, results in zip(params, score_lists): for score in results: data.append((args['area'], args['clustering'], args['level']) + score) logger.info('Writing Data File') # write csv with open('border_comparason.csv', 'a') as output: writer = csv.writer(output, delimiter=',')
logger.info('Starting!') path = 'data/{}/{}/distance/{}/{}'.format(city, crime_name, distance, node_type) network = load_network(path, filename) add_regions(network, path, filename, region_type) _ = get_communities(network, iterations, path, filename, algorithm=algorithm) save_borders(path, filename, region_type, iterations, algorithm) # figure_path = 'output/{}.svg'.format(unique_id) # if not os.path.exists(figure_path): # fig = plotting.get_border_fig(path, region_type, algorithm, filename, iterations) # fig.savefig(figure_path) # else: # logger.info('Figure Exists, skipping') logger.info('Done!') return True _results = multithreading.map_kwargs(work, params, failsafe=True) logger.info(_results) for _r in zip(params, _results): if not _r[1]: logger.info('{} => {}'.format(_r[0].values(), _r[1]))
todo = 'year' areas = ['baltimore', 'los_angeles', 'miami'] _distances = [0.1, 0.8, 1.6, 2.4, 3.2] node_types = ['crime'] _crime_types = [None, ['Theft'], ['Burglary'], ['Assault']] logger.info('Starting') if todo == 'month': params = multithreading.combinations(years=[range(2007, 2011)], area_name=areas, distance=_distances, node_type=node_types, crime_types=_crime_types) multithreading.map_kwargs(save_dynamic_distance_month_graph, params) elif todo == 'week': params = multithreading.combinations( initial=[datetime.datetime(2007, 1, 1)], final=[datetime.datetime(2011, 1, 1)], delta_name=['week'], area_name=areas, distance=_distances, node_type=node_types, crime_types=_crime_types) logger.info('Generating {} dynamic networks'.format(len(params))) multithreading.map_kwargs(save_dynamic_distance_delta_graph, params) # map(lambda args: save_dynamic_distance_graph(**args), params) elif todo == 'year': params = multithreading.combinations(years=[range(2007, 2011)], area_name=areas,