示例#1
0
def get_box_networks(distance,
                     bottom,
                     left,
                     width,
                     height,
                     num_box_x,
                     num_box_y,
                     limits=None,
                     keep_frac=1):
    """ Gets a number of networks contained in boxes.

        :param distance: The maximum distance between connected nodes
        :param bottom: The bottom coordinate of the group of boxes
        :param left: The left coordinate of the group of boxes
        :param width: The total width of the group of boxes
        :param height: The total height of the group of boxes
        :param num_box_x: The number of boxes in a row
        :param num_box_y: The number of boxes in a column
        :param limits: Any restrictions on database output
        eg: {'type': 'Theft'}
        :return: A map from the (x,y) index of a box to the network of crimes
        in the box.
    """
    box_width = width / num_box_x
    box_height = height / num_box_y

    params = multithreading.combinations(width=[box_width],
                                         height=[box_height],
                                         x=range(num_box_x),
                                         y=range(num_box_y),
                                         gl_bottom=[bottom],
                                         gl_left=[left],
                                         limits=[limits],
                                         distance=[distance],
                                         keep_frac=[keep_frac])

    logger.debug('Broke into {} boxes'.format(len(params)))

    if _multiprocess:
        results = multithreading.map_kwargs(get_box_network,
                                            params,
                                            failsafe=True)
    else:
        results = map(lambda args: get_box_network(**args), params)

    # fail if we are missing boxes
    if False in results:
        logger.fatal('Some Boxes Failed!')
        raise RuntimeError

    # get a map<(x,y), network>
    # I know dict is ghetto, 2d array would be better
    return {(params[i]['x'], params[i]['y']): results[i]
            for i in range(len(results))}
示例#2
0
def stitch_rows(rows, distance, row_overlap):
    """ Stitches a number of rows into a single network.

        It is assumed the rows are ordered bottom to top in the given list.
        This method does log(N) parallel row merges to stitch the network.

        :param rows: The list of rows to stitch
        :param distance: The distance to connect edges
        :param row_overlap: The maximum distance into another network needed to
        check for edges. This value is in degrees.
        :return: A single unified network
    """
    finished = False
    while not finished:
        logger.debug('{} crimes in {} rows'.format(
            reduce(lambda s, n: s + n.vcount(), rows, 0), len(rows)))
        # stitch every other row together
        params = []
        i = 0
        while i + 1 < len(rows):
            base = rows[i]
            i += 1
            top = rows.pop(i)
            params.append({
                'base': base,
                'other': top,
                'distance': distance,
                'overlap': row_overlap
            })

        logger.info('Stitching into {} rows ({} stitches)'.format(
            len(rows), len(params)))

        if _multiprocess:
            results = multithreading.map_kwargs(stitch_two_rows,
                                                params,
                                                failsafe=True)
        else:
            results = map(lambda args: stitch_two_rows(**args), params)

        if i == len(rows) - 1:
            results.append(rows[i])
        rows = results

        # check we didn't throw up on a row
        if False in results:
            logger.fatal('Some Rows Failed!')
            raise RuntimeError

        finished = len(rows) == 1

    return rows[0]
示例#3
0
def stitch_boxes(networks, distance):
    """ Turns box networks into a list of rows.

        :param networks: A map<(x, y), network>, where (x, y) is the index of
        the box.
        :param distance: The maximum distance between linked crimes.
        :return: A list of row networks. Each row is composed of all boxes with
        the same y index. The list of rows is ordered from bottom to top.
        ie: the index of a row in the list is the same as its y index in the
        input map.
    """
    max_x = max([k[0] for k in networks.keys()])
    max_y = max([k[1] for k in networks.keys()])
    params = []
    for y in range(max_y + 1):
        params.append({
            'network_row': [networks[(x, y)] for x in range(max_x + 1)],
            'distance':
            distance,
            'row_number':
            y
        })

    # stitch into rows
    logger.info('Stitching into {} rows'.format(len(params)))
    if _multiprocess:
        rows = multithreading.map_kwargs(make_row_network,
                                         params,
                                         failsafe=True)
    else:
        rows = map(lambda args: make_row_network(**args), params)
    # make sure no rows died
    if False in rows:
        logger.fatal('Some Rows Failed!')
        raise RuntimeError
    return rows
示例#4
0
        crime_types=[['all', 'assault', 'burglary', 'theft']],
        distances=[[3.2, 2.4, 1.6, 0.8, 0.1]],
        node_types=[['zip']],
        filenames=[year_files],
        algorithms=[['label_propagation']],
        iterations_list=[[1000]])

    # filter out bad combinations
    params = filter(
        lambda args: args['clustering'] == 'average' and args['level'] == 50000
        or args['clustering'] == 'single' and args['level'] == 25000 or args[
            'clustering'] == 'complete' and args['level'] == 25000, params)

    logger.info('{} Base Networks Found'.format(len(params)))

    score_lists = multithreading.map_kwargs(get_z_scores, params)
    logger.info('Combining Results')
    data = [('area', 'clustering', 'level', 'crime_type', 'distance',
             'node_type', 'filename', 'algorithm', 'iterations', 'zscore')
            ]  # fill with the header row
    logger.debug(params)
    logger.debug(score_lists)
    for args, results in zip(params, score_lists):
        for score in results:
            data.append((args['area'], args['clustering'], args['level']) +
                        score)

    logger.info('Writing Data File')
    # write csv
    with open('border_comparason.csv', 'a') as output:
        writer = csv.writer(output, delimiter=',')
示例#5
0
        logger.info('Starting!')

        path = 'data/{}/{}/distance/{}/{}'.format(city, crime_name, distance,
                                                  node_type)
        network = load_network(path, filename)
        add_regions(network, path, filename, region_type)

        _ = get_communities(network,
                            iterations,
                            path,
                            filename,
                            algorithm=algorithm)

        save_borders(path, filename, region_type, iterations, algorithm)

        # figure_path = 'output/{}.svg'.format(unique_id)
        # if not os.path.exists(figure_path):
        #     fig = plotting.get_border_fig(path, region_type, algorithm, filename, iterations)
        #     fig.savefig(figure_path)
        # else:
        #     logger.info('Figure Exists, skipping')

        logger.info('Done!')
        return True

    _results = multithreading.map_kwargs(work, params, failsafe=True)
    logger.info(_results)
    for _r in zip(params, _results):
        if not _r[1]:
            logger.info('{} => {}'.format(_r[0].values(), _r[1]))
示例#6
0
    todo = 'year'

    areas = ['baltimore', 'los_angeles', 'miami']
    _distances = [0.1, 0.8, 1.6, 2.4, 3.2]
    node_types = ['crime']
    _crime_types = [None, ['Theft'], ['Burglary'], ['Assault']]

    logger.info('Starting')
    if todo == 'month':
        params = multithreading.combinations(years=[range(2007, 2011)],
                                             area_name=areas,
                                             distance=_distances,
                                             node_type=node_types,
                                             crime_types=_crime_types)
        multithreading.map_kwargs(save_dynamic_distance_month_graph, params)
    elif todo == 'week':
        params = multithreading.combinations(
            initial=[datetime.datetime(2007, 1, 1)],
            final=[datetime.datetime(2011, 1, 1)],
            delta_name=['week'],
            area_name=areas,
            distance=_distances,
            node_type=node_types,
            crime_types=_crime_types)
        logger.info('Generating {} dynamic networks'.format(len(params)))
        multithreading.map_kwargs(save_dynamic_distance_delta_graph, params)
        # map(lambda args: save_dynamic_distance_graph(**args), params)
    elif todo == 'year':
        params = multithreading.combinations(years=[range(2007, 2011)],
                                             area_name=areas,