示例#1
0
def save_dynamic_distance_year_graph(years,
                                     area_name,
                                     distance,
                                     node_type,
                                     crime_types=None):
    """ Saves a number of networks, where each network represents a year.

        Each network is saved to disk at a location that matches the parameters
        used to construct it. This method uses the box network method to make
        network creation faster. This means each network may not contain all
        the crimes available for that period of time.

        :param years: The time frame of networks to create. Should be a list of
        integers.
        :param area_name: The name of the area to search for crimes. This
        should be a valid entry in cities.json
        :param distance: The maximum distance between two connected crimes.
        :param node_type: What each node represents. The is passed on to
        network_creation.distance_graph.
        :param crime_types: The types of crimes to include in the network.

        Examples
        --------
        >>> save_dynamic_distance_year_graph([2008, 2009], 'miami', 1.6,
        ...   'crime', ['Theft'])
    """
    start_times = map(
        lambda args: datetime.datetime(**args),
        multithreading.combinations(month=[1], year=years, day=[1]))
    # add the end point of the final network to the list
    start_times.append(datetime.datetime(year=years[-1] + 1, month=1, day=1))
    path = 'data/{}/{}/distance/{}/{}'.format(area_name,
                                              get_crime_name(crime_types),
                                              distance, node_type)
    box_networks._multiprocess = False  # this is very bad, do not multiprocess!
    zipcodes = json.load(open('cities.json', 'r'))[area_name]

    if crime_types is None:
        limits = {'zipcode': {'$in': zipcodes}}
    else:
        limits = {'zipcode': {'$in': zipcodes}, 'type': {'$in': crime_types}}

    i = 0
    while i < len(start_times) - 1:
        network_path = '{}/networks/{}_{}.graphml'.format(
            path, 'year', date_string(start_times[i]))
        if not os.path.exists(network_path):
            limits['date'] = {
                '$gte': start_times[i],
                '$lt': start_times[i + 1]
            }
            logger.info('Building {}'.format(limits))
            g = box_networks.distance_crime_network_by_box(distance,
                                                           area_name,
                                                           limits=limits)
            save_graph(g, network_path)
            gc.collect()
        else:
            logger.info('Network exists, skipping')
        i += 1
示例#2
0
def get_box_networks(distance,
                     bottom,
                     left,
                     width,
                     height,
                     num_box_x,
                     num_box_y,
                     limits=None,
                     keep_frac=1):
    """ Gets a number of networks contained in boxes.

        :param distance: The maximum distance between connected nodes
        :param bottom: The bottom coordinate of the group of boxes
        :param left: The left coordinate of the group of boxes
        :param width: The total width of the group of boxes
        :param height: The total height of the group of boxes
        :param num_box_x: The number of boxes in a row
        :param num_box_y: The number of boxes in a column
        :param limits: Any restrictions on database output
        eg: {'type': 'Theft'}
        :return: A map from the (x,y) index of a box to the network of crimes
        in the box.
    """
    box_width = width / num_box_x
    box_height = height / num_box_y

    params = multithreading.combinations(width=[box_width],
                                         height=[box_height],
                                         x=range(num_box_x),
                                         y=range(num_box_y),
                                         gl_bottom=[bottom],
                                         gl_left=[left],
                                         limits=[limits],
                                         distance=[distance],
                                         keep_frac=[keep_frac])

    logger.debug('Broke into {} boxes'.format(len(params)))

    if _multiprocess:
        results = multithreading.map_kwargs(get_box_network,
                                            params,
                                            failsafe=True)
    else:
        results = map(lambda args: get_box_network(**args), params)

    # fail if we are missing boxes
    if False in results:
        logger.fatal('Some Boxes Failed!')
        raise RuntimeError

    # get a map<(x,y), network>
    # I know dict is ghetto, 2d array would be better
    return {(params[i]['x'], params[i]['y']): results[i]
            for i in range(len(results))}
示例#3
0
def save_dynamic_distance_month_graph(years,
                                      area_name,
                                      distance,
                                      node_type,
                                      crime_types=None):
    """ Saves a number of networks, where each network represents a month.

        Each network is saved to disk at a location that matches the parameters
        used to construct it.

        :param years: The time frame of networks to create. Should be a list of
        integers.
        :param area_name: The name of the area to search for crimes. This
        should be a valid entry in cities.json
        :param distance: The maximum distance between two connected crimes.
        :param node_type: What each node represents. The is passed on to
        network_creation.distance_graph.
        :param crime_types: The types of crimes to include in the network.

        Examples
        --------
        >>> save_dynamic_distance_month_graph([2008, 2009], 'miami', 1.6,
        ...   'crime', ['Theft'])
    """
    start_times = map(
        lambda args: datetime.datetime(**args),
        multithreading.combinations(month=range(1, 13), year=years, day=[1]))
    # add the end point of the final network to the list
    start_times.append(datetime.datetime(year=years[-1] + 1, month=1, day=1))

    path = 'data/{}/{}/distance/{}/{}'.format(area_name,
                                              get_crime_name(crime_types),
                                              distance, node_type)
    zipcodes = json.load(open('cities.json', 'r'))[area_name]
    i = 0
    while i < len(start_times) - 1:
        network_path = '{}/networks/{}_{}.graphml'.format(
            path, 'month', date_string(start_times[i]))
        if not os.path.exists(network_path):
            data = crime_window(start_times[i], start_times[i + 1], zipcodes,
                                crime_types)
            logger.info('{} crimes found for {}'.format(
                len(data), start_times[i]))
            g = network_creation.distance_graph(data, distance, node_type)
            save_graph(g, network_path)
        else:
            logger.info('Network exists, skipping')
        i += 1
示例#4
0
    week_files = save_networks.week_files(datetime.datetime(2007, 1, 1),
                                          datetime.datetime(2011, 1, 1))
    month_files = save_networks.month_files(range(2007, 2011))
    year_files = save_networks.year_files(range(2007, 2011))

    logger.debug(year_files)

    _cities = ['baltimore', 'los_angeles', 'miami']
    _clusterings = ['average', 'single', 'complete']
    _levels = [50000, 25000]

    params = multithreading.combinations(
        area=_cities,
        clustering=_clusterings,
        level=_levels,
        crime_types=[['all', 'assault', 'burglary', 'theft']],
        distances=[[3.2, 2.4, 1.6, 0.8, 0.1]],
        node_types=[['zip']],
        filenames=[year_files],
        algorithms=[['label_propagation']],
        iterations_list=[[1000]])

    # filter out bad combinations
    params = filter(
        lambda args: args['clustering'] == 'average' and args['level'] == 50000
        or args['clustering'] == 'single' and args['level'] == 25000 or args[
            'clustering'] == 'complete' and args['level'] == 25000, params)

    logger.info('{} Base Networks Found'.format(len(params)))

    score_lists = multithreading.map_kwargs(get_z_scores, params)
    logger.info('Combining Results')
示例#5
0
if __name__ == '__main__':
    # write info and debug to different files
    logging.config.dictConfig(json.load(open('logging_config.json', 'r')))
    fiona.log.setLevel(logging.WARNING)  # I don't care about the fiona logs

    week_files = save_networks.week_files(datetime.datetime(2007, 1, 1),
                                          datetime.datetime(2011, 1, 1))
    month_files = save_networks.month_files(range(2007, 2011))
    year_files = save_networks.year_files(range(2007, 2011))

    params = multithreading.combinations(
        city=['los_angeles', 'baltimore', 'miami'],
        crime_name=['all', 'theft', 'burglary', 'assault'],
        distance=[3.2, 2.4, 1.6, .8, .1],
        node_type=['crime', 'zip'],
        region_type=['voronoi', 'zip'],
        algorithm=['label_propagation'],
        filename=week_files + month_files + year_files,
        iterations=[100])

    # filter out bad combinations
    params = filter(
        lambda d: d['node_type'] == 'crime' and d['region_type'] == 'voronoi'
        or d['node_type'] == 'zip' and d['region_type'] == 'zip', params)

    def work(city, crime_name, distance, node_type, region_type, algorithm,
             filename, iterations):
        unique_id = '{}-{}-{}-{}-{}-{}-{}'.format(city, distance, node_type,
                                                  region_type, algorithm,
                                                  filename, iterations)
示例#6
0
def month_files(years, months=range(1, 13)):
    return map(
        lambda args: 'month_{year}-{month:0>2}-{day:0>2}'.format(**args),
        multithreading.combinations(year=years, month=months, day=[1]))
示例#7
0
def year_files(years):
    return map(lambda args: 'year_{year}-{month:0>2}-{day:0>2}'.format(**args),
               multithreading.combinations(year=years, month=[1], day=[1]))
示例#8
0
if __name__ == '__main__':
    logging.config.dictConfig(json.load(open('logging_config.json', 'r')))
    # logging.basicConfig(level=logging.DEBUG)

    todo = 'year'

    areas = ['baltimore', 'los_angeles', 'miami']
    _distances = [0.1, 0.8, 1.6, 2.4, 3.2]
    node_types = ['crime']
    _crime_types = [None, ['Theft'], ['Burglary'], ['Assault']]

    logger.info('Starting')
    if todo == 'month':
        params = multithreading.combinations(years=[range(2007, 2011)],
                                             area_name=areas,
                                             distance=_distances,
                                             node_type=node_types,
                                             crime_types=_crime_types)
        multithreading.map_kwargs(save_dynamic_distance_month_graph, params)
    elif todo == 'week':
        params = multithreading.combinations(
            initial=[datetime.datetime(2007, 1, 1)],
            final=[datetime.datetime(2011, 1, 1)],
            delta_name=['week'],
            area_name=areas,
            distance=_distances,
            node_type=node_types,
            crime_types=_crime_types)
        logger.info('Generating {} dynamic networks'.format(len(params)))
        multithreading.map_kwargs(save_dynamic_distance_delta_graph, params)
        # map(lambda args: save_dynamic_distance_graph(**args), params)