Python top_n_crimes示例，crime_statistics.top_n_crimes Python示例

示例#1

0

显示文件

文件： request_routing.py 项目： egaebel/crime-on-the-move-back-end--Python

def _cache_hit_or_run(md5hash, func, crime_rows, column_names, num_clusters=None):
    convex_hulls_file_path = CACHE_DIR\
             + 'convex_hulls__' + md5hash.hexdigest() + '.cache'
    cluster_points_file_path = CACHE_DIR\
            + 'cluster_points__' + md5hash.hexdigest() + '.cache'
    if USE_CACHING:
        for r in crime_rows:
            formatted_r = list(r)
            formatted_r[4] = str(r[4])
            md5hash.update(json.dumps(formatted_r, sort_keys=True).encode('utf-8'))
        md5hash.update(func.__name__.encode('utf-8'))
        cached_file = None
        # Check for cached version
        print("Files in Cache: ")
        print("current hash: " + md5hash.hexdigest())
        for f in os.listdir(CACHE_DIR):
            print(f)
            if f == (md5hash.hexdigest() + '.cache'):
                cached_file = f
                break
    # Check for cached version or compute and cache
    if not USE_CACHING or cached_file is None:
        start_time = time.perf_counter()
        if num_clusters is not None:
            cluster_convex_hulls, cluster_points = func(crime_rows, column_names, num_clusters)
        else:
            cluster_convex_hulls, cluster_points = func(crime_rows, column_names)
        if cluster_convex_hulls is None or cluster_points is None:
            return None
        end_time = time.perf_counter()
        print("Clustering ran in time: " + str(end_time - start_time))
        if USE_CACHING:
            with open(convex_hulls_file_path, 'w') as new_cache_file:
                json.dump(cluster_convex_hulls, new_cache_file, cls=DatetimeEncoder)
            with open(cluster_points_file_path, 'w') as new_cache_file:
                json.dump(cluster_points, new_cache_file, cls=DatetimeEncoder)
    else:
        print("RETRIEVED CACHED VERSION!")
        with open(convex_hulls_file_path, 'r') as cache_file:
            cluster_convex_hulls = json.load(cache_file, cls=DatetimeEncoder)
        with open(cluster_points_file_path, 'r') as cache_file:
            cluster_points = json.load(cache_file, cls=DatetimeEncoder)
    statistics = {}
    for i in range(len(cluster_points)):
        cluster_statistics = {}
        cluster_statistics['top_5_crimes'] = crime_statistics.top_n_crimes(
                cluster_points[i], column_names, n=5)
        """
        cluster_statistics['crime_per_year'] = crime_statistics.crimes_per_year(
                cluster_points[i])
        """
        statistics[i] = cluster_statistics
    return_dict = {}
    return_dict['area_outline'] = cluster_convex_hulls
    return_dict['statistics'] = statistics
    return return_dict

示例#2

0

显示文件

文件： app.py 项目： egaebel/crime-on-the-move-back-end--Python

def get_dc_outline():
    print("Get DC Outline request!")
    print(request)
    print(request.args)
    request_args = request.args
    db_session = scoped_session(sessionmaker(bind=engine))
    dc_row = db_session.query(StateOutlines)\
            .filter(StateOutlines.name == 'District of Columbia')
    # Parse out the coordinates
    outline_points = json.loads(db_session.scalar(dc_row[0].geom.ST_AsGeoJSON()))
    # For some reason there's two absolutely pointless lists containing the
    # content. Like, all they do is enclose another list....wtf mate
    dc_outline_points = list()
    for i in range(len(outline_points['coordinates'][0][0])):
        dc_point_dict = dict()
        # longitude
        dc_point_dict['x_cord'] = outline_points['coordinates'][0][0][i][0]
        # latitude
        dc_point_dict['y_cord'] = outline_points['coordinates'][0][0][i][1]
        dc_outline_points.append(dc_point_dict)

    # Crimes for Statistics
    dc_crime_rows = db_session.query(DCCrime.id, DCCrime.report_date,\
            DCCrime.offense,\
            DCCrime.address, DCCrime.ward,\
            DCCrime.x_cord, DCCrime.y_cord)\
            .order_by(DCCrime.id)
    formatted_dc_crime_rows = [dict(zip(dc_column_names, r)) for r in dc_crime_rows]

    crime_type_tuples = crime_statistics.top_n_crimes(
            formatted_dc_crime_rows,
            column_names=dc_column_names,
            offense_column_name='offense')
    db_session.close()
    return jsonify({'area_outline': {'District of Columbia': dc_outline_points},
            'area_statistics': {'District of Columbia': 
                {'top_5_crimes': crime_type_tuples}
            }})

示例#3

0

显示文件

文件： app.py 项目： egaebel/crime-on-the-move-back-end--Python

def get_nova_county_outlines():
    """
    SELECT c.gid,
        s.name AS state_name,
        s.statefp AS geom,
        c.name AS county_name,
        c.geom AS geom
    FROM state_outlines s,
        county_outlines c
    WHERE s.name::text = 'Virginia'::text;
    """
    print("Get Nova County Outlines request!")
    print(request)
    print(request.args)
    request_args = request.args
    db_session = scoped_session(sessionmaker(bind=engine))
    nova_county_rows = db_session.query(StateOutlines, CountyOutlines)\
            .join(CountyOutlines, CountyOutlines.statefp == StateOutlines.statefp)\
            .filter(StateOutlines.name == 'Virginia')\
            .filter(or_(CountyOutlines.name == county_name for county_name in county_names))

    county_outlines = {}
    county_statistics = {}
    
    # Parse out the coordinates
    for state, county in nova_county_rows:
        outline_points = json.loads(db_session.scalar(county.geom.ST_AsGeoJSON()))
        # For some reason there's two absolutely pointless lists containing the
        # content. Like, all they do is enclose another list....wtf mate
        county_outline_points = list()
        for i in range(len(outline_points['coordinates'][0][0])):
            county_point_dict = dict()
            # longitude
            county_point_dict['x_cord'] = outline_points['coordinates'][0][0][i][0]
            # latitude
            county_point_dict['y_cord'] = outline_points['coordinates'][0][0][i][1]
            county_outline_points.append(county_point_dict)
        if county.name in county_outlines:
            county_outlines[county.name + '_2'] = county_outline_points
        else:
            county_outlines[county.name] = county_outline_points

        # Crimes for Statistics
        nova_crime_rows = db_session.query(NovaCrime.id, NovaCrime.report_date,\
                NovaCrime.offense_specific, NovaCrime.offense_general,\
                NovaCrime.address, NovaCrime.city,\
                NovaCrime.county, NovaCrime.zip_code,\
                NovaCrime.x_cord, NovaCrime.y_cord)\
                .filter(NovaCrime.county == county.name)\
                .order_by(NovaCrime.id)
        formatted_nova_crime_rows = [dict(zip(nova_column_names, r)) for r in nova_crime_rows]

        crime_type_tuples = crime_statistics.top_n_crimes(
                formatted_nova_crime_rows,
                column_names=nova_column_names,
                offense_column_name='offense_specific')
        if county.name in county_statistics:
            county_statistics[county.name + '_2'] = {'top_5_crimes': crime_type_tuples}
        else:
            county_statistics[county.name] = {'top_5_crimes': crime_type_tuples}

    db_session.close()
    print("Returned: " + str(len(county_outlines)) + " county outlines")
    return jsonify({'area_outline': county_outlines,
                    'area_statistics': county_statistics})