def _cache_hit_or_run(md5hash, func, crime_rows, column_names, num_clusters=None): convex_hulls_file_path = CACHE_DIR\ + 'convex_hulls__' + md5hash.hexdigest() + '.cache' cluster_points_file_path = CACHE_DIR\ + 'cluster_points__' + md5hash.hexdigest() + '.cache' if USE_CACHING: for r in crime_rows: formatted_r = list(r) formatted_r[4] = str(r[4]) md5hash.update(json.dumps(formatted_r, sort_keys=True).encode('utf-8')) md5hash.update(func.__name__.encode('utf-8')) cached_file = None # Check for cached version print("Files in Cache: ") print("current hash: " + md5hash.hexdigest()) for f in os.listdir(CACHE_DIR): print(f) if f == (md5hash.hexdigest() + '.cache'): cached_file = f break # Check for cached version or compute and cache if not USE_CACHING or cached_file is None: start_time = time.perf_counter() if num_clusters is not None: cluster_convex_hulls, cluster_points = func(crime_rows, column_names, num_clusters) else: cluster_convex_hulls, cluster_points = func(crime_rows, column_names) if cluster_convex_hulls is None or cluster_points is None: return None end_time = time.perf_counter() print("Clustering ran in time: " + str(end_time - start_time)) if USE_CACHING: with open(convex_hulls_file_path, 'w') as new_cache_file: json.dump(cluster_convex_hulls, new_cache_file, cls=DatetimeEncoder) with open(cluster_points_file_path, 'w') as new_cache_file: json.dump(cluster_points, new_cache_file, cls=DatetimeEncoder) else: print("RETRIEVED CACHED VERSION!") with open(convex_hulls_file_path, 'r') as cache_file: cluster_convex_hulls = json.load(cache_file, cls=DatetimeEncoder) with open(cluster_points_file_path, 'r') as cache_file: cluster_points = json.load(cache_file, cls=DatetimeEncoder) statistics = {} for i in range(len(cluster_points)): cluster_statistics = {} cluster_statistics['top_5_crimes'] = crime_statistics.top_n_crimes( cluster_points[i], column_names, n=5) """ cluster_statistics['crime_per_year'] = crime_statistics.crimes_per_year( cluster_points[i]) """ statistics[i] = cluster_statistics return_dict = {} return_dict['area_outline'] = cluster_convex_hulls return_dict['statistics'] = statistics return return_dict
def get_dc_outline(): print("Get DC Outline request!") print(request) print(request.args) request_args = request.args db_session = scoped_session(sessionmaker(bind=engine)) dc_row = db_session.query(StateOutlines)\ .filter(StateOutlines.name == 'District of Columbia') # Parse out the coordinates outline_points = json.loads(db_session.scalar(dc_row[0].geom.ST_AsGeoJSON())) # For some reason there's two absolutely pointless lists containing the # content. Like, all they do is enclose another list....wtf mate dc_outline_points = list() for i in range(len(outline_points['coordinates'][0][0])): dc_point_dict = dict() # longitude dc_point_dict['x_cord'] = outline_points['coordinates'][0][0][i][0] # latitude dc_point_dict['y_cord'] = outline_points['coordinates'][0][0][i][1] dc_outline_points.append(dc_point_dict) # Crimes for Statistics dc_crime_rows = db_session.query(DCCrime.id, DCCrime.report_date,\ DCCrime.offense,\ DCCrime.address, DCCrime.ward,\ DCCrime.x_cord, DCCrime.y_cord)\ .order_by(DCCrime.id) formatted_dc_crime_rows = [dict(zip(dc_column_names, r)) for r in dc_crime_rows] crime_type_tuples = crime_statistics.top_n_crimes( formatted_dc_crime_rows, column_names=dc_column_names, offense_column_name='offense') db_session.close() return jsonify({'area_outline': {'District of Columbia': dc_outline_points}, 'area_statistics': {'District of Columbia': {'top_5_crimes': crime_type_tuples} }})
def get_nova_county_outlines(): """ SELECT c.gid, s.name AS state_name, s.statefp AS geom, c.name AS county_name, c.geom AS geom FROM state_outlines s, county_outlines c WHERE s.name::text = 'Virginia'::text; """ print("Get Nova County Outlines request!") print(request) print(request.args) request_args = request.args db_session = scoped_session(sessionmaker(bind=engine)) nova_county_rows = db_session.query(StateOutlines, CountyOutlines)\ .join(CountyOutlines, CountyOutlines.statefp == StateOutlines.statefp)\ .filter(StateOutlines.name == 'Virginia')\ .filter(or_(CountyOutlines.name == county_name for county_name in county_names)) county_outlines = {} county_statistics = {} # Parse out the coordinates for state, county in nova_county_rows: outline_points = json.loads(db_session.scalar(county.geom.ST_AsGeoJSON())) # For some reason there's two absolutely pointless lists containing the # content. Like, all they do is enclose another list....wtf mate county_outline_points = list() for i in range(len(outline_points['coordinates'][0][0])): county_point_dict = dict() # longitude county_point_dict['x_cord'] = outline_points['coordinates'][0][0][i][0] # latitude county_point_dict['y_cord'] = outline_points['coordinates'][0][0][i][1] county_outline_points.append(county_point_dict) if county.name in county_outlines: county_outlines[county.name + '_2'] = county_outline_points else: county_outlines[county.name] = county_outline_points # Crimes for Statistics nova_crime_rows = db_session.query(NovaCrime.id, NovaCrime.report_date,\ NovaCrime.offense_specific, NovaCrime.offense_general,\ NovaCrime.address, NovaCrime.city,\ NovaCrime.county, NovaCrime.zip_code,\ NovaCrime.x_cord, NovaCrime.y_cord)\ .filter(NovaCrime.county == county.name)\ .order_by(NovaCrime.id) formatted_nova_crime_rows = [dict(zip(nova_column_names, r)) for r in nova_crime_rows] crime_type_tuples = crime_statistics.top_n_crimes( formatted_nova_crime_rows, column_names=nova_column_names, offense_column_name='offense_specific') if county.name in county_statistics: county_statistics[county.name + '_2'] = {'top_5_crimes': crime_type_tuples} else: county_statistics[county.name] = {'top_5_crimes': crime_type_tuples} db_session.close() print("Returned: " + str(len(county_outlines)) + " county outlines") return jsonify({'area_outline': county_outlines, 'area_statistics': county_statistics})