def merge_map_data(path, featuredf, store=False): """ For each set of features, take results of graph reduction, assign cluster numbers, and compute all other data for insertion into final geojson file. Args: path : path to directory containing graph cut list files, string featuredf : scaled features, pandas dataframe Returns: Data for insertion into geojson, pandas dataframe """ # get filenames files = os.listdir(path) files = [f[2:-4] for f in files if f[:2] == 'CL'] # null map files.remove('xx') # only allow 3 or less features mapnos = [f for f in files if len(f) <= 6] fnums = [mapno2list(f) for f in mapnos] # column names fnames = map(lambda x: [FDICT[n] for n in x], fnums) # fixed number of clusters nclustersmax = 28 # make null map cnum = cut2cluster('xx', nclustersmax, allowed_nodes=featuredf.index) # retain only mutual nodes nodelist = set(featuredf.index).intersection(set(cnum.index)) featuredf = featuredf.ix[nodelist] cnum = cnum.ix[nodelist] nclusters = len(cnum.unique()) # compute data # similarity colors rgmatrix = rg_colormatrix(most_similar(featuredf, cnum)) # feature bar graph data fbars = feature_bars(featuredf[FDICT.values()], cnum) # shape file polygons fn = 'data/uscensus/tl_2010_06075_tabblock10/tl_2010_06075_tabblock10.dbf' mergedf = merge_shapefiles(featuredf[['lat', 'lon']], fn) polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum) # compile into single dataframe alldf = pd.DataFrame({'cnum': cnum.unique(), 'polygon': polys}) alldf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique()) alldf['mapno'] = '' alldf['fbars'] = map(list, fbars.round(2).values) # store results if store: alldf.to_csv('results/geojson.csv') # make all other maps for i, f in enumerate(mapnos): cnum = cut2cluster(f, nclustersmax, allowed_nodes=featuredf.index) rgmatrix = rg_colormatrix(most_similar(featuredf, cnum)) fbars = feature_bars(featuredf[fnames[i]], cnum) polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum) onedf = pd.DataFrame({'cnum': cnum.unique(), 'polygon': polys}) onedf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique()) onedf['mapno'] = f onedf['fbars'] = map(list, fbars.round(2).values) # append results after each map if store: with open('results/geojson.csv', 'a') as storefile: onedf.to_csv(storefile, header=False) alldf = pd.concat((alldf, onedf), axis=0, ignore_index=True) with open('results/geojsondf.pkl', 'wb') as f: pickle.dump(alldf, f) return alldf
def merge_map_data(path, featuredf, store=False): files = os.listdir(path) files = [f[2:-4] for f in files if f[:2] == 'CL'] files.remove('xx') # incomplete cut list # files.remove('000104') mapnos = [f for f in files if len(f) <= 6] mapnos = ['020408'] fnums = [mapno2list(f) for f in mapnos] # column names fnames = map(lambda x: [FDICT[n] for n in x], fnums) nclustersmax = 28 ### make null map cnum = cut2cluster('xx', nclustersmax, allowed_nodes=featuredf.index) # retain only mutual nodes nodelist = set(featuredf.index).intersection(set(cnum.index)) featuredf = featuredf.ix[nodelist] cnum = cnum.ix[nodelist] nclusters = len(cnum.unique()) clist = gencolors(nclusters) rgmatrix = rg_colormatrix(most_similar(featuredf, cnum)) fbars = feature_bars(featuredf[FDICT.values()], cnum) fn = 'data/uscensus/tl_2010_06075_tabblock10/tl_2010_06075_tabblock10.dbf' mergedf = merge_shapefiles(featuredf[['lat', 'lon']], fn) polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum) # pdb.set_trace() alldf = pd.DataFrame({'cnum': cnum.unique(), 'polygon': polys}) # alldf['color'] = clist alldf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique()) alldf['mapno'] = '' alldf['fbars'] = map(list, fbars.round(2).values) # store results if store: alldf.to_csv('results/alldf.csv') # make all other maps for i, f in enumerate(mapnos): print f cnum = cut2cluster(f, nclustersmax, allowed_nodes=featuredf.index) rgmatrix = rg_colormatrix(most_similar(featuredf, cnum)) # cnum = cnum.ix[nodelist] fbars = feature_bars(featuredf[fnames[i]], cnum) polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum) onedf = pd.DataFrame({'cnum': cnum.unique(), 'polygon': polys}) # onedf['color'] = clist onedf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique()) onedf['mapno'] = f onedf['fbars'] = map(list, fbars.round(2).values) if store: with open('results/alldf.csv', 'a') as storefile: onedf.to_csv(storefile, header=False) alldf = pd.concat((alldf, onedf), axis=0, ignore_index=True) if store: with open('results/alldf.pkl', 'wb') as f: pickle.dump(alldf, f) return alldf