def _expand_place_ids(self, terms): """ Lookups all of the place identifiers to get gvids Args: terms (str or unicode): terms to lookup Returns: str or list: given terms if no identifiers found, otherwise list of identifiers. """ place_vids = [] first_type = None for result in self.backend.identifier_index.search(terms): if not first_type: first_type = result.type if result.type != first_type: # Ignore ones that aren't the same type as the best match continue place_vids.append(result.vid) if place_vids: # Add the 'all region' gvids for the higher level all_set = set(itertools.chain.from_iterable(iallval(GVid.parse(x)) for x in place_vids)) place_vids += list(str(x) for x in all_set) return place_vids else: return terms
def partition(self, pvid): from geoid.civick import GVid template = self.env.get_template('bundle/partition.html') p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: all_idents = self.library.search.identifier_map for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass return self.render(template, p=p, **self.cc())
def expand_place_ids(self,terms): """ Lookup all of the place identifiers to get gvids :param terms: :return: """ from geoid.civick import GVid from geoid.util import iallval import itertools place_vids = [] first_type = None for score, vid, t, name in self.search_identifiers(terms): if not first_type: first_type = t if t != first_type: # Ignore ones that aren't the same type as the best match continue place_vids.append(vid) if place_vids: # Add the 'all region' gvids for the higher level all_set = set( itertools.chain.from_iterable( iallval(GVid.parse(x)) for x in place_vids)) place_vids += list( str(x) for x in all_set) return place_vids else: return terms
def resum(g): try: return str(GVid.parse(g).summarize()) except KeyError: return g except ValueError: logger.debug("Failed to parse gvid '{}' from partition '{}' grain coverage" .format(g, partition.identity.vname)) return g
def name_to_gvid(self, row): from geoid.civick import GVid v = row.state_county_city.lower() self.city_name = None if v == 'california': from geoid.civick import State return State(6) elif v == 'balance of county': return None elif self.non_null_row_count == 1: self.last_county = row.state_county_city return GVid.parse(self.county_map()[v]) else: self.city_name = row.state_county_city return GVid.parse(self.place_map().get(v))
def build_modify_row(self, row_gen, p, source, row): """Called for every row to allow subclasses to modify rows. """ from geoid.civick import GVid, Zip row['name'] = row['name'].decode('latin1') row['county_gvid'] = self.counties.get(row['county'].lower()) row['year'] = self.year if row['zip']: cg = GVid.parse(row['county_gvid']) if cg: zg = Zip(int(row['zip'])) row['zip_gvid'] = str(zg)
def parse_to_gvid(v): """Parse an ACS Geoid or a GVID to a GVID""" from geoid.civick import GVid from geoid.acs import AcsGeoid m1 = '' try: return GVid.parse(v) except ValueError as e: m1 = str(e) try: return AcsGeoid.parse(v).convert(GVid) except ValueError as e: raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
def get_boundaries(gvid, sl): """ Return a cached, static geojson file of boundaries for a region :param gvid: The GVID of the region :param sl: The summary level of the subdivisions of the region. :return: """ from geojson import Feature, Point, FeatureCollection, dumps from shapely.wkt import loads from geoid.civick import GVid from os.path import join, exists from flask import send_from_directory cache_dir = aac.library.filesystem.cache('ui/geo') fn = "{}-{}.geojson".format(str(gvid), sl) fn_path = join(cache_dir, fn) if not exists(fn_path): p = aac.library.partition('census.gov-tiger-2015-counties') features = [] for i, row in enumerate(p): if row.statefp == 6: # In dev, assume counties in California gvid = GVid.parse(row.gvid) f = Feature(geometry=loads(row.geometry).simplify(0.01), properties={ 'gvid': row.gvid, 'state': gvid.state, 'county': gvid.county, 'count_name': row.name }) features.append(f) fc = FeatureCollection(features) with open(fn_path, 'w') as f: f.write(dumps(fc)) return send_from_directory(cache_dir, fn, as_attachment=False, mimetype='application/vnd.geo+json')
def get_boundaries(gvid, sl): """ Return a cached, static geojson file of boundaries for a region :param gvid: The GVID of the region :param sl: The summary level of the subdivisions of the region. :return: """ from geojson import Feature, Point, FeatureCollection, dumps from shapely.wkt import loads from geoid.civick import GVid from os.path import join, exists from flask import send_from_directory cache_dir = aac.library.filesystem.cache("ui/geo") fn = "{}-{}.geojson".format(str(gvid), sl) fn_path = join(cache_dir, fn) if not exists(fn_path): p = aac.library.partition("census.gov-tiger-2015-counties") features = [] for i, row in enumerate(p): if row.statefp == 6: # In dev, assume counties in California gvid = GVid.parse(row.gvid) f = Feature( geometry=loads(row.geometry).simplify(0.01), properties={"gvid": row.gvid, "state": gvid.state, "county": gvid.county, "count_name": row.name}, ) features.append(f) fc = FeatureCollection(features) with open(fn_path, "w") as f: f.write(dumps(fc)) return send_from_directory(cache_dir, fn, as_attachment=False, mimetype="application/vnd.geo+json")
def generate_all(sumlevel, d): """Generate a dict that includes all of the available geoid values, with keys for the most common names for those values. """ from geoid.civick import GVid from geoid.tiger import TigerGeoid from geoid.acs import AcsGeoid sumlevel = int(sumlevel) d = dict(d.items()) # Map common name variants if 'cousub' in d: d['cosub'] = d['cousub'] del d['cousub'] if 'blkgrp' in d: d['blockgroup'] = d['blkgrp'] del d['blkgrp'] if 'zcta5' in d: d['zcta'] = d['zcta5'] del d['zcta5'] gvid_class = GVid.resolve_summary_level(sumlevel) if not gvid_class: return {} geoidt_class = TigerGeoid.resolve_summary_level(sumlevel) geoid_class = AcsGeoid.resolve_summary_level(sumlevel) try: return dict( gvid=str(gvid_class(**d)), geoid=str(geoid_class(**d)), geoidt=str(geoidt_class(**d)) ) except: raise
def resum(g): try: return str(GVid.parse(g).summarize()) except (KeyError, ValueError): return g
def bundle_search(self, terms): """Incremental search, search as you type.""" from geoid.civick import GVid parsed = self.library.search.make_query_from_terms(terms) final_results = [] init_results = self.library.search.search_datasets(parsed) pvid_limit = 5 all_idents = self.library.search.identifier_map for result in sorted(init_results.values(), key=lambda e: e.score, reverse=True): d = self.doc_cache.dataset(result.vid) d['partition_count'] = len(result.partitions) d['partitions'] = {} for pvid in list(result.partitions)[:pvid_limit]: p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass d['partitions'][pvid] = p final_results.append(d) template = self.env.get_template('search/results.html') # Collect facets to display to the user, for additional sorting facets = { 'years': set(), 'sources': set(), 'states': set() } for r in final_results: facets['sources'].add(r['source']) for p in r['partitions'].values(): if 'time_coverage' in p and p['time_coverage']: facets['years'] |= set(p['time_coverage']['years']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: g = GVid.parse(gvid) if g.level == 'state' and not g.is_summary: #facets['states'].add( (gvid, all_idents[gvid])) try: facets['states'].add(all_idents[gvid]) except KeyError: pass # TODO Should probably announce an error return self.render( template, query = parsed, results=final_results, facets=facets, **self.cc())
def resum(g): try: return str(GVid.parse(g).summarize()) except KeyError: return g
def bundle_search(self, terms): """Incremental search, search as you type.""" from geoid.civick import GVid results = [] (b_query, p_query, terms), bp_set = self.library.search.search_bundles( {k: v.strip() for k, v in terms.items()}) pvid_limit = 5 all_idents = self.library.search.identifier_map for bvid, pvids in bp_set.items(): d = self.doc_cache.dataset(bvid) d['partition_count'] = len(pvids) d['partitions'] = {} for pvid in pvids[:pvid_limit]: p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass d['partitions'][pvid] = p results.append(d) template = self.env.get_template('search/results.html') results = sorted(results, key=lambda x: x['vname']) # Collect facets to display to the user, for additional sorting facets = { 'years': set(), 'sources': set(), 'states': set() } for r in results: facets['sources'].add(r['source']) for p in r['partitions'].values(): if 'time_coverage' in p and p['time_coverage']: facets['years'] |= set(p['time_coverage']['years']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: g = GVid.parse(gvid) if g.level == 'state' and not g.is_summary: #facets['states'].add( (gvid, all_idents[gvid])) facets['states'].add(all_idents[gvid]) return self.render( template, queries=dict( b_query=b_query, p_query=p_query, terms=terms), results=results, facets=facets, **self.cc())
def search_bundles(self, search, limit=None): """Search for datasets and partitions using a structured search object. :param search: a dict, with values for each of the search components. :param limit: :return: """ from ..identity import ObjectNumber from collections import defaultdict from geoid.civick import GVid if search.get('all', False): search = SearchTermParser().parse(search['all']) bvid_term = about_term = source_term = with_term = grain_term = years_term = in_term = '' if search.get('source', False): source_term = "source:" + search.get('source', '').strip() if search.get('about', False): about_term = "doc:({})".format(search.get('about', '').strip()) d_term = ' AND '.join(x for x in [source_term, about_term] if bool(x)) # This is the doc terms we'll move to the partition search if the partition search returns nothing # but only if the about term was the only one specified. dt_p_term = about_term if not source_term else None if search.get('in', False): place_vids = list( x[1] for x in self.search_identifiers( search['in'])) # COnvert generator to list if place_vids: # Add the 'all region' gvids for the higher level all_set = set(str(GVid.parse(x).allval()) for x in place_vids) place_vids += list(all_set) in_term = "coverage:({})".format(' OR '.join(place_vids)) if search.get('by', False): grain_term = "coverage:" + search.get('by', '').strip() # The wackiness with the converts to int and str, and adding ' ', is because there # can't be a space between the 'TO' and the brackets in the time range # when one end is open if search.get('from', False): try: from_year = str(int(search.get('from', False))) + ' ' except ValueError: pass else: from_year = '' if search.get('to', False): try: to_year = ' ' + str(int(search.get('to', False))) except ValueError: pass else: to_year = '' if bool(from_year) or bool(to_year): years_term = "coverage:[{}TO{}]".format(from_year, to_year) if search.get('with', False): with_term = 'schema:({})'.format(search.get('with', False)) if bool(d_term): # list(...) : the return from search_datasets is a generator, so it # can only be read once. bvids = list(self.search_datasets(d_term)) else: bvids = [] p_term = ' AND '.join( x for x in [ in_term, years_term, grain_term, with_term] if bool(x)) if bool(p_term): if bvids: p_term += " AND bvid:({})".format(' OR '.join(bvids)) elif dt_p_term: # In case the about term didn't generate any hits for the # bundle. p_term += " AND {}".format(dt_p_term) else: if not bvids and dt_p_term: p_term = dt_p_term if p_term: pvids = list(self.search_partitions(p_term)) if pvids: bp_set = defaultdict(set) for p in pvids: bvid = str(ObjectNumber.parse(p).as_dataset) bp_set[bvid].add(p) rtrn = {b: list(p) for b, p in bp_set.items()} else: rtrn = {} else: rtrn = {b: [] for b in bvids} return (d_term, p_term, search), rtrn