def expand_place_ids(self,terms): """ Lookup all of the place identifiers to get gvids :param terms: :return: """ from geoid.civick import GVid from geoid.util import iallval import itertools place_vids = [] first_type = None for score, vid, t, name in self.search_identifiers(terms): if not first_type: first_type = t if t != first_type: # Ignore ones that aren't the same type as the best match continue place_vids.append(vid) if place_vids: # Add the 'all region' gvids for the higher level all_set = set( itertools.chain.from_iterable( iallval(GVid.parse(x)) for x in place_vids)) place_vids += list( str(x) for x in all_set) return place_vids else: return terms
def _expand_place_ids(self, terms): """ Lookups all of the place identifiers to get gvids Args: terms (str or unicode): terms to lookup Returns: str or list: given terms if no identifiers found, otherwise list of identifiers. """ place_vids = [] first_type = None for result in self.backend.identifier_index.search(terms): if not first_type: first_type = result.type if result.type != first_type: # Ignore ones that aren't the same type as the best match continue place_vids.append(result.vid) if place_vids: # Add the 'all region' gvids for the higher level all_set = set(itertools.chain.from_iterable(iallval(GVid.parse(x)) for x in place_vids)) place_vids += list(str(x) for x in all_set) return place_vids else: return terms
def partition(self, pvid): from geoid.civick import GVid template = self.env.get_template('bundle/partition.html') p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: all_idents = self.library.search.identifier_map for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass return self.render(template, p=p, **self.cc())
def resum(g): try: return str(GVid.parse(g).summarize()) except KeyError: return g except ValueError: logger.debug("Failed to parse gvid '{}' from partition '{}' grain coverage" .format(g, partition.identity.vname)) return g
def name_to_gvid(self, row): from geoid.civick import GVid v = row.state_county_city.lower() self.city_name = None if v == 'california': from geoid.civick import State return State(6) elif v == 'balance of county': return None elif self.non_null_row_count == 1: self.last_county = row.state_county_city return GVid.parse(self.county_map()[v]) else: self.city_name = row.state_county_city return GVid.parse(self.place_map().get(v))
def build_modify_row(self, row_gen, p, source, row): """Called for every row to allow subclasses to modify rows. """ from geoid.civick import GVid, Zip row['name'] = row['name'].decode('latin1') row['county_gvid'] = self.counties.get(row['county'].lower()) row['year'] = self.year if row['zip']: cg = GVid.parse(row['county_gvid']) if cg: zg = Zip(int(row['zip'])) row['zip_gvid'] = str(zg)
def parse_to_gvid(v): """Parse an ACS Geoid or a GVID to a GVID""" from geoid.civick import GVid from geoid.acs import AcsGeoid m1 = '' try: return GVid.parse(v) except ValueError as e: m1 = str(e) try: return AcsGeoid.parse(v).convert(GVid) except ValueError as e: raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
def get_boundaries(gvid, sl): """ Return a cached, static geojson file of boundaries for a region :param gvid: The GVID of the region :param sl: The summary level of the subdivisions of the region. :return: """ from geojson import Feature, Point, FeatureCollection, dumps from shapely.wkt import loads from geoid.civick import GVid from os.path import join, exists from flask import send_from_directory cache_dir = aac.library.filesystem.cache('ui/geo') fn = "{}-{}.geojson".format(str(gvid), sl) fn_path = join(cache_dir, fn) if not exists(fn_path): p = aac.library.partition('census.gov-tiger-2015-counties') features = [] for i, row in enumerate(p): if row.statefp == 6: # In dev, assume counties in California gvid = GVid.parse(row.gvid) f = Feature(geometry=loads(row.geometry).simplify(0.01), properties={ 'gvid': row.gvid, 'state': gvid.state, 'county': gvid.county, 'count_name': row.name }) features.append(f) fc = FeatureCollection(features) with open(fn_path, 'w') as f: f.write(dumps(fc)) return send_from_directory(cache_dir, fn, as_attachment=False, mimetype='application/vnd.geo+json')
def get_boundaries(gvid, sl): """ Return a cached, static geojson file of boundaries for a region :param gvid: The GVID of the region :param sl: The summary level of the subdivisions of the region. :return: """ from geojson import Feature, Point, FeatureCollection, dumps from shapely.wkt import loads from geoid.civick import GVid from os.path import join, exists from flask import send_from_directory cache_dir = aac.library.filesystem.cache("ui/geo") fn = "{}-{}.geojson".format(str(gvid), sl) fn_path = join(cache_dir, fn) if not exists(fn_path): p = aac.library.partition("census.gov-tiger-2015-counties") features = [] for i, row in enumerate(p): if row.statefp == 6: # In dev, assume counties in California gvid = GVid.parse(row.gvid) f = Feature( geometry=loads(row.geometry).simplify(0.01), properties={"gvid": row.gvid, "state": gvid.state, "county": gvid.county, "count_name": row.name}, ) features.append(f) fc = FeatureCollection(features) with open(fn_path, "w") as f: f.write(dumps(fc)) return send_from_directory(cache_dir, fn, as_attachment=False, mimetype="application/vnd.geo+json")
def resum(g): try: return str(GVid.parse(g).summarize()) except (KeyError, ValueError): return g
def bundle_search(self, terms): """Incremental search, search as you type.""" from geoid.civick import GVid parsed = self.library.search.make_query_from_terms(terms) final_results = [] init_results = self.library.search.search_datasets(parsed) pvid_limit = 5 all_idents = self.library.search.identifier_map for result in sorted(init_results.values(), key=lambda e: e.score, reverse=True): d = self.doc_cache.dataset(result.vid) d['partition_count'] = len(result.partitions) d['partitions'] = {} for pvid in list(result.partitions)[:pvid_limit]: p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass d['partitions'][pvid] = p final_results.append(d) template = self.env.get_template('search/results.html') # Collect facets to display to the user, for additional sorting facets = { 'years': set(), 'sources': set(), 'states': set() } for r in final_results: facets['sources'].add(r['source']) for p in r['partitions'].values(): if 'time_coverage' in p and p['time_coverage']: facets['years'] |= set(p['time_coverage']['years']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: g = GVid.parse(gvid) if g.level == 'state' and not g.is_summary: #facets['states'].add( (gvid, all_idents[gvid])) try: facets['states'].add(all_idents[gvid]) except KeyError: pass # TODO Should probably announce an error return self.render( template, query = parsed, results=final_results, facets=facets, **self.cc())
def resum(g): try: return str(GVid.parse(g).summarize()) except KeyError: return g
def bundle_search(self, terms): """Incremental search, search as you type.""" from geoid.civick import GVid results = [] (b_query, p_query, terms), bp_set = self.library.search.search_bundles( {k: v.strip() for k, v in terms.items()}) pvid_limit = 5 all_idents = self.library.search.identifier_map for bvid, pvids in bp_set.items(): d = self.doc_cache.dataset(bvid) d['partition_count'] = len(pvids) d['partitions'] = {} for pvid in pvids[:pvid_limit]: p = self.doc_cache.partition(pvid) p['table'] = self.doc_cache.table(p['table_vid']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: try: p['geo_coverage']['names'].append(all_idents[gvid]) except KeyError: g = GVid.parse(gvid) try: phrase = "All {} in {} ".format( g.level_plural.title(), all_idents[str(g.promote())]) p['geo_coverage']['names'].append(phrase) except KeyError: pass d['partitions'][pvid] = p results.append(d) template = self.env.get_template('search/results.html') results = sorted(results, key=lambda x: x['vname']) # Collect facets to display to the user, for additional sorting facets = { 'years': set(), 'sources': set(), 'states': set() } for r in results: facets['sources'].add(r['source']) for p in r['partitions'].values(): if 'time_coverage' in p and p['time_coverage']: facets['years'] |= set(p['time_coverage']['years']) if 'geo_coverage' in p: for gvid in p['geo_coverage']['vids']: g = GVid.parse(gvid) if g.level == 'state' and not g.is_summary: #facets['states'].add( (gvid, all_idents[gvid])) facets['states'].add(all_idents[gvid]) return self.render( template, queries=dict( b_query=b_query, p_query=p_query, terms=terms), results=results, facets=facets, **self.cc())
def search_bundles(self, search, limit=None): """Search for datasets and partitions using a structured search object. :param search: a dict, with values for each of the search components. :param limit: :return: """ from ..identity import ObjectNumber from collections import defaultdict from geoid.civick import GVid if search.get('all', False): search = SearchTermParser().parse(search['all']) bvid_term = about_term = source_term = with_term = grain_term = years_term = in_term = '' if search.get('source', False): source_term = "source:" + search.get('source', '').strip() if search.get('about', False): about_term = "doc:({})".format(search.get('about', '').strip()) d_term = ' AND '.join(x for x in [source_term, about_term] if bool(x)) # This is the doc terms we'll move to the partition search if the partition search returns nothing # but only if the about term was the only one specified. dt_p_term = about_term if not source_term else None if search.get('in', False): place_vids = list( x[1] for x in self.search_identifiers( search['in'])) # COnvert generator to list if place_vids: # Add the 'all region' gvids for the higher level all_set = set(str(GVid.parse(x).allval()) for x in place_vids) place_vids += list(all_set) in_term = "coverage:({})".format(' OR '.join(place_vids)) if search.get('by', False): grain_term = "coverage:" + search.get('by', '').strip() # The wackiness with the converts to int and str, and adding ' ', is because there # can't be a space between the 'TO' and the brackets in the time range # when one end is open if search.get('from', False): try: from_year = str(int(search.get('from', False))) + ' ' except ValueError: pass else: from_year = '' if search.get('to', False): try: to_year = ' ' + str(int(search.get('to', False))) except ValueError: pass else: to_year = '' if bool(from_year) or bool(to_year): years_term = "coverage:[{}TO{}]".format(from_year, to_year) if search.get('with', False): with_term = 'schema:({})'.format(search.get('with', False)) if bool(d_term): # list(...) : the return from search_datasets is a generator, so it # can only be read once. bvids = list(self.search_datasets(d_term)) else: bvids = [] p_term = ' AND '.join( x for x in [ in_term, years_term, grain_term, with_term] if bool(x)) if bool(p_term): if bvids: p_term += " AND bvid:({})".format(' OR '.join(bvids)) elif dt_p_term: # In case the about term didn't generate any hits for the # bundle. p_term += " AND {}".format(dt_p_term) else: if not bvids and dt_p_term: p_term = dt_p_term if p_term: pvids = list(self.search_partitions(p_term)) if pvids: bp_set = defaultdict(set) for p in pvids: bvid = str(ObjectNumber.parse(p).as_dataset) bp_set[bvid].add(p) rtrn = {b: list(p) for b, p in bp_set.items()} else: rtrn = {} else: rtrn = {b: [] for b in bvids} return (d_term, p_term, search), rtrn