def ccp_cache_populate(self): # We cleared the db handle earlier, so get a new one, and lock it. g.assurt_soft(self.qb.db is None) self.qb.db = db_glue.new() # FIXME: What's gtfsdb_cache_edges? Or don't we care? #self.qb.db.transaction_begin_rw('gtfsdb_cache_edges', # 'gtfsdb_cache_links') # EXPLAIN: Who are we competing with? Just other instances of this # script? locked = self.qb.db.transaction_lock_try('gtfsdb_cache_links', caller='gtfsdb_build_cache') g.assurt(locked) self.qb.db.insert( 'gtfsdb_cache_register', { 'username': self.qb.username, 'branch_id': self.qb.branch_hier[0][0], 'revision_id': self.revision_id, 'gtfs_caldate': self.tfeed_zipdate, }, {}) self.ccp_clear_cache() self.ccp_save_cache() log.info('Committing transaction [ccp_cache_populate]') self.qb.db.transaction_commit() self.qb.db.close() self.qb.db = None
def save_rstop(self, qb, route, stop_number): log.debug('save_rstop: route: %s / stop_number: %s' % (route, stop_number,)) #item_helper.One.save_core(self, qb) self.route_id = route.system_id self.stop_number = stop_number # FIXME: 2012.09.24: Drop the stack_id and version, eh? self.route_stack_id = route.stack_id self.route_version = route.version # 2014.09.13: There's been a bug until now wherein the user drags a new # route in the client, and when the route is saved, the client does not # send the intermediate stops' node IDs. if not self.node_id: pt_xy = (self.x, self.y,) nodes_byway = node_byway.Many() nodes_byway.search_by_endpoint_xy(qb, pt_xy, internals_ok=False, limit_one=True) if len(nodes_byway) == 1: self.node_id = nodes_byway[0].node_stack_id else: g.assurt_soft(len(nodes_byway) == 0) log.warning('save_rstop: endpoint no found: %s' % (pt_xy,)) g.assurt_soft(False) self.save_insert(qb, One.item_type_table, One.psql_defns)
def wire_link_tag(self, qb, lval_tag): g.assurt(qb.item_mgr.cache_tags is not None) try: the_tag = qb.item_mgr.cache_tags[lval_tag.lhs_stack_id] if the_tag.name: if not lval_tag.deleted: self.tagged.add(the_tag.name) else: try: self.tagged.remove(the_tag.name) except KeyError: pass else: g.assurt_soft(False) except KeyError: log.warning('wire_link_tag: missing tag! item_mgr: %s / stack_id: %d' % (qb.item_mgr, lval_tag.lhs_stack_id,))
def __eq__(self, other): attrs_equal = True # MAYBE: We only compare item_base.One's __slots__. # Derived classes should really override this fcn. # and compare whatever __slots__ they use and care about. if attrs_equal: # NOTE: self.__slots__ is the derived class's. # MAYBE: If derived classes care, override this fcn. and check # their own One.__slots__ #for key in self.__slots__: for key in One.__slots__: if (getattr(self, key, None) != getattr(other, key, None)): #import rpdb2;rpdb2.start_embedded_debugger('password', # fAllowRemote=True) attrs_equal = False break if attrs_equal: for attr_defn in self.attr_defns: attr_name = attr_defn[One.scol_pyname] try: # If this is a floating point number, round it, # otherwise __eq__ is liable to be not. self_value = round(getattr(self, attr_name, None), attr_defn[One.scol_precision]) other_value = round(getattr(other, attr_name, None), attr_defn[One.scol_precision]) if self_value != other_value: attrs_equal = False break else: continue except IndexError: pass except TypeError: g.assurt_soft(False) pass if (getattr(self, attr_name, None) != getattr(other, attr_name, None)): attrs_equal = False break return attrs_equal
def __eq__(self, other): attrs_equal = True # MAYBE: We only compare item_base.One's __slots__. # Derived classes should really override this fcn. # and compare whatever __slots__ they use and care about. if attrs_equal: # NOTE: self.__slots__ is the derived class's. # MAYBE: If derived classes care, override this fcn. and check # their own One.__slots__ #for key in self.__slots__: for key in One.__slots__: if (getattr(self, key, None) != getattr(other, key, None)): #import rpdb2;rpdb2.start_embedded_debugger('password', # fAllowRemote=True) attrs_equal = False break if attrs_equal: for attr_defn in self.attr_defns: attr_name = attr_defn[One.scol_pyname] try: # If this is a floating point number, round it, # otherwise __eq__ is liable to be not. self_value = round(getattr(self, attr_name, None), attr_defn[One.scol_precision]) other_value = round(getattr(other, attr_name, None), attr_defn[One.scol_precision]) if self_value != other_value: attrs_equal = False break else: continue except IndexError: pass except TypeError: g.assurt_soft(False) pass if (getattr(self, attr_name, None) != getattr( other, attr_name, None)): attrs_equal = False break return attrs_equal
def wire_link_tag(self, qb, lval_tag): g.assurt(qb.item_mgr.cache_tags is not None) try: the_tag = qb.item_mgr.cache_tags[lval_tag.lhs_stack_id] if the_tag.name: if not lval_tag.deleted: self.tagged.add(the_tag.name) else: try: self.tagged.remove(the_tag.name) except KeyError: pass else: g.assurt_soft(False) except KeyError: log.warning( 'wire_link_tag: missing tag! item_mgr: %s / stack_id: %d' % ( qb.item_mgr, lval_tag.lhs_stack_id, ))
def __init__(self, byway, forward): GenericPyPayload.__init__(self) # NOTE: graphserver.core crashes if you don't set self.type self.type = LGSTypes.ENUM_edgepayload_t.PL_EXTERNVALUE self.rise = 0.0 #self.fall = 0.0 self.slog = 1.0 self.byway = byway self.forward = forward # Calculate the slope of the street. # Graphserver's Street defines a rise and a fall. The rise is the number # of meters of elevation as you travel the line segment, and the fall is # the number of meters of descending. Since Cyclopath only stores the # elevation at the endpoints, we can only supply one of those values, and # it might be less than its true value. if byway.geometry_len > 0.0: try: if forward: elevation_delta = (float(byway.node_rhs_elevation_m) - float(byway.node_lhs_elevation_m)) else: elevation_delta = (float(byway.node_lhs_elevation_m) - float(byway.node_rhs_elevation_m)) self.average_grade = elevation_delta / byway.geometry_len if elevation_delta > 0.0: self.rise = elevation_delta except TypeError, e: log.error('TypeError: %s' % (str(e), )) log.error('byway: %s' % (str(byway), )) log.error('beg_node_id: %s' % (byway.beg_node_id, )) log.error('fin_node_id: %s' % (byway.fin_node_id, )) log.error('node_lhs_elevation_m: %s' % (byway.node_lhs_elevation_m, )) log.error('node_rhs_elevation_m: %s' % (byway.node_rhs_elevation_m, )) self.average_grade = 0.0 #self.rise = 0.0 # Don't raise, so that we keep loading, even if we can't figure # out the elevation. # Nope: raise g.assurt_soft(False)
def __init__(self, byway, forward): GenericPyPayload.__init__(self) # NOTE: graphserver.core crashes if you don't set self.type self.type = LGSTypes.ENUM_edgepayload_t.PL_EXTERNVALUE self.rise = 0.0 #self.fall = 0.0 self.slog = 1.0 self.byway = byway self.forward = forward # Calculate the slope of the street. # Graphserver's Street defines a rise and a fall. The rise is the number # of meters of elevation as you travel the line segment, and the fall is # the number of meters of descending. Since Cyclopath only stores the # elevation at the endpoints, we can only supply one of those values, and # it might be less than its true value. if byway.geometry_len > 0.0: try: if forward: elevation_delta = (float(byway.node_rhs_elevation_m) - float(byway.node_lhs_elevation_m)) else: elevation_delta = (float(byway.node_lhs_elevation_m) - float(byway.node_rhs_elevation_m)) self.average_grade = elevation_delta / byway.geometry_len if elevation_delta > 0.0: self.rise = elevation_delta except TypeError, e: log.error('TypeError: %s' % (str(e),)) log.error('byway: %s' % (str(byway),)) log.error('beg_node_id: %s' % (byway.beg_node_id,)) log.error('fin_node_id: %s' % (byway.fin_node_id,)) log.error('node_lhs_elevation_m: %s' % (byway.node_lhs_elevation_m,)) log.error('node_rhs_elevation_m: %s' % (byway.node_rhs_elevation_m,)) self.average_grade = 0.0 #self.rise = 0.0 # Don't raise, so that we keep loading, even if we can't figure # out the elevation. # Nope: raise g.assurt_soft(False)
def save_rstop(self, qb, route, stop_number): log.debug('save_rstop: route: %s / stop_number: %s' % ( route, stop_number, )) #item_helper.One.save_core(self, qb) self.route_id = route.system_id self.stop_number = stop_number # FIXME: 2012.09.24: Drop the stack_id and version, eh? self.route_stack_id = route.stack_id self.route_version = route.version # 2014.09.13: There's been a bug until now wherein the user drags a new # route in the client, and when the route is saved, the client does not # send the intermediate stops' node IDs. if not self.node_id: pt_xy = ( self.x, self.y, ) nodes_byway = node_byway.Many() nodes_byway.search_by_endpoint_xy(qb, pt_xy, internals_ok=False, limit_one=True) if len(nodes_byway) == 1: self.node_id = nodes_byway[0].node_stack_id else: g.assurt_soft(len(nodes_byway) == 0) log.warning('save_rstop: endpoint no found: %s' % (pt_xy, )) g.assurt_soft(False) self.save_insert(qb, One.item_type_table, One.psql_defns)
def sql_apply_query_filter_by_text_tc(self, qb, table_col, stop_words, where_clause, conjunction, use_outer, outer_where): # Only select items whose name matches the user's search query. # But if multiple search columns or search filters are specified, # just OR them all together (this is so, e.g., search threads # looks in both the thread name and the post body). # See below for a bunch of comments are the different postgres # string comparison operators (=, ~/~*, and @@). if qb.filters.filter_by_text_exact: filter_by_text_exact_lower = qb.filters.filter_by_text_exact.lower() where_clause += ( """ %s (LOWER(%s) = %s) """ % (conjunction, table_col, # qb.db.quoted(qb.filters.filter_by_text_exact), # %s (LOWER(%s) = LOWER(%s)) qb.db.quoted(filter_by_text_exact_lower),)) conjunction = "OR" # This is like the previous filter but allows the user to specify a list. if qb.filters.filter_by_names_exact: item_names = [x.strip().lower() for x in qb.filters.filter_by_names_exact.split(',')] name_clauses = [] for item_name in item_names: # item_name is the empty string if input contained ,, if item_name: name_clauses.append("(LOWER(gia.name) = %s)" % (qb.db.quoted(item_name),)) name_clauses = " OR ".join(name_clauses) where_clause += ( """ %s (%s) """ % (conjunction, name_clauses,)) conjunction = "OR" if qb.filters.filter_by_text_loose: # NOTE: ~* does case-insensitive regex matching. This is slower than # using =, but this is how we get a loose search. Consider # select 'a' ~ 'a b c'; ==> false # select 'a b c' ~ 'a'; ==> true # meaning if the user searches 'lake' they get all the lakes. if not use_outer: where_clause += ( """ %s (%s ~* %s) """ % (conjunction, table_col, qb.db.quoted(qb.filters.filter_by_text_loose),)) conjunction = "OR" else: sub_where = (" (%s ~* %s) " % (table_col, qb.db.quoted(qb.filters.filter_by_text_loose),)) if not outer_where: outer_where = sub_where else: outer_where = (" (%s OR (%s ~* %s)) " % (outer_where, table_col, qb.db.quoted(qb.filters.filter_by_text_loose), )) # For filter_by_text_smart and filter_by_text_full: tsquery = None # Callers should only specify columns that are properly indexed for # full text search, since that's the column we really want (if we use # the normal-named column, Postgres does an inline index on the text). (table, column) = table_col.split('.', 1) table_col = '%s.tsvect_%s' % (table, column,) # FIXME: l18n. Hard-coding 'english' for now. # NOTE: Avoid plainto_tsquery, which applies & and not |. # Or is that what we want?? #where_clause += ( # """ # %s (%s @@ plainto_tsquery('english', %s)) # """ % (conjunction, # table_col, # qb.db.quoted(qb.filters.filter_by_text_smart),)) if qb.filters.filter_by_text_smart: # Get a list of "quoted phrases". query_text = qb.filters.filter_by_text_smart query_terms = re.findall(r'\"[^\"]*\"', query_text) # Remove the quotes from each multi-word term. raw_terms = [t.strip('"').strip() for t in query_terms] # Cull the "quoted phrases" we just extracted from the query string. (remainder, num_subs) = re.subn(r'\"[^\"]*\"', r' ', query_text) # Add the remaining single-word terms. raw_terms.extend(remainder.split()) # Remove all non-alphanums and search just clean words. clean_terms = set() for raw_term in raw_terms: cleaned = re.sub(r'\W', ' ', raw_term).split() for clean_word in cleaned: if (not stop_words) or (clean_word not in stop_words.lookup): clean_terms.add(clean_word) # Add the original string-term, too. if cleaned and ((len(cleaned) > 1) or (cleaned[0] != raw_term)): if (not stop_words) or (raw_term not in stop_words.lookup): # 2014.08.19: Watch out for, e.g., "Ruttger's" (as in, # (Ruttger's Resort), which splits on the \W to "Ruttger s" # but whose raw term remains "Ruttger's": the single quote # is special to full text search so remove 'em all. raw_sans_single_quote = re.sub("'", '', raw_term) clean_terms.add(raw_sans_single_quote) approved_terms = [] for clean_term in clean_terms: # MAGIC_NUMBER: Short terms are okay when &'ed to another term, but # on their own, they're not very meaningful. E.g., searching 'st' # would return half the byways. And [lb] cannot think of any one- # or two-letter words that would be important to search on their # own. if len(clean_term) > 2: approved_terms.append(clean_term) if not approved_terms: nothing_to_query = True else: # Special Case: Check if query is all stop words. sql_tsquery = ("SELECT to_tsquery('%s')" % ('|'.join(approved_terms),)) dont_fetchall = qb.db.dont_fetchall qb.db.dont_fetchall = False rows = qb.db.sql(sql_tsquery) qb.db.dont_fetchall = dont_fetchall g.assurt(len(rows) == 1) nothing_to_query = not rows[0]['to_tsquery'] if nothing_to_query: approved_terms = [] log.info( 'sql_apply_query_filter_by_text_tc: only stop words: %s' % (qb.filters.filter_by_text_smart,)) # Stop processing the request now. #raise GWIS_Warning( # 'Too vague: Please try using more specific search terms.') # Quote each andd everything. if raw_terms and (raw_terms.sort() != approved_terms.sort()): quoted_terms = ["'%s'" % (' '.join([x for x in raw_terms]),),] else: quoted_terms = [] quoted_terms.extend([ "'%s'" % (qb.db.quoted(term),) for term in approved_terms]) tsquery = "|".join(quoted_terms) if qb.filters.filter_by_text_full: # This is only used internally. It's a ready-to-go string, like # ''123 main st''|''minneapolis''|''main st'' tsquery = qb.filters.filter_by_text_full if tsquery and (qb.filters.filter_by_text_smart or qb.filters.filter_by_text_full): where_clause += ( """ %s (%s @@ to_tsquery('english', '%s')) """ % (conjunction, table_col, tsquery,)) conjunction = "OR" # Sort the full text results by relevance. if True: # The ts_rank_cd function returns a number from 0 to whatever, # adding 0.1 for every matching word. E.g., # select ts_rank_cd( # to_tsvector('english', 'route|route|hello|hello'), # to_tsquery('english', 'hello|route')); # returns 0.4 and not just 0.2 because the query includes # the same words twice... so if the user includes a search # term multiple times, any results with that term will be # ranked even higher. qb.sql_clauses.outer.enabled = True # An example of how one might use debuggery_print_next_sql: # conf.debuggery_print_next_sql += 1 qb.sql_clauses.inner.shared += ( """ , %s """ % (table_col,)) qb.sql_clauses.outer.select += ( """ , ts_rank_cd(group_item.tsvect_%s, to_tsquery('english', '%s')) AS fts_rank_%s """ % (column, tsquery, column,)) qb.sql_clauses.outer.group_by += ( """ , fts_rank_%s """ % (column,)) qb.sql_clauses.outer.order_by_enable = True # Route will add edited_date DESC, which we don't want when # ranking by test. comma_maybe = ', ' if qb.sql_clauses.outer.order_by else '' if comma_maybe: check_ordering = qb.sql_clauses.outer.order_by.strip() if check_ordering == 'edited_date DESC': qb.sql_clauses.outer.order_by = '' comma_maybe = '' # When searching multiple columns, we'll order by each of # them, e.g., when geocoding, we'll search text in the # item name and also look for item comments, so the order- # by is a collection of fts_rank_*, e.g., # ORDER BY fts_rank_name DESC, fts_rank_comments DESC # not that ordering by the second column probably does much. elif not check_ordering.startswith('fts_rank_'): log.warning( 'sql_apply_query_filter_by_text_tc: check_ordering: %s' % (check_ordering,)) log.warning( 'sql_apply_query_filter_by_text_tc: qb: %s' % (qb,)) g.assurt_soft(False) qb.sql_clauses.outer.order_by += ( """ %s fts_rank_%s DESC """ % (comma_maybe, column,)) if False: # We could use levenshtein distance, but that doesn't work well # when terms are scrambled, e.g., comparing "my favorite route" # to "route favorite my" is same as comparing "my favorite route" # to "completely different", i.e., not a good way to rank results # (but good for looking for duplicate line segments by name, like # how geofeature_io.py works). # sudo apt-get install postgresql-contrib # psql -U postgres ccpv3_lite # ccpv3_lite=# create extension fuzzystrmatch; qb.sql_clauses.outer.enabled = True qb.sql_clauses.outer.select += ( """ , levenshtein(LOWER(group_item.name), %s) AS leven_dist """ % (qb.db.quoted(qb.filters.filter_by_text_smart),)) qb.sql_clauses.outer.group_by += ( """ , leven_dist """) qb.sql_clauses.outer.order_by_enable = True comma_maybe = ', ' if qb.sql_clauses.outer.order_by else '' qb.sql_clauses.outer.order_by += ( """ %s leven_dist ASC """ % (comma_maybe,)) return (where_clause, conjunction, outer_where,)
def save_core(self, qb): g.assurt_soft(self.name) attachment.One.save_core(self, qb) # Save to the 'tag' table. self.save_insert(qb, One.item_type_table, One.psql_defns)
def geocode_mapquest_process(json_in, geocoded): # We only sent one address (i.e., not a bulk query) so 'results' will # have length of 0 or 1, and 'locations' within it might have many. g.assurt_soft(len(json_in['results']) <= 1) log.debug('geocode_mapquest_proc: received %s results' % (str(len(json_in['results'][0]['locations'])) if json_in['results'] else 'zero',)) for matches in json_in['results']: for result in matches['locations']: addr_g = address.Address() addr_g.text = ', '.join( [x for x in [result['street'], # House/Street or Xsct result['adminArea5'], # City result['adminArea3'], # State result['postalCode'],] # ZIP(r) if x]) # MAYBE: This includes house number or intersection... should be # fine? addr_g.street = result['street'] addr_g.city = result['adminArea5'] g.assurt_soft(result['adminArea5Type'] == 'City') addr_g.state = result['adminArea3'] g.assurt_soft(result['adminArea3Type'] == 'State') addr_g.county = result['adminArea4'] g.assurt_soft(result['adminArea4Type'] == 'County') addr_g.country = result['adminArea1'] g.assurt_soft(result['adminArea1Type'] == 'Country') addr_g.zip = result['postalCode'] # EXPLAIN: Is there a difference btw. displayLatLng and latLng? addr_g.y = float(result['latLng']['lat']) addr_g.x = float(result['latLng']['lng']) # See: http://www.mapquestapi.com/geocoding/geocodequality.html if (result['geocodeQuality'] in Geocode.mapquest_confident_categories): addr_g.gc_confidence = 100 else: # From MapQuest docs: "The geocodeQualityCode value in a # Geocode Response is a five character string which # describes the quality of the geocoding results. # Character Position 1 2 3 4 5 # Value G S F A P # where: G = Granularity Code # S = Granularity Sub-Code # F = Full Street Name Confidence Level # A = Administrative Area Confidence Level # P = Postal Code Confidence Level # # We handle granularities (see mapquest_confident_categories) # and administrative and postal codes specially, so # all we want to check out is the full street confidence. # It's one of: 'A', 'B', 'C', 'X'. # http://www.mapquestapi.com/geocoding/geocodequality.html # MAGIC_NUMBER: 2 is Full Street Name Confidence Level Index. # That is, no street name confidence, 0 overall. try: addr_g.gc_confidence = Geocode.mapquest_confidence_lookup[ result['geocodeQualityCode'][2]] except Exception, e: log.warning('geocode_mapquest_proc: what gQC?: %s / %s / %s' % (result['geocodeQualityCode'][2], pprint.pprint(json_in), str(e),)) addr_g.gc_confidence = 0 addr_g.gc_fulfiller = 'mapq' log.debug('geocode_mapquest_proc: adding: %s' % (addr_g,)) geocoded.append(addr_g)
def sql_apply_query_filter_by_text_tc(self, qb, table_col, stop_words, where_clause, conjunction, use_outer, outer_where): # Only select items whose name matches the user's search query. # But if multiple search columns or search filters are specified, # just OR them all together (this is so, e.g., search threads # looks in both the thread name and the post body). # See below for a bunch of comments are the different postgres # string comparison operators (=, ~/~*, and @@). if qb.filters.filter_by_text_exact: filter_by_text_exact_lower = qb.filters.filter_by_text_exact.lower( ) where_clause += ( """ %s (LOWER(%s) = %s) """ % ( conjunction, table_col, # qb.db.quoted(qb.filters.filter_by_text_exact), # %s (LOWER(%s) = LOWER(%s)) qb.db.quoted(filter_by_text_exact_lower), )) conjunction = "OR" # This is like the previous filter but allows the user to specify a list. if qb.filters.filter_by_names_exact: item_names = [ x.strip().lower() for x in qb.filters.filter_by_names_exact.split(',') ] name_clauses = [] for item_name in item_names: # item_name is the empty string if input contained ,, if item_name: name_clauses.append("(LOWER(gia.name) = %s)" % (qb.db.quoted(item_name), )) name_clauses = " OR ".join(name_clauses) where_clause += (""" %s (%s) """ % ( conjunction, name_clauses, )) conjunction = "OR" if qb.filters.filter_by_text_loose: # NOTE: ~* does case-insensitive regex matching. This is slower than # using =, but this is how we get a loose search. Consider # select 'a' ~ 'a b c'; ==> false # select 'a b c' ~ 'a'; ==> true # meaning if the user searches 'lake' they get all the lakes. if not use_outer: where_clause += (""" %s (%s ~* %s) """ % ( conjunction, table_col, qb.db.quoted(qb.filters.filter_by_text_loose), )) conjunction = "OR" else: sub_where = (" (%s ~* %s) " % ( table_col, qb.db.quoted(qb.filters.filter_by_text_loose), )) if not outer_where: outer_where = sub_where else: outer_where = (" (%s OR (%s ~* %s)) " % ( outer_where, table_col, qb.db.quoted(qb.filters.filter_by_text_loose), )) # For filter_by_text_smart and filter_by_text_full: tsquery = None # Callers should only specify columns that are properly indexed for # full text search, since that's the column we really want (if we use # the normal-named column, Postgres does an inline index on the text). (table, column) = table_col.split('.', 1) table_col = '%s.tsvect_%s' % ( table, column, ) # FIXME: l18n. Hard-coding 'english' for now. # NOTE: Avoid plainto_tsquery, which applies & and not |. # Or is that what we want?? #where_clause += ( # """ # %s (%s @@ plainto_tsquery('english', %s)) # """ % (conjunction, # table_col, # qb.db.quoted(qb.filters.filter_by_text_smart),)) if qb.filters.filter_by_text_smart: # Get a list of "quoted phrases". query_text = qb.filters.filter_by_text_smart query_terms = re.findall(r'\"[^\"]*\"', query_text) # Remove the quotes from each multi-word term. raw_terms = [t.strip('"').strip() for t in query_terms] # Cull the "quoted phrases" we just extracted from the query string. (remainder, num_subs) = re.subn(r'\"[^\"]*\"', r' ', query_text) # Add the remaining single-word terms. raw_terms.extend(remainder.split()) # Remove all non-alphanums and search just clean words. clean_terms = set() for raw_term in raw_terms: cleaned = re.sub(r'\W', ' ', raw_term).split() for clean_word in cleaned: if (not stop_words) or (clean_word not in stop_words.lookup): clean_terms.add(clean_word) # Add the original string-term, too. if cleaned and ((len(cleaned) > 1) or (cleaned[0] != raw_term)): if (not stop_words) or (raw_term not in stop_words.lookup): # 2014.08.19: Watch out for, e.g., "Ruttger's" (as in, # (Ruttger's Resort), which splits on the \W to "Ruttger s" # but whose raw term remains "Ruttger's": the single quote # is special to full text search so remove 'em all. raw_sans_single_quote = re.sub("'", '', raw_term) clean_terms.add(raw_sans_single_quote) approved_terms = [] for clean_term in clean_terms: # MAGIC_NUMBER: Short terms are okay when &'ed to another term, but # on their own, they're not very meaningful. E.g., searching 'st' # would return half the byways. And [lb] cannot think of any one- # or two-letter words that would be important to search on their # own. if len(clean_term) > 2: approved_terms.append(clean_term) if not approved_terms: nothing_to_query = True else: # Special Case: Check if query is all stop words. sql_tsquery = ("SELECT to_tsquery('%s')" % ('|'.join(approved_terms), )) dont_fetchall = qb.db.dont_fetchall qb.db.dont_fetchall = False rows = qb.db.sql(sql_tsquery) qb.db.dont_fetchall = dont_fetchall g.assurt(len(rows) == 1) nothing_to_query = not rows[0]['to_tsquery'] if nothing_to_query: approved_terms = [] log.info( 'sql_apply_query_filter_by_text_tc: only stop words: %s' % (qb.filters.filter_by_text_smart, )) # Stop processing the request now. #raise GWIS_Warning( # 'Too vague: Please try using more specific search terms.') # Quote each andd everything. if raw_terms and (raw_terms.sort() != approved_terms.sort()): quoted_terms = [ "'%s'" % (' '.join([x for x in raw_terms]), ), ] else: quoted_terms = [] quoted_terms.extend( ["'%s'" % (qb.db.quoted(term), ) for term in approved_terms]) tsquery = "|".join(quoted_terms) if qb.filters.filter_by_text_full: # This is only used internally. It's a ready-to-go string, like # ''123 main st''|''minneapolis''|''main st'' tsquery = qb.filters.filter_by_text_full if tsquery and (qb.filters.filter_by_text_smart or qb.filters.filter_by_text_full): where_clause += (""" %s (%s @@ to_tsquery('english', '%s')) """ % ( conjunction, table_col, tsquery, )) conjunction = "OR" # Sort the full text results by relevance. if True: # The ts_rank_cd function returns a number from 0 to whatever, # adding 0.1 for every matching word. E.g., # select ts_rank_cd( # to_tsvector('english', 'route|route|hello|hello'), # to_tsquery('english', 'hello|route')); # returns 0.4 and not just 0.2 because the query includes # the same words twice... so if the user includes a search # term multiple times, any results with that term will be # ranked even higher. qb.sql_clauses.outer.enabled = True # An example of how one might use debuggery_print_next_sql: # conf.debuggery_print_next_sql += 1 qb.sql_clauses.inner.shared += (""" , %s """ % (table_col, )) qb.sql_clauses.outer.select += (""" , ts_rank_cd(group_item.tsvect_%s, to_tsquery('english', '%s')) AS fts_rank_%s """ % ( column, tsquery, column, )) qb.sql_clauses.outer.group_by += (""" , fts_rank_%s """ % (column, )) qb.sql_clauses.outer.order_by_enable = True # Route will add edited_date DESC, which we don't want when # ranking by test. comma_maybe = ', ' if qb.sql_clauses.outer.order_by else '' if comma_maybe: check_ordering = qb.sql_clauses.outer.order_by.strip() if check_ordering == 'edited_date DESC': qb.sql_clauses.outer.order_by = '' comma_maybe = '' # When searching multiple columns, we'll order by each of # them, e.g., when geocoding, we'll search text in the # item name and also look for item comments, so the order- # by is a collection of fts_rank_*, e.g., # ORDER BY fts_rank_name DESC, fts_rank_comments DESC # not that ordering by the second column probably does much. elif not check_ordering.startswith('fts_rank_'): log.warning( 'sql_apply_query_filter_by_text_tc: check_ordering: %s' % (check_ordering, )) log.warning( 'sql_apply_query_filter_by_text_tc: qb: %s' % (qb, )) g.assurt_soft(False) qb.sql_clauses.outer.order_by += (""" %s fts_rank_%s DESC """ % ( comma_maybe, column, )) if False: # We could use levenshtein distance, but that doesn't work well # when terms are scrambled, e.g., comparing "my favorite route" # to "route favorite my" is same as comparing "my favorite route" # to "completely different", i.e., not a good way to rank results # (but good for looking for duplicate line segments by name, like # how geofeature_io.py works). # sudo apt-get install postgresql-contrib # psql -U postgres ccpv3_lite # ccpv3_lite=# create extension fuzzystrmatch; qb.sql_clauses.outer.enabled = True qb.sql_clauses.outer.select += (""" , levenshtein(LOWER(group_item.name), %s) AS leven_dist """ % (qb.db.quoted(qb.filters.filter_by_text_smart), )) qb.sql_clauses.outer.group_by += (""" , leven_dist """) qb.sql_clauses.outer.order_by_enable = True comma_maybe = ', ' if qb.sql_clauses.outer.order_by else '' qb.sql_clauses.outer.order_by += (""" %s leven_dist ASC """ % (comma_maybe, )) return ( where_clause, conjunction, outer_where, )