def makeSearchString(q, index, limits, handler=None): # sort, """translates search parameters into Solr query syntax.""" q = q.replace("_", " ") # strip out stopwords here... this is necessary due to some # unexpected side-effects of the general keyword search # TODO: do NOT strip stopwords if a quoted search, eg. "the the" or "to be or not to be" qWords = q.split(" ") nonStopWords = [] for wordOn in qWords: if wordOn not in STOPWORDS: nonStopWords.append( wordOn ) q = " ".join(nonStopWords) for orig,replacement in SEARCH_CHARACTER_REPLACEMENTS.iteritems(): q = q.replace(orig,replacement) q = urllib.quote( q ).strip() if not handler or (handler is "standard"): if index in allFacetCodes: # then treat as an exact match search; it is a facet, not free text entered by user ret = '%s:"%s"' % (index, q) else: # then it is a search index, not a facet -- NOT exact match ret = '%s:%s' % (index, q) else: # if you specify a handler you can't also specify an index... ret = '%s' % q for limitOn in limits: # csdebug: how are these forbidden characters making it this far? _limOn = urllib.unquote( limitOn ) limitSplit = _limOn.split(":") logger.error("limitSplit is %s\n\n\n\n" % limitSplit) # csdebug index = limitSplit[0] # todo error handling term = ":".join( limitSplit[1:] ) for orig,replacement in SEARCH_CHARACTER_REPLACEMENTS.iteritems(): term = term.replace(orig,replacement) logger.error("\n\nterm is now %s\n\n" % term) logger.error("\n\nxxterm is now %s\n\n" % term) term = term.replace("_", " ") #.replace('"', "") term = term.strip() logger.error("\n\nyyterm is now %s\n\n" % term) # we are going to put exact quotes around the whole thing, so we don't want to double-quote term = urllib.quote(term) logger.error("\n\n!!!term is now %s\n\n" % term) ret = """%s AND %s:"%s\"""" % ( ret, index, term) # get rid of any double spaces. ret = ret.replace(" ", " ") ret = ret.replace(" ", "%20") logger.debug( "search string is %s" % ret ) return ret.strip()
def spellCheck( phraseToCheck ): """takes the phrase to check and returns a list of potential suggestions.""" ret = [] if not config.USE_YAHOO_SPELLING_WEB_SERVICE: pass else: # TODO: will it handle utf8? # check cache first. cacheKey = "spellcheck~~%s~~" % phraseToCheck suggestionsFromCache = cache.get( cacheKey ) if not suggestionsFromCache: query = urllib.quote( phraseToCheck ) urlToGet = config.YAHOO_WEB_SERVICE_URL % dict( YAHOO_APPID = config.YAHOO_APPID, query = query ) logger.debug("fetching URL %s" % urlToGet ) data = urllib.urlopen( urlToGet ).read() try: respObject = simplejson.loads( data ) # note we do this to prevent caching bad data cache.set( cacheKey, data, config.YAHOO_SPELLCHECK_CACHE_TIME) # TODO: figure out if it will *ever* return multiple suggestions... except: logger.error( "exception doing spellCheck service" ) else: logger.debug( "got spelling suggestion from cache") respObject = simplejson.loads( suggestionsFromCache ) #print "respObject is %s, type is %s" % (respObject, type(respObject) ) # csdebug if respObject.has_key("ResultSet") and type(respObject['ResultSet']) == types.DictType and respObject['ResultSet'].has_key("Result"): ret.append( respObject['ResultSet']['Result'] ) logger.debug("returning %s" % ret) return ret #return ['pants', 'pantaloons'] # csdebug