def makeSearchString(q, index, limits, handler=None):   # sort, 
    """translates search parameters into Solr query syntax."""
    q = q.replace("_", " ")
    
    # strip out stopwords here... this is necessary due to some 
    # unexpected side-effects of the general keyword search
    # TODO: do NOT strip stopwords if a quoted search, eg. "the the" or "to be or not to be"
    qWords = q.split(" ")
    nonStopWords = []
    for wordOn in qWords:
        if wordOn not in STOPWORDS:    
            nonStopWords.append( wordOn )
    q = " ".join(nonStopWords)
    
    for orig,replacement in SEARCH_CHARACTER_REPLACEMENTS.iteritems():
        q = q.replace(orig,replacement)
    q = urllib.quote( q ).strip()
     
    if not handler or (handler is "standard"):
        if index in allFacetCodes:    
            # then treat as an exact match search; it is a facet, not free text entered by user
            ret = '%s:"%s"' % (index, q)
        else:    
            # then it is a search index, not a facet -- NOT exact match
            ret = '%s:%s' % (index, q)
    else:   
        # if you specify a handler you can't also specify an index...
        ret = '%s' % q
    for limitOn in limits:
        # csdebug: how are these forbidden characters making it this far?
        _limOn = urllib.unquote( limitOn )
        
        
        limitSplit = _limOn.split(":")
        logger.error("limitSplit is %s\n\n\n\n" % limitSplit) # csdebug
        index = limitSplit[0]    # todo error handling
        term = ":".join( limitSplit[1:] )
        for orig,replacement in SEARCH_CHARACTER_REPLACEMENTS.iteritems():
            term = term.replace(orig,replacement)
            logger.error("\n\nterm is now %s\n\n" % term)
        logger.error("\n\nxxterm is now %s\n\n" % term)
        term = term.replace("_", " ")
        #.replace('"', "") 
        term = term.strip()
        logger.error("\n\nyyterm is now %s\n\n" % term)
        # we are going to put exact quotes around the whole thing, so we don't want to double-quote
        term = urllib.quote(term)
        logger.error("\n\n!!!term is now %s\n\n" % term)
        ret = """%s AND %s:"%s\"""" % ( ret, index, term)
    # get rid of any double spaces.
    ret = ret.replace("  ", " ")
    ret = ret.replace(" ", "%20")
    logger.debug( "search string is %s" % ret )  
    return ret.strip()
Пример #2
0
def spellCheck( phraseToCheck ):
    """takes the phrase to check and returns a list of potential suggestions."""
    ret = []
    if not config.USE_YAHOO_SPELLING_WEB_SERVICE:
        pass
    else:        
        # TODO: will it handle utf8?
        
        # check cache first.
        cacheKey = "spellcheck~~%s~~" % phraseToCheck
        
        suggestionsFromCache = cache.get( cacheKey )
        if not suggestionsFromCache:
            query = urllib.quote( phraseToCheck )
            
            urlToGet = config.YAHOO_WEB_SERVICE_URL % dict( YAHOO_APPID = config.YAHOO_APPID, query = query )
            logger.debug("fetching URL %s" % urlToGet )
            data = urllib.urlopen( urlToGet ).read()
            try:
                respObject = simplejson.loads( data )
                # note we do this to prevent caching bad data
                cache.set( cacheKey, data, config.YAHOO_SPELLCHECK_CACHE_TIME)
                # TODO: figure out if it will *ever* return multiple suggestions...
            except:
                logger.error( "exception doing spellCheck service" )
        else:
            logger.debug( "got spelling suggestion from cache") 
            respObject = simplejson.loads( suggestionsFromCache )
        #print "respObject is %s, type is %s" % (respObject, type(respObject) )    # csdebug
        if respObject.has_key("ResultSet") and type(respObject['ResultSet']) == types.DictType and respObject['ResultSet'].has_key("Result"):
            ret.append( respObject['ResultSet']['Result'] )
    logger.debug("returning %s" % ret)
    return ret

                
    #return ['pants', 'pantaloons']    # csdebug