Пример #1
0
 def testQuotingForwardSlashes(self):
     # solr 4 supports regular expressions and requires / to be escaped
     self.assertEqual(quote("/"), "\\/")
     self.assertEqual(quote("(/ OR x)"), "(\\/ OR x)")
     self.assertEqual(quote('"/'), '\\"\\/')
     self.assertEqual(quote('"/"'), '"\\/"')
     self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
Пример #2
0
 def quoteitem(term):
     if isinstance(term, unicode):
         term = term.encode('utf-8')
     quoted = quote(term)
     if not quoted.startswith('"') and not quoted == term:
         quoted = quote('"' + term + '"')
     return quoted
Пример #3
0
 def testQuotingBoostingTerm(self):
     self.assertEqual(quote("jakarta^4 apache"), "(jakarta^4 apache)")
     self.assertEqual(quote("jakarta^0.2 apache"), "(jakarta^0.2 apache)")
     self.assertEqual(
         quote('"jakarta apache"^4 "Apache Lucene"'),
         '("jakarta apache"^4 "Apache Lucene")',
     )
Пример #4
0
 def quoteitem(term):
     if isinstance(term, unicode):
         term = term.encode('utf-8')
     quoted = quote(term)
     if not quoted.startswith('"') and not quoted == term:
         quoted = quote('"' + term + '"')
     return quoted
Пример #5
0
 def testQuotingForwardSlashes(self):
     # solr 4 supports regular expressions and requires / to be escaped
     self.assertEqual(quote('/'), '\\/')
     self.assertEqual(quote('(/ OR x)'), '(\\/ OR x)')
     self.assertEqual(quote('"/'), '\\"\\/')
     self.assertEqual(quote('"/"'), '"\\/"')
     self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
Пример #6
0
 def testQuotingEscapingSpecialCharacters(self):
     self.assertEqual(quote("-+!^~:"), "\\-\\+\\!\\^\\~\\:")
     # Only quote * and ? if quoted
     self.assertEqual(quote('"*?"'), '"\\*\\?"')
     # also quote multiple occurrences
     self.assertEqual(quote(":"), "\\:")
     self.assertEqual(quote(": :"), "(\\: \\:)")
     self.assertEqual(quote("foo+ bar! nul:"), "(foo\\+ bar\\! nul\\:)")
Пример #7
0
 def testQuotingEscapingSpecialCharacters(self):
     self.assertEqual(quote('-+!^~:'), '\\-\\+\\!\\^\\~\\:')
     # Only quote * and ? if quoted
     self.assertEqual(quote('"*?"'), '"\\*\\?"')
     # also quote multiple occurrences
     self.assertEqual(quote(':'), '\\:')
     self.assertEqual(quote(': :'), '(\\: \\:)')
     self.assertEqual(quote('foo+ bar! nul:'), '(foo\\+ bar\\! nul\\:)')
Пример #8
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote("te?t"), "te?t")
     self.assertEqual(quote("test*"), "test*")
     self.assertEqual(quote("test**"), "test*")
     self.assertEqual(quote("te*t"), "te*t")
     self.assertEqual(quote("?test"), "test")
     self.assertEqual(quote("*test"), "test")
     self.assertEqual(quote("*test", prefix_wildcard=True), "*test")
     self.assertEqual(quote("?test", prefix_wildcard=True), "?test")
     self.assertEqual(quote("**test", prefix_wildcard=True), "*test")
     self.assertEqual(quote("??test", prefix_wildcard=True), "?test")
Пример #9
0
 def testUnicode(self):
     self.assertEqual(quote('foø'), 'fo\xc3\xb8')
     self.assertEqual(quote('"foø'), '\\"fo\xc3\xb8')
     self.assertEqual(quote('whät?'), 'wh\xc3\xa4t?')
     self.assertEqual(quote('"whät?"'), '"wh\xc3\xa4t\?"')
     self.assertEqual(quote('"[ø]"'), '"\[\xc3\xb8\]"')
     self.assertEqual(quote('[ø]'), '\\[\xc3\xb8\\]')
     self.assertEqual(quote('"foø*"'), '"fo\xc3\xb8\*"')
     self.assertEqual(quote('"foø bar?"'), '"fo\xc3\xb8 bar\?"')
     self.assertEqual(quote(u'*****@*****.**'), '*****@*****.**')
Пример #10
0
 def testUnicode(self):
     self.assertEqual(quote("foø"), b"fo\xc3\xb8".decode("utf-8"))
     self.assertEqual(quote('"foø'), b'\\"fo\xc3\xb8'.decode("utf-8"))
     self.assertEqual(quote("whät?"), b"wh\xc3\xa4t?".decode("utf-8"))
     self.assertEqual(quote('"whät?"'), b'"wh\xc3\xa4t\\?"'.decode("utf-8"))
     self.assertEqual(quote('"[ø]"'), b'"\\[\xc3\xb8\\]"'.decode("utf-8"))
     self.assertEqual(quote("[ø]"), b"\\[\xc3\xb8\\]".decode("utf-8"))
     self.assertEqual(quote('"foø*"'), b'"fo\xc3\xb8\\*"'.decode("utf-8"))
     self.assertEqual(quote('"foø bar?"'),
                      b'"fo\xc3\xb8 bar\\?"'.decode("utf-8"))
     self.assertEqual(quote(u"*****@*****.**"), "*****@*****.**")
Пример #11
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
Пример #12
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
Пример #13
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    config = getConfig()
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    prefix_wildcard_str = "*" if prefix_wildcard else ""
    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s%s* OR %s%s" % (
            prefix_wildcard_str,
            prepare_wildcard(term),
            term,
            levenstein_expr,
        )
    return "(%s)" % value, "(%s)" % base_value
Пример #14
0
 def testSolrSpecifics(self):
     # http://wiki.apache.org/solr/SolrQuerySyntax
     # Seems to be ok to quote function
     self.assertEqual(
         quote('"recip(rord(myfield),1,2,3)"'),
         '"recip\\(rord\\(myfield\\),1,2,3\\)"',
     )
     self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]")
     self.assertEqual(quote("[1976-03-06T23:59:59.999Z TO *]"),
                      "[1976-03-06T23:59:59.999Z TO *]")
     self.assertEqual(
         quote("[1995-12-31T23:59:59.999Z TO "
               "2007-03-06T00:00:00Z]"),
         "[1995-12-31T23:59:59.999Z TO "
         "2007-03-06T00:00:00Z]",
     )
     self.assertEqual(
         quote("[NOW-1YEAR/DAY TO NOW/DAY+1DAY]"),
         "[NOW-1YEAR\\/DAY TO NOW\\/DAY+1DAY]",
     )
     self.assertEqual(
         quote("[1976-03-06T23:59:59.999Z TO "
               "1976-03-06T23:59:59.999Z+1YEAR]"),
         "[1976-03-06T23:59:59.999Z TO "
         "1976-03-06T23:59:59.999Z+1YEAR]",
     )
     # quoting date operators seems to be ok too
     self.assertEqual(
         quote("[1976-03-06T23:59:59.999Z/YEAR TO "
               "1976-03-06T23:59:59.999Z]"),
         "[1976-03-06T23:59:59.999Z\\/YEAR TO "
         "1976-03-06T23:59:59.999Z]",
     )
Пример #15
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                            '(+return +"pink panther")')
     self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)')
     self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'),
                            '("jakarta apache" -"Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'),
                            '("jakarta apache" NOT "Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                            '("jakarta apache" OR jakarta)')
     self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'),
                            '("jakarta apache" AND "Apache Lucene")')
     self.assertEqual(quote('(jakarta OR apache) AND website'),
                            '((jakarta OR apache) AND website)')
     self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))')
     self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)')
Пример #16
0
def searchterms_from_value(value):
    """Turn a search query into a list of search terms, removing
    parentheses, wildcards and quoting any special characters.
    """
    # remove any parens and wildcards, so quote() doesn't try to escape them
    value = strip_wildcards(strip_parens(value))
    # then quote the value
    value = quote(value)
    # and again strip parentheses that might have been added by quote()
    value = strip_parens(value)
    return value.split()
Пример #17
0
def searchterms_from_value(value):
    """Turn a search query into a list of search terms, removing
    parentheses, wildcards and quoting any special characters.
    """
    # remove any parens and wildcards, so quote() doesn't try to escape them
    value = strip_wildcards(strip_parens(value))
    # then quote the value
    value = quote(value)
    # and again strip parentheses that might have been added by quote()
    value = strip_parens(value)
    return value.split()
Пример #18
0
def mangle_searchable_text_query(value, pattern):
    value = value.lower()

    value_lwc = leading_wildcards(value)
    value_twc = trailing_wildcards(value)
    value = strip_wildcards(value)

    value = pattern.format(value=quote(value),
                           value_lwc=value_lwc,
                           value_twc=value_twc)
    return value
Пример #19
0
def mangle_searchable_text_query(value, pattern):
    value = value.lower()

    value_lwc = leading_wildcards(value)
    value_twc = trailing_wildcards(value)
    value = strip_wildcards(value)

    value = pattern.format(
        value=quote(value),
        value_lwc=value_lwc,
        value_twc=value_twc)
    return value
Пример #20
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote("te?t"), "te?t")
     self.assertEqual(quote("test*"), "test*")
     self.assertEqual(quote("test**"), "test*")
     self.assertEqual(quote("te*t"), "te*t")
     self.assertEqual(quote("?test"), "test")
     self.assertEqual(quote("*test"), "test")
Пример #21
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote('te?t'), 'te?t')
     self.assertEqual(quote('test*'), 'test*')
     self.assertEqual(quote('test**'), 'test*')
     self.assertEqual(quote('te*t'), 'te*t')
     self.assertEqual(quote('?test'), 'test')
     self.assertEqual(quote('*test'), 'test')
Пример #22
0
 def testQuotingRangeSearches(self):
     self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]')
     self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO *]'),
                            '[1972-05-11T00:00:00.000Z TO *]')
     self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]'),
                            '[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]')
     self.assertEqual(quote('[20020101 TO 20030101]'),
                            '[20020101 TO 20030101]')
     self.assertEqual(quote('{Aida TO Carmen}'), '{Aida TO Carmen}')
     self.assertEqual(quote('{Aida TO}'), '{Aida TO *}')
     self.assertEqual(quote('{TO Carmen}'), '{* TO Carmen}')
Пример #23
0
 def testQuotingRangeSearches(self):
     self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]")
     self.assertEqual(quote("[1972-05-11T00:00:00.000Z TO *]"),
                      "[1972-05-11T00:00:00.000Z TO *]")
     self.assertEqual(
         quote("[1972-05-11T00:00:00.000Z TO "
               "2011-05-10T01:30:00.000Z]"),
         "[1972-05-11T00:00:00.000Z TO "
         "2011-05-10T01:30:00.000Z]",
     )
     self.assertEqual(quote("[20020101 TO 20030101]"),
                      "[20020101 TO 20030101]")
     self.assertEqual(quote("{Aida TO Carmen}"), "{Aida TO Carmen}")
     self.assertEqual(quote("{Aida TO}"), "{Aida TO *}")
     self.assertEqual(quote("{TO Carmen}"), "{* TO Carmen}")
Пример #24
0
 def testSolrSpecifics(self):
     # http://wiki.apache.org/solr/SolrQuerySyntax
     self.assertEqual(quote('"recip(rord(myfield),1,2,3)"'),
                            '"recip\(rord\(myfield\),1,2,3\)"') # Seems to be ok to quote function
     self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO *]'),
                            '[1976-03-06T23:59:59.999Z TO *]')
     self.assertEqual(quote('[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]'),
                            '[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]')
     self.assertEqual(quote('[NOW-1YEAR/DAY TO NOW/DAY+1DAY]'),
                            '[NOW-1YEAR/DAY TO NOW/DAY+1DAY]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]'),
                            '[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]'),
                            '[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]')
Пример #25
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr)
    return '(%s)' % value, '(%s)' % base_value
Пример #26
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s* OR %s%s" % (prepare_wildcard(term), term, levenstein_expr)
    return "(%s)" % value, "(%s)" % base_value
Пример #27
0
def mangleSearchableText(value, config):
    pattern = getattr(config, "search_pattern", "")
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value), base_value=base_value)
        return set([value])  # add literal query parameter
    return value
Пример #28
0
def mangleSearchableText(value, config, force_complex_search=False):
    config = config or getConfig()
    pattern = getattr(config, "search_pattern", u"")
    force_simple_search = getattr(config, "force_simple_search", False)
    allow_complex_search = getattr(config, "allow_complex_search", False)
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    value_parts = []
    base_value_parts = []

    stripped = value.strip()
    force_complex_search_prefix = False
    if stripped.startswith("solr:"):
        stripped = stripped.replace("solr:", "", 1).strip()
        force_complex_search_prefix = True

    if not force_simple_search and not isSimpleSearch(value):
        return value

    if allow_complex_search and (force_complex_search_prefix or force_complex_search):
        # FIXME: fold in catalog solr_complex_search parameter check
        return stripped

    if force_simple_search:
        value = removeSpecialCharactersAndOperators(value)

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(
            value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value
        )
        return set([value])  # add literal query parameter
    if pattern:
        pattern = pattern.encode("utf-8")
    return value
Пример #29
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term,
                                 levenstein_expr)
    # Netsight: we removed the parenthesis around base_value
    #           the first element of the returned tuple (value) is
    #           not used.
    return '(%s)' % value, base_value
Пример #30
0
 def testQuotingProximitySearches(self):
     self.assertEqual(quote('"jakarta apache"~10'), '"jakarta apache"~10')
Пример #31
0
 def testQuotingFuzzySearches(self):
     self.assertEqual(quote('roam~'), 'roam~')
     self.assertEqual(quote('roam~0.8'), 'roam~0.8')
Пример #32
0
 def testQuotingFuzzySearches(self):
     self.assertEqual(quote("roam~"), "roam~")
     self.assertEqual(quote("roam~0.8"), "roam~0.8")
Пример #33
0
 def testQuoting(self):
     # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html
     self.assertEqual(quote(''), '')
     self.assertEqual(quote(' '), '')
     self.assertEqual(quote('foo'), 'foo')
     self.assertEqual(quote('foo '), 'foo')
     self.assertEqual(quote('"foo"'), '"foo"')
     self.assertEqual(quote('"foo'), '\\"foo')
     self.assertEqual(quote('foo"'), 'foo\\"')
     self.assertEqual(quote('foo bar'), '(foo bar)')
     self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)')
     self.assertEqual(quote('\\['), '\\[')
     self.assertEqual(quote(')'), '\)')
     self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)')
     self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)')
     self.assertEqual(quote('"foo bar"'), '"foo bar"')
     self.assertEqual(quote('"foo bar'), '(\\"foo bar)')
     self.assertEqual(quote('foo bar what?'), '(foo bar what?)')
     self.assertEqual(quote('P|This&That'), 'P|This&That')
     self.assertEqual(quote('[]'), '')
     self.assertEqual(quote('()'), '')
     self.assertEqual(quote('{}'), '')
     self.assertEqual(quote('...""'), '...\\"\\"')
     self.assertEqual(quote('\\'), '\\\\') # Search for \ has to be quoted
     self.assertEqual(quote('\?'), '\?')
     self.assertEqual(quote('*****@*****.**'), '*****@*****.**')
     self.assertEqual(quote('http://machine/folder and item and some/path and and amilli3*'),
                            '(http\://machine/folder and item and some/path and and amilli3*)')
     self.assertEqual(quote('"[]"'), '"\[\]"')
     self.assertEqual(quote('"{}"'), '"\{\}"')
     self.assertEqual(quote('"()"'), '"\(\)"')
     self.assertEqual(quote('foo and bar and 42"*'), '(foo and bar and 42\\"\\*)')
     # Can't use ? or * as beginning of new query
     self.assertEqual(quote('"fix and it"*'), '"fix and it"')
     self.assertEqual(quote('"fix and it"?'), '"fix and it"')
     self.assertEqual(quote('foo and bar and [foobar at foo.com]*'),
                            '(foo and bar and \[foobar at foo.com\])')
Пример #34
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                      '(+return +"pink panther")')
     self.assertEqual(quote("+jakarta lucene"), "(+jakarta lucene)")
     self.assertEqual(
         quote('"jakarta apache" -"Apache Lucene"'),
         '("jakarta apache" -"Apache Lucene")',
     )
     self.assertEqual(
         quote('"jakarta apache" NOT "Apache Lucene"'),
         '("jakarta apache" NOT "Apache Lucene")',
     )
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                      '("jakarta apache" OR jakarta)')
     self.assertEqual(
         quote('"jakarta apache" AND "Apache Lucene"'),
         '("jakarta apache" AND "Apache Lucene")',
     )
     self.assertEqual(
         quote("(jakarta OR apache) AND website"),
         "((jakarta OR apache) AND website)",
     )
     self.assertEqual(quote("(a AND (b OR c))"), "(a AND (b OR c))")
     self.assertEqual(quote("((a AND b) OR c)"), "((a AND b) OR c)")
     self.assertEqual(quote('"jakarta apache" || jakarta'),
                      '("jakarta apache" || jakarta)')
     self.assertEqual(
         quote('"jakarta apache" && "Apache Lucene"'),
         '("jakarta apache" && "Apache Lucene")',
     )
     self.assertEqual(quote("(jakarta || apache) && website"),
                      "((jakarta || apache) && website)")
     self.assertEqual(quote("(a && (b || c))"), "(a && (b || c))")
     self.assertEqual(quote("((a && b) || c)"), "((a && b) || c)")
     self.assertEqual(quote("P||This&&That"), "(P||This&&That)")
Пример #35
0
 def testQuotingBoostingTerm(self):
     self.assertEqual(quote('jakarta^4 apache'), '(jakarta^4 apache)')
     self.assertEqual(quote('jakarta^0.2 apache'), '(jakarta^0.2 apache)')
     self.assertEqual(quote('"jakarta apache"^4 "Apache Lucene"'),
                            '("jakarta apache"^4 "Apache Lucene")')
Пример #36
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                      '(+return +"pink panther")')
     self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)')
     self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'),
                      '("jakarta apache" -"Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'),
                      '("jakarta apache" NOT "Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                      '("jakarta apache" OR jakarta)')
     self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'),
                      '("jakarta apache" AND "Apache Lucene")')
     self.assertEqual(quote('(jakarta OR apache) AND website'),
                      '((jakarta OR apache) AND website)')
     self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))')
     self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)')
     self.assertEqual(quote('"jakarta apache" || jakarta'),
                      '("jakarta apache" || jakarta)')
     self.assertEqual(quote('"jakarta apache" && "Apache Lucene"'),
                      '("jakarta apache" && "Apache Lucene")')
     self.assertEqual(quote('(jakarta || apache) && website'),
                      '((jakarta || apache) && website)')
     self.assertEqual(quote('(a && (b || c))'), '(a && (b || c))')
     self.assertEqual(quote('((a && b) || c)'), '((a && b) || c)')
     self.assertEqual(quote('P||This&&That'), '(P||This&&That)')
Пример #37
0
    def buildQueryAndParameters(self, default=None, **args):
        """ helper to build a querystring for simple use-cases """
        schema = self.getManager().getSchema() or {}

        params = subtractQueryParameters(args)
        params = cleanupQueryParameters(params, schema)
        config = self.getConfig()

        prepareData(args)
        mangleQuery(args, config, schema)

        logger.debug('building query for "%r", %r', default, args)
        schema = self.getManager().getSchema() or {}
        defaultSearchField = getattr(schema, 'defaultSearchField', None)
        args[None] = default
        query = {}

        for name, value in sorted(args.items()):
            field = schema.get(name or defaultSearchField, None)
            if field is None or not field.indexed:
                logger.info(
                    'dropping unknown search attribute "%s" '
                    ' (%r) for query: %r', name, value, args
                )
                continue
            if isinstance(value, bool):
                value = str(value).lower()
            elif not value:     # solr doesn't like empty fields (+foo:"")
                if not name:
                    continue
                logger.info(
                    'empty search term form "%s:%s", aborting buildQuery' % (
                        name,
                        value
                    )
                )
                return {}, params
            elif field.class_ == 'solr.BoolField':
                if not isinstance(value, (tuple, list)):
                    value = [value]
                falses = '0', 'False', MV
                true = lambda v: bool(v) and v not in falses
                value = set(map(true, value))
                if not len(value) == 1:
                    assert len(value) == 2      # just to make sure
                    continue                    # skip when "true or false"
                value = str(value.pop()).lower()
            elif isinstance(value, (tuple, list)):
                # list items should be treated as literals, but
                # nevertheless only get quoted when necessary
                value = '(%s)' % ' OR '.join(map(quote_iterable_item, value))
            elif isinstance(value, set):        # sets are taken literally
                if len(value) == 1:
                    query[name] = ''.join(value)
                else:
                    query[name] = '(%s)' % ' OR '.join(value)
                continue
            elif isinstance(value, basestring):
                if field.class_ == 'solr.TextField':
                    if isWildCard(value):
                        value = prepare_wildcard(value)
                    value = quote(value, textfield=True)
                    # if we have an intra-word hyphen, we need quotes
                    if '\\-' in value or '\\+' in value:
                        if value[0] != '"':
                            value = '"%s"' % value
                else:
                    value = quote(value)
                if not value:   # don't search for empty strings, even quoted
                    continue
            else:
                logger.info(
                    'skipping unsupported value "%r" (%s)', value, name
                )
                continue
            if name is None:
                if value and value[0] not in '+-':
                    value = '+%s' % value
            else:
                value = '+%s:%s' % (name, value)
            query[name] = value
        logger.debug('built query "%s"', query)

        if query:
            optimizeQueryParameters(query, params)
        return query, params
Пример #38
0
def buildQuery(self, default=None, **args):
    """ helper to build a querystring for simple use-cases """
    logger.debug('building query for "%r", %r', default, args)
    schema = self.getManager().getSchema() or {}
    defaultSearchField = getattr(schema, 'defaultSearchField', None)
    if default is not None and defaultSearchField is not None:
        args[None] = default
    query = {}
    for name, value in sorted(args.items()):
        field = schema.get(name or defaultSearchField, None)
        if field is None or not field.indexed:
            logger.warning(
                'dropping unknown search attribute "%s" '
                ' (%r) for query: %r', name, value, args)
            continue
        if isinstance(value, bool):
            value = str(value).lower()
        elif not value:  # solr doesn't like empty fields (+foo:"")
            continue
        elif field.class_ == 'solr.BoolField':
            if not isinstance(value, (tuple, list)):
                value = [value]
            falses = '0', 'False', MV
            true = lambda v: bool(v) and v not in falses
            value = set(map(true, value))
            if not len(value) == 1:
                assert len(value) == 2  # just to make sure
                continue  # skip when "true or false"
            value = str(value.pop()).lower()
        elif isinstance(value, (tuple, list)):
            # list items should be treated as literals, but
            # nevertheless only get quoted when necessary
            def quoteitem(term):
                if isinstance(term, unicode):
                    term = term.encode('utf-8')
                quoted = quote(term)
                if not quoted.startswith('"') and not quoted == term:
                    quoted = quote('"' + term + '"')
                return quoted

            value = '(%s)' % ' OR '.join(map(quoteitem, value))
        elif isinstance(value, set):  # sets are taken literally
            if len(value) == 1:
                query[name] = ''.join(value)
            else:
                query[name] = '(%s)' % ' OR '.join(value)
            if '/' in query[name]:
                query[name] = query[name].replace('/', '\\/')
            continue
        elif isinstance(value, basestring):
            if field.class_ == 'solr.TextField':
                if isWildCard(value):
                    value = prepare_wildcard(value)
                value = quote(value, textfield=True)
                # if we have an intra-word hyphen, we need quotes
                if '\\-' in value or '\\+' in value:
                    if value[0] != '"':
                        value = '"%s"' % value
            else:
                value = quote(value)
            if not value:  # don't search for empty strings, even quoted
                continue
        else:
            logger.info('skipping unsupported value "%r" (%s)', value, name)
            continue
        if name is None:
            if value and value[0] not in '+-':
                value = '+%s' % value
        else:
            value = '+%s:%s' % (name, value)

        # Since Solr 4.0 slashes must be escaped
        # see: http://wiki.apache.org/solr/SolrQuerySyntax
        if '/' in value:
            value = value.replace('/', '\\/')

        query[name] = value

    logger.debug('built query "%s"', query)
    return query
Пример #39
0
def mangleQuery(keywords, config, schema):
    """ translate / mangle query parameters to replace zope specifics
        with equivalent constructs for solr """
    extras = {}
    for key, value in keywords.items():
        if key.endswith('_usage'):          # convert old-style parameters
            category, spec = value.split(':', 1)
            extras[key[:-6]] = {category: spec}
            del keywords[key]
        elif isinstance(value, dict):       # unify dict parameters
            keywords[key] = value['query']
            del value['query']
            extras[key] = value
        elif hasattr(value, 'query'):       # unify object parameters
            keywords[key] = value.query
            extra = dict()
            for arg in query_args:
                arg_val = getattr(value, arg, None)
                if arg_val is not None:
                    extra[arg] = arg_val
            extras[key] = extra
        elif key in ignored:
            del keywords[key]

    # find EPI indexes
    if schema:
        epi_indexes = {}
        for name in schema.keys():
            parts = name.split('_')
            if parts[-1] in ['string', 'depth', 'parents']:
                count = epi_indexes.get(parts[0], 0)
                epi_indexes[parts[0]] = count + 1
        epi_indexes = [k for k, v in epi_indexes.items() if v == 3]
    else:
        epi_indexes = ['path']

    for key, value in keywords.items():
        args = extras.get(key, {})
        if key == 'SearchableText':
            pattern = getattr(config, 'search_pattern', '')
            simple_term = isSimpleTerm(value)
            if pattern and isSimpleSearch(value):
                base_value = value
                if simple_term: # use prefix/wildcard search
                    value = '(%s* OR %s)' % (prepare_wildcard(value), value)
                elif isWildCard(value):
                    value = prepare_wildcard(value)
                    base_value = quote(value.replace('*', '').replace('?', ''))
                # simple queries use custom search pattern
                value = pattern.format(value=quote(value),
                    base_value=base_value)
                keywords[key] = set([value])    # add literal query parameter
                continue
            elif simple_term: # use prefix/wildcard search
                keywords[key] = '(%s* OR %s)' % (
                    prepare_wildcard(value), value)
                continue
        if key in epi_indexes:
            path = keywords['%s_parents' % key] = value
            del keywords[key]
            if 'depth' in args:
                depth = int(args['depth'])
                if depth >= 0:
                    if not isinstance(value, (list, tuple)):
                        path = [path]
                    tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)'
                    params = keywords['%s_parents' % key] = set()
                    for p in path:
                        base = len(p.split('/'))
                        params.add(tmpl % (key, base + (depth and 1), base + depth, key, p))
                del args['depth']
        elif key == 'effectiveRange':
            if isinstance(value, DateTime):
                steps = getattr(config, 'effective_steps', 1)
                if steps > 1:
                    value = DateTime(value.timeTime() // steps * steps)
                value = iso8601date(value)
            del keywords[key]
            keywords['effective'] = '[* TO %s]' % value
            keywords['expires'] = '[%s TO *]' % value
        elif key == 'show_inactive':
            del keywords[key]           # marker for `effectiveRange`
        elif 'range' in args:
            if not isinstance(value, (list, tuple)):
                value = [value]
            payload = map(iso8601date, value)
            keywords[key] = ranges[args['range']] % tuple(payload)
            del args['range']
        elif 'operator' in args:
            if isinstance(value, (list, tuple)) and len(value) > 1:
                sep = ' %s ' % args['operator'].upper()
                value = sep.join(map(str, map(iso8601date, value)))
                keywords[key] = '(%s)' % value
            del args['operator']
        elif key == 'allowedRolesAndUsers':
            if getattr(config, 'exclude_user', False):
                token = 'user$' + getSecurityManager().getUser().getId()
                if token in value:
                    value.remove(token)
        elif isinstance(value, DateTime):
            keywords[key] = iso8601date(value)
        elif not isinstance(value, basestring):
            assert not args, 'unsupported usage: %r' % args
Пример #40
0
 def buildQuery(self, default=None, **args):
     """ helper to build a querystring for simple use-cases """
     logger.debug('building query for "%r", %r', default, args)
     schema = self.getManager().getSchema() or {}
     defaultSearchField = getattr(schema, 'defaultSearchField', None)
     args[None] = default
     query = {}
     for name, value in args.items():
         field = schema.get(name or defaultSearchField, None)
         if field is None or not field.indexed:
             logger.warning('dropping unknown search attribute "%s" '
                 ' (%r) for query: %r', name, value, args)
             continue
         if isinstance(value, bool):
             value = str(value).lower()
         elif not value:     # solr doesn't like empty fields (+foo:"")
             continue
         elif field.class_ == 'solr.BoolField':
             if not isinstance(value, (tuple, list)):
                 value = [value]
             falses = '0', 'False', MV
             true = lambda v: bool(v) and v not in falses
             value = set(map(true, value))
             if not len(value) == 1:
                 assert len(value) == 2      # just to make sure
                 continue                    # skip when "true or false"
             value = str(value.pop()).lower()
         elif isinstance(value, (tuple, list)):
             # list items should be treated as literals, but
             # nevertheless only get quoted when necessary
             def quoteitem(term):
                 if isinstance(term, unicode):
                     term = term.encode('utf-8')
                 quoted = quote(term)
                 if not quoted.startswith('"') and not quoted == term:
                     quoted = quote('"' + term + '"')
                 return quoted
             value = '(%s)' % ' OR '.join(map(quoteitem, value))
         elif isinstance(value, set):        # sets are taken literally
             if len(value) == 1:
                 query[name] = ''.join(value)
             else:
                 query[name] = '(%s)' % ' OR '.join(value)
             continue
         elif isinstance(value, basestring):
             if field.class_ == 'solr.TextField':
                 if isWildCard(value):
                     value = prepare_wildcard(value)
                 value = quote(value, textfield=True)
                 # if we have an intra-word hyphen, we need quotes
                 if '\\-' in value or '\\+' in value:
                     if value[0] != '"':
                         value = '"%s"' % value
             else:
                 value = quote(value)
             if not value:   # don't search for empty strings, even quoted
                 continue
         else:
             logger.info('skipping unsupported value "%r" (%s)',
                 value, name)
             continue
         if name is None:
             if value and value[0] not in '+-':
                 value = '+%s' % value
         else:
             value = '+%s:%s' % (name, value)
         query[name] = value
     logger.debug('built query "%s"', query)
     return query
Пример #41
0
 def testQuoting(self):
     # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html
     self.assertEqual(quote(""), "")
     self.assertEqual(quote(" "), "")
     self.assertEqual(quote("foo"), "foo")
     self.assertEqual(quote("foo "), "foo")
     self.assertEqual(quote('"foo"'), '"foo"')
     self.assertEqual(quote('"foo'), '\\"foo')
     self.assertEqual(quote('foo"'), 'foo\\"')
     self.assertEqual(quote("foo bar"), "(foo bar)")
     self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)')
     self.assertEqual(quote("\\["), "\\[")
     self.assertEqual(quote(")"), "\\)")
     self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)')
     self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)')
     self.assertEqual(quote('"foo bar"'), '"foo bar"')
     self.assertEqual(quote('"foo bar'), '(\\"foo bar)')
     self.assertEqual(quote("foo bar what?"), "(foo bar what?)")
     self.assertEqual(quote("P|This&That"), "P|This&That")
     self.assertEqual(quote("[]"), "")
     self.assertEqual(quote("()"), "")
     self.assertEqual(quote("{}"), "")
     self.assertEqual(quote('...""'), '...\\"\\"')
     self.assertEqual(quote("\\"), "\\\\")  # Search for \ has to be quoted
     self.assertEqual(quote("\\?"), "\\?")
     self.assertEqual(quote("*****@*****.**"), "*****@*****.**")
     self.assertEqual(
         quote("http://machine/folder and item and some/path "
               "and and amilli3*"),
         "(http\\:\\/\\/machine\\/folder and item and "
         "some\\/path and and amilli3*)",
     )
     self.assertEqual(quote('"[]"'), '"\\[\\]"')
     self.assertEqual(quote('"{}"'), '"\\{\\}"')
     self.assertEqual(quote('"()"'), '"\\(\\)"')
     self.assertEqual(quote('foo and bar and 42"*'), "(foo and bar and "
                      '42\\"\\*)')
     # Can't use ? or * as beginning of new query
     self.assertEqual(quote('"fix and it"*'), '"fix and it"')
     self.assertEqual(quote('"fix and it"?'), '"fix and it"')
     self.assertEqual(
         quote("foo and bar and [foobar at foo.com]*"),
         "(foo and bar and \\[foobar at foo.com\\])",
     )