Exemplo n.º 1
0
    def _CheckMatch(self, node, document):
        """Check if a document matches a query tree.

    Args:
      node: the query node to match
      document: the document to match

    Returns:
      True iff the query node matches the document.

    Raises:
      ExpressionTreeException: when != operator is used or numeric value is used
      in comparison for DATE field.
    """

        if node.getType() == QueryParser.SEQUENCE:
            result = all(
                self._CheckMatch(child, document) for child in node.children)
            return result or self._MatchGlobalPhrase(node, document)

        if node.getType() == QueryParser.CONJUNCTION:
            return all(
                self._CheckMatch(child, document) for child in node.children)

        if node.getType() == QueryParser.DISJUNCTION:
            return any(
                self._CheckMatch(child, document) for child in node.children)

        if node.getType() == QueryParser.NEGATION:
            return not self._CheckMatch(node.children[0], document)

        if node.getType() == QueryParser.NE:
            raise ExpressionTreeException(
                '!= comparison operator is not available')

        if node.getType() in query_parser.COMPARISON_TYPES:
            lhs, match = node.children
            if lhs.getType() == QueryParser.GLOBAL:
                return self._MatchGlobal(match, document)
            elif lhs.getType() == QueryParser.FUNCTION:
                return self._MatchFunction(lhs, match, node.getType(),
                                           document)

            field_name = self._GetFieldName(lhs)
            if node.getType() in INEQUALITY_COMPARISON_TYPES:
                try:
                    float(query_parser.GetPhraseQueryNodeText(match))
                except ValueError:
                    self._CheckValidDateComparison(field_name, match)
            elif (self._IsValidDateValue(field_name)
                  or self._IsValidNumericValue(field_name)):

                raise ExpressionTreeException('Invalid field name "%s"' %
                                              field_name)
            return self._MatchAnyField(lhs, match, node.getType(), document)

        return False
Exemplo n.º 2
0
 def _CheckValidDateComparison(self, field_name, match):
   """Check if match is a valid date value."""
   if match.getType() == QueryParser.FUNCTION:
     name, _ = match.children
     raise ExpressionTreeException('Unable to compare "%s" with "%s()"' %
                                   (field_name, name))
   elif match.getType() == QueryParser.VALUE:
     match_val = query_parser.GetPhraseQueryNodeText(match)
     if not self._IsValidDateValue(match_val):
       raise ExpressionTreeException('Unable to compare "%s" with "%s"' %
                                     (field_name, match_val))
Exemplo n.º 3
0
    def _CheckValidDateComparison(self, field_name, match):
        """Check if match is a valid date value."""
        if match.getType() == QueryParser.VALUE:
            try:
                match_val = query_parser.GetPhraseQueryNodeText(match)

                datetime.datetime.strptime(match_val, '%Y-%m-%d')
            except ValueError:
                raise ExpressionTreeException(
                    'Unable to compare "%s" with "%s"' %
                    (field_name, match_val))
Exemplo n.º 4
0
    def _MatchPhrase(self, field, match, document):
        """Match a textual field with a phrase query node."""
        raw_field_text = field.value().string_value()
        raw_phrase_text = query_parser.GetPhraseQueryNodeText(match)

        if field.value().type() == document_pb.FieldValue.ATOM:
            return self._MatchRawPhraseWithRawAtom(raw_field_text,
                                                   raw_phrase_text)

        if not raw_phrase_text:
            return False

        if field.value().type() == document_pb.FieldValue.UNTOKENIZED_PREFIX:
            phrase = self._parser.Normalize(raw_phrase_text,
                                            field.value().type())
            field_text = self._parser.Normalize(raw_field_text,
                                                field.value().type())
            return field_text.startswith(phrase)

        phrase = self._parser.TokenizeText(raw_phrase_text)
        field_text = self._parser.TokenizeText(raw_field_text)
        if not phrase:
            return True
        posting = None
        for post in self._PostingsForFieldToken(field.name(), phrase[0].chars):
            if post.doc_id == document.id():
                posting = post
                break
        if not posting:
            return False

        def ExtractWords(token_list):
            return (token.chars for token in token_list)

        for position in posting.positions:

            match_words = zip(ExtractWords(field_text[position:]),
                              ExtractWords(phrase))
            if len(match_words) != len(phrase):
                continue

            match = True
            for doc_word, match_word in match_words:
                if (field.value().type()
                        == document_pb.FieldValue.TOKENIZED_PREFIX
                        and doc_word.startswith(match_word)):
                    continue
                if doc_word != match_word:
                    match = False

            if match:
                return True
        return False
Exemplo n.º 5
0
    def _MatchComparableField(self, field, match, cast_to_type, op, document):
        """A generic method to test matching for comparable types.

    Comparable types are defined to be anything that supports <, >, <=, >=, ==.
    For our purposes, this is numbers and dates.

    Args:
      field: The document_pb.Field to test
      match: The query node to match against
      cast_to_type: The type to cast the node string values to
      op: The query node type representing the type of comparison to perform
      document: The document that the field is in

    Returns:
      True iff the field matches the query.

    Raises:
      UnsupportedOnDevError: Raised when an unsupported operator is used, or
      when the query node is of the wrong type.
      ExpressionTreeException: Raised when a != inequality operator is used.
    """

        field_val = cast_to_type(field.value().string_value())

        if match.getType() == QueryParser.VALUE:
            try:
                match_val = cast_to_type(
                    query_parser.GetPhraseQueryNodeText(match))
            except ValueError:
                return False
        else:
            return False

        if op == QueryParser.EQ or op == QueryParser.HAS:
            return field_val == match_val
        if op == QueryParser.NE:
            raise ExpressionTreeException(
                '!= comparison operator is not available')
        if op == QueryParser.GT:
            return field_val > match_val
        if op == QueryParser.GE:
            return field_val >= match_val
        if op == QueryParser.LESSTHAN:
            return field_val < match_val
        if op == QueryParser.LE:
            return field_val <= match_val
        raise search_util.UnsupportedOnDevError(
            'Operator %s not supported for numerical fields on development server.'
            % match.getText())
Exemplo n.º 6
0
  def _MatchPhrase(self, field, match, document):
    """Match a textual field with a phrase query node."""
    field_text = field.value().string_value()
    phrase_text = query_parser.GetPhraseQueryNodeText(match)


    if field.value().type() == document_pb.FieldValue.ATOM:
      return self._MatchRawPhraseWithRawAtom(field_text, phrase_text)


    if not phrase_text:
      return False

    phrase = self._parser.TokenizeText(
        search_util.RemoveAccentsNfkd(phrase_text))
    field_text = self._parser.TokenizeText(
        search_util.RemoveAccentsNfkd(field_text))
    if not phrase:
      return True
    posting = None
    for post in self._PostingsForFieldToken(field.name(), phrase[0].chars):
      if post.doc_id == document.id():
        posting = post
        break
    if not posting:
      return False

    def ExtractWords(token_list):
      return (token.chars for token in token_list)

    for position in posting.positions:




      match_words = list(zip(ExtractWords(field_text[position:]),
                        ExtractWords(phrase)))
      if len(match_words) != len(phrase):
        continue


      match = True
      for doc_word, match_word in match_words:
        if doc_word != match_word:
          match = False

      if match:
        return True
    return False