def _Snippet(self, query, field, *args): """Create a snippet given a query and the field to query on. Args: query: A query string containing only a bare term (no operators). field: The field name to query on. *args: Unused optional arguments. These are not used on dev_appserver. Returns: A snippet for the field with the query term bolded. Raises: ExpressionEvaluationError: if this is a sort expression. """ field = query_parser.GetQueryNodeText(field) if self._is_sort_expression: raise ExpressionEvaluationError( 'Failed to parse sort expression \'snippet(' + query_parser.GetQueryNodeText(query) + ', ' + field + ')\': snippet() is not supported in sort expressions') schema = self._inverted_index.GetSchema() if schema.IsType(field, document_pb.FieldValue.NUMBER): raise ExpressionEvaluationError( 'Failed to parse field expression \'snippet(' + query_parser.GetQueryNodeText(query) + ', ' + field + ')\': snippet() argument 2 must be text') terms = self._tokenizer.TokenizeText( query_parser.GetQueryNodeText(query).strip('"')) for term in terms: search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars)) postings = self._inverted_index.GetPostingsForToken(search_token) for posting in postings: if posting.doc_id != self._doc_pb.id( ) or not posting.positions: continue field_val = self._GetFieldValue( search_util.GetFieldInDocument(self._doc_pb, field)) if not field_val: continue doc_words = [ token.chars for token in self._case_preserving_tokenizer.TokenizeText(field_val) ] position = posting.positions[0] return self._GenerateSnippet( doc_words, position, search_util.DEFAULT_MAX_SNIPPET_LENGTH) else: field_val = self._GetFieldValue( search_util.GetFieldInDocument(self._doc_pb, field)) if not field_val: return '' return '%s...' % field_val[:search_util. DEFAULT_MAX_SNIPPET_LENGTH]
def _Eval(self, node): if node.getType() is ExpressionParser.FN: func = self._function_table[query_parser.GetQueryNodeText(node)] return func(*node.children) if node.getType() is ExpressionParser.PLUS: return self._EvalBinaryOp(lambda a, b: a + b, 'addition', node) if node.getType() is ExpressionParser.MINUS: return self._EvalBinaryOp(lambda a, b: a - b, 'subtraction', node) if node.getType() is ExpressionParser.DIV: return self._EvalBinaryOp(lambda a, b: a / b, 'division', node) if node.getType() is ExpressionParser.TIMES: return self._EvalBinaryOp(lambda a, b: a * b, 'multiplication', node) if node.getType() is ExpressionParser.NEG: return self._EvalUnaryOp(lambda a: -a, 'negation', node) if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT): return float(query_parser.GetQueryNodeText(node)) if node.getType() is ExpressionParser.PHRASE: return query_parser.GetQueryNodeText(node).strip('"') if node.getType() is ExpressionParser.NAME: name = query_parser.GetQueryNodeText(node) if name == '_score': return self._doc.score field = search_util.GetFieldInDocument(self._doc_pb, name) if field: return search_util.GetFieldValue(field) raise _ExpressionError('No field %s in document' % name) raise _ExpressionError('Unable to handle node %s' % node)
def _Snippet(self, query, field, *args): """Create a snippet given a query and the field to query on. Args: query: A query string containing only a bare term (no operators). field: The field name to query on. *args: Unused optional arguments. These are not used on dev_appserver. Returns: A snippet for the field with the query term bolded. """ field = query_parser.GetQueryNodeText(field) terms = self._tokenizer.TokenizeText( query_parser.GetQueryNodeText(query).strip('"')) for term in terms: search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars)) postings = self._inverted_index.GetPostingsForToken(search_token) for posting in postings: if posting.doc_id != self._doc_pb.id( ) or not posting.positions: continue field_val = search_util.GetFieldValue( search_util.GetFieldInDocument(self._doc_pb, field)) if not field_val: continue doc_words = [ token.chars for token in self._case_preserving_tokenizer.TokenizeText(field_val) ] position = posting.positions[0] return self._GenerateSnippet( doc_words, position, search_util.DEFAULT_MAX_SNIPPET_LENGTH) else: field_val = search_util.GetFieldValue( search_util.GetFieldInDocument(self._doc_pb, field)) if not field_val: return None return '%s...' % field_val[:search_util. DEFAULT_MAX_SNIPPET_LENGTH]
def _Eval(self, node): """Evaluate an expression node on the document. Args: node: The expression AST node representing an expression subtree. Returns: The Python value that maps to the value of node. Types are inferred from the expression, so expressions with numeric results will return as python int/long/floats, textual results will be strings, and dates will be datetimes. Raises: _ExpressionError: The expression cannot be evaluated on this document because either the expression is malformed or the document does not contain the required fields. Callers of _Eval should catch _ExpressionErrors and optionally log them; these are not fatal in any way, and are used to indicate that this expression should not be set on this document. """ if node.getType() in self._function_table: func = self._function_table[node.getType()] return func(*node.children) if node.getType() == ExpressionParser.PLUS: return self._EvalBinaryOp(lambda a, b: a + b, 'addition', node) if node.getType() == ExpressionParser.MINUS: return self._EvalBinaryOp(lambda a, b: a - b, 'subtraction', node) if node.getType() == ExpressionParser.DIV: return self._EvalBinaryOp(lambda a, b: a / b, 'division', node) if node.getType() == ExpressionParser.TIMES: return self._EvalBinaryOp(lambda a, b: a * b, 'multiplication', node) if node.getType() == ExpressionParser.NEG: return self._EvalUnaryOp(lambda a: -a, 'negation', node) if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT): return float(query_parser.GetQueryNodeText(node)) if node.getType() == ExpressionParser.PHRASE: return query_parser.GetQueryNodeText(node).strip('"') if node.getType() == ExpressionParser.NAME: name = query_parser.GetQueryNodeText(node) if name == '_score': return self._doc.score field = search_util.GetFieldInDocument(self._doc_pb, name) if field: return self._GetFieldValue(field) raise _ExpressionError('No field %s in document' % name) raise _ExpressionError('Unable to handle node %s' % node)
def SortKey(scored_doc): """Return the sort key for a document based on the request parameters.""" field = search_util.GetFieldInDocument( scored_doc.document, sort_spec.sort_expression()) if not field: return default_value string_val = field.value().string_value() if field.value().type() in search_util.NUMBER_DOCUMENT_FIELD_TYPES: return float(string_val) if field.value().type() is document_pb.FieldValue.DATE: return search_util.EpochTime(search_util.DeserializeDate(string_val)) return string_val
def _MatchField(self, field_query_node, match, document): """Check if a field matches a query tree.""" if isinstance(field_query_node, str): field = search_util.GetFieldInDocument(document, field_query_node) else: field = search_util.GetFieldInDocument(document, field_query_node.getText()) if not field: return False if field.value().type() in search_util.TEXT_DOCUMENT_FIELD_TYPES: return self._MatchTextField(field, match, document) if field.value().type() in search_util.NUMBER_DOCUMENT_FIELD_TYPES: return self._MatchNumericField(field, match, document) if field.value().type() == document_pb.FieldValue.DATE: return self._MatchDateField(field, match, document) raise search_util.UnsupportedOnDevError( 'Matching to field type of field "%s" (type=%d) is unsupported on ' 'dev server' % (field.name(), field.value().type()))
def _CheckInvalidNumericComparison(self, match, document): """Check if this is an invalid numeric comparison. Valid numeric comparisons are "numeric_field OP numeric_constant" where OP is one of [>, <, >=, <=, =, :]. Args: match: The right hand side argument of the operator. document: The document we are checking for a match. Raises: ExpressionTreeException: when right hand side of numeric comparison is not a numeric constant. """ match_text = query_parser.GetQueryNodeText(match) match_fields = search_util.GetFieldInDocument( document, match_text, document_pb.FieldValue.NUMBER) if match_fields: raise ExpressionTreeException( 'Expected numeric constant, found \"' + match_text + '\"')
def _Snippet(self, query, field, *args): field = query_parser.GetQueryNodeText(field) terms = self._tokenizer.TokenizeText( query_parser.GetQueryNodeText(query).strip('"')) for term in terms: search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars)) postings = self._inverted_index.GetPostingsForToken(search_token) for posting in postings: if posting.doc_id != self._doc_pb.id( ) or not posting.positions: continue field_val = search_util.GetFieldValue( search_util.GetFieldInDocument(self._doc_pb, field)) doc_words = [ token.chars for token in self._case_preserving_tokenizer.TokenizeText(field_val) ] position = posting.positions[0] return self._GenerateSnippet( doc_words, position, search_util.DEFAULT_MAX_SNIPPET_LENGTH)
def _Eval(self, node, return_type=None, allow_rank=True): """Evaluate an expression node on the document. Args: node: The expression AST node representing an expression subtree. return_type: The type to retrieve for fields with multiple types in the expression. Used when the field type is ambiguous and cannot be inferred from the context. If None, we retrieve the first field type found in doc list. allow_rank: For expressions that will be used in a sort context, indicate if rank is allowed. Returns: The Python value that maps to the value of node. Types are inferred from the expression, so expressions with numeric results will return as python int/long/floats, textual results will be strings, and dates will be datetimes. Raises: _ExpressionError: The expression cannot be evaluated on this document because either the expression is malformed or the document does not contain the required fields. Callers of _Eval should catch _ExpressionErrors and optionally log them; these are not fatal in any way and are used to indicate that this expression should not be set on this document. QueryExpressionEvaluationError: same as ExpressionEvaluationError but these errors should return query as error status to users. """ if node.getType() in self._function_table: func = self._function_table[node.getType()] return func(return_type, *node.children) if node.getType() == ExpressionParser.PLUS: return self._EvalNumericBinaryOp(lambda a, b: a + b, 'addition', node, return_type) if node.getType() == ExpressionParser.MINUS: return self._EvalNumericBinaryOp(lambda a, b: a - b, 'subtraction', node, return_type) if node.getType() == ExpressionParser.DIV: return self._EvalNumericBinaryOp(lambda a, b: a / b, 'division', node, return_type) if node.getType() == ExpressionParser.TIMES: return self._EvalNumericBinaryOp(lambda a, b: a * b, 'multiplication', node, return_type) if node.getType() == ExpressionParser.NEG: return self._EvalNumericUnaryOp(lambda a: -a, 'negation', node, return_type) if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT): return float(query_parser.GetQueryNodeText(node)) if node.getType() == ExpressionParser.PHRASE: return query_parser.GetQueryNodeText(node).strip('"') if node.getType() == ExpressionParser.NAME: name = query_parser.GetQueryNodeText(node) if name == '_score': return self._doc.score elif name == '_rank': if allow_rank: return self._doc.document.order_id() else: raise QueryExpressionEvaluationError( 'SortSpec order must be descending in \'_rank\'') field = search_util.GetFieldInDocument(self._doc_pb, name, return_type) if field: return self._GetFieldValue(field) raise _ExpressionError('No field %s in document' % name) raise _ExpressionError('Unable to handle node %s' % node)