def _check_elements(self): if self.model.elements: if self.model.kind != IngredientKinds.index.value: raise ValidationException( "Kind %s of %s cannot have elements." % (self.model.kind, self.model.slug)) for slug in self.model.elements: child = self.session.query(IngredientModel).get(slug) if child is None: raise ValidationException( "Element %s of %s does not exist." % (slug, self.model.slug))
def raw_to_obj(raw_citation): raw_citation = CitationFactory.sanitize_raw(raw_citation) notes = [] if 'notes' in raw_citation.keys(): for note in raw_citation['notes']: notes.append(Text(**note)) del(raw_citation['notes']) # Frak the specific date, just go with year. Not all books make the # full publishing date easily available. if 'date' in raw_citation.keys(): raw_date = raw_citation['date'] del(raw_citation['date']) # raw_date is not a Date() when it comes from YAML but # is a Str() when it comes from database or other sources. # There is a way of disabling the auto-casting in the yaml # loader, it may be a good idea to revisit that. if type(raw_date) is Date: new_date = raw_date.year elif type(raw_date) is str: new_date = Date(*[int(i) for i in raw_date.split('-')]).year elif type(raw_date) is int: new_date = raw_date else: raise ValidationException("Date is invalid '%s'" % raw_date) else: new_date = None # Build and return the Citation object c_obj = Citation(notes=notes, date=new_date, **raw_citation) return c_obj
def _check_parent_kind(self): parent = self._get_parent() if parent is None and IngredientKinds( self.model.kind) == IngredientKinds.top: return try: if IngredientKinds(parent.kind).value not in IngredientKinds( self.model.kind).allowed_parents: raise ValidationException( "Parent (%s) of %s has invalid kind (%s)." % (parent.slug, self.model.slug, parent.kind)) except KeyError: raise ValidationException( "Parent (%s) of %s has bad kind (%s)" % (parent.slug, self.model.slug, parent.kind))
def _check_parent_existence(self): if self.model.kind == IngredientKinds.top.value: return parent = self._get_parent() if not parent: raise ValidationException("Parent of %s does not exist (%s)" % (self.model.slug, self.model.parent))
def _validate_query_parameter(parameter, value, type_): """ Ensure that the value of a query parameter matches the expected type. This is somewhat redundant now that proper request parsing works on the API end, but hey better to be sure right? :param parameter: URL parameter. :param value: Value as passed by the user (after modeling). :param type_: expected python type class. :return: None """ if type(value) is not type_: raise ValidationException( "Value of parameter '%s' is not a '%s' (got '%s')" % (parameter, type_, value))
def _parse_display_name(raw_input): value_key = 'display_name' old_value = raw_input.get(value_key) # Log.info("Old value for %s is %s" % (value_key, old_value)) if not raw_input: new_value = DisplayName('Unnamed Inventory') elif type(old_value) is str: new_value = DisplayName(old_value) else: raise ValidationException( "Bad display name given for inventory (%s)" % old_value) # Log.info("New value for %s is %s" % (value_key, new_value)) raw_input.update({value_key: new_value}) return raw_input
def add_query_parameter(self, url_parameter, query_class, fields, url_parameter_type=str, invert=False, occurrence=MustOccurrence, value_parser=None, **attributes): """ Define a queriable parameter for this search index. :param url_parameter: URL/input parameter to key from. :param url_parameter_type: Python type class of this parameter from the URL. :param invert: Turn this from Must to MustNot at the overall level. :param occurrence: Occurrence object. :param query_class: ElasticSearch DSL query class. :param value_parser: Function to parse the value of this parameter if it cannot be done easily in the URL. :param attributes: Dictionary of extra params to pass to the query_class consturctor :return: None """ # Ensure that we were given a proper occurrence. try: occurrence_factory.get_occurrence(occurrence.occur) except Exception as e: raise ValidationException("Invalid occurrence: %s" % e) # Setup the settings for this URL parameter. self.query_parameters[url_parameter] = { 'url_parameter': url_parameter, 'url_parameter_type': url_parameter_type, 'query_class': query_class, 'attributes': attributes, 'fields': fields, 'occurrence': occurrence, 'invert': invert, 'value_parser': value_parser }
def _check_slug(self): if self.model.slug != Slug(self.model.display_name): raise ValidationException( "Slug (%s) is inconsistent with display_name." % self.model.slug)
def _check_kind(self): try: kind_class = IngredientKinds(self.model.kind) except KeyError: ValidationException("Ingredient %s has bad kind: %s" % (self.model.slug, self.model.kind))
def fail(self, message): Log.error(message) if self.fatal: raise ValidationException(message)
def _check_id(self): try: uuid = UUID(self.model.id) except ValueError as e: raise ValidationException(e)
def _build_search_query(self): """ "filter" = "must" without scoring. Better for caching. This function is built for Bool() queries only. """ # These lists contain the AND'd queries for each url_parameter. # They are AND because we query like "irish-whiskey AND stirred" musts = [] must_nots = [] for url_parameter in self.supported_parameters: # Each parameter is something like "components" or "construction" and # are keys defined in the barbados.search.whatever.WhateverSearch classes. # Should vs Must # https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query # tldr: Should == OR, Must == AND # For the purposes of multiple values per url_parameter, we have to use # AND (ex: components=irish-whiskey,vermouth should yield irish-whiskey AND vermouth # not irish-whiskey OR vermouth). url_parameter_conditions = [] # Get the value for the url_parameter as passed in from the URL. # Example: "components=irish-whiskey,vermouth" would mean a raw_value # of ['irish-whiskey', 'vermouth']. Native data types apply as defined # in the barbados.search.whatever.WhateverSearch class. raw_value = getattr(self, url_parameter, None) if raw_value is None: continue # A value parser is a function that is used to munge the raw_value before # further processing. Since we abstracted the shit out of the search stuff # this is how we can transform things from the URL into ElasticSearch-speak # in a bespoke way. value_parser = self.query_parameters.get(url_parameter).get( 'value_parser') if value_parser: raw_value = value_parser(raw_value) # Ensure that the value we got matches the expected data type. expected_value_type = self.query_parameters.get(url_parameter).get( 'url_parameter_type') self._validate_query_parameter(parameter=url_parameter, value=raw_value, type_=expected_value_type) # These are the Elasticsearch document fields to search for # the particular value(s) we were given. These are defined in the # barbados.search.whatever.WhateverSearch class and are generally # a list of fields in Elasticsearch syntax. fields = self.query_parameters.get(url_parameter).get('fields') # When there are multiple values given in a url_parameter, we interpret # this to mean each value should be present in expected fields. # For example if we say "components=irish-whiskey,vermouth" it is # expected that both "irish-whiskey" and "vermouth" are in the fields. if expected_value_type is list: for value in raw_value: # There's a lot going on here... # Since we want the OR condition between fields (spec.components.slug || spec.components.parents) # we are using Should. If we specified multiple values, we want the AND condition # (rum && sherry). This builds a sub-query of Bool() for the former || situation # and adds it to the list of all conditions for this query for aggregation with # other url_parameters. field_conditions = Bool(should=[ self.get_query_condition(url_parameter=url_parameter, field=field, value=value) for field in fields ]) url_parameter_conditions.append(field_conditions) # Single-valued url_parameters are much easier to look for. elif expected_value_type is str: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. for field in fields: url_parameter_conditions.append( self.get_query_condition(url_parameter=url_parameter, field=field, value=raw_value)) # Complex queries like implicit ranges take a direct dictionary of values to pass # to the underlying ElasticSearch query. elif expected_value_type is dict or expected_value_type is bool: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. for field in fields: url_parameter_conditions.append( self.get_query_condition(url_parameter=url_parameter, field=field, value=raw_value)) else: raise ValidationException( "Unsupported url_parameter data type: %s" % expected_value_type) # The occurrence is used to determine which method to use for # searching the index for this particular condition. There are # times when we want Should (OR) like matching slugs and display_names, # others that we want Must (AND) like matching `rum && sherry`. occurrence = self.query_parameters.get(url_parameter).get( 'occurrence') # Boolean-based queries (not to be confused with ElasticSearch Bool queries!) # need to set their occurrence based on the value of the boolean. if expected_value_type is bool: occurrence = MustOccurrence if raw_value else MustNotOccurrence # Now construct the Bool() query for this url_parameter. url_parameter_query = Bool( **{occurrence.occur: url_parameter_conditions}) # Some parameters are inverted, aka MUST NOT appear in the # search results. This can be useful for say allergies or if you # have a pathological hatred of anything pineapple. if self.query_parameters.get(url_parameter).get('invert'): must_nots.append(url_parameter_query) else: musts.append(url_parameter_query) # Build the overall query. query = Bool(must=musts, must_not=must_nots) Log.info("Search Conditions are %s" % query) return query