def __init__(self, hosts=os.getenv('AMARI_ZOOKEEPER_HOSTS', '127.0.0.1:2181'), read_only=False): self.hosts = hosts self.read_only = read_only Log.info("Using Zookeeper hosts: \"%s\"" % hosts)
def invalidate(cls): """ Invalidate (delete) the cache value and key. :return: None """ Log.info("Invalidating cache key %s" % cls.cache_key) return Cache.delete(cls.cache_key)
def __init__(self, path): connection_string = "sqlite:///%s" % path Log.info("connection string is '%s'" % connection_string) self.engine = sqlalchemy.create_engine(connection_string) # session.configure(bind=self.engine) self.Session = sessionmaker(bind=self.engine)
def scrape_recipe(recipe): url = "%s/%s" % (url_base, endpoints.get('recipe') % recipe) Log.info("scraping %s" % url) parser = UpneatRecipeParser(slug=recipe, url=url) raw_recipe = parser.parse() return raw_recipe
def retrieve(cls): try: return pickle.loads(Cache.get(cls.cache_key)) except KeyError: Log.warning( "Attempted to retrieve '%s' but it was empty. Repopulating..." % cls.cache_key) cls.populate() return pickle.loads(Cache.get(cls.cache_key))
def get(self, path): self._connect() try: data, stat = self.zk.get(path) return data.decode("utf-8") except NoNodeError: raise KeyError("%s does not exist." % path) except Exception as e: Log.error(e.__class__) Log.error(e)
def execute(self, sort='_score'): """ Actually talk to ElasticSearch and run the query. :param sort: ElasticSearch attribute on which to sort the results. :return: SearchResults child class. # @TODO address the search range hacks here. """ results = self.index_class.search()[0:1000].query( self.q).sort(sort).execute() Log.info("Got %s results." % results.hits.total.value) return SearchResults(hits=results)
def init(self): """ Re-initialize all indexes. This calls rebuild on every registered index class. There be dragons here. :return: None """ for name in self._indexes.keys(): Log.debug("Init on %s" % name) try: self.rebuild(self._indexes.get(name)) except NotFoundError or KeyError or AttributeError as e: Log.warning("Error re-initing index %s: %s" % (name, e))
def rebuild(self, index_class): """ Re-create an index. This deletes the entire index (not just the contents, but the Whole Damn Thing(tm). and re-creates it. :param index_class: elasticsearch_dsl.Document child representing this index. :return: None """ try: index_class._index.delete() index_class.init() Log.info("Successfully rebuilt index %s" % index_class.Index.name) except NotFoundError: Log.warning("Index %s did not exist." % index_class.Index.name)
def __init__(self, host=os.getenv('AMARI_REDIS_HOST', default='127.0.0.1'), port=int(os.getenv('AMARI_REDIS_PORT', default=6379)), username=None, password=None, ssl=False): self.host = host self.port = port self.username = username self.password = password self.ssl = ssl Log.info("Using Redis host: \"%s:%i\"" % (host, port))
def _get_ingredient_primary_category(ingredient): category_mappings = IngredientCategoryMappingModel.query.filter( IngredientCategoryMappingModel.ingredient_id == ingredient.id) # print([mapping.category_id for mapping in category_mappings]) # exit() for category_id in [ result.category_id for result in category_mappings ]: category = IngredientCategoryModel.query.get(category_id) if category.position and category.position >= 5: return category.display_name Log.error("Could not find category for %s" % ingredient.canonical_name)
def retrieve(cls): """ Retrieve the cache's value :return: Various """ try: return Cache.get(cls.cache_key) except KeyError: Log.warning( "Attempted to retrieve '%s' but it was empty. Repopulating..." % cls.cache_key) cls.populate() return Cache.get(cls.cache_key)
def delete(cocktail_object): try: indexables = CocktailFactory.obj_to_index(cocktail_object, RecipeIndex) for indexable in indexables: try: RecipeIndex.delete(indexable) except NotFoundError: Log.warning("No cache entry found for %s" % indexable) except KeyError as e: # Since this is a DELETE situation we don't particularly care to correct # the problem, but if we're creating or some other thing that could be # more problematic. Log.error("Recipe has bad data: %s" % e)
def __init__(self, username, password, database, host='127.0.0.1', port=5432, debug_sql=False): connection_string = "postgres://%s:%s@%s:%i/%s" % ( username, password, host, port, database) Log.info("Using Postgres host: \"%s\"" % host) self.engine = sqlalchemy.create_engine(connection_string, echo=debug_sql) self.Session = sessionmaker(bind=self.engine)
def get_recipes(): # character_list = list(range(0, 10)) + list(string.ascii_uppercase) character_list = string.ascii_uppercase[2:3] raw_recipes = [] for char in character_list: # print(UpneatConnector._get_recipes_alpha(char)) slugs = UpneatConnector._get_recipes_alpha(char) for slug in slugs: try: raw_recipes.append(UpneatConnector.scrape_recipe(slug)) except: Log.error("ERROR WITH %s " % slug) return raw_recipes
def _connect(self): if not hasattr(self, 'zk'): self.zk = KazooClient(hosts=self.hosts, read_only=self.read_only, timeout=5, connection_retry=self._get_retry()) elif self.zk.state != KazooState.CONNECTED: Log.warning("ZooKeeper state is %s" % self.zk.state) pass elif self.zk.state == KazooState.CONNECTED: return else: raise Exception("We in a weird state. %s" % self.zk.state) try: return self.zk.start() except KazooTimeoutError as e: raise FatalException("Timeout connecting to ZooKeeper (%s)" % e)
def get_ingredients(self): all_ingredients = IngredientModel.query.all() # Log.info("Total ingredient count is %i" % len(all_ingredients)) standardized_ingredients = [] orphan_count = 0 for ingredient in all_ingredients: # Log.info("Parsing %s" % ingredient.canonical_name) parent = self._get_parent_name(ingredient) if parent: kind = ProductKind.value else: kind = IngredientKind.value orphan_count += 1 # Log.info("Parent is %s" % parent) standardized_ingredient = { 'display_name': ingredient.canonical_name, 'slug': Slug(ingredient.canonical_name), 'aliases': self._get_ingredient_aliases(ingredient), 'parent': parent, 'kind': kind, } standardized_ingredients.append(standardized_ingredient) Log.info(standardized_ingredient ) if not standardized_ingredient['parent'] else None # print(len(IngredientModel.query.all())) # for ingredient in IngredientModel.query.all(): # print(ingredient.canonical_name) # for altname in IngredientAlternateSpellingModel.query.all(): # print(altname.ingredient_id) Log.info("Orphans at %i" % orphan_count) return standardized_ingredients
def resolve(cls, inventory, cocktail, spec_slug=None): results = [] tree = IngredientTreeCache.retrieve() inventory.expand(tree=tree) Log.info("Cocktail specs: %s" % [spec.slug for spec in cocktail.specs]) for spec in cocktail.specs: # Skip any specs that the user didn't ask for with the spec_slug # parameter. if spec_slug and spec.slug != spec_slug: Log.info("Skipping slug %s because you didn't want it." % spec.slug) continue # Parse the spec results.append( cls._resolve_spec(inventory=inventory, cocktail=cocktail, spec=spec, tree=tree)) # Return the list of results. return results
def _build_tree(self, passes, root=root_node): tree = Tree() pgconn = Registry.get_database_connection() with pgconn.get_session() as session: tree.create_node(root, root) for item in IngredientModel.get_by_kind(session, CategoryKind): tree.create_node(item.slug, item.slug, parent=root, data=self._create_tree_data(item)) for item in IngredientModel.get_by_kind(session, FamilyKind): tree.create_node(item.slug, item.slug, parent=item.parent, data=self._create_tree_data(item)) ingredients_to_place = list( IngredientModel.get_usable_ingredients(session)) for i in range(1, passes + 1): Log.debug("Pass %i/%i" % (i, passes)) for item in ingredients_to_place[:]: if item.kind == FamilyKind.value: ingredients_to_place.remove(item) Log.debug("Skipping %s because it is a family." % item.slug) continue try: tree.create_node(item.slug, item.slug, parent=item.parent, data=self._create_tree_data(item)) ingredients_to_place.remove(item) except NodeIDAbsentError: Log.debug("skipping %s (Attempt %i/%s)" % (item.slug, i, passes)) if len(ingredients_to_place) == 0: Log.info("All done after pass %i" % i) break return tree
def _build_search_query(self): """ "filter" = "must" without scoring. Better for caching. This function is built for Bool() queries only. """ # These lists contain the AND'd queries for each url_parameter. # They are AND because we query like "irish-whiskey AND stirred" musts = [] must_nots = [] for url_parameter in self.supported_parameters: # Each parameter is something like "components" or "construction" and # are keys defined in the barbados.search.whatever.WhateverSearch classes. # Should vs Must # https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query # tldr: Should == OR, Must == AND # For the purposes of multiple values per url_parameter, we have to use # AND (ex: components=irish-whiskey,vermouth should yield irish-whiskey AND vermouth # not irish-whiskey OR vermouth). url_parameter_conditions = [] # Get the value for the url_parameter as passed in from the URL. # Example: "components=irish-whiskey,vermouth" would mean a raw_value # of ['irish-whiskey', 'vermouth']. Native data types apply as defined # in the barbados.search.whatever.WhateverSearch class. raw_value = getattr(self, url_parameter, None) if raw_value is None: continue # A value parser is a function that is used to munge the raw_value before # further processing. Since we abstracted the shit out of the search stuff # this is how we can transform things from the URL into ElasticSearch-speak # in a bespoke way. value_parser = self.query_parameters.get(url_parameter).get( 'value_parser') if value_parser: raw_value = value_parser(raw_value) # Ensure that the value we got matches the expected data type. expected_value_type = self.query_parameters.get(url_parameter).get( 'url_parameter_type') self._validate_query_parameter(parameter=url_parameter, value=raw_value, type_=expected_value_type) # These are the Elasticsearch document fields to search for # the particular value(s) we were given. These are defined in the # barbados.search.whatever.WhateverSearch class and are generally # a list of fields in Elasticsearch syntax. fields = self.query_parameters.get(url_parameter).get('fields') # When there are multiple values given in a url_parameter, we interpret # this to mean each value should be present in expected fields. # For example if we say "components=irish-whiskey,vermouth" it is # expected that both "irish-whiskey" and "vermouth" are in the fields. if expected_value_type is list: for value in raw_value: # There's a lot going on here... # Since we want the OR condition between fields (spec.components.slug || spec.components.parents) # we are using Should. If we specified multiple values, we want the AND condition # (rum && sherry). This builds a sub-query of Bool() for the former || situation # and adds it to the list of all conditions for this query for aggregation with # other url_parameters. field_conditions = Bool(should=[ self.get_query_condition(url_parameter=url_parameter, field=field, value=value) for field in fields ]) url_parameter_conditions.append(field_conditions) # Single-valued url_parameters are much easier to look for. elif expected_value_type is str: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. for field in fields: url_parameter_conditions.append( self.get_query_condition(url_parameter=url_parameter, field=field, value=raw_value)) # Complex queries like implicit ranges take a direct dictionary of values to pass # to the underlying ElasticSearch query. elif expected_value_type is dict or expected_value_type is bool: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. for field in fields: url_parameter_conditions.append( self.get_query_condition(url_parameter=url_parameter, field=field, value=raw_value)) else: raise ValidationException( "Unsupported url_parameter data type: %s" % expected_value_type) # The occurrence is used to determine which method to use for # searching the index for this particular condition. There are # times when we want Should (OR) like matching slugs and display_names, # others that we want Must (AND) like matching `rum && sherry`. occurrence = self.query_parameters.get(url_parameter).get( 'occurrence') # Boolean-based queries (not to be confused with ElasticSearch Bool queries!) # need to set their occurrence based on the value of the boolean. if expected_value_type is bool: occurrence = MustOccurrence if raw_value else MustNotOccurrence # Now construct the Bool() query for this url_parameter. url_parameter_query = Bool( **{occurrence.occur: url_parameter_conditions}) # Some parameters are inverted, aka MUST NOT appear in the # search results. This can be useful for say allergies or if you # have a pathological hatred of anything pineapple. if self.query_parameters.get(url_parameter).get('invert'): must_nots.append(url_parameter_query) else: musts.append(url_parameter_query) # Build the overall query. query = Bool(must=musts, must_not=must_nots) Log.info("Search Conditions are %s" % query) return query
def fail(self, message): Log.error(message) if self.fatal: raise ValidationException(message)
def delete(cls, ingredient_object): index = IngredientFactory.obj_to_index(ingredient_object, cls.for_index) try: IngredientIndex.delete(index) except NotFoundError: Log.warn("Object %s was not found in index on DELETE. This probably isn't a problem?" % ingredient_object.slug)