Пример #1
0
def test_conversion_to_dict():
    """ Tests conversion from a ProductModel instance to a dict.
    """
    model_definition = {
        'language': {'type': 'fixed', 'default': 'english'},
        'a': {'type': 'fixed', 'persisted': True},
        'b.c': {'type': 'fixed', 'persisted': True},
        'b.d.e': {'type': 'text', 'persisted': True},
        'b.d.f': {'type': 'numeric', 'persisted': True}
    }
    factory = ProductModelFactory(model_definition)
    raw_product = {
        'a': 'foo',
        'b': {
            'c': 'bar',
            'd': {
                'e': 'some nested stuff',
                'f': 12345
            }
        }
    }
    stemmed = text.parse_text_to_stems('english', raw_product['b']['d']['e'])
    model = factory.build('test_product', raw_product)
    model_dict = model.to_dict()
    nose.tools.eq_(model_dict['a'], raw_product['a'], 'Attribute does not match')
    nose.tools.eq_(model_dict['b']['c'], raw_product['b']['c'], 'Attribute does not match')
    nose.tools.assert_list_equal(model_dict['b']['d']['e'], stemmed, 'Attribute does not match')
    nose.tools.eq_(model_dict['b']['d']['f'], raw_product['b']['d']['f'], 'Attribute does not match')
Пример #2
0
def test_stemmed_field():
    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'resources.title': {'type': 'text',
                                            'required': True,
                                            'persisted': True}}
    product1 = {'language': 'english', 'resources': {'title': 'Roger Federer Ousts Novak Djokovic'}}
    stems = ['roger', 'feder', 'oust', 'novak', 'djokov']
    factory = ProductModelFactory(model_definition)
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('resources.title'), stems)
Пример #3
0
def test_numeric_type():
    model_definition = {'language': {'type': 'list',
                                     'required': True,
                                     'persisted': True,
                                     'default': 'portuguese'},
                        'price': {'type': 'numeric',
                                  'required': True,
                                  'persisted': True}}
    product1 = {'price': 5.7}
    factory = ProductModelFactory(model_definition)
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('price'), 5.7)
Пример #4
0
def test_default_attribute_values():
    model_definition = {'language': {'type': 'list',
                                     'required': True,
                                     'persisted': True,
                                     'default': 'portuguese'},
                        'resources.title': {'type': 'text',
                                            'required': True,
                                            'persisted': True}}
    product1 = {'resources': {'title': 'O rato roeu a roupa do rei de Roma'}, 'source': ['NYT']}
    factory = ProductModelFactory(model_definition)
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('language'), 'portuguese')
Пример #5
0
def test_external_product_parse_valid_attributes():
    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'source': {'type': 'list',
                                   'required': True,
                                   'persisted': True}}
    factory = ProductModelFactory(model_definition)
    product1 = {'language': 'english', 'source': ['NYT']}
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('language'), product1['language'])
    nose.tools.eq_(model.get_attribute('source'), product1['source'])
Пример #6
0
def test_invalid_setting_values():
    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'source': {'type': 'list',
                                   'required': 'maybe',
                                   'persisted': True},
                        'resources.title': {'type': 'text',
                                            'required': True,
                                            'persisted': True}}
    product1 = {'language': 'english'}
    factory = ProductModelFactory(model_definition)
    factory.build('product1', product1)
Пример #7
0
def test_alright_when_required_field_is_missing_but_default_is_given():
    """ Tests whether the factory successfully validates a model when a required attribute is missing from
        the product model, but a default value is given.
    """

    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True,
                                     'default': 'portuguese'},
                        'source': {'type': 'list',
                                   'required': False,
                                   'persisted': True}}
    product1 = {'source': ['Whatever']}
    factory = ProductModelFactory(model_definition)
    factory.build('product1', product1)
Пример #8
0
def test_alright_when_non_required_field_is_missing():
    """ Tests whether the factory successfully validates a model when a non-required attribute is missing from
        the product model.
    """

    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'source': {'type': 'list',
                                   'required': False,
                                   'persisted': True},
                        'resources.title': {'type': 'text',
                                            'required': False,
                                            'persisted': True}}
    product1 = {'language': 'english'}
    factory = ProductModelFactory(model_definition)
    factory.build('product1', product1)
Пример #9
0
def test_required_field_values_are_present():
    """ Tests whether an exception is raised when a required attribute is missing from
        the product model.
    """

    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'source': {'type': 'list',
                                   'required': False,
                                   'persisted': True},
                        'resources.title': {'type': 'text',
                                            'required': True,
                                            'persisted': True}}
    product1 = {'language': 'english'}
    factory = ProductModelFactory(model_definition)
    factory.build('product1', product1)
Пример #10
0
def test_key_field():
    model_definition = {'language': {'type': 'fixed',
                                     'required': True,
                                     'persisted': True},
                        'source': {'type': 'list',
                                   'required': True,
                                   'persisted': True},
                        'resources.title': {'type': 'text',
                                            'required': True,
                                            'persisted': True}}
    product1 = {'language': 'english', 
                'resources': {'title': 'Roger Federer Ousts Novak Djokovic'},
                'source': ['NYT']}
    factory = ProductModelFactory(model_definition)
    model = factory.build('product1', product1)
    nose.tools.ok_(
        len(frozenset(model.keys()).intersection(frozenset(['source', 'resources.title', 'language']))) == 3
    )
Пример #11
0
    def __init__(self, customer, db_proxy, database_settings, cache_settings):
        """
        :param customer: The customer name.
        :param db_proxy: May be a descendant class of BaseProxy or an instance of that descendant class. If it is the
        class itself, CustomerContext will create a new instance of it; otherwise, it will reuse the instance passed.
        :param database_settings: The settings to be used for accessing the database.
        :return: A new CustomerContext instance
        """

        super().__init__()

        self.customer = customer
        """ The customer for this context.
        """
        self.default_product_date_field = self._get_setting("DEFAULT_PRODUCT_DATE_FIELD")
        """ The name of the date field to be used in product queries concerning time when no other field is informed.
        """
        self.recommendable_product_start_date_field = self._get_setting("RECOMMENDABLE_PRODUCT_START_DATE_FIELD")
        """ The name of the date field that should be less or equal than the present moment for a product to be
            recommendable.
        """
        self.recommendable_product_end_date_field = self._get_setting("RECOMMENDABLE_PRODUCT_END_DATE_FIELD")
        """ The name of the date field that should be greater or equal than the present moment for a product to be
            recommendable.
        """
        self.database_settings = database_settings
        """ The database settings (host/database name).
        """
        self.data_proxy = None
        """ The database proxy instance used by this customer context.
        """
        self.set_data_proxy(db_proxy)

        self._present_date = None
        """ The date used to define the system current date.
            It is generally used as a hard limit when querying for activities and products.
            If no present_date is provided, the OS system date is used.
        """
        self.short_term_window = self._get_setting("SHORT_TERM_WINDOW")
        """ The short term time window used as a limit when querying for recent activity.
        """
        self.long_term_window = self._get_setting("LONG_TERM_WINDOW")
        """ The long term time window used generally to limit the data universe for template generation.
        """
        self.popularity_window = self._get_setting("POPULARITY_WINDOW")
        """ The number of days of the time window used for popularity counts (usually shorter than
            the short term window).
        """
        self.risk_factor = self._get_setting("RISK_FACTOR")
        """ The risk factor (between 0 and 1) for collaborative recommendations:
            0 is conservative (focus on at least 3 stars); 1 is aggressive (focus on 5 stars).
        """
        self.top_terms_count = self._get_setting("COUNT_TOP_TERMS")
        """ The number of relevant terms per document in text-based recommendations.
        """
        self.base_products_count = self._get_setting("COUNT_RECENT_PRODUCTS")
        """ The minimum number of recently consumed items that will be used as base for product-similarity algorithms.
        """
        self.user_templates_count = self._get_setting("COUNT_USER_TEMPLATES")
        """ The number of user templates to be considered.
        """
        self.product_templates_count = self._get_setting("COUNT_PRODUCT_TEMPLATES")
        """ The number of product templates to be considered (for each recently consumed 'base product').
        """
        self.user_user_strengths_window = self._get_setting("DAYS_FOR_USER_USER_STRENGTHS")
        """ The time window used as a limit when calculating product-product strengths.
        """
        self.product_product_strengths_window = self._get_setting("DAYS_FOR_PRODUCT_PRODUCT_STRENGTHS")
        """ The time window used as a limit when calculating product-product strengths.
        """
        self.product_product_strengths_tfidf_window = self._get_setting("DAYS_FOR_PRODUCT_PRODUCT_STRENGTHS_TFIDF")
        """ The time window used as a limit when calculating product-product strengths (tfidf).
        """
        self.should_consolidate_user_templates_on_the_fly = self._get_setting(
            "SHOULD_CONSOLIDATE_USER_TEMPLATES_ON_THE_FLY")
        """ If True, pre-renderization of user templates will take place during by the end of each update of
            user-user strengths.
        """
        self.should_consolidate_product_templates_on_the_fly = self._get_setting(
            "SHOULD_CONSOLIDATE_PRODUCT_TEMPLATES_ON_THE_FLY")
        """ If True, pre-renderization of product templates will take place during by the end of each update of
            product-product strengths.
        """
        self.bidirectional_uu_strength_updates = self._get_setting("BIDIRECTIONAL_UU_STRENGTH_UPDATES")
        """ If True, user-user strengths will be updated on both directions (user <--> template) on the fly.
        """
        self.bidirectional_pp_strength_updates = self._get_setting("BIDIRECTIONAL_PP_STRENGTH_UPDATES")
        """ If True, product-product strengths will be updated on both directions (product <--> template) on the fly.
        """
        self.history_decay_function_name = self._get_setting("HISTORY_DECAY_FUNC")
        self.history_decay_linear_function_ttl = self._get_setting("HISTORY_DECAY_LINEAR_FUNCTION_TTL")
        self.history_decay_exponential_function_halflife = self._get_setting(
            "HISTORY_DECAY_EXPONENTIAL_FUNCTION_HALFLIFE")
        self.history_decay_step_function_ttl = self._get_setting("HISTORY_DECAY_STEP_FUNCTION_TTL")
        """ The function to be used for decaying scores based on past recommendations (and eventual parameters).
        """
        self.product_age_decay_function_name = self._get_setting("PRODUCT_AGE_DECAY_FUNC")
        self.product_age_decay_linear_function_ttl = self._get_setting("PRODUCT_AGE_DECAY_LINEAR_FUNCTION_TTL")
        self.product_age_decay_exponential_function_halflife = self._get_setting(
            "PRODUCT_AGE_DECAY_EXPONENTIAL_FUNCTION_HALFLIFE")
        self.product_age_decay_step_function_ttl = self._get_setting("PRODUCT_AGE_DECAY_STEP_FUNCTION_TTL")
        """ The function to be used for decaying scores based on the product age (and eventual parameters).
        """
        self.previous_consumption_factor = self._get_setting("PREVIOUS_CONSUMPTION_FACTOR")
        """ A factor to be applied to the score of pre-filtered products already consumed by the target user.
        """
        self.near_identical_filter_field = self._get_setting("NEAR_IDENTICAL_FILTER_FIELD")
        """ The field used to remove duplicates if there are too many terms in common.
        """
        self.near_identical_filter_threshold = self._get_setting("NEAR_IDENTICAL_FILTER_THRESHOLD")
        """ The field used to remove duplicates if there are too many terms in common.
        """
        self.recommendations_page_size = self._get_setting("RECOMMENDATIONS_PAGE_SIZE")
        """ The most likely size of each recommendations page
            (to be used to avoid near-identical recommendations in the same page).
        """
        self.product_text_fields = []
        """ A raw list with all TEXT-type product attributes.
        """
        self.product_non_text_fields = []
        """ A raw list with all non-TEXT-type product attributes.
        """
        self.similarity_filters_by_type = {}
        """ A dict {attribute_type: list of attribute_name's} for calculating product similarities.
            Equality is required for all such fields so that two products may have non-zero mutual similarity.
        """
        self.similarity_weights_by_type = {}
        """ A dict {attribute_type: dict {attribute_name: attribute_weight}} for calculating
            product similarities. Each type is handled differently by the similarity functions, and
            the scores assigned to each attribute's contributions are multiplied by the corresponding
            attribute's weight.
        """
        self.date_similarity_halflife = self._get_setting("DATE_SIMILARITY_HALFLIFE")
        """ The difference between two products' dates (in days) that makes their date-based similarity be 0.5.
            Note that an inverse exponential function is used. If none is informed, than date-based similarities
            will always be 1.
        """
        self.min_user_user_strength_numerator = self._get_setting("MIN_USER_USER_STRENGTH_NUMERATOR")
        """ The minimum number of common products before a user-to-user strength can be non-zero.
        """
        self.min_product_product_strength_numerator = self._get_setting("MIN_PRODUCT_PRODUCT_STRENGTH_NUMERATOR")
        """ The minimum number of common users before a product-to-product strength can be non-zero.
        """
        self.page_size_user_user_numerators = self._get_setting("PAGE_SIZE_USER_USER_NUMERATORS")
        """ The number of products contributing to user-user strengths in each processing unit of numerators.
        """
        self.page_size_user_user_denominators = self._get_setting("PAGE_SIZE_USER_USER_DENOMINATORS")
        """ The number of target users in user-user pairs in each processing unit (page) of denominators.
        """
        self.page_size_product_product_numerators = self._get_setting("PAGE_SIZE_PRODUCT_PRODUCT_NUMERATORS")
        """ The number of users contributing to product-product strengths in each processing unit of numerators.
        """
        self.page_size_product_product_denominators = self._get_setting("PAGE_SIZE_PRODUCT_PRODUCT_DENOMINATORS")
        """ The number of template products in product-product pairs in each processing unit (page) of denominators.
        """
        self.page_size_batch_process_products = self._get_setting("PAGE_SIZE_BATCH_PROCESS_PRODUCTS")
        """ The number of products to be processed in each processing unit (page) during creation of product models.
        """
        self.max_workers_user_user_strengths = self._get_setting("MAX_WORKERS_USER_USER_STRENGTHS")
        """ The maximum number of parallel threads during user-user strengths generation in batch.
        """
        self.max_workers_product_product_strengths = self._get_setting("MAX_WORKERS_PRODUCT_PRODUCT_STRENGTHS")
        """ The maximum number of parallel threads during product-product strengths generation in batch.
        """
        self.max_workers_batch_process_products = self._get_setting("MAX_WORKERS_BATCH_PROCESS_PRODUCTS")
        """ The maximum number of parallel threads during batch creation of product models.
        """
        self.max_workers_template_consolidation = self._get_setting("MAX_WORKERS_TEMPLATE_CONSOLIDATION")
        """ The maximum number of parallel threads during batch consolidation of user/product templates.
        """
        self.flush_size = self._get_setting("FLUSH_SIZE")
        """ The number of queued db operations which forces a flush.
        """
        self.max_recommendations = self._get_setting("MAX_RECOMMENDATIONS")
        """ Hard limit for recommendation queries. If a query goes beyond the limit an Exception is raised.
        """
        self.recommendation_timeout = self._get_setting("RECOMMENDATION_TIMEOUT")
        """ The timeout in seconds a hybrid recommender will wait for a specialist request to return
            If a specialist reaches timeout its results are ignored
        """
        self.min_rating_recommendable_from_user = self._get_setting("MIN_RATING_RECOMMENDABLE_FROM_USER")
        """ The minimum rating of recommendable products in user-to-user strategies.
        """
        self.min_rating_recommendable_from_product = self._get_setting("MIN_RATING_RECOMMENDABLE_FROM_PRODUCT")
        """ The minimum rating of base consumed products in item-to-item strategies.
        """
        self.min_rating_conservative = self._get_setting("MIN_RATING_CONSERVATIVE")
        """ The minimum rating in conservative strengths.
        """
        self.min_rating_aggressive = self._get_setting("MIN_RATING_AGGRESSIVE")
        """ The minimum rating in aggressive strengths.
        """
        self.product_model_factory = ProductModelFactory(self._get_setting("PRODUCT_MODEL"))
        """ Product model definition.
        """
        self.supported_activities = []
        """ The supported activities.
            Activities that are not supported are simply disregarded by the recommender engine.
        """
        self.blocking_activities = []
        """ The blocking activities.
            A blocking activity prevents a product to be further recommended to a user within
            a same short_term_window.
        """
        self.activities_by_rating = {rating: [] for rating in range(1, 6)}  # ratings from 1 to 5
        """ A dict whose key is the (implicit) rating of to the activities it maps to.
        """
        self.rating_by_activity = {}
        """ A dict whose value is the (implicit) rating of the key activity.
        """
        self.in_boost_by_activity = {}
        """ A dict whose key is the activity type corresponding to the in-boost factor it maps to.
        """
        self.out_boost_by_activity = {}
        """ A dict whose key is the activity type corresponding to the out-boost factor it maps to.
        """
        self.impressions_enabled = self._get_setting("IMPRESSIONS_ENABLED")
        """ Indicates whether impressions (products shown to users) are kept track of.
        """
        self.filter_strategy = self._get_setting("FILTER_STRATEGY")
        """ Defines the filter strategy to be used when filters are applied to the recommenders.
            The supported filtering strategies are BEFORE_SCORING and AFTER_SCORING. The best choice depends on
            the cardinality of filtered products set and the cardinality of the recommendation candidates set
            as calculated by the various algorithms. The filter should be applied to the smallest set.
        """
        self.algorithm_weights = self._get_setting("ALGORITHM_WEIGHTS")
        """ Stores, for each hybrid recommender, the weight distribution of the algorithms in the form of a list of
            [algorithm_suffix, probability, extra_comma_separated_directives] tuples}, where:
                *algorithm_suffix* identifies a specialist algorithm,
                *probability* is the importance (slice size, merge probability, vote power, etc.)
                 assigned to an algorithm, and
                *extra_comma_separated_directives* are optional, hybrid-recommender-specific settings
                 associated to an algorithm, e.g. *nobonus* indicates that the "bonus time" in HRVoting
                 shall not apply to that specific algorithm.
            The sum of the probabilities must be 1.
            We use lists because, for some hybrid recommenders, the order matters.
        """
        self.fill_in_algorithm = self._get_setting("FILL_IN_ALGORITHM")

        self._load_activity_types()
        self._load_product_similarity_attributes()

        self.initial_date = self._get_setting("PRESENT_DATE")
        """ Used only in tests to set the present date """

        self.context_filters_cache = Cache(cache_settings, 'context_filters') if cache_settings else None
        """ A cache for product_ids that correspond to recently used context filters.