Пример #1
0
def log_aidsearchevent(querystring='', source='', results_count=0):
    """
    Method to cleanup/populate the AidSearchEvents
    Run asynchronously to avoid slowing down requests.
    """
    source_cleaned = get_subdomain_from_host(source)
    querystring_cleaned = clean_search_querystring(querystring)

    # sometimes we query our API for internal (e.g. admin) purposes
    # we don't want to log these searches
    is_internal_search = get_querystring_value_list_from_key(
        querystring_cleaned, 'internal')  # noqa

    if not is_internal_search:  # noqa
        targeted_audiences = get_querystring_value_list_from_key(
            querystring, 'targeted_audiences') or None  # noqa
        perimeter = get_querystring_perimeter(querystring)
        text = get_querystring_value_from_key(querystring, 'text')

        event = AidSearchEvent.objects.create(
            querystring=querystring_cleaned,
            source=source_cleaned,
            results_count=results_count,
            targeted_audiences=targeted_audiences,
            perimeter=perimeter,
            text=text)

        themes = get_querystring_themes(querystring)
        categories = get_querystring_categories(querystring)
        event.themes.set(themes)
        event.categories.set(categories)
Пример #2
0
def log_aidviewevent(aid_id, querystring='', source=''):
    source_cleaned = get_subdomain_from_host(source)
    querystring_cleaned = clean_search_querystring(querystring)

    AidViewEvent.objects.create(aid_id=aid_id,
                                querystring=querystring_cleaned,
                                source=source_cleaned)
Пример #3
0
 def perform_create(self, serializer):
     # clean host
     host = self.request.get_host()
     source_cleaned = get_site_from_host(host)
     # clean querystring
     querystring = serializer.validated_data.get('querystring')
     querystring_cleaned = clean_search_querystring(querystring)
     # save
     serializer.save(source=source_cleaned, querystring=querystring_cleaned)
Пример #4
0
def log_aidsearchevent(querystring='',
                       results_count=0,
                       source='',
                       request_ua=''):
    """
    Method to cleanup/populate the AidSearchEvents
    Run asynchronously to avoid slowing down requests.
    """
    source_cleaned = get_site_from_host(source)
    querystring_cleaned = clean_search_querystring(querystring)

    # There are some cases where we don't want to log the search event:
    # - a crawler
    # - when there are unknown targeted_audiences (e.g. 'test', since May 2021)
    # - when we query our API for internal (e.g. admin) purposes
    # - page is greater than 1 (the user has scrolled to see more results) or has a strange value
    is_crawler = crawler_detect.isCrawler(request_ua)
    targeted_audiences = get_querystring_value_list_from_key(
        querystring, 'targeted_audiences') or None  # noqa
    is_wrong_search = targeted_audiences and len(
        targeted_audiences
    ) and targeted_audiences[0] not in Aid.AUDIENCES  # noqa
    is_internal_search = get_querystring_value_list_from_key(
        querystring_cleaned, 'internal')
    next_page = get_querystring_value_from_key(querystring_cleaned, 'page')
    is_next_page_search = next_page and (not next_page.isdigit()
                                         or int(next_page) > 1)

    if not any(
        [is_crawler, is_wrong_search, is_internal_search, is_next_page_search
         ]):
        perimeter = get_querystring_perimeter(querystring)
        text = get_querystring_value_from_key(querystring, 'text')

        event = AidSearchEvent.objects.create(
            querystring=querystring_cleaned,
            source=source_cleaned,
            results_count=results_count,
            targeted_audiences=targeted_audiences,
            perimeter=perimeter,
            text=text)

        themes = get_querystring_themes(querystring)
        categories = get_querystring_categories(querystring)
        backers = get_querystring_backers(querystring)
        programs = get_querystring_programs(querystring)
        event.themes.set(themes)
        event.categories.set(categories)
        event.backers.set(backers)
        event.programs.set(programs)
Пример #5
0
def log_aidviewevent(aid_id,
                     querystring='',
                     source='',
                     request_ua='',
                     request_referer=''):
    source_cleaned = get_site_from_host(source)
    querystring_cleaned = clean_search_querystring(querystring)

    # There are some cases where we don't want to log the view event:
    # - a crawler (bot)
    # - a scraper (user script that parses & pulls data from our website)
    is_crawler = crawler_detect.isCrawler(request_ua)
    is_scraper = 'sitemap.xml' in request_referer

    if not any([is_crawler, is_scraper]):
        targeted_audiences = get_querystring_value_list_from_key(
            querystring, 'targeted_audiences') or None  # noqa

        AidViewEvent.objects.create(aid_id=aid_id,
                                    targeted_audiences=targeted_audiences,
                                    querystring=querystring_cleaned,
                                    source=source_cleaned)
Пример #6
0
def test_clean_search_querystring_with_remove_extra_fields(
        input_querystring, expected_cleaned_querystring):  # noqa

    assert clean_search_querystring(
        input_querystring,
        remove_extra_fields=True) == expected_cleaned_querystring  # noqa
Пример #7
0
def test_clean_search_querystring(input_querystring,
                                  expected_cleaned_querystring):  # noqa

    assert clean_search_querystring(
        input_querystring) == expected_cleaned_querystring  # noqa
Пример #8
0
 def clean_querystring(self):
     self.querystring = clean_search_querystring(self.querystring)