Exemplo n.º 1
0
def analytics(employer, company, candidate):
    if employer not in company.admins.all():
        raise Http404

    if hasattr(mail, 'outbox'):
        solr = settings.TEST_SOLR_INSTANCE
    else:
        solr = settings.SOLR

    analytics_solr = Solr(solr['current']).add_filter_query(
        'company_id:%d' % company.pk).add_query('User_id:%d' % candidate.pk)
    return analytics_solr.search().docs
Exemplo n.º 2
0
    def test_savedsearch_no_user(self):
        """
        A saved search with a null recipient should never be inserted in Solr
        """
        solr = Solr()
        SavedSearchFactory(user=self.user)
        update_solr_task(self.test_solr)
        results = solr.search(q='*:*')
        # One hit for the search, one for its recipient
        self.assertEqual(results.hits, 2)

        solr.delete()
        search = SavedSearchFactory(user=None)
        self.assertEqual(object_to_dict(SavedSearch, search), None)
        update_solr_task(self.test_solr)
        results = solr.search(q='*:*')
        self.assertEqual(results.hits, 0)
Exemplo n.º 3
0
    def test_analytics_delete_old_data(self):
        """
        When Solr is updated with analytics data, we should delete all docs
        from the "current" collection older than 30 days.
        """
        solr = Solr()

        # Create old logs that will be pruned when delete is run
        logs = [MockLog(log_type=type_, delta=datetime.timedelta(days=-31))
                for type_ in ['analytics', 'redirect']]
        parse_log(logs, self.test_solr)
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 2, 'Old logs were not added')
        old_uids = {doc['uid'] for doc in results.docs}

        # Create logs timestamped for today
        logs = [MockLog(log_type=type_) for type_ in ['analytics', 'redirect']]
        parse_log(logs, self.test_solr)
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 4, 'New logs were not added')
        all_uids = {doc['uid'] for doc in results.docs}

        # delete_old_analytics_docs is called after parse_logs in read_new_logs
        # and has not been called yet. Call it now
        delete_old_analytics_docs()
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 2, 'Old logs were not deleted')
        new_uids = {doc['uid'] for doc in results.docs}

        # Ensure that the correct documents have been added/removed
        # The old and new uid sets should be disjoint (no elements in common)
        self.assertTrue(old_uids.isdisjoint(new_uids),
                        'Sets are not disjoint; Intersecting elements: %s' %
                        str(old_uids.intersection(new_uids)))
        # Since the old and new uid sets have nothing in common, their union
        # should equal the set of all uids
        self.assertEqual(old_uids.union(new_uids),
                         all_uids,
                         'Sets are not equal; difference: %s' %
                         str(old_uids.union(new_uids).symmetric_difference(
                             all_uids)))
Exemplo n.º 4
0
def filter_by_microsite(company, user_solr=None, facet_solr=None):
    """
    Applies solr filters based on company.

    inputs:
    :microsites: A list of the microsites to filter the SavedSearches on.
    :user_solr: A Solr instance used for retrieving SavedSearch documents
        from solr.
    :facet_solr: A Solr instance used for retrieving facets based on
        ProfileUnits data.

    outputs:
    user_solr and facet_solr filtered by the company.

    """

    user_solr = Solr() if not user_solr else user_solr
    facet_solr = Solr() if not facet_solr else facet_solr

    query = 'SavedSearch_company_id:%s' % company.pk

    user_solr = user_solr.add_filter_query(query)
    facet_solr = facet_solr.add_query(query)
    user_solr = user_solr.add_filter_query('User_opt_in_employers:true')
    facet_solr = facet_solr.add_query('User_opt_in_employers:true')

    return user_solr, facet_solr
Exemplo n.º 5
0
def filter_by_domain(domain, user_solr=None, facet_solr=None):
    """
    Applies solr filters to SavedSearch.url based on a domain.

    inputs:
    :domain: The domain to filter the SavedSearches on.
    :user_solr: A Solr instance used for retrieving SavedSearch documents
        from solr.
    :facet_solr: A Solr instance used for retrieving facets based on
        ProfileUnits data.

    outputs:
    user_solr and facet_solr filtered by the company.

    """
    user_solr = Solr() if not user_solr else user_solr
    facet_solr = Solr() if not facet_solr else facet_solr

    query = 'SavedSearch_feed:*%s*' % domain

    user_solr = user_solr.add_filter_query(query)
    facet_solr = facet_solr.add_filter_query(query)

    return user_solr, facet_solr
Exemplo n.º 6
0
def apply_facets_and_filters(request, user_solr=None, facet_solr=None):
    """
    Applies facets to solr based on filters currently applied and creates
    a dictionary of removable terms and the resulting url with the term removed.

    inputs:
    :user_solr: A Solr instance used for retrieving SavedSearch documents
        from solr.
    :facet_solr: A Solr instance used for retrieving facets based on
        ProfileUnits data.
    :loc_solr: A solr instance used for retrieving facets based on
        location slabs.

    outputs:
    user_solr, facet_solr, and loc_solr appropriately filtered as well as a
    dictionary of
    {'the applied filter': 'resulting url if the filter were removed'}
    """
    url = request.build_absolute_uri()
    date_start = request.REQUEST.get('date_start', None)
    date_end = request.REQUEST.get('date_end', None)
    if date_start:
        url = update_url_param(url, 'date_start',
                               request.REQUEST.get('date_start'))
    if date_end:
        url = update_url_param(url, 'date_end',
                               request.REQUEST.get('date_end'))

    filters = {}
    user_solr = Solr() if not user_solr else user_solr
    facet_solr = Solr() if not facet_solr else facet_solr

    # The location parameter should be "Country-Region-City". Faceting
    # is determined on what is already in the location parameter.
    # No location facets on country, country parameter facets on state, and
    # country-state facets on city.
    if not 'location' in request.GET:
        facet_solr = facet_solr.add_facet_field('Address_country_code')
        facet_solr = facet_solr.add_facet_field('Address_region')
    else:
        fieldname = 'Address_full_location'

        term = urllib.unquote(request.GET.get('location'))
        search_term = term.replace("-", "##").replace(" ", "\ ")
        if len(term.split("-")) == 3:
            q = '%s:%s' % (fieldname, search_term)
        else:
            q = '%s:%s##*' % (fieldname, search_term)
        user_solr = user_solr.add_query(q)
        facet_solr = facet_solr.add_filter_query(q)

        term_list = term.split("-")
        term_len = len(term_list)

        prefix = '%s##' % term.replace('-', '##')
        if term_len == 3:
            # Country, Region, City included. No reason to facet on location.
            city = term_list[2] if term_list[2] else 'None'
            region = term_list[1] if term_list[1] else 'None'
            country = term_list[0]
            remove_term = "%s, %s, %s" % (city, region, country)
            new_val = "%s-%s" % (term_list[0], term_list[1])
            filters[remove_term] = update_url_param(url, 'location', new_val)
        elif term_len == 2:
            # Country, Region included.
            region = term_list[1] if term_list[1] else 'None'
            country = term_list[0]
            remove_term = "%s, %s" % (region, country)
            filters[remove_term] = update_url_param(url, 'location', term_list[0])
            facet_solr = facet_solr.add_facet_field(fieldname)
            facet_solr = facet_solr.add_facet_prefix(prefix, fieldname=fieldname)
        elif term_len == 1:
            # Country included.
            country = country_codes.get(term_list[0], term_list[0])
            filters[country] = remove_param_from_url(url, 'location')
            facet_solr = facet_solr.add_facet_field('Address_region')
            facet_solr = facet_solr.add_facet_prefix(prefix, fieldname=fieldname)

    if not 'education' in request.GET:
        facet_solr = facet_solr.add_facet_field('Education_education_level_code')
    else:
        term = urllib.unquote(request.GET.get('education'))
        term_name = edu_codes.get(int(term))
        filters[term_name] = remove_param_from_url(url, 'education')

        q = 'Education_education_level_code:%s' % term
        user_solr = user_solr.add_query(q)
        facet_solr = facet_solr.add_filter_query(q)

    if not 'license' in request.GET:
        facet_solr = facet_solr.add_facet_field('License_license_name')
    else:
        term = urllib.unquote(request.GET.get('license'))
        filters[term] = remove_param_from_url(url, 'license')

        q = 'License_license_name:"%s"' % term
        user_solr = user_solr.add_query(q)
        facet_solr = facet_solr.add_filter_query(q)

    return user_solr, facet_solr, filters
Exemplo n.º 7
0
    def test_analytics_log_parsing(self):
        """
        Ensure that analytics logs are parsed and stored in solr correctly
        """
        company = CompanyFactory(id=1)
        business_unit = BusinessUnitFactory(id=1000)
        company.job_source_ids.add(business_unit)

        # match and no_match will be used later to ensure that the correct
        # number of documents were associated with a company or associated
        # with the default company
        match = Mock(
            wraps=lambda: self.assertEqual(doc['company_id'], company.pk))
        no_match = Mock(
            wraps=lambda: self.assertEqual(doc['company_id'], 999999))

        for log_type in ['analytics', 'redirect']:
            log = MockLog(log_type=log_type)
            parse_log([log], self.test_solr)

            solr = Solr()
            results = solr.search(q='uid:analytics*')

            # fake logs contain two lines - one human and one bot hit
            # If it is getting processed correctly, there should be only one
            # hit recorded
            self.assertEqual(results.hits, 1)
            multi_field = 'facets'
            if log_type == 'redirect':
                with self.assertRaises(KeyError):
                    results.docs[0][multi_field]
            else:
                self.assertEqual(len(results.docs[0][multi_field]), 2)
            for field in results.docs[0].keys():
                if field != multi_field:
                    self.assertTrue(type(results.docs[0][field] != list))
            uuid.UUID(results.docs[0]['aguid'])
            with self.assertRaises(KeyError):
                results.docs[0]['User_user_guid']

            for doc in results.docs:
                if doc['job_view_buid'] == business_unit.pk:
                    # If business units match, company ids should match
                    match()
                else:
                    # Business units don't match; company id should be set to
                    # the default company
                    no_match()

            solr.delete()
            user = UserFactory(email="*****@*****.**")
            user.user_guid = '1e5f7e122156483f98727366afe06e0b'
            user.save()
            parse_log([log], self.test_solr)
            results = solr.search(q='uid:analytics*')
            for guid in ['aguid', 'User_user_guid']:
                uuid.UUID(results.docs[0][guid])

            solr.delete()
            user.delete()

        # We have already determined that there are only two documents.
        # Ensure that there is exactly one document that matches a specific
        # company and one document that was given the default company
        self.assertEqual(match.call_count, 1)
        self.assertEqual(no_match.call_count, 1)
Exemplo n.º 8
0
def dashboard(request, template="mydashboard/mydashboard.html",
              extra_context=None):
    """
    Returns a list of candidates who created a saved search for one of the
    microsites within the company microsite list or with the company name like
    jobs.jobs/company_name/careers for example between the given (optional)
    dates

    Inputs:
    :company:               company.id that is associated with request.user

    Returns:
    :render_to_response:    renders template with context dict

    """
    if hasattr(mail, 'outbox'):
        solr = settings.TEST_SOLR_INSTANCE
    else:
        solr = settings.SOLR

    company = get_company(request)
    if not company:
        raise Http404

    user_solr = Solr(solr['current'])
    facet_solr = Solr(solr['current'])

    # Add join only if we're using facets, not if we're simply searching.
    query_params = {'search', 'company'}
    if not query_params.issuperset({q_key for q_key in request.GET.keys()
                                    if q_key not in ['querystring_key',
                                                     'date_end', 'date_start',
                                                     'page']}):
        user_solr = user_solr.add_join(from_field='ProfileUnits_user_id',
                                       to_field='User_id')
    facet_solr = facet_solr.add_join(from_field='User_id',
                                     to_field='ProfileUnits_user_id')
    facet_solr = facet_solr.rows_to_fetch(0)

    authorized_microsites, buids = get_company_microsites(company)

    admins = CompanyUser.objects.filter(company=company.id)

    # Removes main user from admin list to display other admins
    admins = admins.exclude(user=request.user)
    requested_microsite = request.REQUEST.get('microsite', '')
    requested_date_button = request.REQUEST.get('date_button', False)    
    candidates_page = request.REQUEST.get('page', 1)    
          
    # the url value for 'All' in the select box is company name
    # which then gets replaced with all microsite urls for that company
    site_name = ''
    if requested_microsite != '':
        active_microsites = authorized_microsites.filter(
            domain__startswith=requested_microsite)
        user_solr, facet_solr = filter_by_domain(requested_microsite,
                                                 user_solr, facet_solr)
    else:
        active_microsites = authorized_microsites
        site_name = company.name
    if not site_name:
        try:
            site_name = active_microsites[0]
        except IndexError:
            site_name = ''
    active_microsites = set(active_microsites)

    rng, date_start, date_end, date_display = filter_by_date(request)
    user_solr = user_solr.add_filter_query(rng)
    facet_solr = facet_solr.add_query(rng)

    if request.GET.get('search', False):
        user_solr = user_solr.add_query("%s" % request.GET['search'])
        facet_solr = facet_solr.add_query("%s" % request.GET['search'])

    user_solr, facet_solr = filter_by_microsite(company, user_solr, facet_solr)

    (user_solr, facet_solr, filters) = apply_facets_and_filters(
        request, user_solr, facet_solr)

    solr_results = user_solr.rows_to_fetch(100).search()

    # List of dashboard widgets to display.
    dashboard_widgets = ["home_views", "search_views", "job_views",
                         "apply_clicks", "candidates", "search",
                         "applied_filters", "filters"]

    # Date button highlighting
    if 'today' in request.REQUEST:
        requested_date_button = 'today'
    elif 'seven_days' in request.REQUEST:
        requested_date_button = 'seven_days'
    elif 'thirty_days' in request.REQUEST:
        requested_date_button = 'thirty_days'

    url = request.build_absolute_uri()
    facets = parse_facets(facet_solr.search(), request)

    context = {
        'admin_you': request.user,
        'applied_filters': filters,
        'candidates_page': candidates_page,
        'company_admins': admins,
        'company_id': company.id,
        'company_microsites': authorized_microsites,
        'company_name': company.name,
        'dashboard_widgets': dashboard_widgets,
        'date_button': requested_date_button,
        'date_display': date_display,
        'date_end': date_end,
        'date_start': date_start,
        'date_submit_url': url,
        'facets': facets,
        'site_name': site_name,
        'view_name': 'Company Dashboard',
    }

    results = solr_results.docs

    facet_var_map = {
        'home': 'home',
        'listing': 'job_view',
        'results': 'search',
        'redirect': 'apply',
    }
    analytics_solr = Solr(solr['current']).add_facet_field(
        'page_category')
    if requested_microsite:
        analytics_solr = analytics_solr.add_query('domain:%s' %
                                                  requested_microsite)
    else:
        analytics_solr = analytics_solr.add_query('company_id:%d' % company.pk)

    rng = filter_by_date(request, field='view_date')[0]
    analytics_solr = analytics_solr.add_filter_query(rng)

    all_results = analytics_solr.rows_to_fetch(0).search()
    analytics_facets = all_results.facets.get('facet_fields', {}).get(
        'page_category', [])
    facet_dict = sequence_to_dict(analytics_facets)
    for key in facet_dict.keys():
        context_key = 'total_%s' % facet_var_map.get(key, '')
        context[context_key] = facet_dict[key]

    analytics_solr = analytics_solr.add_filter_query('User_id:[* TO *]')
    analytics_solr = analytics_solr.add_facet_field('domain')

    auth_results = analytics_solr.search()
    analytics_facet_list = auth_results.facets.get(
        'facet_fields', {}).get('domain', [])

    analytics_facets = sequence_to_dict(analytics_facet_list)
    analytics_facets = [domain for domain in analytics_facets.items()
                        if domain[0] in active_microsites]

    results += auth_results.docs

    candidates = dict_to_object(results)

    domains = [get_domain(c.SavedSearch_feed) for c in candidates
               if hasattr(c, 'SavedSearch_feed')]
    search_facets = [(domain, domains.count(domain)) for domain in set(domains)]

    domain_facets = {}
    for domain, group in groupby(analytics_facets + search_facets,
                                 key=lambda x: x[0]):
        domain_facets[domain] = sum(item[1] for item in group)

    candidate_list = sorted(candidates,
                            key=lambda y: y.SavedSearch_created_on
                            if hasattr(y, 'SavedSearch_created_on')
                            else y.view_date,
                            reverse=True)

    context['domain_facets'] = domain_facets
    context['candidates'] = candidate_list
    context['total_candidates'] = len([x for x in groupby(
        candidate_list, key=lambda y: y.User_id)])

    if extra_context is not None:
        context.update(extra_context)
    return render_to_response(template, context,
                              context_instance=RequestContext(request))