def test_visitor_country(self):
     collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")])
     pages = analyzer.filter_page_requests(collection)
     country_pageview = {}
     dict_ip_collection = pages.group("ip")
     for ip, col in dict_ip_collection.items():
         country = geoip.country(ip)
         if country not in country_pageview.keys():
             country_pageview[country] = 0
         country_pageview[country] += analyzer.filter_page_requests(col).count()
     expected_output = sorted(country_pageview.items(), reverse=True, key=operator.itemgetter(1))
     submission_output = analyzer.visitor_country(collection)
     self.assertEqual(str(expected_output), str(submission_output))
 def test_filter_page_request(self):
     collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")])
     submission_output = analyzer.filter_page_requests(collection)
     expected_output = Collection()
     expected_output.load_from_collection(collection)
     expected_output.filter(analyzer.predicate_page_request)
     self.assertEqual(str(expected_output), str(submission_output))
 def test_popular_pages(self):
     collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")])
     pages = analyzer.filter_page_requests(collection)
     # canonical code
     url_pageview = {}
     dict_req_events = pages.group("http_request")
     for request, col in dict_req_events.items():
         url = request.split(" ")[1]
         if url not in url_pageview.keys():
             url_pageview[url] = 0
         url_pageview[url] += col.count()
     expected_output = sorted(url_pageview.items(), reverse=True, key=operator.itemgetter(1))
     submission_output = analyzer.popular_pages(collection)
     self.assertEqual(str(expected_output), str(submission_output))
    def test_referrers(self):
        collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")])
        pages = analyzer.filter_page_requests(collection)
        # canonical code
        ref_pageview = {}
        ref_groups = pages.group("referrer")
        for referrer in ref_groups.keys():
            if "www.cs.umd.edu/~bederson" in referrer: continue    # ignore referrers within the self domain
            if referrer not in ref_pageview.keys():
                ref_pageview[referrer] = 0
            ref_pageview[referrer] += ref_groups[referrer].count()

        def cmp_count(x, y):
            return x[1] - y[1]

        expected_output = sorted(ref_pageview.items(), reverse=True, cmp=cmp_count)
        submission_output = analyzer.referrers(collection)
        self.assertEqual(str(expected_output), str(submission_output))