def test_without_protocol(self): """Test domain extraction from URLs, for HTTP, about:, chrome.""" test_urls = ( ('http://www.example.com', 'http://example.com'), ('http://example.com', 'http://example.com'), ('http://example.com/the/place/to/be', 'http://example.com'), ('https://example.net:8080', 'https://example.net:8080'), ('https://example.net:8080/abc', 'https://example.net:8080'), ('https://[email protected]:8080/xyz', 'https://example.com:8080'), ('about:config', 'about:config'), ('chrome://something/exciting', 'chrome://something/exciting'), ) for url, expected in test_domains: eq_(utils.normalize_url(url), expected)
def test_normalize_url(self): """Test normalization from urls to sites.""" test_urls = ( ('http://www.example.com', 'http://example.com'), ('http://example.com', 'http://example.com'), ('http://example.com/the/place/to/be', 'http://example.com'), ('https://example.net:8080', 'https://example.net:8080'), ('https://example.net:8080/abc', 'https://example.net:8080'), ('https://[email protected]:8080/xyz', 'https://example.com:8080'), ('https://*****:*****@example.com:8080/z', 'https://example.com:8080'), ('about:config', 'about:config'), ('chrome://something/exciting', 'chrome://something/exciting'), ) for url, expected in test_urls: eq_(utils.normalize_url(url), expected)
def __call__(self, data): supported_types = set( [OPINION_BROKEN.short, OPINION_ISSUE.short, OPINION_PRAISE.short]) for key, value in recombined(data): self.comments_in += 1 m_id, ts, type, product, version, platform, locale, \ manufacturer, device, url, message = value.split('\t', 10) if not url or type not in supported_types: continue app = '<%s>' % product site = normalize_url(url) out_keys = cartesian((version, ), (site, ), (app, platform, None), (type, )) out_value = (m_id, message) self.comments_out += 1 for out_key in out_keys: yield (out_key, out_value)
def __call__(self, data): supported_types = set([OPINION_BROKEN.short, OPINION_ISSUE.short, OPINION_PRAISE.short]) for key, value in recombined(data): self.comments_in += 1 m_id, ts, type, product, version, platform, locale, manufacturer, device, url, message = value.split( "\t", 10 ) if not url or type not in supported_types: continue app = "<%s>" % product site = normalize_url(url) out_keys = cartesian((version,), (site,), (app, platform, None), (type,)) out_value = (m_id, message) self.comments_out += 1 for out_key in out_keys: yield (out_key, out_value)