def test_source_common(): with apache.ApacheSource(EXAMPLE_01.splitlines(True)) as source: row = None for count, row in enumerate(source): if count == 0: assert row.remote_host == dt.hostname('64.242.88.10') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2004, 3, 8, 0, 56, 39) assert row.request == dt.Request( 'GET', dt.url('/twiki/bin/view/Sandbox/WebHome?rev=1.6'), 'HTTP/1.1') assert row.status == 200 assert row.size == 8545 elif count == 1: assert row.remote_host == dt.hostname('lordgun.org') assert row.ident is None assert row.remote_user == 'foo' assert row.time == dt.DateTime(2004, 3, 8, 1, 1, 53) assert row.request == dt.Request('GET', dt.url('/razor.html'), 'HTTP/1.0') assert row.status == 302 assert row.size == 2869 else: assert False assert row assert count == 1
def test_source_combined(): with apache.ApacheSource( EXAMPLE_02.splitlines(True), log_format=apache.COMBINED) as source: row = None for count, row in enumerate(source): if count == 0: assert row.remote_host == dt.hostname('78.86.48.95') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 5) assert row.request == dt.Request('GET', dt.url('/template/images/ITSheader.jpg'), 'HTTP/1.1') assert row.status == 200 assert row.size == 14745 assert row.req_Referer is None assert row.req_User_Agent == 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0; byond_4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; InfoPath.2; OfficeLiveConnector.1.5; OfficeLivePatch.1.3; .NET4.0E; .NET4.0C)' elif count == 1: assert row.remote_host == dt.hostname('217.129.225.117') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 7) assert row.request == dt.Request('GET', dt.url('/images/spacer.gif'), 'HTTP/1.1') assert row.status == 200 assert row.size == 43 assert row.req_Referer == dt.url('http://eprints.lse.ac.uk/33718/') assert row.req_User_Agent == 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR; rv:1.9.2.23) Gecko/20110920 Firefox/3.6.23' else: assert False assert row assert count == 1
def rows_null_first(): return [ Row( datatypes.datetime('2002-06-24 16:40:23'), datatypes.address('172.224.24.114'), None, None, 0.01, 408, 0, ), Row( datatypes.datetime('2002-05-02 20:18:01'), datatypes.address('172.22.255.255'), 'GET', datatypes.url('/images/picture.jpg'), 0.1, 302, 16328, ), Row( datatypes.datetime('2002-05-29 12:34:56'), datatypes.address('9.180.235.203'), 'HEAD', datatypes.url('/images/picture.jpg'), 0.1, 202, None, ), ]
def test_source_combined(): with apache.ApacheSource(EXAMPLE_02.splitlines(True), log_format=apache.COMBINED) as source: row = None for count, row in enumerate(source): if count == 0: assert row.remote_host == dt.hostname('78.86.48.95') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 5) assert row.request == dt.Request( 'GET', dt.url('/template/images/ITSheader.jpg'), 'HTTP/1.1') assert row.status == 200 assert row.size == 14745 assert row.req_Referer is None assert row.req_User_Agent == 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0; byond_4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; InfoPath.2; OfficeLiveConnector.1.5; OfficeLivePatch.1.3; .NET4.0E; .NET4.0C)' elif count == 1: assert row.remote_host == dt.hostname('217.129.225.117') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 7) assert row.request == dt.Request('GET', dt.url('/images/spacer.gif'), 'HTTP/1.1') assert row.status == 200 assert row.size == 43 assert row.req_Referer == dt.url( 'http://eprints.lse.ac.uk/33718/') assert row.req_User_Agent == 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR; rv:1.9.2.23) Gecko/20110920 Firefox/3.6.23' else: assert False assert row assert count == 1
def rows(): return [ Row( datatypes.datetime('2002-06-24 16:40:23'), datatypes.address('172.224.24.114'), 'POST', datatypes.url('/Default.htm'), 0.67, 200, 7930, ), Row( datatypes.datetime('2002-05-02 20:18:01'), datatypes.address('172.22.255.255'), 'GET', datatypes.url('/images/picture.jpg'), 0.1, 302, 16328, ), Row( datatypes.datetime('2002-05-29 12:34:56'), datatypes.address('9.180.235.203'), 'HEAD', datatypes.url('/images/picture.jpg'), 0.1, 202, None, ), ]
def rows(): # Construct some test rows with appropriate namedtuples Row = namedtuple('Row', ( 'timestamp', 'client', 'method', 'url', 'time_taken', 'status', 'size', )) return [ Row( datatypes.datetime('2002-06-24 16:40:23'), datatypes.address('172.224.24.114'), 'POST', datatypes.url('/Default.htm'), 0.67, 200, 7930, ), Row( datatypes.datetime('2002-05-02 20:18:01'), datatypes.address('172.22.255.255'), 'GET', datatypes.url('/images/picture.jpg'), 0.1, 302, 16328, ), Row( datatypes.datetime('2002-05-29 12:34:56'), datatypes.address('9.180.235.203'), 'HEAD', datatypes.url('/images/picture.jpg'), 0.1, 202, None, ), ]
def test_source_date_formats(): with apache.ApacheSource( EXAMPLE_04.splitlines(True), log_format="%{%Y-%m-%dT%H:%M:%S%z}t %H %m %U%q %>s %O") as source: row = None for count, row in enumerate(source): if count == 0: assert row.time == dt.datetime('2004-03-08 00:56:39') assert row.method == 'GET' assert row.protocol == 'HTTP/1.0' assert row.url_stem == dt.url( '/twiki/bin/view/Sandbox/WebHome') assert row.url_query == dt.url('?rev=1.6') assert row.status == 200 assert row.bytes_sent == 8545 elif count == 1: assert row.time == dt.datetime('2004-03-07 22:01:53') assert row.method == 'HEAD' assert row.protocol == 'HTTP/1.1' assert row.url_stem == dt.url('/razor.html') assert row.url_query is None assert row.status == 302 assert row.bytes_sent == 2869 assert row assert count == 1
def test_request(): assert dt.request('OPTIONS * HTTP/1.1') == dt.Request('OPTIONS', None, 'HTTP/1.1') assert dt.request('GET / HTTP/1.0') == dt.Request('GET', dt.url('/'), 'HTTP/1.0') assert dt.request('POST /foo/bar/baz?query HTTP/1.0') == dt.Request('POST', dt.url('/foo/bar/baz?query'), 'HTTP/1.0') with pytest.raises(ValueError): assert dt.request('') with pytest.raises(ValueError): assert dt.request('GET') with pytest.raises(ValueError): assert dt.request('GET HTTP/1.0') with pytest.raises(ValueError): assert dt.request('GET /foo/bar')
def test_request(): assert dt.request('OPTIONS * HTTP/1.1') == dt.Request( 'OPTIONS', None, 'HTTP/1.1') assert dt.request('GET / HTTP/1.0') == dt.Request('GET', dt.url('/'), 'HTTP/1.0') assert dt.request('POST /foo/bar/baz?query HTTP/1.0') == dt.Request( 'POST', dt.url('/foo/bar/baz?query'), 'HTTP/1.0') with pytest.raises(ValueError): assert dt.request('') with pytest.raises(ValueError): assert dt.request('GET') with pytest.raises(ValueError): assert dt.request('GET HTTP/1.0') with pytest.raises(ValueError): assert dt.request('GET /foo/bar')
def test_url_query(): url = dt.url('http://foo/bar?baz=quux&x=1&y=') assert 'baz' in url.query assert 'x' in url.query assert 'y' in url.query assert not 'z' in url.query assert url.query['baz'] == ['quux'] assert url.query['x'] == ['1'] assert url.query['y'] == ['']
def test_source_normal(): # Test two normal runs with INTERNET_EXAMPLE and INTRANET_EXAMPLE with iis.IISSource(INTERNET_EXAMPLE.splitlines(True)) as source: row = None for count, row in enumerate(source): assert source.version == '1.0' assert source.software == 'Microsoft Internet Information Services 6.0' assert source.date == dt.DateTime(2002, 5, 24, 20, 18, 1) assert source.fields == [ 'date', 'time', 'c-ip', 'cs-username', 's-ip', 's-port', 'cs-method', 'cs-uri-stem', 'cs-uri-query', 'sc-status', 'sc-bytes', 'cs-bytes', 'time-taken', 'cs(User-Agent)', 'cs(Referrer)', ] assert row.date == dt.Date(2002, 5, 24) assert row.time == dt.Time(20, 18, 1) assert str(row.c_ip) == '172.224.24.114' assert row.cs_username is None assert str(row.s_ip) == '206.73.118.24' assert row.s_port == 80 assert row.cs_method == 'GET' assert str(row.cs_uri_stem) == '/Default.htm' assert row.cs_uri_query is None assert row.sc_status == 200 assert row.sc_bytes == 7930 assert row.cs_bytes == 248 assert row.time_taken == 31.0 assert row.cs_User_Agent == 'Mozilla/4.0 (compatible; MSIE 5.01; Windows 2000 Server)' assert row.cs_Referrer == dt.url('http://64.224.24.114/') assert row assert count + 1 == source.count with iis.IISSource(INTRANET_EXAMPLE.splitlines(True)) as source: row = None for count, row in enumerate(source): assert source.fields == [ 'date', 'time', 'c-ip', 'cs-username', 's-ip', 's-port', 'cs-method', 'cs-uri-stem', 'cs-uri-query', 'sc-status', 'cs(User-Agent)', ] assert row.date == dt.Date(2002, 5, 2) assert row.time == dt.Time(17, 42, 15) assert str(row.c_ip) == '172.22.255.255' assert row.cs_username is None assert str(row.s_ip) == '172.30.255.255' assert row.s_port == 80 assert row.cs_method == 'GET' assert str(row.cs_uri_stem) == '/images/picture.jpg' assert row.cs_uri_query is None assert row.sc_status == 200 assert row.cs_User_Agent == 'Mozilla/4.0 (compatible;MSIE 5.5; Windows 2000 Server)' assert row assert count + 1 == source.count
def url_parse(s): """ Parse a URL string in a log file. This is a variant on the standard Python urlparse.urlparse function. The result type has been extended to include a :meth:`~lars.datatypes.Url.__str__` method which outputs the reconstructed URL, and to have specialized hostname and path properties which return enhanced objects instead of simple strings. :param str s: The string containing the URI to parse :returns: A :class:`~lars.datatypes.Url` tuple representing the URL """ return dt.url(s) if s not in ('-', '') else None
def test_source_date_formats(): with apache.ApacheSource( EXAMPLE_04.splitlines(True), log_format="%{%Y-%m-%dT%H:%M:%S%z}t %H %m %U%q %>s %O") as source: row = None for count, row in enumerate(source): if count == 0: assert row.time == dt.datetime('2004-03-08 00:56:39') assert row.method == 'GET' assert row.protocol == 'HTTP/1.0' assert row.url_stem == dt.url('/twiki/bin/view/Sandbox/WebHome') assert row.url_query == dt.url('?rev=1.6') assert row.status == 200 assert row.bytes_sent == 8545 elif count == 1: assert row.time == dt.datetime('2004-03-07 22:01:53') assert row.method == 'HEAD' assert row.protocol == 'HTTP/1.1' assert row.url_stem == dt.url('/razor.html') assert row.url_query is None assert row.status == 302 assert row.bytes_sent == 2869 assert row assert count == 1
def test_source_common(): with apache.ApacheSource(EXAMPLE_01.splitlines(True)) as source: row = None for count, row in enumerate(source): if count == 0: assert row.remote_host == dt.hostname('64.242.88.10') assert row.ident is None assert row.remote_user is None assert row.time == dt.DateTime(2004, 3, 8, 0, 56, 39) assert row.request == dt.Request('GET', dt.url('/twiki/bin/view/Sandbox/WebHome?rev=1.6'), 'HTTP/1.1') assert row.status == 200 assert row.size == 8545 elif count == 1: assert row.remote_host == dt.hostname('lordgun.org') assert row.ident is None assert row.remote_user == 'foo' assert row.time == dt.DateTime(2004, 3, 8, 1, 1, 53) assert row.request == dt.Request('GET', dt.url('/razor.html'), 'HTTP/1.0') assert row.status == 302 assert row.size == 2869 else: assert False assert row assert count == 1
def test_url(): assert dt.url('foo') == dt.Url('', '', 'foo', '', '', '') assert dt.url('//foo/bar') == dt.Url('', 'foo', '/bar', '', '', '') assert dt.url('http://foo/') == dt.Url('http', 'foo', '/', '', '', '') assert dt.url('http://foo/bar?baz=quux') == dt.Url('http', 'foo', '/bar', '', 'baz=quux', '') assert dt.url('https://foo/bar#baz') == dt.Url('https', 'foo', '/bar', '', '', 'baz') u = dt.url('http://localhost/foo/bar#baz') assert u.scheme == 'http' assert u.netloc == 'localhost' assert u.path == dt.Path('/foo', 'bar', '') assert u.path_str == '/foo/bar' assert u.fragment == 'baz' assert u.username is None assert u.password is None assert u.port is None assert u.hostname == dt.hostname('localhost') assert u.hostname.address == dt.address('127.0.0.1')
def test_request_parse(): assert parsers.request_parse('-') is None assert parsers.request_parse('OPTIONS * HTTP/1.0') == datatypes.Request( 'OPTIONS', None, 'HTTP/1.0') assert parsers.request_parse('GET /foo/bar HTTP/1.1') == datatypes.Request( 'GET', datatypes.url('/foo/bar'), 'HTTP/1.1')