def test07_explore_show_head(self): e = Explorer() # file that exists with matching with capture_stdout() as capturer: e.explore_show_head(uri='tests/testdata/dir1/file_a', check_headers={'content-length': 20, 'unknown': 'abc'}) self.assertTrue( re.search(r'HEAD tests/testdata/dir1/file_a', capturer.result)) self.assertTrue( re.search(r'content-length: 20 MATCHES EXPECTED VALUE', capturer.result)) # same file, bad header check with capture_stdout() as capturer: e.explore_show_head('tests/testdata/dir1/file_a', check_headers={'content-length': 99}) self.assertTrue( re.search(r'HEAD tests/testdata/dir1/file_a', capturer.result)) self.assertTrue( re.search(r'content-length: 20 EXPECTED 99', capturer.result)) # file that does not exist with capture_stdout() as capturer: e.explore_show_head('tests/testdata/does_not_exist') self.assertTrue( re.search(r'HEAD tests/testdata/does_not_exist', capturer.result)) self.assertTrue(re.search(r'status: 404', capturer.result))
def test20_parse_document(self): # Key property of the parse_document() method is that it parses the # document and identifies its type c = Client() with capture_stdout() as capturer: c.sitemap_name='tests/testdata/examples_from_spec/resourcesync_ex_1.xml' c.parse_document() self.assertTrue( re.search(r'Parsed resourcelist document with 2 entries',capturer.result) ) with capture_stdout() as capturer: c.sitemap_name='tests/testdata/examples_from_spec/resourcesync_ex_17.xml' c.parse_document() self.assertTrue( re.search(r'Parsed resourcedump document with 3 entries',capturer.result) ) with capture_stdout() as capturer: c.sitemap_name='tests/testdata/examples_from_spec/resourcesync_ex_19.xml' c.parse_document() self.assertTrue( re.search(r'Parsed changelist document with 4 entries',capturer.result) ) with capture_stdout() as capturer: c.sitemap_name='tests/testdata/examples_from_spec/resourcesync_ex_22.xml' c.parse_document() self.assertTrue( re.search(r'Parsed changedump document with 3 entries',capturer.result) ) # Document that doesn't exist c.sitemap_name='/does_not_exist' self.assertRaises( ClientFatalError, c.parse_document ) # and verbose with truncation... with capture_stdout() as capturer: c.verbose = True c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml' c.max_sitemap_entries = 1 c.parse_document() self.assertTrue( re.search(r'Showing first 1 entries', capturer.result ) ) self.assertTrue( re.search(r'\[0\] ', capturer.result ) ) self.assertFalse( re.search(r'\[1\] ', capturer.result ) )
def test07_explore_show_head(self): e = Explorer() # file that exists with matching with capture_stdout() as capturer: e.explore_show_head(uri='tests/testdata/dir1/file_a', check_headers={ 'content-length': 20, 'unknown': 'abc' }) self.assertTrue( re.search(r'HEAD tests/testdata/dir1/file_a', capturer.result)) self.assertTrue( re.search(r'content-length: 20 MATCHES EXPECTED VALUE', capturer.result)) # same file, bad header check with capture_stdout() as capturer: e.explore_show_head('tests/testdata/dir1/file_a', check_headers={'content-length': 99}) self.assertTrue( re.search(r'HEAD tests/testdata/dir1/file_a', capturer.result)) self.assertTrue( re.search(r'content-length: 20 EXPECTED 99', capturer.result)) # file that does not exist with capture_stdout() as capturer: e.explore_show_head('tests/testdata/does_not_exist') self.assertTrue( re.search(r'HEAD tests/testdata/does_not_exist', capturer.result)) self.assertTrue(re.search(r'status: 404', capturer.result))
def test47_write_source_description(self): c = Client() # simple case to STDOUT with capture_stdout() as capturer: c.write_source_description(['a', 'b', 'c']) self.assertTrue(re.search(r'<urlset ', capturer.result)) self.assertTrue( re.search(r'<rs:md capability="description" />', capturer.result)) self.assertTrue( re.search( r'<url><loc>a</loc><rs:md capability="capabilitylist" /></url>', capturer.result)) self.assertTrue( re.search( r'<url><loc>b</loc><rs:md capability="capabilitylist" /></url>', capturer.result)) # more complex case to STDOUT with capture_stdout() as capturer: c.write_source_description(capability_lists=['http://a.b/'], links=[{ 'rel': 'c', 'href': 'd' }]) self.assertTrue(re.search(r'http://a.b/', capturer.result)) # to file (just check that something is written) outfile = os.path.join(self.tmpdir, 'sd_out.xml') c.write_source_description(capability_lists=['http://a.b/'], outfile=outfile, links=[{ 'rel': 'c', 'href': 'd' }]) self.assertTrue(os.path.getsize(outfile) > 100)
def test41_write_resource_list_path(self): c = Client() c.set_mappings( ['http://example.org/','tests/testdata/'] ) links=[{'rel':'uri_c','href':'uri_d'}] # with an explicit paths setting only the specified paths will be included with capture_stdout() as capturer: c.write_resource_list(paths='tests/testdata/dir1', links=links) self.assertTrue( re.search(r'<rs:md at="\S+" capability="resourcelist"', capturer.result ) ) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc>', capturer.result ) ) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc>', capturer.result ) ) self.assertFalse( re.search(r'<url><loc>http://example.org/dir2/file_x</loc>', capturer.result ) ) # check link present self.assertTrue( re.search(r'rel="uri_c"', capturer.result ) ) self.assertTrue( re.search(r'href="uri_d"', capturer.result ) ) # Travis CI does not preserve timestamps from github so test here for the file # size but not the datestamp #self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc><lastmod>[\w\-:]+</lastmod><rs:md length="20" /></url>', capturer.result ) ) #self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc><lastmod>[\w\-:]+</lastmod><rs:md length="45" /></url>', capturer.result ) ) # to file outfile = os.path.join(self.tmpdir,'rl_out.xml') c.write_resource_list(paths='tests/testdata/dir1', outfile=outfile) self.assertTrue( os.path.getsize(outfile)>100 ) # dump instead (default file) c.default_resource_dump = os.path.join(self.tmpdir,'rl_out_dump_def') outfile = c.default_resource_dump+'00000.zip' self.assertFalse( os.path.exists(outfile) ) c.write_resource_list(paths='tests/testdata/dir1', dump=True) self.assertTrue( os.path.getsize(outfile)>100 ) # (specific file) outbase = os.path.join(self.tmpdir,'rl_out_dump') outfile = outbase+'00000.zip' self.assertFalse( os.path.exists(outfile) ) c.write_resource_list(paths='tests/testdata/dir1', dump=True, outfile=outbase) self.assertTrue( os.path.getsize(outfile)>100 )
def test45_write_change_list(self): c = Client() ex1 = 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml' with capture_stdout() as capturer: c.write_change_list(ref_sitemap=ex1, newref_sitemap=ex1) self.assertTrue( re.search(r'<rs:md capability="changelist"', capturer.result) ) # compare ex1 with testdata on disk c.set_mappings( ['http://example.org/','tests/testdata/'] ) with capture_stdout() as capturer: c.write_change_list(ref_sitemap=ex1, paths='tests/testdata/dir1') self.assertTrue( re.search(r'<rs:md capability="changelist"', capturer.result) ) self.assertTrue( re.search(r'<url><loc>http://example.com/res1</loc><rs:md change="deleted" /></url>', capturer.result) ) # to file outfile = os.path.join(self.tmpdir,'cl_out.xml') c.write_change_list(ref_sitemap=ex1, newref_sitemap=ex1, outfile=outfile) self.assertTrue( os.path.getsize(outfile)>100 )
def test47_write_source_description(self): c = Client() # simple case to STDOUT with capture_stdout() as capturer: c.write_source_description( [ 'a','b','c' ] ) self.assertTrue( re.search(r'<urlset ',capturer.result) ) self.assertTrue( re.search(r'<rs:md capability="description" />',capturer.result) ) self.assertTrue( re.search(r'<url><loc>a</loc><rs:md capability="capabilitylist" /></url>',capturer.result) ) self.assertTrue( re.search(r'<url><loc>b</loc><rs:md capability="capabilitylist" /></url>',capturer.result) ) # more complex case to STDOUT with capture_stdout() as capturer: c.write_source_description(capability_lists=['http://a.b/'], links=[{'rel':'c','href':'d'}]) self.assertTrue( re.search(r'http://a.b/', capturer.result ) ) # to file (just check that something is written) outfile = os.path.join(self.tmpdir,'sd_out.xml') c.write_source_description(capability_lists=['http://a.b/'], outfile=outfile, links=[{'rel':'c','href':'d'}]) self.assertTrue( os.path.getsize(outfile)>100 )
def test48_write_dump_if_requested(self): c = Client() # no dump file self.assertFalse(c.write_dump_if_requested(ChangeList(), None)) # with dump file with capture_stdout() as capturer: c.write_dump_if_requested(ChangeList(), '/tmp/a_file') self.assertTrue(re.search(r'FIXME', capturer.result))
def test04_explore(self): e = Explorer() e.sitemap_name = 'tests/testdata/explore1/caps1.xml' with capture_stdout() as capturer: # IOError generated from attempt to read stdin e.fake_input = 'q' e.explore() self.assertTrue(re.search(r'resync-explorer done', capturer.result))
def test48_write_dump_if_requested(self): c = Client() # no dump file self.assertFalse( c.write_dump_if_requested( ChangeList(), None ) ) # with dump file with capture_stdout() as capturer: c.write_dump_if_requested(ChangeList(),'/tmp/a_file') self.assertTrue( re.search(r'FIXME', capturer.result) )
def test06_explore_show_summary(self): e = Explorer() # file that exists with matching with capture_stdout() as capturer: e.explore_show_summary(list=CapabilityList()) self.assertTrue(re.search(r'Parsed \(unknown capability\) document with 0 entries:', capturer.result)) # dummy capabilities object and display cl = CapabilityList() cl.add(Resource('uri:resourcelist')) cl.add(Resource('uri:changelist')) with capture_stdout() as capturer: e.explore_show_summary(cl, False, []) self.assertTrue(re.search( r'Parsed \(unknown capability\) document with 2 entries:', capturer.result)) self.assertTrue(re.search(r'\[1\] uri:changelist', capturer.result)) self.assertTrue(re.search(r'\[2\] uri:resourcelist', capturer.result))
def test05_explore_uri(self): e = Explorer() with capture_stdout() as capturer: e.fake_input = 'q' self.assertRaises(ExplorerQuit, e.explore_uri, XResource('tests/testdata/explore1/caps1.xml')) self.assertTrue(re.search(r'Reading tests/testdata/explore1/caps1.xml', capturer.result)) self.assertTrue(re.search(r'Parsed capabilitylist document with 4 entries:', capturer.result))
def test49_read_reference_resource_list(self): c = Client() with capture_stdout() as capturer: rl = c.read_reference_resource_list('tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual( len(rl), 2 ) self.assertEqual( '', capturer.result ) c.verbose = True with capture_stdout() as capturer: rl = c.read_reference_resource_list('tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual( len(rl), 2 ) self.assertTrue( re.search(r'http://example.com/res2', capturer.result) ) c.verbose = True c.max_sitemap_entries = 1 with capture_stdout() as capturer: rl = c.read_reference_resource_list('tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual( len(rl), 2 ) self.assertTrue( re.search(r'http://example.com/res1', capturer.result) ) self.assertTrue( re.search(r'Showing first 1 entries', capturer.result) ) self.assertFalse( re.search(r'http://example.com/res2', capturer.result) )
def test40_write_resource_list_mappings(self): c = Client() c.set_mappings( ['http://example.org/','tests/testdata/'] ) # with no explicit paths seting the mapping will be used with capture_stdout() as capturer: c.write_resource_list() #sys.stderr.write(capturer.result) self.assertTrue( re.search(r'<rs:md at="\S+" capability="resourcelist"', capturer.result ) ) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc>', capturer.result ) ) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc>', capturer.result ) ) self.assertTrue( re.search(r'<url><loc>http://example.org/dir2/file_x</loc>', capturer.result ) )
def test20_parse_document(self): # Key property of the parse_document() method is that it parses the # document and identifies its type c = Client() with capture_stdout() as capturer: c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml' c.parse_document() self.assertTrue( re.search(r'Parsed resourcelist document with 2 entries', capturer.result)) with capture_stdout() as capturer: c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_17.xml' c.parse_document() self.assertTrue( re.search(r'Parsed resourcedump document with 3 entries', capturer.result)) with capture_stdout() as capturer: c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_19.xml' c.parse_document() self.assertTrue( re.search(r'Parsed changelist document with 4 entries', capturer.result)) with capture_stdout() as capturer: c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_22.xml' c.parse_document() self.assertTrue( re.search(r'Parsed changedump document with 3 entries', capturer.result)) # Document that doesn't exist c.sitemap_name = '/does_not_exist' self.assertRaises(ClientFatalError, c.parse_document) # and verbose with truncation... with capture_stdout() as capturer: c.verbose = True c.sitemap_name = 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml' c.max_sitemap_entries = 1 c.parse_document() self.assertTrue(re.search(r'Showing first 1 entries', capturer.result)) self.assertTrue(re.search(r'\[0\] ', capturer.result)) self.assertFalse(re.search(r'\[1\] ', capturer.result))
def test46_write_capability_list(self): c = Client() caps = { 'a':'uri_a', 'b':'uri_b' } # simple case to STDOUT with capture_stdout() as capturer: c.write_capability_list( caps ) self.assertTrue( re.search(r'<urlset ',capturer.result) ) self.assertTrue( re.search(r'<rs:md capability="capabilitylist" />',capturer.result) ) self.assertTrue( re.search(r'<url><loc>uri_a</loc><rs:md capability="a"',capturer.result) ) self.assertTrue( re.search(r'<url><loc>uri_b</loc><rs:md capability="b"',capturer.result) ) # to file (just check that something is written) outfile = os.path.join(self.tmpdir,'caps_out.xml') c.write_capability_list(capabilities=caps, outfile=outfile) self.assertTrue( os.path.getsize(outfile)>100 )
def test49_read_reference_resource_list(self): c = Client() with capture_stdout() as capturer: rl = c.read_reference_resource_list( 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual(len(rl), 2) self.assertEqual('', capturer.result) c.verbose = True with capture_stdout() as capturer: rl = c.read_reference_resource_list( 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual(len(rl), 2) self.assertTrue(re.search(r'http://example.com/res2', capturer.result)) c.verbose = True c.max_sitemap_entries = 1 with capture_stdout() as capturer: rl = c.read_reference_resource_list( 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml') self.assertEqual(len(rl), 2) self.assertTrue(re.search(r'http://example.com/res1', capturer.result)) self.assertTrue(re.search(r'Showing first 1 entries', capturer.result)) self.assertFalse(re.search(r'http://example.com/res2', capturer.result))
def test45_write_change_list(self): c = Client() ex1 = 'tests/testdata/examples_from_spec/resourcesync_ex_1.xml' with capture_stdout() as capturer: c.write_change_list(ref_sitemap=ex1, newref_sitemap=ex1) self.assertTrue( re.search(r'<rs:md capability="changelist"', capturer.result)) # compare ex1 with testdata on disk c.set_mappings(['http://example.org/', 'tests/testdata/']) with capture_stdout() as capturer: c.write_change_list(ref_sitemap=ex1, paths='tests/testdata/dir1') self.assertTrue( re.search(r'<rs:md capability="changelist"', capturer.result)) self.assertTrue( re.search( r'<url><loc>http://example.com/res1</loc><rs:md change="deleted" /></url>', capturer.result)) # to file outfile = os.path.join(self.tmpdir, 'cl_out.xml') c.write_change_list(ref_sitemap=ex1, newref_sitemap=ex1, outfile=outfile) self.assertTrue(os.path.getsize(outfile) > 100)
def test41_write_resource_list_path(self): c = Client() c.set_mappings(['http://example.org/', 'tests/testdata/']) links = [{'rel': 'uri_c', 'href': 'uri_d'}] # with an explicit paths setting only the specified paths will be # included with capture_stdout() as capturer: c.write_resource_list(paths='tests/testdata/dir1', links=links) self.assertTrue( re.search(r'<rs:md at="\S+" capability="resourcelist"', capturer.result)) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc>', capturer.result)) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc>', capturer.result)) self.assertFalse( re.search(r'<url><loc>http://example.org/dir2/file_x</loc>', capturer.result)) # check link present self.assertTrue(re.search(r'rel="uri_c"', capturer.result)) self.assertTrue(re.search(r'href="uri_d"', capturer.result)) # Travis CI does not preserve timestamps from github so test here for the file # size but not the datestamp # self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc><lastmod>[\w\-:]+</lastmod><rs:md length="20" /></url>', capturer.result ) ) # self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc><lastmod>[\w\-:]+</lastmod><rs:md length="45" /></url>', capturer.result ) ) # to file outfile = os.path.join(self.tmpdir, 'rl_out.xml') c.write_resource_list(paths='tests/testdata/dir1', outfile=outfile) self.assertTrue(os.path.getsize(outfile) > 100) # dump instead (default file) c.default_resource_dump = os.path.join(self.tmpdir, 'rl_out_dump_def') outfile = c.default_resource_dump + '00000.zip' self.assertFalse(os.path.exists(outfile)) c.write_resource_list(paths='tests/testdata/dir1', dump=True) self.assertTrue(os.path.getsize(outfile) > 100) # (specific file) outbase = os.path.join(self.tmpdir, 'rl_out_dump') outfile = outbase + '00000.zip' self.assertFalse(os.path.exists(outfile)) c.write_resource_list(paths='tests/testdata/dir1', dump=True, outfile=outbase) self.assertTrue(os.path.getsize(outfile) > 100)
def test40_write_resource_list_mappings(self): c = Client() c.set_mappings(['http://example.org/', 'tests/testdata/']) # with no explicit paths seting the mapping will be used with capture_stdout() as capturer: c.write_resource_list() #sys.stderr.write(capturer.result) self.assertTrue( re.search(r'<rs:md at="\S+" capability="resourcelist"', capturer.result)) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_a</loc>', capturer.result)) self.assertTrue( re.search(r'<url><loc>http://example.org/dir1/file_b</loc>', capturer.result)) self.assertTrue( re.search(r'<url><loc>http://example.org/dir2/file_x</loc>', capturer.result))
def test46_write_capability_list(self): c = Client() caps = {'a': 'uri_a', 'b': 'uri_b'} # simple case to STDOUT with capture_stdout() as capturer: c.write_capability_list(caps) self.assertTrue(re.search(r'<urlset ', capturer.result)) self.assertTrue( re.search(r'<rs:md capability="capabilitylist" />', capturer.result)) self.assertTrue( re.search(r'<url><loc>uri_a</loc><rs:md capability="a"', capturer.result)) self.assertTrue( re.search(r'<url><loc>uri_b</loc><rs:md capability="b"', capturer.result)) # to file (just check that something is written) outfile = os.path.join(self.tmpdir, 'caps_out.xml') c.write_capability_list(capabilities=caps, outfile=outfile) self.assertTrue(os.path.getsize(outfile) > 100)
def test10_expand_relative_uri(self): e = Explorer() with capture_stdout() as capturer: uri = e.expand_relative_uri('https://example.org/ctx', 'abc') self.assertEqual(uri, 'https://example.org/abc') self.assertTrue(re.search(r'expanded relative URI', capturer.result))