Exemplo n.º 1
0
 def test02_dump_check_files(self):
     cl=ChangeList()
     cl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a', change="updated") )
     cl.add( Resource('http://ex.org/b', length=21, path='resync/test/testdata/b', change="updated") )
     d=Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
Exemplo n.º 2
0
 def test48_write_dump_if_requested(self):
     c = Client()
     # no dump file
     self.assertFalse(c.write_dump_if_requested(ChangeList(), None))
     # with dump file
     with capture_stdout() as capturer:
         c.write_dump_if_requested(ChangeList(), '/tmp/a_file')
     self.assertTrue(re.search(r'FIXME', capturer.result))
Exemplo n.º 3
0
 def write_change_list(self,outfile=None,ref_sitemap=None,newref_sitemap=None,
                       empty=None,links=None,dump=None):
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that 
         # or build resource_list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.resource_list
         else:
             new_rl = self.read_reference_resource_list(newref_sitemap,name='new reference')
         # 3. Calculate change list
         (same,updated,deleted,created)=old_rl.compare(new_rl)   
         cl.add_changed_resources( updated, change='updated' )
         cl.add_changed_resources( deleted, change='deleted' )
         cl.add_changed_resources( created, change='created' )
     # 4. Write out change list
     kwargs = { 'pretty_xml': True,
                'mapper' : self.mapper }
     if (self.max_sitemap_entries is not None):
         kwargs['max_sitemap_entries'] = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml(**kwargs)
     else:
         cl.write(basename=outfile,**kwargs)
     self.write_dump_if_requested(cl,dump)
Exemplo n.º 4
0
 def test_ex_03(self):
     """resourcesync_ex_3 is a simple change_list with 2 resources"""
     cl=ChangeList()
     cl.parse('tests/testdata/examples_from_spec/resourcesync_ex_3.xml')
     self.assertEqual( len(cl.resources), 2, '2 resources')
     sms = sorted(cl.uris())
     self.assertEqual( sms, ['http://example.com/res2.pdf','http://example.com/res3.tiff'] )
     self.assertEqual( cl.resources[0].lastmod, '2013-01-02T13:00:00Z' )
     self.assertEqual( cl.resources[1].lastmod, '2013-01-02T18:00:00Z' )
     self.assertEqual( cl.resources[0].change, 'updated' )
     self.assertEqual( cl.resources[1].change, 'deleted' )
Exemplo n.º 5
0
 def test3_change_list(self):
     src = ChangeList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=2) )
     src.add( Resource('c',timestamp=3) )
     src.add( Resource('d',timestamp=4)) 
     src.add( Resource('e',timestamp=5) )
     self.assertEqual(len(src), 5, "5 things in src")
Exemplo n.º 6
0
 def test4_iter(self):
     i = ChangeList()
     i.add(Resource("a", timestamp=1, change="created"))
     i.add(Resource("b", timestamp=2, change="created"))
     i.add(Resource("c", timestamp=3, change="created"))
     i.add(Resource("d", timestamp=4, change="created"))
     resources = []
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual(resources[0].uri, "a")
     self.assertEqual(resources[3].uri, "d")
Exemplo n.º 7
0
    def test08_parse(self):
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="changelist" from="2013-01-01"/>\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated" length="12" /></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="deleted" length="32" /></url>\
</urlset>'
        cl=ChangeList()
        cl.parse(fh=io.StringIO(xml))
        self.assertEqual( len(cl.resources), 2, 'got 2 resources')
        self.assertEqual( cl.md['capability'], 'changelist', 'capability set' )
        self.assertEqual( cl.md['md_from'], '2013-01-01' )
Exemplo n.º 8
0
 def test4_iter(self):
     i = ChangeList()
     i.add( Resource('a',timestamp=1) )
     i.add( Resource('b',timestamp=2) )
     i.add( Resource('c',timestamp=3) )
     i.add( Resource('d',timestamp=4) )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
Exemplo n.º 9
0
 def test20_as_xml(self):
     cl = ChangeList()
     cl.md_from = "1970-01-01T00:00:00Z"
     cl.add(Resource("a", timestamp=1, change="updated"))
     cl.add(Resource("b", timestamp=2, change="updated"))
     xml = cl.as_xml()
     self.assertTrue(re.search(r'<rs:md .*capability="changelist"', xml), "XML has capability")
     self.assertTrue(
         re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml),
         "XML has from to seconds precision (and not more)",
     )
     self.assertTrue(
         re.search(r"<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>", xml), "XML has resource a"
     )
Exemplo n.º 10
0
 def test04_change_list(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='created') )
     cl.add( Resource('b',timestamp=2,change='created') )
     cl.add( Resource('c',timestamp=3,change='created') )
     cl.add( Resource('d',timestamp=4,change='created') ) 
     cl.add( Resource('e',timestamp=5,change='created') )
     self.assertEqual(len(cl), 5, "5 things in src")
Exemplo n.º 11
0
 def test02_set_with_repeats(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=1,change='created') )
     cl.add( Resource('c',timestamp=1,change='deleted') )
     cl.add( Resource('a',timestamp=2,change='deleted') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     self.assertEqual(len(cl), 5, "5 changes in change_list")
Exemplo n.º 12
0
 def test_build_ex_03(self):
     """Simple Change List document """
     cl = ChangeList()
     cl.md_from = '2013-01-02T00:00:00Z'
     cl.md_until= '2013-01-03T00:00:00Z'
     cl.add( Resource(uri='http://example.com/res2.pdf',
                      lastmod='2013-01-02T13:00:00Z',
                      change="updated") )
     cl.add( Resource(uri='http://example.com/res3.tiff',
                      lastmod='2013-01-02T18:00:00Z',
                      change='deleted') )
     ex_xml = self._open_ex('resourcesync_ex_3').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Exemplo n.º 13
0
    def test09_parse_no_capability(self):
        # missing capability is an error for changelist
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<url><loc>http://example.com/res1</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated"/></url>\
</urlset>'
        cl=ChangeList()
        self.assertRaises( SitemapParseError, cl.parse, fh=io.StringIO(xml) )
Exemplo n.º 14
0
 def test_build_ex_28(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res2.pdf",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="application/pdf" )
     c1.link_set(rel="describedby",
                 href="http://example.com/res2_dublin-core_metadata.xml",
                 modified="2013-01-01T12:00:00Z",
                 type="application/xml")
     c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml",
                   lastmod="2013-01-03T19:00:00Z",
                   change="updated",
                   mime_type="application/xml")
     c2.link_set(rel="describes",
                 href="http://example.com/res2.pdf",
                 modified="2013-01-03T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="application/pdf")
     c2.link_set(rel="profile",
                 href="http://purl.org/dc/elements/1.1/")
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
Exemplo n.º 15
0
 def test_build_ex_27(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res4",
                   lastmod="2013-01-03T17:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj",
                   length=56778,
                   mime_type="application/json" )
     c1.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res4-json-patch",
                 modified="2013-01-03T17:00:00Z",
                 hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent
                 length=73,
                 type="application/json-patch")
     c2 = Resource(uri="http://example.com/res5-full.tiff",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk",
                   length="9788456778",
                   mime_type="image/tiff")
     c2.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res5-diff",
                 modified="2013-01-03T18:00:00Z",
                 hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT",
                 length=4533,
                 type="application/x-tiff-diff" )
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
Exemplo n.º 16
0
 def test_build_ex_24(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html")
     # Resource.link_set with add or change link depending on one with 
     # the particular rel exists unless allow_duplicates=True. 
     # Resource.link_add will always add. Test both here...
     c1.link_set(rel="duplicate",
                 href="http://mirror1.example.com/res1",
                 pri="1",
                 modified="2013-01-03T18:00:00Z")
     c1.link_set(rel="duplicate",
                 href="http://mirror2.example.com/res1",
                 pri="2",
                 modified="2013-01-03T18:00:00Z",
                 allow_duplicates=True)
     c1.link_add(rel="duplicate",
                 href="gsiftp://gridftp.example.com/res1",
                 pri="3",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_24' )
Exemplo n.º 17
0
    def test10_parse_bad_capability(self):
        # the <rs:md capability="bad_capability".. should give error
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="bad_capability" from="2013-01-01"/>\
<url><loc>http://example.com/bad_res_1</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated"/></url>\
</urlset>'
        cl=ChangeList()
        self.assertRaises( SitemapParseError, cl.parse, fh=io.StringIO(xml) )
Exemplo n.º 18
0
 def test1_set_with_repeats(self):
     src = ChangeList()
     src.add(Resource("a", timestamp=1, change="updated"))
     src.add(Resource("b", timestamp=1, change="created"))
     src.add(Resource("c", timestamp=1, change="deleted"))
     src.add(Resource("a", timestamp=2, change="deleted"))
     src.add(Resource("b", timestamp=2, change="updated"))
     self.assertEqual(len(src), 5, "5 changes in change_list")
Exemplo n.º 19
0
 def test3_change_list(self):
     src = ChangeList()
     src.add(Resource("a", timestamp=1, change="created"))
     src.add(Resource("b", timestamp=2, change="created"))
     src.add(Resource("c", timestamp=3, change="created"))
     src.add(Resource("d", timestamp=4, change="created"))
     src.add(Resource("e", timestamp=5, change="created"))
     self.assertEqual(len(src), 5, "5 things in src")
Exemplo n.º 20
0
 def test1_set_with_repeats(self):
     src = ChangeList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=1) )
     src.add( Resource('c',timestamp=1) )
     src.add( Resource('a',timestamp=2) )
     src.add( Resource('b',timestamp=2) )
     self.assertEqual(len(src), 5, "5 changes in change_list")
Exemplo n.º 21
0
 def test03_multiple(self):
     caps = CapabilityList()
     rl = ResourceList()
     caps.add_capability( rl, "rl.xml" )
     cl = ChangeList()
     caps.add_capability( cl, "cl.xml" )
     self.assertEqual( len(caps), 2 )
     xml = caps.as_xml()
     self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) )
     self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
Exemplo n.º 22
0
 def test_build_ex_13(self):
     """Capability List document with 4 entries"""
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) )
     cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) )
     cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) )
     cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) )
     ex_xml = self._open_ex('resourcesync_ex_13').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Exemplo n.º 23
0
 def test06_add_changed_resources(self):
     added = ResourceList()
     added.add( Resource('a',timestamp=1,change='created') )
     added.add( Resource('d',timestamp=4,change='created') )
     self.assertEqual(len(added), 2, "2 things in added resource_list")
     changes = ChangeList()
     changes.add_changed_resources( added, change='created' )
     self.assertEqual(len(changes), 2, "2 things added")
     i = iter(changes)
     first = next(i)
     self.assertEqual(first.uri, 'a', "changes[0].uri=a")
     self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1")
     self.assertEqual(first.change, 'created') #, "changes[0].change=createdd")
     second = next(i)
     self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4")
     self.assertEqual(second.change, 'created', "changes[1].change=createdd")
     # Now add some with updated (one same, one diff)
     updated = ResourceList()
     updated.add( Resource('a',timestamp=5,change='created') )
     updated.add( Resource('b',timestamp=6,change='created') )
     self.assertEqual(len(updated), 2, "2 things in updated resource_list")
     changes.add_changed_resources( updated, change='updated' )
     self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated")
     # Make new resource_list from the changes which should not have dupes
     dst = ResourceList()
     dst.add( changes, replace=True )
     self.assertEqual(len(dst), 3, "3 unique resources")
     self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1
     self.assertEqual(dst.resources['a'].change, 'updated')
     self.assertEqual(dst.resources['b'].timestamp, 6)
     self.assertEqual(dst.resources['b'].change, 'updated')
     self.assertEqual(dst.resources['d'].timestamp, 4)
     self.assertEqual(dst.resources['d'].change, 'created')
Exemplo n.º 24
0
 def test_build_ex_31(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://original.example.com/res1.html",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_31' )
Exemplo n.º 25
0
 def test07_as_xml(self):
     cl = ChangeList()
     cl.md_from = '1970-01-01T00:00:00Z'
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     xml = cl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' ) 
Exemplo n.º 26
0
 def test02_dump_check_files(self):
     cl = ChangeList()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
Exemplo n.º 27
0
 def test_build_ex_30(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="collection",
                 href="http://example.com/aggregation/0601007")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_30' )
Exemplo n.º 28
0
 def test_build_ex_26(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1.html",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876 )
     c1.link_add(rel="canonical",
                 href="http://example.com/res1",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_26' )
Exemplo n.º 29
0
 def test03_with_repeats_again(self):
     r1 = Resource(uri='a',length=1,change='created')
     r2 = Resource(uri='b',length=2,change='created')
     i = ChangeList()
     i.add(r1)
     i.add(r2)
     self.assertEqual( len(i), 2 )
     # Can add another Resource with same URI
     r1d = Resource(uri='a',length=10,change='created')
     i.add(r1d)
     self.assertEqual( len(i), 3 )
Exemplo n.º 30
0
 def test05_iter(self):
     i = ChangeList()
     i.add( Resource('a',timestamp=1,change='created') )
     i.add( Resource('b',timestamp=2,change='created') )
     i.add( Resource('c',timestamp=3,change='created') )
     i.add( Resource('d',timestamp=4,change='created') )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
Exemplo n.º 31
0
 def test01_add(self):
     # one
     caps = CapabilityList()
     r1 = Resource(uri='http://example.org/r1')
     caps.add(r1)
     self.assertEqual(len(caps), 1)
     # dupe
     self.assertRaises(ResourceSetDupeError, caps.add, r1)
     self.assertEqual(len(caps), 1)
     # dupe with replace
     caps = CapabilityList()
     caps.add([r1, r1], replace=True)
     self.assertEqual(len(caps), 1)
     # diff
     caps = CapabilityList()
     r2 = ChangeList(uri='http://example.org/r2')
     caps.add([r1, r2])
     self.assertEqual(len(caps), 2)
Exemplo n.º 32
0
 def test_build_ex_25(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T11:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated")
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.html",
                 modified="2013-01-03T18:00:00Z",
                 type="text/html") #FIXME - inconsistent
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.pdf",
                 modified="2013-01-03T18:00:00Z",
                 type="application/pdf")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_25' )
Exemplo n.º 33
0
 def test_build_ex_33(self):
     cl = ChangeList()
     cl.up = "http://aggregator2.example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T12:00:00Z"
     c1 = Resource(uri="http://aggregator2.example.com/res1.html",
                   lastmod="2013-01-04T09:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="via",
                 href="http://original.example.com/res1.html",
                 modified="2013-01-03T07:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_33' )
Exemplo n.º 34
0
 def test_build_ex_29(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="memento",
                 href="http://example.com/20130103070000/res1",
                 modified="2013-01-02T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     c1.link_add(rel="timegate",
                 href="http://example.com/timegate/http://example.com/res1")
     c1.link_add(rel="timemap",
                 href="http://example.com/timemap/http://example.com/res1",
                 type="application/link-format")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_29' )
Exemplo n.º 35
0
 def test01_add_if_changed(self):
     cl = ChangeList()
     cl.add_if_changed( Resource('a',timestamp=1,change='updated') )
     self.assertEqual( len(cl), 1 )
     self.assertRaises( ChangeTypeError, cl.add_if_changed,
                        Resource('c',timestamp=3) )
Exemplo n.º 36
0
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from="2013-01-02T00:00:00Z"
     cl.md_until="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T12:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res9.pdf',
                       lastmod='2013-01-02T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res5.tiff',
                       lastmod='2013-01-02T19:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T20:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Exemplo n.º 37
0
 def generate(self):
     """Generate a list of changes."""
     changelist = ChangeList()
     for change in self.changes:
         changelist.add(change)
     return changelist
Exemplo n.º 38
0
 def generate(self):
     """Generate a list of changes."""
     changelist = ChangeList()
     for change in self.changes:
         changelist.add(change)
     return changelist
Exemplo n.º 39
0
 def write_change_list(self,
                       paths=None,
                       outfile=None,
                       ref_sitemap=None,
                       newref_sitemap=None,
                       empty=None,
                       links=None,
                       dump=None):
     """Write a change list
     
     Unless the both ref_sitemap and newref_sitemap are specified then the Change 
     List is calculated between the reference an the current state of files on
     disk. The files on disk are scanned based either on the paths setting or
     else on the mappings.
     """
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build resource list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.build_resource_list(paths=paths, set_path=dump)
         else:
             new_rl = self.read_reference_resource_list(
                 newref_sitemap, name='new reference')
         # 3. Calculate change list
         (same, updated, deleted, created) = old_rl.compare(new_rl)
         cl.add_changed_resources(updated, change='updated')
         cl.add_changed_resources(deleted, change='deleted')
         cl.add_changed_resources(created, change='created')
     # 4. Write out change list
     cl.mapper = self.mapper
     cl.pretty_xml = self.pretty_xml
     if (self.max_sitemap_entries is not None):
         cl.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml()
     else:
         cl.write(basename=outfile)
     self.write_dump_if_requested(cl, dump)
Exemplo n.º 40
0
    def incremental(self,
                    allow_deletion=False,
                    change_list_uri=None,
                    from_datetime=None):
        """Incremental synchronization

        Use Change List to do incremental sync
        """
        self.logger.debug("Starting incremental sync")
        ### 0. Sanity checks
        if (len(self.mapper) < 1):
            raise ClientFatalError(
                "No source to destination mapping specified")
        if (self.mapper.unsafe()):
            raise ClientFatalError(
                "Source to destination mappings unsafe: %s" % str(self.mapper))
        from_timestamp = None
        if (from_datetime is not None):
            try:
                from_timestamp = str_to_datetime(from_datetime)
            except ValueError:
                raise ClientFatalError("Bad datetime in --from (%s)" %
                                       from_datetime)
    ### 1. Work out where to start from
        if (from_timestamp is None):
            from_timestamp = ClientState().get_state(self.sitemap)
            if (from_timestamp is None):
                raise ClientFatalError(
                    "Cannot do incremental sync. No stored timestamp for this site, and no explicit --from."
                )
    ### 2. Get URI of change list, from sitemap or explicit
        if (change_list_uri):
            # Translate as necessary using maps
            change_list = self.sitemap_uri(change_list_uri)
        else:
            # Try default name
            change_list = self.sitemap_uri(self.change_list_name)
    ### 3. Read change list from source
        try:
            self.logger.info("Reading change list %s" % (change_list))
            src_change_list = ChangeList()
            src_change_list.read(uri=change_list)
            self.logger.debug("Finished reading change list")
        except Exception as e:
            raise ClientFatalError(
                "Can't read source change list from %s (%s)" %
                (change_list, str(e)))
        self.logger.info("Read source change list, %d changes listed" %
                         (len(src_change_list)))
        #if (len(src_change_list)==0):
        #    raise ClientFatalError("Aborting as there are no resources to sync")
        if (self.checksum and not src_change_list.has_md5()):
            self.checksum = False
            self.logger.info(
                "Not calculating checksums on destination as not present in source change list"
            )
    # Check all changes have timestamp and record last
        self.last_timestamp = 0
        for resource in src_change_list:
            if (resource.timestamp is None):
                raise ClientFatalError(
                    "Aborting - missing timestamp for change in %s" % (uri))
            if (resource.timestamp > self.last_timestamp):
                self.last_timestamp = resource.timestamp
    ### 4. Check that the change list has authority over URIs listed
    # FIXME - What does authority mean for change list? Here use both the
    # change list URI and, if we used it, the sitemap URI
        if (not self.noauth):
            uauth_cs = UrlAuthority(change_list, self.strictauth)
            if (not change_list_uri):
                uauth_sm = UrlAuthority(self.sitemap)
                for resource in src_change_list:
                    if (not uauth_cs.has_authority_over(resource.uri) and
                        (change_list_uri
                         or not uauth_sm.has_authority_over(resource.uri))):
                        raise ClientFatalError(
                            "Aborting as change list (%s) mentions resource at a location it does not have authority over (%s), override with --noauth"
                            % (change_list, resource.uri))
    ### 5. Prune entries before starting timestamp and dupe changes for a resource
        num_skipped = src_change_list.prune_before(from_timestamp)
        if (num_skipped > 0):
            self.logger.info("Skipped %d changes before %s" %
                             (num_skipped, datetime_to_str(from_timestamp)))
        num_dupes = src_change_list.prune_dupes()
        if (num_dupes > 0):
            self.logger.info("Removed %d prior changes" % (num_dupes))
    # Review and log status before
    # FIXME - should at this stage prune the change list to pick out
    # only the last change for each resource
        to_update = 0
        to_create = 0
        to_delete = 0
        for resource in src_change_list:
            if (resource.change == 'updated'):
                to_update += 1
            elif (resource.change == 'created'):
                to_create += 1
            elif (resource.change == 'deleted'):
                to_delete += 1
            else:
                raise ClientError("Unknown change type %s" % (resource.change))
    # Log status based on what we know from the Change List. Exit if
    # either there are no changes or if there are only deletions and
    # we don't allow deletion
        in_sync = ((to_update + to_delete + to_create) == 0)
        self.log_status(in_sync=in_sync,
                        incremental=True,
                        created=to_create,
                        updated=to_update,
                        deleted=to_delete)
        if (in_sync or ((to_update + to_create) == 0 and not allow_deletion)):
            self.logger.debug("Completed incremental")
            return
    ### 6. Apply changes at same time or after from_timestamp
        delete_msg = (", and delete %d resources" %
                      to_delete) if (allow_deletion) else ''
        self.logger.warning("Will apply %d changes%s" %
                            (len(src_change_list), delete_msg))
        num_updated = 0
        num_deleted = 0
        num_created = 0
        for resource in src_change_list:
            uri = resource.uri
            file = self.mapper.src_to_dst(uri)
            if (resource.change == 'updated'):
                self.logger.info("updated: %s -> %s" % (uri, file))
                self.update_resource(resource, file, 'updated')
                num_updated += 1
            elif (resource.change == 'created'):
                self.logger.info("created: %s -> %s" % (uri, file))
                self.update_resource(resource, file, 'created')
                num_created += 1
            elif (resource.change == 'deleted'):
                num_deleted += self.delete_resource(resource, file,
                                                    allow_deletion)
            else:
                raise ClientError("Unknown change type %s" % (resource.change))
    ### 7. Report status and planned actions
        self.log_status(incremental=True,
                        created=num_created,
                        updated=num_updated,
                        deleted=num_deleted,
                        to_delete=to_delete)
        ### 8. Record last timestamp we have seen
        if (self.last_timestamp > 0):
            ClientState().set_state(self.sitemap, self.last_timestamp)
            self.logger.info("Written last timestamp %s for incremental sync" %
                             (datetime_to_str(self.last_timestamp)))

    ### 9. Done
        self.logger.debug("Completed incremental sync")