def test01_as_xml(self): rd = ChangeDump() rd.add( Resource('a.zip',timestamp=1) ) rd.add( Resource('b.zip',timestamp=2) ) xml = rd.as_xml() #print(xml) self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' ) self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' )
def test01_as_xml(self): rd = ChangeDump() rd.add( Resource('a.zip',timestamp=1) ) rd.add( Resource('b.zip',timestamp=2) ) xml = rd.as_xml() print xml self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' ) self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' )
def test01_as_xml(self): rd = ChangeDump() rd.add(Resource("a.zip", timestamp=1)) rd.add(Resource("b.zip", timestamp=2)) xml = rd.as_xml() # print(xml) self.assertTrue(re.search(r'<rs:md .*capability="changedump"', xml), "XML has capability") self.assertTrue( re.search(r"<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>", xml), "XML has resource a" )
def test10_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\ <rs:md capability="changedump" from="2013-01-01"/>\ <url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12345" /></url>\ <url><loc>http://example.com/b.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="56789" /></url>\ </urlset>' rd=ChangeDump() rd.parse(fh=io.StringIO(xml)) self.assertEqual( len(rd.resources), 2, 'got 2 resource dumps') self.assertEqual( rd.md['capability'], 'changedump', 'capability set' ) self.assertEqual( rd.md_from, '2013-01-01' ) self.assertTrue( 'http://example.com/a.zip' in rd.resources ) self.assertTrue( rd.resources['http://example.com/a.zip'].length, 12345 ) self.assertTrue( 'http://example.com/b.zip' in rd.resources ) self.assertTrue( rd.resources['http://example.com/b.zip'].length, 56789 )
def test10_parse(self): xml = '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\ <rs:md capability="changedump" from="2013-01-01"/>\ <url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12345" /></url>\ <url><loc>http://example.com/b.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="56789" /></url>\ </urlset>' rd = ChangeDump() rd.parse(fh=io.StringIO(xml)) self.assertEqual(len(rd.resources), 2, 'got 2 resource dumps') self.assertEqual(rd.md['capability'], 'changedump', 'capability set') self.assertEqual(rd.md_from, '2013-01-01') self.assertTrue('http://example.com/a.zip' in rd.resources) self.assertTrue(rd.resources['http://example.com/a.zip'].length, 12345) self.assertTrue('http://example.com/b.zip' in rd.resources) self.assertTrue(rd.resources['http://example.com/b.zip'].length, 56789)
def test_build_ex_22(self): """Change Dump with three dump files""" cd = ChangeDump() cd.up = 'http://example.com/dataset1/capabilitylist.xml' cd.md_from="2013-01-01T00:00:00Z" z1 = Resource( uri='http://example.com/20130101-changedump.zip', lastmod='2013-01-01T23:59:59Z', length=3109, md_from="2013-01-01T00:00:00Z", md_until="2013-01-02T00:00:00Z", mime_type="application/zip" ) z1.contents='http://example.com/20130101-changedump-manifest.xml' z2 = Resource( uri='http://example.com/20130102-changedump.zip', lastmod='2013-01-02T23:59:59Z', length=6629, md_from="2013-01-02T00:00:00Z", md_until="2013-01-03T00:00:00Z", mime_type="application/zip" ) z2.contents='http://example.com/20130102-changedump-manifest.xml' z3 = Resource( uri='http://example.com/20130103-changedump.zip', lastmod='2013-01-03T23:59:59Z', length=8124, md_from="2013-01-03T00:00:00Z", md_until="2013-01-04T00:00:00Z", mime_type="application/zip" ) z3.contents='http://example.com/20130103-changedump-manifest.xml' cd.add( [z1, z2, z3] ) ex_xml = self._open_ex('resourcesync_ex_22').read() self._assert_xml_equal( cd.as_xml(), ex_xml )
def test12_parse_bad_capability(self): # the <rs:md capability="bad_capability".. should give error xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\ <rs:md capability="bad_capability" from="2013-01-01"/>\ <url><loc>http://example.com/bad_res_1</loc><lastmod>2012-03-14T18:37:36Z</lastmod></url>\ </urlset>' rd=ChangeDump() self.assertRaises( SitemapParseError, rd.parse, fh=io.StringIO(xml) )
def test11_parse_no_capability(self): # For a resource dump this should be an error xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\ <rs:md at="2013-01-01"/>\ <url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12" /></url>\ </urlset>' rd=ChangeDump() self.assertRaises( SitemapParseError, rd.parse, fh=io.StringIO(xml) )
def test_build_ex_13(self): """Capability List document with 4 entries""" cl = CapabilityList() cl.describedby = 'http://example.com/info_about_set1_of_resources.xml' cl.up = 'http://example.com/resourcesync_description.xml' cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) ) cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) ) cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) ) cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) ) ex_xml = self._open_ex('resourcesync_ex_13').read() self._assert_xml_equal( cl.as_xml(), ex_xml )
def get_change_dump_xml(self, from_date): """ Get change dump xml. :return: Updated Change List info """ if not self._validation(): return None change_dump = ChangeDump() change_dump.up = '{}resync/capability.xml'.format(request.url_root) change_dump.index = '{}resync/{}/changedump.xml'.format( request.url_root, self.repository_id) record_changes = self._get_record_changes_with_interval(from_date) for data in record_changes: try: next_ch = self._next_change(data, record_changes) if data.get('status') == 'deleted': continue loc = '{}resync/{}/{}/change_dump_content.zip'.format( request.url_root, self.repository_id, '{}.{}'.format(data.get('record_id'), data.get('record_version'))) rc = Resource(loc, lastmod=data.get("updated"), mime_type='application/zip', md_from=data.get('updated'), md_until=datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat(), ln=[]) if next_ch and next_ch.get('updated'): rc.md_until = next_ch.get('updated') if self.change_dump_manifest: ln = { 'rel': 'contents', 'href': '{}resync/{}/{}/changedump_manifest.xml'.format( request.url_root, self.repository_id, '{}.{}'.format(data.get('record_id'), data.get('record_version'))), 'type': 'application/xml' } rc.ln.append(ln) change_dump.add(rc) except Exception: current_app.logger.error('-' * 60) traceback.print_exc(file=sys.stdout) current_app.logger.error('-' * 60) continue return change_dump.as_xml()