示例#1
0
 def write_change_list(self,outfile=None,ref_sitemap=None,newref_sitemap=None,
                       empty=None,links=None,dump=None):
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that 
         # or build resource_list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.resource_list
         else:
             new_rl = self.read_reference_resource_list(newref_sitemap,name='new reference')
         # 3. Calculate change list
         (same,updated,deleted,created)=old_rl.compare(new_rl)   
         cl.add_changed_resources( updated, change='updated' )
         cl.add_changed_resources( deleted, change='deleted' )
         cl.add_changed_resources( created, change='created' )
     # 4. Write out change list
     kwargs = { 'pretty_xml': True,
                'mapper' : self.mapper }
     if (self.max_sitemap_entries is not None):
         kwargs['max_sitemap_entries'] = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml(**kwargs)
     else:
         cl.write(basename=outfile,**kwargs)
     self.write_dump_if_requested(cl,dump)
示例#2
0
文件: client.py 项目: EHRI/resync
 def write_change_list(self,paths=None,outfile=None,ref_sitemap=None,newref_sitemap=None,
                       empty=None,links=None,dump=None):
     """Write a change list
     
     Unless the both ref_sitemap and newref_sitemap are specified then the Change 
     List is calculated between the reference an the current state of files on
     disk. The files on disk are scanned based either on the paths setting or
     else on the mappings.
     """
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that 
         # or build resource list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.build_resource_list(paths=paths,set_path=dump)
         else:
             new_rl = self.read_reference_resource_list(newref_sitemap,name='new reference')
         # 3. Calculate change list
         (same,updated,deleted,created)=old_rl.compare(new_rl)   
         cl.add_changed_resources( updated, change='updated' )
         cl.add_changed_resources( deleted, change='deleted' )
         cl.add_changed_resources( created, change='created' )
     # 4. Write out change list
     cl.mapper = self.mapper
     cl.pretty_xml = self.pretty_xml
     if (self.max_sitemap_entries is not None):
         cl.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml()
     else:
         cl.write(basename=outfile)
     self.write_dump_if_requested(cl,dump)
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from = "2013-01-02T00:00:00Z"
     cl.md_until = "2013-01-03T00:00:00Z"
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T12:00:00Z',
                  change='created'))
     cl.add(
         Resource(uri='http://example.com/res9.pdf',
                  lastmod='2013-01-02T13:00:00Z',
                  change='updated'))
     cl.add(
         Resource(uri='http://example.com/res5.tiff',
                  lastmod='2013-01-02T19:00:00Z',
                  change='deleted'))
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T20:00:00Z',
                  change='updated'))
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal(cl.as_xml(), ex_xml)
示例#4
0
 def test_build_ex_27(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res4",
                   lastmod="2013-01-03T17:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj",
                   length=56778,
                   mime_type="application/json" )
     c1.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res4-json-patch",
                 modified="2013-01-03T17:00:00Z",
                 hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent
                 length=73,
                 type="application/json-patch")
     c2 = Resource(uri="http://example.com/res5-full.tiff",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk",
                   length="9788456778",
                   mime_type="image/tiff")
     c2.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res5-diff",
                 modified="2013-01-03T18:00:00Z",
                 hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT",
                 length=4533,
                 type="application/x-tiff-diff" )
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
示例#5
0
 def test_build_ex_28(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res2.pdf",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="application/pdf" )
     c1.link_set(rel="describedby",
                 href="http://example.com/res2_dublin-core_metadata.xml",
                 modified="2013-01-01T12:00:00Z",
                 type="application/xml")
     c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml",
                   lastmod="2013-01-03T19:00:00Z",
                   change="updated",
                   mime_type="application/xml")
     c2.link_set(rel="describes",
                 href="http://example.com/res2.pdf",
                 modified="2013-01-03T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="application/pdf")
     c2.link_set(rel="profile",
                 href="http://purl.org/dc/elements/1.1/")
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
示例#6
0
 def test_build_ex_24(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html")
     # Resource.link_set with add or change link depending on one with 
     # the particular rel exists unless allow_duplicates=True. 
     # Resource.link_add will always add. Test both here...
     c1.link_set(rel="duplicate",
                 href="http://mirror1.example.com/res1",
                 pri="1",
                 modified="2013-01-03T18:00:00Z")
     c1.link_set(rel="duplicate",
                 href="http://mirror2.example.com/res1",
                 pri="2",
                 modified="2013-01-03T18:00:00Z",
                 allow_duplicates=True)
     c1.link_add(rel="duplicate",
                 href="gsiftp://gridftp.example.com/res1",
                 pri="3",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_24' )
示例#7
0
 def test07_as_xml(self):
     cl = ChangeList()
     cl.md_from = '1970-01-01T00:00:00Z'
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     xml = cl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' ) 
示例#8
0
 def write_change_list(self,
                       paths=None,
                       outfile=None,
                       ref_sitemap=None,
                       newref_sitemap=None,
                       empty=None,
                       links=None,
                       dump=None):
     """Write a change list
     
     Unless the both ref_sitemap and newref_sitemap are specified then the Change 
     List is calculated between the reference an the current state of files on
     disk. The files on disk are scanned based either on the paths setting or
     else on the mappings.
     """
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build resource list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.build_resource_list(paths=paths, set_path=dump)
         else:
             new_rl = self.read_reference_resource_list(
                 newref_sitemap, name='new reference')
         # 3. Calculate change list
         (same, updated, deleted, created) = old_rl.compare(new_rl)
         cl.add_changed_resources(updated, change='updated')
         cl.add_changed_resources(deleted, change='deleted')
         cl.add_changed_resources(created, change='created')
     # 4. Write out change list
     cl.mapper = self.mapper
     cl.pretty_xml = self.pretty_xml
     if (self.max_sitemap_entries is not None):
         cl.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml()
     else:
         cl.write(basename=outfile)
     self.write_dump_if_requested(cl, dump)
示例#9
0
 def test_build_ex_31(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://original.example.com/res1.html",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_31' )
示例#10
0
 def test_build_ex_03(self):
     """Simple Change List document """
     cl = ChangeList()
     cl.md_from = '2013-01-02T00:00:00Z'
     cl.md_until= '2013-01-03T00:00:00Z'
     cl.add( Resource(uri='http://example.com/res2.pdf',
                      lastmod='2013-01-02T13:00:00Z',
                      change="updated") )
     cl.add( Resource(uri='http://example.com/res3.tiff',
                      lastmod='2013-01-02T18:00:00Z',
                      change='deleted') )
     ex_xml = self._open_ex('resourcesync_ex_3').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
示例#11
0
 def test_build_ex_30(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="collection",
                 href="http://example.com/aggregation/0601007")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_30' )
示例#12
0
 def test20_as_xml(self):
     cl = ChangeList()
     cl.md_from = "1970-01-01T00:00:00Z"
     cl.add(Resource("a", timestamp=1, change="updated"))
     cl.add(Resource("b", timestamp=2, change="updated"))
     xml = cl.as_xml()
     self.assertTrue(re.search(r'<rs:md .*capability="changelist"', xml), "XML has capability")
     self.assertTrue(
         re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml),
         "XML has from to seconds precision (and not more)",
     )
     self.assertTrue(
         re.search(r"<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>", xml), "XML has resource a"
     )
示例#13
0
 def test_build_ex_26(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1.html",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876 )
     c1.link_add(rel="canonical",
                 href="http://example.com/res1",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_26' )
示例#14
0
 def test_build_ex_25(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T11:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated")
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.html",
                 modified="2013-01-03T18:00:00Z",
                 type="text/html") #FIXME - inconsistent
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.pdf",
                 modified="2013-01-03T18:00:00Z",
                 type="application/pdf")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_25' )
示例#15
0
 def test_build_ex_33(self):
     cl = ChangeList()
     cl.up = "http://aggregator2.example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T12:00:00Z"
     c1 = Resource(uri="http://aggregator2.example.com/res1.html",
                   lastmod="2013-01-04T09:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="via",
                 href="http://original.example.com/res1.html",
                 modified="2013-01-03T07:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_33' )
示例#16
0
 def test_build_ex_19(self):
     """Change List with 4 changes, 'open' as no until"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.md_from="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res1.html',
                       lastmod='2013-01-03T11:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res3.tiff',
                       lastmod='2013-01-03T18:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T21:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_19').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
示例#17
0
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from="2013-01-02T00:00:00Z"
     cl.md_until="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T12:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res9.pdf',
                       lastmod='2013-01-02T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res5.tiff',
                       lastmod='2013-01-02T19:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T20:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
示例#18
0
 def test_build_ex_29(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="memento",
                 href="http://example.com/20130103070000/res1",
                 modified="2013-01-02T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     c1.link_add(rel="timegate",
                 href="http://example.com/timegate/http://example.com/res1")
     c1.link_add(rel="timemap",
                 href="http://example.com/timemap/http://example.com/res1",
                 type="application/link-format")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_29' )
示例#19
0
    def get_change_list_content_xml(self,
                                    from_date,
                                    from_date_args=None,
                                    to_date_args=None):
        """
        Get change list xml.

        :return: Updated Change List info
        """
        if not self._validation():
            return None

        from .utils import parse_date
        if from_date_args:
            from_date_args = parse_date(from_date_args)
        if to_date_args:
            to_date_args = parse_date(to_date_args)

        change_list = ChangeList()
        change_list.up = INVENIO_CAPABILITY_URL.format(request.url_root)
        change_list.index = '{}resync/{}/changelist.xml'.format(
            request.url_root,
            self.repository_id,
        )

        record_changes = self._get_record_changes_with_interval(from_date)

        for data in record_changes:
            try:
                if from_date_args and from_date_args > parse_date(
                        data.get("updated")):
                    continue
                if to_date_args and to_date_args < parse_date(
                        data.get("updated")):
                    continue
                pid_object = PersistentIdentifier.get('recid',
                                                      data.get('record_id'))
                latest_pid = PIDVersioning(child=pid_object).last_child
                is_latest = str(latest_pid.pid_value) == "{}.{}".format(
                    data.get('record_id'), data.get('record_version'))
                if not is_latest and data.get('status') != 'deleted':
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        '{}.{}'.format(data.get('record_id'),
                                       data.get('record_version')))
                else:
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        data.get('record_id'))
                rc = Resource(
                    loc,
                    lastmod=data.get("updated"),
                    change=data.get('status'),
                    md_at=data.get("updated"),
                )
                change_list.add(rc)
            except Exception:
                current_app.logger.error('-' * 60)
                traceback.print_exc(file=sys.stdout)
                current_app.logger.error('-' * 60)
                continue

        return change_list.as_xml()