示例#1
0
    def all_resources(self):
        all_resources = {}

        # search for resourcelists
        resourcelist_files = sorted(
            glob(self.paras.abs_metadata_path("resourcelist_*.xml")))
        for rl_file_name in resourcelist_files:
            resourcelist = ResourceList()
            with open(rl_file_name, "r", encoding="utf-8") as rl_file:
                sm = Sitemap()
                sm.parse_xml(rl_file, resources=resourcelist)

            all_resources.update({
                resource.uri: resource
                for resource in resourcelist.resources
            })

        # search for changelists
        changelist_files = sorted(
            glob(self.paras.abs_metadata_path("changelist_*.xml")))
        for cl_file_name in changelist_files:
            changelist = ChangeList()
            with open(cl_file_name, "r", encoding="utf-8") as cl_file:
                sm = Sitemap()
                sm.parse_xml(cl_file, resources=changelist)

            for resource in changelist.resources:
                if resource.change == "created" or resource.change == "updated":
                    all_resources.update({resource.uri: resource})
                elif resource.change == "deleted" and resource.uri in all_resources:
                    del all_resources[resource.uri]

        return all_resources
示例#2
0
def test_features():
    # Read Capability List and show supported capabilities
    cl = CapabilityList()
    capability_url = 'http://localhost:8888/capabilitylist.xml'
    cl.read(capability_url)
    print("\nAbout to show all capabilities...")
    for resource in cl:
        print(f"supports {resource.capability} (at {resource.uri})")
    # Read Resource List and print it
    rl = ResourceList()
    resource_url = 'http://localhost:8888/resourcelist.xml'  # this url is one of the capabilities
    rl.read(resource_url)

    for resource in rl:
        print(resource)
    # Attempting to download resources, but getting an error related to one resource
    d = dump.Dump(resources=rl)
    d.write(basename='./dump_test/test.xml')
示例#3
0
    def get_resource_list_xml(self, from_date=None, to_date=None):
        """
        Get content of resource list.

        :return: (xml) resource list content
        """
        if not self._validation():
            return None
        r = get_items_by_index_tree(self.repository_id)

        rl = ResourceList()
        rl.up = INVENIO_CAPABILITY_URL.format(request.url_root)

        for item in r:
            if item:
                resource_date = str_to_datetime(
                    item.get('_source').get('_updated'))
                if from_date and str_to_datetime(from_date) > resource_date:
                    continue
                if to_date and str_to_datetime(to_date) < resource_date:
                    continue
                id_item = item.get('_source').get('control_number')
                # url = '{}records/{}'.format(request.url_root, str(id_item))
                url = '{}resync/{}/records/{}'.format(request.url_root,
                                                      str(self.repository_id),
                                                      str(id_item))
                rl.add(
                    Resource(url, lastmod=item.get('_source').get('_updated')))
        return rl.as_xml()
示例#4
0
def sitemap(offset=0):
    if offset > 0:
        offset = offset - 1
    instances = __get_instances__(offset)
    resource_list = ResourceList()
    dedups = 0
    for i,row in enumerate(instances):
        instance = row.get('instance')
        if "date" in row:
            last_mod = row.get("date").get("value")[0:10]
        else:
            last_mod = datetime.datetime.utcnow().strftime(
                W3C_DATE)
        try:
            resource_list.add(
                Resource("{}.json".format(instance.get("value")),
                         lastmod=last_mod)
            )
        except ResourceListDupeError:
            dedups += 1
            continue
    xml = resource_list.as_xml()
    return Response(xml, mimetype="text/xml")
示例#5
0
    def create_index(self, sitemap_data_iter: iter):
        if len(sitemap_data_iter) > 1:
            resourcelist_index = ResourceList()
            resourcelist_index.sitemapindex = True
            resourcelist_index.md_at = self.date_start_processing
            resourcelist_index.md_completed = self.date_end_processing
            index_path = self.para.abs_metadata_path("resourcelist-index.xml")
            rel_index_path = os.path.relpath(index_path,
                                             self.para.resource_dir)
            index_url = self.para.url_prefix + defaults.sanitize_url_path(
                rel_index_path)
            resourcelist_index.link_set(rel="up",
                                        href=self.para.capabilitylist_url())

            for sitemap_data in sitemap_data_iter:
                resourcelist_index.add(
                    Resource(uri=sitemap_data.uri,
                             md_at=sitemap_data.doc_start,
                             md_completed=sitemap_data.doc_end))
                if sitemap_data.document_saved:
                    self.update_rel_index(index_url, sitemap_data.path,
                                          ResourceList())

            self.finish_sitemap(-1, resourcelist_index)
    def update_previous_state(self):
        if self.previous_resources is None:
            self.previous_resources = {}

            # search for resourcelists
            self.resourcelist_files = sorted(
                glob(self.param.abs_metadata_path("changedump_*.xml")))
            for rl_file_name in self.resourcelist_files:
                resourcelist = ResourceList()
                with open(rl_file_name, "r", encoding="utf-8") as rl_file:
                    sm = Sitemap()
                    sm.parse_xml(rl_file, resources=resourcelist)

                self.date_resourcelist_completed = resourcelist.md_completed
                if self.date_resourcelist_completed is None:
                    self.date_resourcelist_completed = resourcelist.md_at

                self.previous_resources.update({
                    resource.uri: resource
                    for resource in resourcelist.resources
                })

            # search for changedumps
            self.changedump_files = sorted(
                glob(self.param.abs_metadata_path("changedump_*.xml")))
            for cl_file_name in self.changedump_files:
                changedump = ChangeDump()
                with open(cl_file_name, "r", encoding="utf-8") as cl_file:
                    sm = Sitemap()
                    sm.parse_xml(cl_file, resources=changedump)

                for resource in changedump.resources:
                    if resource.change == "created" or resource.change == "updated":
                        self.previous_resources.update(
                            {resource.uri: resource})
                    elif resource.change == "deleted" and resource.uri in self.previous_resources:
                        del self.previous_resources[resource.uri]
示例#7
0
        def generator() -> [SitemapData, ResourceList]:
            resourcelist = None
            ordinal = self.find_ordinal(Capability.resourcelist.name)
            resource_count = 0
            doc_start = None
            resource_generator = self.resource_generator()
            for resource_count, resource in resource_generator(
                    resource_metadata):
                # stuff resource into resourcelist
                if resourcelist is None:
                    resourcelist = ResourceList()
                    doc_start = defaults.w3c_now()
                    resourcelist.md_at = doc_start

                resourcelist.add(resource)

                # under conditions: yield the current resourcelist
                if resource_count % self.param.max_items_in_list == 0:
                    ordinal += 1
                    doc_end = defaults.w3c_now()
                    resourcelist.md_completed = doc_end
                    sitemap_data = self.finish_sitemap(ordinal,
                                                       resourcelist,
                                                       doc_start=doc_start,
                                                       doc_end=doc_end)
                    yield sitemap_data, resourcelist
                    resourcelist = None

            # under conditions: yield the current and last resourcelist
            if resourcelist:
                ordinal += 1
                doc_end = defaults.w3c_now()
                resourcelist.md_completed = doc_end
                sitemap_data = self.finish_sitemap(ordinal,
                                                   resourcelist,
                                                   doc_start=doc_start,
                                                   doc_end=doc_end)
                yield sitemap_data, resourcelist
#!/usr/bin/env python
if (True): #keep indentation of README

    from resync import Resource,ResourceList

    rl = ResourceList()
    rl.add( Resource('http://example.com/res1', lastmod='2013-01-01') )
    rl.add( Resource('http://example.com/res2', lastmod='2013-01-02') )
    print rl.as_xml()
#!/usr/bin/env python
if (True):  #keep indentation of README

    from resync import Resource, ResourceList

    rl = ResourceList()
    rl.add(Resource('http://example.com/res1', lastmod='2013-01-01'))
    rl.add(Resource('http://example.com/res2', lastmod='2013-01-02'))
    print rl.as_xml()