Пример #1
0
    def execute(self, filenames: iter):
        """
        ``build step 0`` :samp:`Publish ResourceSync documents`

        Publish ResourceSync documents under conditions of
        current :class:`~rspub.core.rs_paras.RsParameters`.

        :param filenames: iter of filenames and/or directories to scan
        :return: list of :class:`SitemapData` of generated sitemaps
        """
        self.date_start_processing = defaults.w3c_now()
        self.observers_inform(self,
                              ExecutorEvent.execution_start,
                              date_start_processing=self.date_start_processing)
        if not os.path.exists(self.para.abs_metadata_dir()):
            os.makedirs(self.para.abs_metadata_dir())

        self.prepare_metadata_dir()
        sitemap_data_iter = self.generate_rs_documents(filenames)
        self.post_process_documents(sitemap_data_iter)
        self.date_end_processing = defaults.w3c_now()
        self.create_index(sitemap_data_iter)

        capabilitylist_data = self.create_capabilitylist()
        self.update_resource_sync(capabilitylist_data)

        self.observers_inform(self,
                              ExecutorEvent.execution_end,
                              date_end_processing=self.date_end_processing,
                              new_sitemaps=sitemap_data_iter)
        return sitemap_data_iter
Пример #2
0
    def __init__(self,
                 resource_count=0,
                 ordinal=0,
                 uri=None,
                 path=None,
                 capability_name=None,
                 document_saved=False):
        """
        :samp:`Initialization`

        :param int resource_count: the amount of records in the sitemap
        :param int ordinal: the ordinal number as reflected in the sitemap filename and url
        :param str uri: the url of the sitemap
        :param str path: the local path of the sitemap
        :param str capability_name: the capability of the sitemap
        :param bool document_saved: True if the sitemap was saved to disk, False otherwise
        """
        self.resource_count = resource_count
        self.ordinal = ordinal
        self.uri = uri
        self.path = path
        self.capability_name = capability_name
        self.document_saved = document_saved
        self.doc_start = None
        self.doc_end = defaults.w3c_now()
Пример #3
0
    def finish_sitemap(self,
                       ordinal,
                       sitemap,
                       doc_start=None,
                       doc_end=None) -> SitemapData:
        capability_name = sitemap.capability_name
        file_name = capability_name
        if sitemap.sitemapindex:
            file_name += "-index"
        elif ordinal >= 0:
            file_name += self.format_ordinal(ordinal)

        file_name += ".xml"

        path = self.para.abs_metadata_path(file_name)
        url = self.para.uri_from_path(path)
        sitemap.link_set(rel="up", href=self.current_rel_up_for(sitemap))
        sitemap_data = SitemapData(len(sitemap), ordinal, url, path,
                                   capability_name)
        sitemap_data.doc_start = doc_start
        sitemap_data.doc_end = doc_end if doc_end else defaults.w3c_now()

        if self.para.is_saving_sitemaps:
            sitemap.pretty_xml = self.para.is_saving_pretty_xml
            self.save_sitemap(sitemap, path)
            sitemap_data.document_saved = True

        self.observers_inform(self,
                              ExecutorEvent.completed_document,
                              document=sitemap,
                              sitemap_data=sitemap_data)
        return sitemap_data
Пример #4
0
        def generator() -> [SitemapData, ResourceList]:
            resourcelist = None
            ordinal = self.find_ordinal(Capability.resourcelist.name)
            resource_count = 0
            doc_start = None
            resource_generator = self.resource_generator()
            for resource_count, resource in resource_generator(filenames):
                # stuff resource into resourcelist
                if resourcelist is None:
                    resourcelist = ResourceList()
                    doc_start = defaults.w3c_now()
                    resourcelist.md_at = doc_start

                resourcelist.add(resource)

                # under conditions: yield the current resourcelist
                if resource_count % self.para.max_items_in_list == 0:
                    ordinal += 1
                    doc_end = defaults.w3c_now()
                    resourcelist.md_completed = doc_end
                    sitemap_data = self.finish_sitemap(ordinal,
                                                       resourcelist,
                                                       doc_start=doc_start,
                                                       doc_end=doc_end)
                    yield sitemap_data, resourcelist
                    resourcelist = None

            # under conditions: yield the current and last resourcelist
            if resourcelist:
                ordinal += 1
                doc_end = defaults.w3c_now()
                resourcelist.md_completed = doc_end
                sitemap_data = self.finish_sitemap(ordinal,
                                                   resourcelist,
                                                   doc_start=doc_start,
                                                   doc_end=doc_end)
                yield sitemap_data, resourcelist