def execute(self, filenames: iter): """ ``build step 0`` :samp:`Publish ResourceSync documents` Publish ResourceSync documents under conditions of current :class:`~rspub.core.rs_paras.RsParameters`. :param filenames: iter of filenames and/or directories to scan :return: list of :class:`SitemapData` of generated sitemaps """ self.date_start_processing = defaults.w3c_now() self.observers_inform(self, ExecutorEvent.execution_start, date_start_processing=self.date_start_processing) if not os.path.exists(self.para.abs_metadata_dir()): os.makedirs(self.para.abs_metadata_dir()) self.prepare_metadata_dir() sitemap_data_iter = self.generate_rs_documents(filenames) self.post_process_documents(sitemap_data_iter) self.date_end_processing = defaults.w3c_now() self.create_index(sitemap_data_iter) capabilitylist_data = self.create_capabilitylist() self.update_resource_sync(capabilitylist_data) self.observers_inform(self, ExecutorEvent.execution_end, date_end_processing=self.date_end_processing, new_sitemaps=sitemap_data_iter) return sitemap_data_iter
def __init__(self, resource_count=0, ordinal=0, uri=None, path=None, capability_name=None, document_saved=False): """ :samp:`Initialization` :param int resource_count: the amount of records in the sitemap :param int ordinal: the ordinal number as reflected in the sitemap filename and url :param str uri: the url of the sitemap :param str path: the local path of the sitemap :param str capability_name: the capability of the sitemap :param bool document_saved: True if the sitemap was saved to disk, False otherwise """ self.resource_count = resource_count self.ordinal = ordinal self.uri = uri self.path = path self.capability_name = capability_name self.document_saved = document_saved self.doc_start = None self.doc_end = defaults.w3c_now()
def finish_sitemap(self, ordinal, sitemap, doc_start=None, doc_end=None) -> SitemapData: capability_name = sitemap.capability_name file_name = capability_name if sitemap.sitemapindex: file_name += "-index" elif ordinal >= 0: file_name += self.format_ordinal(ordinal) file_name += ".xml" path = self.para.abs_metadata_path(file_name) url = self.para.uri_from_path(path) sitemap.link_set(rel="up", href=self.current_rel_up_for(sitemap)) sitemap_data = SitemapData(len(sitemap), ordinal, url, path, capability_name) sitemap_data.doc_start = doc_start sitemap_data.doc_end = doc_end if doc_end else defaults.w3c_now() if self.para.is_saving_sitemaps: sitemap.pretty_xml = self.para.is_saving_pretty_xml self.save_sitemap(sitemap, path) sitemap_data.document_saved = True self.observers_inform(self, ExecutorEvent.completed_document, document=sitemap, sitemap_data=sitemap_data) return sitemap_data
def generator() -> [SitemapData, ResourceList]: resourcelist = None ordinal = self.find_ordinal(Capability.resourcelist.name) resource_count = 0 doc_start = None resource_generator = self.resource_generator() for resource_count, resource in resource_generator(filenames): # stuff resource into resourcelist if resourcelist is None: resourcelist = ResourceList() doc_start = defaults.w3c_now() resourcelist.md_at = doc_start resourcelist.add(resource) # under conditions: yield the current resourcelist if resource_count % self.para.max_items_in_list == 0: ordinal += 1 doc_end = defaults.w3c_now() resourcelist.md_completed = doc_end sitemap_data = self.finish_sitemap(ordinal, resourcelist, doc_start=doc_start, doc_end=doc_end) yield sitemap_data, resourcelist resourcelist = None # under conditions: yield the current and last resourcelist if resourcelist: ordinal += 1 doc_end = defaults.w3c_now() resourcelist.md_completed = doc_end sitemap_data = self.finish_sitemap(ordinal, resourcelist, doc_start=doc_start, doc_end=doc_end) yield sitemap_data, resourcelist