def lastmod(self, lastmod): """Set timestamp from an W3C Datetime Last-Modified value""" if lastmod is None: self.timestamp = None return if lastmod == "": raise ValueError("Attempt to set empty lastmod") self.timestamp = str_to_datetime(lastmod)
def incremental(self, allow_deletion=False, change_list_uri=None, from_datetime=None): """Incremental synchronization Use Change List to do incremental sync """ self.logger.debug("Starting incremental sync") ### 0. Sanity checks if (len(self.mapper) < 1): raise ClientFatalError( "No source to destination mapping specified") if (self.mapper.unsafe()): raise ClientFatalError( "Source to destination mappings unsafe: %s" % str(self.mapper)) from_timestamp = None if (from_datetime is not None): try: from_timestamp = str_to_datetime(from_datetime) except ValueError: raise ClientFatalError("Bad datetime in --from (%s)" % from_datetime) ### 1. Work out where to start from if (from_timestamp is None): from_timestamp = ClientState().get_state(self.sitemap) if (from_timestamp is None): raise ClientFatalError( "Cannot do incremental sync. No stored timestamp for this site, and no explicit --from." ) ### 2. Get URI of change list, from sitemap or explicit if (change_list_uri): # Translate as necessary using maps change_list = self.sitemap_uri(change_list_uri) else: # Try default name change_list = self.sitemap_uri(self.change_list_name) ### 3. Read change list from source try: self.logger.info("Reading change list %s" % (change_list)) src_change_list = ChangeList() src_change_list.read(uri=change_list) self.logger.debug("Finished reading change list") except Exception as e: raise ClientFatalError( "Can't read source change list from %s (%s)" % (change_list, str(e))) self.logger.info("Read source change list, %d changes listed" % (len(src_change_list))) #if (len(src_change_list)==0): # raise ClientFatalError("Aborting as there are no resources to sync") if (self.checksum and not src_change_list.has_md5()): self.checksum = False self.logger.info( "Not calculating checksums on destination as not present in source change list" ) # Check all changes have timestamp and record last self.last_timestamp = 0 for resource in src_change_list: if (resource.timestamp is None): raise ClientFatalError( "Aborting - missing timestamp for change in %s" % (uri)) if (resource.timestamp > self.last_timestamp): self.last_timestamp = resource.timestamp ### 4. Check that the change list has authority over URIs listed # FIXME - What does authority mean for change list? Here use both the # change list URI and, if we used it, the sitemap URI if (not self.noauth): uauth_cs = UrlAuthority(change_list, self.strictauth) if (not change_list_uri): uauth_sm = UrlAuthority(self.sitemap) for resource in src_change_list: if (not uauth_cs.has_authority_over(resource.uri) and (change_list_uri or not uauth_sm.has_authority_over(resource.uri))): raise ClientFatalError( "Aborting as change list (%s) mentions resource at a location it does not have authority over (%s), override with --noauth" % (change_list, resource.uri)) ### 5. Prune entries before starting timestamp and dupe changes for a resource num_skipped = src_change_list.prune_before(from_timestamp) if (num_skipped > 0): self.logger.info("Skipped %d changes before %s" % (num_skipped, datetime_to_str(from_timestamp))) num_dupes = src_change_list.prune_dupes() if (num_dupes > 0): self.logger.info("Removed %d prior changes" % (num_dupes)) # Review and log status before # FIXME - should at this stage prune the change list to pick out # only the last change for each resource to_update = 0 to_create = 0 to_delete = 0 for resource in src_change_list: if (resource.change == 'updated'): to_update += 1 elif (resource.change == 'created'): to_create += 1 elif (resource.change == 'deleted'): to_delete += 1 else: raise ClientError("Unknown change type %s" % (resource.change)) # Log status based on what we know from the Change List. Exit if # either there are no changes or if there are only deletions and # we don't allow deletion in_sync = ((to_update + to_delete + to_create) == 0) self.log_status(in_sync=in_sync, incremental=True, created=to_create, updated=to_update, deleted=to_delete) if (in_sync or ((to_update + to_create) == 0 and not allow_deletion)): self.logger.debug("Completed incremental") return ### 6. Apply changes at same time or after from_timestamp delete_msg = (", and delete %d resources" % to_delete) if (allow_deletion) else '' self.logger.warning("Will apply %d changes%s" % (len(src_change_list), delete_msg)) num_updated = 0 num_deleted = 0 num_created = 0 for resource in src_change_list: uri = resource.uri file = self.mapper.src_to_dst(uri) if (resource.change == 'updated'): self.logger.info("updated: %s -> %s" % (uri, file)) self.update_resource(resource, file, 'updated') num_updated += 1 elif (resource.change == 'created'): self.logger.info("created: %s -> %s" % (uri, file)) self.update_resource(resource, file, 'created') num_created += 1 elif (resource.change == 'deleted'): num_deleted += self.delete_resource(resource, file, allow_deletion) else: raise ClientError("Unknown change type %s" % (resource.change)) ### 7. Report status and planned actions self.log_status(incremental=True, created=num_created, updated=num_updated, deleted=num_deleted, to_delete=to_delete) ### 8. Record last timestamp we have seen if (self.last_timestamp > 0): ClientState().set_state(self.sitemap, self.last_timestamp) self.logger.info("Written last timestamp %s for incremental sync" % (datetime_to_str(self.last_timestamp))) ### 9. Done self.logger.debug("Completed incremental sync")
def incremental(self, allow_deletion=False, change_list_uri=None, from_datetime=None): """Incremental synchronization """ self.logger.debug("Starting incremental sync") ### 0. Sanity checks if (len(self.mappings)<1): raise ClientFatalError("No source to destination mapping specified") from_timestamp = None if (from_datetime is not None): try: from_timestamp = str_to_datetime(from_datetime) except ValueError: raise ClientFatalError("Bad datetime in --from (%s)" % from_datetime) ### 1. Work out where to start from if (from_timestamp is None): from_timestamp=ClientState().get_state(self.sitemap) if (from_timestamp is None): raise ClientFatalError("No stored timestamp for this site, and no explicit --from") ### 2. Get URI of change list, from sitemap or explicit if (change_list_uri): # Translate as necessary using maps change_list = self.sitemap_uri(change_list_uri) else: # Try default name change_list = self.sitemap_uri(self.change_list_name) ### 3. Read change list from source try: self.logger.info("Reading change list %s" % (change_list)) src_change_list = ChangeList() src_change_list.read(uri=change_list) self.logger.debug("Finished reading change list") except Exception as e: raise ClientFatalError("Can't read source change list from %s (%s)" % (change_list,str(e))) self.logger.info("Read source change list, %d changes listed" % (len(src_change_list))) #if (len(src_change_list)==0): # raise ClientFatalError("Aborting as there are no resources to sync") if (self.checksum and not src_change_list.has_md5()): self.checksum=False self.logger.info("Not calculating checksums on destination as not present in source change list") # Check all changes have timestamp and record last self.last_timestamp = 0 for resource in src_change_list: if (resource.timestamp is None): raise ClientFatalError("Aborting - missing timestamp for change in %s" % (uri)) if (resource.timestamp > self.last_timestamp): self.last_timestamp = resource.timestamp ### 4. Check that the change list has authority over URIs listed # FIXME - What does authority mean for change list? Here use both the # change list URI and, if we used it, the sitemap URI uauth_cs = UrlAuthority(change_list) if (not change_list_uri): uauth_sm = UrlAuthority(self.sitemap) for resource in src_change_list: if (not uauth_cs.has_authority_over(resource.uri) and (change_list_uri or not uauth_sm.has_authority_over(resource.uri))): if (self.noauth): #self.logger.info("Change list (%s) mentions resource at a location it does not have authority over (%s)" % (change_list,resource.uri)) pass else: raise ClientFatalError("Aborting as change list (%s) mentions resource at a location it does not have authority over (%s), override with --noauth" % (change_list,resource.uri)) ### 5. Prune entries before starting timestamp and dupe changes for a resource num_skipped = src_change_list.prune_before(from_timestamp) if (num_skipped>0): self.logger.info("Skipped %d changes before %s" % (num_skipped,datetime_to_str(from_timestamp))) num_dupes = src_change_list.prune_dupes() if (num_dupes>0): self.logger.info("Removed %d prior changes" % (num_dupes)) ### 6. Apply changes at same time or after from_timestamp self.logger.info("Applying %d changes" % (len(src_change_list))) num_updated = 0 num_deleted = 0 num_created = 0 for resource in src_change_list: uri = resource.uri file = self.mapper.src_to_dst(uri) if (resource.change == 'updated'): self.logger.info("updated: %s -> %s" % (uri,file)) self.update_resource(resource,file,'updated') num_updated+=1 elif (resource.change == 'created'): self.logger.info("created: %s -> %s" % (uri,file)) self.update_resource(resource,file,'created') num_created+=1 elif (resource.change == 'deleted'): self.delete_resource(resource,file,allow_deletion) num_deleted+=1 else: raise ClientError("Unknown change type %s" % (resource.change) ) ### 7. Report status and planned actions self.log_status(in_sync=((num_updated+num_deleted+num_created)==0), incremental=True,created=num_created, updated=num_updated, deleted=num_deleted) ### 8. Record last timestamp we have seen if (self.last_timestamp>0): ClientState().set_state(self.sitemap,self.last_timestamp) self.logger.info("Written last timestamp %s for incremental sync" % (datetime_to_str(self.last_timestamp))) ### 9. Done self.logger.debug("Completed incremental sync")
def md_until(self, md_until): """Set md_until value from a W3C Datetime value""" self._set_extra( 'ts_until', str_to_datetime(md_until, context='md_until datetime') )
def md_from(self, md_from): """Set md_from value from a W3C Datetime value""" self._set_extra( 'ts_from', str_to_datetime(md_from, context='md_from datetime') )
def md_completed(self, md_completed): """Set md_completed value from a W3C Datetime value""" self._set_extra( 'ts_completed', str_to_datetime(md_completed, context='md_completed datetime') )
def md_at(self, md_at): """Set at value from a W3C Datetime value""" self._set_extra( 'ts_at', str_to_datetime(md_at, context='md_at datetime') )
def lastmod(self, lastmod): """Set timestamp from a W3C Datetime Last-Modified value""" self.timestamp = str_to_datetime(lastmod, context='lastmod')