Exemplo n.º 1
0
    def list_patch_files(self, resourcelist, max_files=-1):
        """
        Append resources with the name pattern 'rdf_out_*' to a resourcelist. All resources in
        resource_dir are included except for the last one in alphabetical sort order. If max_files is set to a
        value greater than 0, will only include up to max_files.
        :param resourcelist: the resourcelist to append to
        :param max_files: the maximum number of resources to append to the list
        :return: True if the list includes the one but last rdf_out_* file in resource_dir, False otherwise
        """
        rdf_out_files = sorted(glob(os.path.join(self.resource_dir, PATTERN_RDF_OUT + "*")))
        if len(rdf_out_files) > 0:
            rdf_out_files.pop()  # remove last from list
        n = 0
        for file in rdf_out_files:
            filename = os.path.basename(file)
            timestamp = self.extract_timestamp(file)
            length = os.stat(file).st_size
            md5 = compute_md5_for_file(file)
            resourcelist.add(
                Resource(self.publish_url + filename, md5=md5, length=length, lastmod=timestamp, path=file))
            n += 1
            if 0 < max_files == n:
                break

        exhausted = len(rdf_out_files) == n
        return exhausted
Exemplo n.º 2
0
    def list_patch_files(self, resourcelist, max_files=-1):
        """
        Append resources with the name pattern 'rdf_out_*' to a resourcelist. All resources in
        resource_dir are included except for the last one in alphabetical sort order. If max_files is set to a
        value greater than 0, will only include up to max_files.
        :param resourcelist: the resourcelist to append to
        :param max_files: the maximum number of resources to append to the list
        :return: True if the list includes the one but last rdf_out_* file in resource_dir, False otherwise
        """
        rdf_out_files = sorted(
            glob(os.path.join(self.resource_dir, PATTERN_RDF_OUT + "*")))
        if len(rdf_out_files) > 0:
            rdf_out_files.pop()  # remove last from list
        n = 0
        for file in rdf_out_files:
            filename = os.path.basename(file)
            timestamp = self.extract_timestamp(file)
            length = os.stat(file).st_size
            md5 = compute_md5_for_file(file)
            resourcelist.add(
                Resource(self.publish_url + filename,
                         md5=md5,
                         length=length,
                         lastmod=timestamp,
                         path=file))
            n += 1
            if 0 < max_files == n:
                break

        exhausted = len(rdf_out_files) == n
        return exhausted
Exemplo n.º 3
0
    def update_resource(self, resource, file, change=None):
        """Update resource from uri to file on local system

        Update means three things:
        1. GET resources
        2. set mtime in local time to be equal to timestamp in UTC (should perhaps
        or at least warn if different from LastModified from the GET response instead 
        but maybe warn if different (or just earlier than) the lastmod we expected 
        from the resource list
        3. check that resource matches expected information

        Also update self.last_timestamp if the timestamp (in source frame) of this
        resource is later and the current value.

        Returns the number of resources updated/created (0 or 1)
        """
        path = os.path.dirname(file)
        distutils.dir_util.mkpath(path)
        num_updated = 0
        if (self.dryrun):
            self.logger.info("dryrun: would GET %s --> %s" %
                             (resource.uri, file))
        else:
            # 1. GET
            try:
                urllib.urlretrieve(resource.uri, file)
                num_updated += 1
            except IOError as e:
                msg = "Failed to GET %s -- %s" % (resource.uri, str(e))
                if (self.ignore_failures):
                    self.logger.warning(msg)
                    return
                else:
                    raise ClientFatalError(msg)
            # 2. set timestamp if we have one
            if (resource.timestamp is not None):
                unixtime = int(resource.timestamp)  #no fractional
                os.utime(file, (unixtime, unixtime))
                if (resource.timestamp > self.last_timestamp):
                    self.last_timestamp = resource.timestamp
            self.log_event(Resource(resource=resource, change=change))
            # 3. sanity check
            length = os.stat(file).st_size
            if (resource.length != length):
                self.logger.info(
                    "Downloaded size for %s of %d bytes does not match expected %d bytes"
                    % (resource.uri, length, resource.length))
            if (self.checksum and resource.md5 is not None):
                file_md5 = compute_md5_for_file(file)
                if (resource.md5 != file_md5):
                    self.logger.info(
                        "MD5 mismatch for %s, got %s but expected %s bytes" %
                        (resource.uri, file_md5, resource.md5))
        return (num_updated)
Exemplo n.º 4
0
    def update_resource(self, resource, file, change=None):
        """Update resource from uri to file on local system

        Update means three things:
        1. GET resources
        2. set mtime in local time to be equal to timestamp in UTC (should perhaps
        or at least warn if different from LastModified from the GET response instead 
        but maybe warn if different (or just earlier than) the lastmod we expected 
        from the resource_list
        3. check that resource matches expected information

        Also update self.last_timestamp if the timestamp (in source frame) of this
        resource is later and the current value.

        Returns the number of resources updated/created (0 or 1)
        """
        path = os.path.dirname(file)
        distutils.dir_util.mkpath(path)
        num_updated=0
        if (self.dryrun):
            self.logger.info("dryrun: would GET %s --> %s" % (resource.uri,file))
        else:
            # 1. GET
            try:
                urllib.urlretrieve(resource.uri,file)
                num_updated+=1
            except IOError as e:
                msg = "Failed to GET %s -- %s" % (resource.uri,str(e))
                if (self.ignore_failures):
                    self.logger.warning(msg)
                    return
                else:
                    raise ClientFatalError(msg)
            # 2. set timestamp if we have one
            if (resource.timestamp is not None):
                unixtime = int(resource.timestamp) #no fractional
                os.utime(file,(unixtime,unixtime))
                if (resource.timestamp > self.last_timestamp):
                    self.last_timestamp = resource.timestamp
            self.log_event(Resource(resource=resource, change=change))
            # 3. sanity check
            length = os.stat(file).st_size
            if (resource.length != length):
                self.logger.info("Downloaded size for %s of %d bytes does not match expected %d bytes" % (resource.uri,length,resource.length))
            if (self.checksum and resource.md5 is not None):
                file_md5 = compute_md5_for_file(file)
                if (resource.md5 != file_md5):
                    self.logger.info("MD5 mismatch for %s, got %s but expected %s bytes" % (resource.uri,file_md5,resource.md5))
        return(num_updated)
Exemplo n.º 5
0
 def _create_resource(self,
                      basename=None,
                      file_path=None,
                      notify_observers=True):
     """Create a new resource, add it to the source, notify observers."""
     payload = open(file_path).read()
     md5 = compute_md5_for_file(file_path)
     self._repository[basename] = {
         'timestamp': os.path.getmtime(file_path),
         'length': len(payload),
         'md5': md5
     }
     if notify_observers:
         change = Resource(resource=self.resource(basename),
                           change="created")
         self.notify_observers(change)
Exemplo n.º 6
0
    def create_zip(self,
                   resourcelist,
                   prefix,
                   write_list=False,
                   write_manifest=True):
        """
        Dump local resources in resourcelist to a zip file with the specified prefix. The index in the zip file name
        will be 1 higher than the last zip file index with the same prefix. A manifest.xml will be included in the
        zip.
        --  The resync.Dump.write_zip method used in this method has the side effect of changing local paths in
            resourcelist into paths relative in zip.
        :param resourcelist: resources to zip
        :param prefix: prefix of the zip file
        :param write_list: True if resourcelist should be written to local disc. Default: False
        :param write_manifest: True if a separate manifest file should be written to disc, False otherwise. Default: True
        :return: the created zip as a resync.Resource.
        """

        md_at = None  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        index = -1
        zipfiles = sorted(
            glob(os.path.join(self.publish_dir, prefix + "*.zip")))
        if len(zipfiles) > 0:
            last_zip_file = zipfiles[len(zipfiles) - 1]
            basename = os.path.basename(last_zip_file)
            index = int(re.findall('\d+', basename)[0])

        zip_name = "%s%05d" % (prefix, index + 1)
        if (write_list):
            # this is the given resourcelist with local paths. As such it is *not* the resourcedump_manifest.
            rl_file = open(os.path.join(self.publish_dir, zip_name + ".xml"),
                           "w")
            rl_file.write(resourcelist.as_xml())
            rl_file.close()

        zip_path = os.path.join(self.publish_dir, zip_name + ".zip")
        dump = Dump()
        dump.path_prefix = self.resource_dir
        dump.write_zip(resourcelist,
                       zip_path)  # paths in resourcelist will be stripped.
        md_completed = None  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        #print "Zipped %d resources in %s" % (len(resourcelist), zip_path)

        loc = self.publish_url + zip_name + ".zip"  # mandatory
        lastmod = self.last_modified(resourcelist)  # optional
        md_type = "application/zip"  # recommended
        md_length = os.stat(zip_path).st_size
        md5 = compute_md5_for_file(zip_path)

        zip_resource = Resource(uri=loc,
                                lastmod=lastmod,
                                length=md_length,
                                md5=md5,
                                mime_type=md_type,
                                md_at=md_at,
                                md_completed=md_completed)
        if write_manifest:
            rdm = ResourceDumpManifest(resources=resourcelist.resources)
            rdm_file = open(
                os.path.join(self.publish_dir,
                             PREFIX_MANIFEST + zip_name + ".xml"), "w")
            rdm_url = self.publish_url + PREFIX_MANIFEST + zip_name + ".xml"
            rdm_file.write(rdm.as_xml())
            rdm_file.close()
            zip_resource.link_set(rel="content", href=rdm_url)

        return zip_resource
Exemplo n.º 7
0
    def create_zip(self, resourcelist, prefix, write_list=False, write_manifest=True):
        """
        Dump local resources in resourcelist to a zip file with the specified prefix. The index in the zip file name
        will be 1 higher than the last zip file index with the same prefix. A manifest.xml will be included in the
        zip.
        --  The resync.Dump.write_zip method used in this method has the side effect of changing local paths in
            resourcelist into paths relative in zip.
        :param resourcelist: resources to zip
        :param prefix: prefix of the zip file
        :param write_list: True if resourcelist should be written to local disc. Default: False
        :param write_manifest: True if a separate manifest file should be written to disc, False otherwise. Default: True
        :return: the created zip as a resync.Resource.
        """

        md_at = (
            None
        )  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        index = -1
        zipfiles = sorted(glob(os.path.join(self.publish_dir, prefix + "*.zip")))
        if len(zipfiles) > 0:
            last_zip_file = zipfiles[len(zipfiles) - 1]
            basename = os.path.basename(last_zip_file)
            index = int(re.findall("\d+", basename)[0])

        zip_name = "%s%05d" % (prefix, index + 1)
        if write_list:
            # this is the given resourcelist with local paths. As such it is *not* the resourcedump_manifest.
            rl_file = open(os.path.join(self.publish_dir, zip_name + ".xml"), "w")
            rl_file.write(resourcelist.as_xml())
            rl_file.close()

        zip_path = os.path.join(self.publish_dir, zip_name + ".zip")
        dump = Dump()
        dump.path_prefix = self.resource_dir
        dump.write_zip(resourcelist, zip_path)  # paths in resourcelist will be stripped.
        md_completed = (
            None
        )  # w3cdt.datetime_to_str(no_fractions=True) # attribute gets lost in read > write cycle with resync library.
        # print "Zipped %d resources in %s" % (len(resourcelist), zip_path)

        loc = self.publish_url + zip_name + ".zip"  # mandatory
        lastmod = self.last_modified(resourcelist)  # optional
        md_type = "application/zip"  # recommended
        md_length = os.stat(zip_path).st_size
        md5 = compute_md5_for_file(zip_path)

        zip_resource = Resource(
            uri=loc,
            lastmod=lastmod,
            length=md_length,
            md5=md5,
            mime_type=md_type,
            md_at=md_at,
            md_completed=md_completed,
        )
        if write_manifest:
            rdm = ResourceDumpManifest(resources=resourcelist.resources)
            rdm_file = open(os.path.join(self.publish_dir, PREFIX_MANIFEST + zip_name + ".xml"), "w")
            rdm_url = self.publish_url + PREFIX_MANIFEST + zip_name + ".xml"
            rdm_file.write(rdm.as_xml())
            rdm_file.close()
            zip_resource.link_set(rel="content", href=rdm_url)

        return zip_resource