Пример #1
0
    def _scanDirectories(self, result = None, walker = None):
        """Walk each directory looking for cached files.
        
        @param result: the result of a DHT store request, not used (optional)
        @param walker: the walker to use to traverse the current directory
            (optional, defaults to creating a new walker from the first
            directory in the L{CacheManager.scanning} list)
        """
        # Need to start walking a new directory
        if walker is None:
            # If there are any left, get them
            if self.scanning:
                log.msg('started scanning directory: %s' % self.scanning[0].path)
                walker = self.scanning[0].walk()
            else:
                log.msg('cache directory scan complete')
                return
            
        try:
            # Get the next file in the directory
            file = walker.next()
        except StopIteration:
            # No files left, go to the next directory
            log.msg('done scanning directory: %s' % self.scanning[0].path)
            self.scanning.pop(0)
            reactor.callLater(0, self._scanDirectories)
            return

        # If it's not a file ignore it
        if not file.isfile():
            reactor.callLater(0, self._scanDirectories, None, walker)
            return

        # If it's already properly in the DB, ignore it
        db_status = self.db.isUnchanged(file)
        if db_status:
            reactor.callLater(0, self._scanDirectories, None, walker)
            return
        
        # Don't hash files in the cache that are not in the DB
        if self.scanning[0] == self.cache_dir:
            if db_status is None:
                log.msg('ignoring unknown cache file: %s' % file.path)
            else:
                log.msg('removing changed cache file: %s' % file.path)
                file.remove()
            reactor.callLater(0, self._scanDirectories, None, walker)
            return

        # Otherwise hash it
        log.msg('start hash checking file: %s' % file.path)
        hash = HashObject()
        df = hash.hashInThread(file)
        df.addBoth(self._doneHashing, file, walker)
Пример #2
0
    def _save_complete(self, hash, url, destFile, destStream = None,
                       modtime = None, decFile = None):
        """Update the modification time and inform the main program.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        @param url: the URI of the actual mirror request
        @type destFile: C{twisted.python.FilePath}
        @param destFile: the file where the download was written to
        @type destStream: L{Streams.GrowingFileStream}
        @param destStream: the stream to notify that all data is available
        @type modtime: C{int}
        @param modtime: the modified time of the cached file (seconds since epoch)
            (optional, defaults to not setting the modification time of the file)
        @type decFile: C{twisted.python.FilePath}
        @param decFile: the file where the decompressed download was written to
            (optional, defaults to the file not having been compressed)
        """
        result = hash.verify()
        if result or result is None:
            if destStream:
                destStream.allAvailable()
            if modtime:
                os.utime(destFile.path, (modtime, modtime))
            
            if result:
                log.msg('Hashes match: %s' % url)
                dht = True
            else:
                log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
                dht = False
                
            new_hash = self.db.storeFile(destFile, hash.digest(), dht,
                                         ''.join(hash.pieceDigests()))

            if self.manager:
                self.manager.new_cached_file(destFile, hash, new_hash, url)

            if decFile:
                # Hash the decompressed file and add it to the DB
                decHash = HashObject()
                ext_len = len(destFile.path) - len(decFile.path)
                df = decHash.hashInThread(decFile)
                df.addCallback(self._save_complete, url[:-ext_len], decFile, modtime = modtime)
                df.addErrback(self._save_error, url[:-ext_len], decFile)
        else:
            log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
            if destStream:
                destStream.allAvailable(remove = True)
            if decFile:
                decFile.remove()
Пример #3
0
    def _findHash(self, loadResult, path, d):
        """Search the records for the hash of a path.
        
        @type loadResult: C{boolean}
        @param loadResult: whether apt's cache was successfully loaded
        @type path: C{string}
        @param path: the path within the mirror of the file to lookup
        @type d: L{twisted.internet.defer.Deferred}
        @param d: the deferred to callback with the result
        """
        if not loadResult:
            d.callback(HashObject())
            return loadResult

        h = HashObject()

        # First look for the path in the cache of index files
        for release in self.indexrecords:
            if path.startswith(release[:-7]):
                for indexFile in self.indexrecords[release]:
                    if release[:-7] + indexFile == path:
                        h.setFromIndexRecord(
                            self.indexrecords[release][indexFile])
                        d.callback(h)
                        return loadResult

        package = path.split('/')[-1].split('_')[0]

        # Check the binary packages
        try:
            for version in self.cache[package].version_list:
                size = version.size
                for verFile in version.file_list:
                    if self.records.lookup(verFile):
                        if '/' + self.records.filename == path:
                            h.setFromPkgRecord(self.records, size)
                            d.callback(h)
                            return loadResult
        except KeyError:
            pass

        # Check the source packages' files
        if self.srcrecords:
            self.srcrecords.restart()
            if self.srcrecords.lookup(package):
                for f in self.srcrecords.files:
                    if path == '/' + f[2]:
                        h.setFromSrcRecord(f)
                        d.callback(h)
                        return loadResult

        d.callback(h)

        # Have to pass the returned loadResult on in case other calls to this function are pending.
        return loadResult
Пример #4
0
    def refreshFiles(self, result=None):
        """Refresh any files in the DHT that are about to expire."""
        if result is not None:
            log.msg('Storage resulted in: %r' % result)

        if not self.refreshingHashes:
            expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH')
            self.refreshingHashes = self.db.expiredHashes(expireAfter)
            if len(self.refreshingHashes) > 0:
                log.msg('Refreshing the keys of %d DHT values' %
                        len(self.refreshingHashes))

        delay = 60
        if self.refreshingHashes:
            delay = 3
            refresh = self.refreshingHashes.pop(0)
            self.db.refreshHash(refresh['hash'])
            hash = HashObject(refresh['hash'], pieces=refresh['pieces'])
            storeDefer = self.store(hash)
            storeDefer.addBoth(self.refreshFiles)

        if self.nextRefresh.active():
            self.nextRefresh.reset(delay)
        else:
            self.nextRefresh = reactor.callLater(delay, self.refreshFiles)
Пример #5
0
 def check_freshness_error(self, err, req, url, d):
     """Mirror request failed, continue with download.
     
     @param err: the response from the mirror to the HEAD request
     @type req: L{twisted.web2.http.Request}
     @param req: the initial request sent to the HTTP server by apt
     @param url: the URI of the actual mirror request
     """
     log.err(err)
     self.startDownload([], req, HashObject(), url, d)
Пример #6
0
    def _findHash(self, loadResult, path, d):
        """Search the records for the hash of a path.
        
        @type loadResult: C{boolean}
        @param loadResult: whether apt's cache was successfully loaded
        @type path: C{string}
        @param path: the path within the mirror of the file to lookup
        @type d: L{twisted.internet.defer.Deferred}
        @param d: the deferred to callback with the result
        """
        if not loadResult:
            d.callback(HashObject())
            return loadResult
        
        h = HashObject()
        
        # First look for the path in the cache of index files
        for release in self.indexrecords:
            if path.startswith(release[:-7]):
                for indexFile in self.indexrecords[release]:
                    if release[:-7] + indexFile == path:
                        h.setFromIndexRecord(self.indexrecords[release][indexFile])
                        d.callback(h)
                        return loadResult
        
        package = path.split('/')[-1].split('_')[0]

        # Check the binary packages
        try:
            for version in self.cache[package].version_list:
                size = version.size
                for verFile in version.file_list:
                    if self.records.lookup(verFile):
                        if '/' + self.records.filename == path:
                            h.setFromPkgRecord(self.records, size)
                            d.callback(h)
                            return loadResult
        except KeyError:
            pass

        # Check the source packages' files
        if self.srcrecords:
            self.srcrecords.restart()
            if self.srcrecords.lookup(package):
                for f in self.srcrecords.files:
                    if path == '/' + f[2]:
                        h.setFromSrcRecord(f)
                        d.callback(h)
                        return loadResult
        
        d.callback(h)
        
        # Have to pass the returned loadResult on in case other calls to this function are pending.
        return loadResult
Пример #7
0
 def check_freshness_done(self, resp, req, url, orig_resp, d):
     """Return the fresh response, if stale start to redownload.
     
     @type resp: L{twisted.web2.http.Response}
     @param resp: the response from the mirror to the HEAD request
     @type req: L{twisted.web2.http.Request}
     @param req: the initial request sent to the HTTP server by apt
     @param url: the URI of the actual mirror request
     @type orig_resp: L{twisted.web2.http.Response}
     @param orig_resp: the response from the cache to be sent to apt
     """
     if resp.code == 304:
         log.msg('Still fresh, returning: %s' % url)
         d.callback(orig_resp)
     else:
         log.msg('Stale, need to redownload: %s' % url)
         self.startDownload([], req, HashObject(), url, d)
Пример #8
0
 def check_freshness(self, req, url, orig_resp, d):
     """Send a HEAD to the mirror to check if the response from the cache is still valid.
     
     @type req: L{twisted.web2.http.Request}
     @param req: the initial request sent to the HTTP server by apt
     @param url: the URI of the actual mirror request
     @type orig_resp: L{twisted.web2.http.Response}
     @param orig_resp: the response from the cache to be sent to apt
     """
     log.msg('Checking if %s is still fresh' % url)
     modtime = orig_resp.headers.getHeader('Last-Modified')
     headDefer = self.peers.get(HashObject(),
                                url,
                                method="HEAD",
                                modtime=modtime)
     headDefer.addCallbacks(self.check_freshness_done,
                            self.check_freshness_error,
                            callbackArgs=(req, url, orig_resp, d),
                            errbackArgs=(req, url, d))
Пример #9
0
 def findHash_error(self, failure, req, url, orig_resp, d):
     """Process the error in hash lookup by returning an empty L{HashObject}."""
     log.msg('Hash lookup for %s resulted in an error: %s' %
             (url, failure.getErrorMessage()))
     self.findHash_done(HashObject(), req, url, orig_resp, d)
Пример #10
0
 def _findHash_error(self, failure, path, d):
     """An error occurred, return an empty hash."""
     log.msg('An error occurred while looking up a hash for: %s' % path)
     log.err(failure)
     d.callback(HashObject())
     return failure