def _scanDirectories(self, result = None, walker = None): """Walk each directory looking for cached files. @param result: the result of a DHT store request, not used (optional) @param walker: the walker to use to traverse the current directory (optional, defaults to creating a new walker from the first directory in the L{CacheManager.scanning} list) """ # Need to start walking a new directory if walker is None: # If there are any left, get them if self.scanning: log.msg('started scanning directory: %s' % self.scanning[0].path) walker = self.scanning[0].walk() else: log.msg('cache directory scan complete') return try: # Get the next file in the directory file = walker.next() except StopIteration: # No files left, go to the next directory log.msg('done scanning directory: %s' % self.scanning[0].path) self.scanning.pop(0) reactor.callLater(0, self._scanDirectories) return # If it's not a file ignore it if not file.isfile(): reactor.callLater(0, self._scanDirectories, None, walker) return # If it's already properly in the DB, ignore it db_status = self.db.isUnchanged(file) if db_status: reactor.callLater(0, self._scanDirectories, None, walker) return # Don't hash files in the cache that are not in the DB if self.scanning[0] == self.cache_dir: if db_status is None: log.msg('ignoring unknown cache file: %s' % file.path) else: log.msg('removing changed cache file: %s' % file.path) file.remove() reactor.callLater(0, self._scanDirectories, None, walker) return # Otherwise hash it log.msg('start hash checking file: %s' % file.path) hash = HashObject() df = hash.hashInThread(file) df.addBoth(self._doneHashing, file, walker)
def _save_complete(self, hash, url, destFile, destStream = None, modtime = None, decFile = None): """Update the modification time and inform the main program. @type hash: L{Hash.HashObject} @param hash: the hash object containing the expected hash for the file @param url: the URI of the actual mirror request @type destFile: C{twisted.python.FilePath} @param destFile: the file where the download was written to @type destStream: L{Streams.GrowingFileStream} @param destStream: the stream to notify that all data is available @type modtime: C{int} @param modtime: the modified time of the cached file (seconds since epoch) (optional, defaults to not setting the modification time of the file) @type decFile: C{twisted.python.FilePath} @param decFile: the file where the decompressed download was written to (optional, defaults to the file not having been compressed) """ result = hash.verify() if result or result is None: if destStream: destStream.allAvailable() if modtime: os.utime(destFile.path, (modtime, modtime)) if result: log.msg('Hashes match: %s' % url) dht = True else: log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) dht = False new_hash = self.db.storeFile(destFile, hash.digest(), dht, ''.join(hash.pieceDigests())) if self.manager: self.manager.new_cached_file(destFile, hash, new_hash, url) if decFile: # Hash the decompressed file and add it to the DB decHash = HashObject() ext_len = len(destFile.path) - len(decFile.path) df = decHash.hashInThread(decFile) df.addCallback(self._save_complete, url[:-ext_len], decFile, modtime = modtime) df.addErrback(self._save_error, url[:-ext_len], decFile) else: log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) if destStream: destStream.allAvailable(remove = True) if decFile: decFile.remove()
def _findHash(self, loadResult, path, d): """Search the records for the hash of a path. @type loadResult: C{boolean} @param loadResult: whether apt's cache was successfully loaded @type path: C{string} @param path: the path within the mirror of the file to lookup @type d: L{twisted.internet.defer.Deferred} @param d: the deferred to callback with the result """ if not loadResult: d.callback(HashObject()) return loadResult h = HashObject() # First look for the path in the cache of index files for release in self.indexrecords: if path.startswith(release[:-7]): for indexFile in self.indexrecords[release]: if release[:-7] + indexFile == path: h.setFromIndexRecord( self.indexrecords[release][indexFile]) d.callback(h) return loadResult package = path.split('/')[-1].split('_')[0] # Check the binary packages try: for version in self.cache[package].version_list: size = version.size for verFile in version.file_list: if self.records.lookup(verFile): if '/' + self.records.filename == path: h.setFromPkgRecord(self.records, size) d.callback(h) return loadResult except KeyError: pass # Check the source packages' files if self.srcrecords: self.srcrecords.restart() if self.srcrecords.lookup(package): for f in self.srcrecords.files: if path == '/' + f[2]: h.setFromSrcRecord(f) d.callback(h) return loadResult d.callback(h) # Have to pass the returned loadResult on in case other calls to this function are pending. return loadResult
def refreshFiles(self, result=None): """Refresh any files in the DHT that are about to expire.""" if result is not None: log.msg('Storage resulted in: %r' % result) if not self.refreshingHashes: expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH') self.refreshingHashes = self.db.expiredHashes(expireAfter) if len(self.refreshingHashes) > 0: log.msg('Refreshing the keys of %d DHT values' % len(self.refreshingHashes)) delay = 60 if self.refreshingHashes: delay = 3 refresh = self.refreshingHashes.pop(0) self.db.refreshHash(refresh['hash']) hash = HashObject(refresh['hash'], pieces=refresh['pieces']) storeDefer = self.store(hash) storeDefer.addBoth(self.refreshFiles) if self.nextRefresh.active(): self.nextRefresh.reset(delay) else: self.nextRefresh = reactor.callLater(delay, self.refreshFiles)
def check_freshness_error(self, err, req, url, d): """Mirror request failed, continue with download. @param err: the response from the mirror to the HEAD request @type req: L{twisted.web2.http.Request} @param req: the initial request sent to the HTTP server by apt @param url: the URI of the actual mirror request """ log.err(err) self.startDownload([], req, HashObject(), url, d)
def _findHash(self, loadResult, path, d): """Search the records for the hash of a path. @type loadResult: C{boolean} @param loadResult: whether apt's cache was successfully loaded @type path: C{string} @param path: the path within the mirror of the file to lookup @type d: L{twisted.internet.defer.Deferred} @param d: the deferred to callback with the result """ if not loadResult: d.callback(HashObject()) return loadResult h = HashObject() # First look for the path in the cache of index files for release in self.indexrecords: if path.startswith(release[:-7]): for indexFile in self.indexrecords[release]: if release[:-7] + indexFile == path: h.setFromIndexRecord(self.indexrecords[release][indexFile]) d.callback(h) return loadResult package = path.split('/')[-1].split('_')[0] # Check the binary packages try: for version in self.cache[package].version_list: size = version.size for verFile in version.file_list: if self.records.lookup(verFile): if '/' + self.records.filename == path: h.setFromPkgRecord(self.records, size) d.callback(h) return loadResult except KeyError: pass # Check the source packages' files if self.srcrecords: self.srcrecords.restart() if self.srcrecords.lookup(package): for f in self.srcrecords.files: if path == '/' + f[2]: h.setFromSrcRecord(f) d.callback(h) return loadResult d.callback(h) # Have to pass the returned loadResult on in case other calls to this function are pending. return loadResult
def check_freshness_done(self, resp, req, url, orig_resp, d): """Return the fresh response, if stale start to redownload. @type resp: L{twisted.web2.http.Response} @param resp: the response from the mirror to the HEAD request @type req: L{twisted.web2.http.Request} @param req: the initial request sent to the HTTP server by apt @param url: the URI of the actual mirror request @type orig_resp: L{twisted.web2.http.Response} @param orig_resp: the response from the cache to be sent to apt """ if resp.code == 304: log.msg('Still fresh, returning: %s' % url) d.callback(orig_resp) else: log.msg('Stale, need to redownload: %s' % url) self.startDownload([], req, HashObject(), url, d)
def check_freshness(self, req, url, orig_resp, d): """Send a HEAD to the mirror to check if the response from the cache is still valid. @type req: L{twisted.web2.http.Request} @param req: the initial request sent to the HTTP server by apt @param url: the URI of the actual mirror request @type orig_resp: L{twisted.web2.http.Response} @param orig_resp: the response from the cache to be sent to apt """ log.msg('Checking if %s is still fresh' % url) modtime = orig_resp.headers.getHeader('Last-Modified') headDefer = self.peers.get(HashObject(), url, method="HEAD", modtime=modtime) headDefer.addCallbacks(self.check_freshness_done, self.check_freshness_error, callbackArgs=(req, url, orig_resp, d), errbackArgs=(req, url, d))
def findHash_error(self, failure, req, url, orig_resp, d): """Process the error in hash lookup by returning an empty L{HashObject}.""" log.msg('Hash lookup for %s resulted in an error: %s' % (url, failure.getErrorMessage())) self.findHash_done(HashObject(), req, url, orig_resp, d)
def _findHash_error(self, failure, path, d): """An error occurred, return an empty hash.""" log.msg('An error occurred while looking up a hash for: %s' % path) log.err(failure) d.callback(HashObject()) return failure