def setRealFileName(nzbFile, filename, forceChange = False, settingSegmentNumber = None): """ Set the actual filename of the segment's parent nzbFile. If the filename wasn't already previously set, set the actual filename atomically and also atomically rename known temporary files belonging to that nzbFile to use the new real filename """ # FIXME: remove locking. actually, this function really needs to be locking when # nzb.destDir is changing (when the archive dir is moved around) switchedReal = False if nzbFile.filename is not None and nzbFile.filename != filename and \ not isHellaTemp(nzbFile.filename): # This NZBFile already had a real filename set, and now something has triggered it # be changed switchedReal = True if forceChange: # Force change -- this segment has been found to be a duplicate and needs to # be renamed (but its parent NZBFile is currently being downloaded) nzbFile.forcedChangedFilename = True else: # Not a force change. Either ignore the supposed new real filename (we already # had one, we're just going to stick with it) and print an error about # receiving bad header data. Or if this NZBFile filename mismatches because it # was previously found to be a dupe (and its filename was renamed) just # completely ignore the new filename if not nzbFile.forcedChangedFilename: segmentInfo = '' if settingSegmentNumber is not None: segmentInfo = ' segment: %i' % settingSegmentNumber error(nzbFile.showFilename + segmentInfo + \ ' has incorrect filename header!: ' + filename + ' should be: ' + \ nzbFile.showFilename) return elif nzbFile.filename == filename: return # We might have been using a tempFileName previously, and just succesfully found # the real filename in the articleData. Immediately rename any files that were # using the temp name nzbFile.tempFileNameLock.acquire() renameFilenames = {} if switchedReal: notOnDisk = nzbFile.todoNzbSegments.union(nzbFile.dequeuedSegments) # Get the original segment filenames via getDestination() (before we change it) renameSegments = [(nzbSegment, nzbSegment.getDestination()) for nzbSegment in nzbFile.nzbSegments if nzbSegment not in notOnDisk] # Change the filename nzbFile.filename = filename if switchedReal: # Now get the new filenames via getDestination() for (renameSegment, oldName) in renameSegments: renameFilenames[os.path.basename(oldName)] = \ os.path.basename(renameSegment.getDestination()) # We also need a mapping of temp filenames to the new filename, incase we just found # the real file name (filename is None or filename was previously set to a temp name) for nzbSegment in nzbFile.nzbSegments: renameFilenames[nzbSegment.getTempFileName()] = \ os.path.basename(nzbSegment.getDestination()) # Rename all segments for file in os.listdir(nzbFile.nzb.destDir): if file in renameFilenames: orig = os.path.join(nzbFile.nzb.destDir, file) new = os.path.join(nzbFile.nzb.destDir, renameFilenames.get(file)) shutil.move(orig, new) # Keep the onDiskSegments map in sync if Hellanzb.queue.onDiskSegments.has_key(orig): Hellanzb.queue.onDiskSegments[new] = \ Hellanzb.queue.onDiskSegments.pop(orig) nzbFile.tempFileNameLock.release()
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments=False): """ Faster version of needsDownload for multiple segments that do not have their real file name (for use by the Queue). When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing are marked as needing to be downloaded. (An easy first pass of figuring out exactly what needs to be downloaded). This function is the second pass. It takes all of those NZBFiles that need to be downloaded's child NZBSegments and scans the disk, detecting which segments are already on disk and can be skipped """ # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment # number in a map. Loop through the specified segmentList, doing a subject.find for # each segment filename with a matching segment number onDiskSegmentsByNumber = {} needDlFiles = set() # for speed while iterating needDlSegments = [] onDiskSegments = [] # Cache all WORKING_DIR segment filenames in a map of lists for file in os.listdir(Hellanzb.WORKING_DIR): if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), overwriteZeroByteSegments): continue ext = getFileExtension(file) if ext is not None and segmentEndRe.match(ext): segmentNumber = int(ext[-4:]) if onDiskSegmentsByNumber.has_key(segmentNumber): segmentFileNames = onDiskSegmentsByNumber[segmentNumber] else: segmentFileNames = [] onDiskSegmentsByNumber[segmentNumber] = segmentFileNames # cut off .segmentXXXX fileNoExt = file[:-12] segmentFileNames.append(fileNoExt) # Determine if each segment needs to be downloaded for segment in segmentList: if not onDiskSegmentsByNumber.has_key(segment.number): # No matching segment numbers, obviously needs to be downloaded needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) continue segmentFileNames = onDiskSegmentsByNumber[segment.number] foundFileName = None for segmentFileName in segmentFileNames: # We've matched to our on disk segment if we: # a) find that on disk segment's file name in our potential segment's subject # b) match that on disk segment's file name to our potential segment's temp # file name (w/ .segmentXXXX cutoff) if segment.nzbFile.subject.find(segmentFileName) > -1 or \ segment.getTempFileName()[:-12] == segmentFileName: foundFileName = segmentFileName break if not foundFileName: needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) else: if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \ segment.nzbFile.filename is None: # HACK: filename is None. so we only have the temporary name in # memory. since we didnt see the temporary name on the filesystem, but we # found a subject match, that means we have the real name on the # filesystem. In the case where this happens we've figured out the real # filename (hopefully!). Set it if it hasn't already been set setRealFileName(segment.nzbFile, foundFileName, settingSegmentNumber=segment.number) if Hellanzb.SMART_PAR: # smartDequeue won't actually 'dequeue' any of this segment's # nzbFile's segments (because there are no segments in the queue at # this point). It will identifyPar the segment AND more importantly it # will mark nzbFiles as isSkippedPar (taken into account later during # parseNZB) and print a 'Skipping par' message for those isSkippedPar # nzbFiles segment.smartDequeue(readOnlyQueue=True) onDiskSegments.append(segment) # Originally the main reason to call segmentDone here is to update the queue's # onDiskSegments (so isBeingDownloaded can safely detect things on disk during # Dupe renaming). However it's correct to call this here, it's as if hellanzb # just finished downloading and decoding the segment. The only incorrect part # about the call is the queue's totalQueuedBytes is decremented. That total is # reset to zero just before it is recalculated at the end of parseNZB, however Hellanzb.queue.segmentDone(segment) # This segment was matched. Remove it from the list to avoid matching it again # later (dupes) segmentFileNames.remove(foundFileName) #else: # debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \ # segment.nzbFile.subject) return needDlFiles, needDlSegments, onDiskSegments
class NZBSegmentQueue(PriorityQueue): """ priority fifo queue of segments to download. lower numbered segments are downloaded before higher ones """ NZB_CONTENT_P = 100000 # normal nzb downloads # FIXME: EXTRA_PAR2_P isn't actually used EXTRA_PAR2_P = 0 # par2 after-the-fact downloads are more important def __init__(self, fileName=None, parent=None): PriorityQueue.__init__(self) if parent is not None: self.parent = parent else: self.parent = self # Segments curently on disk self.onDiskSegments = {} # Maintain a collection of the known nzbFiles belonging to the segments in this # queue. Set is much faster for _put & __contains__ self.nzbFiles = set() self.postponedNzbFiles = set() self.nzbFilesLock = Lock() self.nzbs = [] self.nzbsLock = Lock() self.totalQueuedBytes = 0 self.fillServerPriority = 0 self.retryQueueEnabled = False self.rQueue = RetryQueue() if fileName is not None: self.parseNZB(fileName) def cancel(self): self.postpone(cancel=True) def clear(self): """ Clear the queue of all its contents""" if self.retryQueueEnabled is not None: self.rQueue.clear() PriorityQueue.clear(self) self.nzbs = [] self.parent.onDiskSegments.clear() def postpone(self, cancel=False): """ Postpone the current download """ self.clear() self.nzbsLock.acquire() self.nzbFilesLock.acquire() if not cancel: self.postponedNzbFiles.update(self.nzbFiles) self.nzbFiles.clear() self.nzbFilesLock.release() self.nzbsLock.release() self.totalQueuedBytes = 0 def unpostpone(self, nzb): """ Recall a postponed NZB """ self.nzbFilesLock.acquire() arName = archiveName(nzb.nzbFileName) found = [] for nzbFile in self.postponedNzbFiles: # FIXME: # Why is this not nzbFile.nzb == nzb? if nzbFile.nzb.archiveName == arName: found.append(nzbFile) for nzbFile in found: self.postponedNzbFiles.remove(nzbFile) self.nzbFilesLock.release() def _put(self, item): """ Add a segment to the queue """ priority, item = item # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments if isinstance(item, NZBFile): offset = 0 for nzbSegment in item.nzbSegments: PriorityQueue._put(self, (priority + offset, nzbSegment)) offset += 1 else: # Assume segment, add to list if item.nzbFile not in self.nzbFiles: self.nzbFiles.add(item.nzbFile) PriorityQueue._put(self, (priority, item)) def calculateTotalQueuedBytes(self): """ Calculate how many bytes are queued to be downloaded in this queue """ # NOTE: we don't maintain this calculation all the time, too much CPU work for # _put self.totalQueuedBytes = 0 self.nzbFilesLock.acquire() files = self.nzbFiles.copy() self.nzbFilesLock.release() # Total all the nzbFiles, then subtract their segments that don't need to be # downloaded for nzbFile in files: self.totalQueuedBytes += nzbFile.totalBytes if len(nzbFile.todoNzbSegments) != len(nzbFile.nzbSegments): for nzbSegment in nzbFile.nzbSegments: if nzbSegment not in nzbFile.todoNzbSegments: self.totalQueuedBytes -= nzbSegment.bytes def dequeueSegments(self, nzbSegments): """ Explicitly dequeue the specified nzb segments """ # ATOMIC: dequeued = self.dequeueItems([(nzbSegment.priority, nzbSegment) for nzbSegment in \ nzbSegments]) dequeuedSegments = [segment for priority, segment in dequeued] if self.retryQueueEnabled: dequeuedSegments.extend(self.rQueue.dequeueSegments(nzbSegments)) for nzbSegment in dequeuedSegments: self.segmentDone(nzbSegment, dequeue=True) return dequeuedSegments def addQueuedBytes(self, bytes): """ Add to the totalQueuedBytes count """ self.totalQueuedBytes += bytes def currentNZBs(self): """ Return a copy of the list of nzbs currently being downloaded """ self.nzbsLock.acquire() nzbs = self.nzbs[:] self.nzbsLock.release() return nzbs def nzbAdd(self, nzb): """ Denote this nzb as currently being downloaded """ self.nzbsLock.acquire() self.nzbs.append(nzb) self.nzbsLock.release() def nzbDone(self, nzb): """ NZB finished """ self.nzbsLock.acquire() try: self.nzbs.remove(nzb) except ValueError: # NZB might have been canceled pass self.nzbsLock.release() def isNZBDone(self, nzb, postponed=None): """ Determine whether or not all of the specified NZB as been thoroughly downloaded """ if postponed is None: if nzb not in Hellanzb.queue.currentNZBs(): postponed = True else: postponed = False self.nzbFilesLock.acquire() if not postponed: queueFilesCopy = self.nzbFiles.copy() else: queueFilesCopy = self.postponedNzbFiles.copy() self.nzbFilesLock.release() for nzbFile in queueFilesCopy: if nzbFile not in nzb.nzbFiles: continue debug('isNZBDone: NOT DONE: ' + nzbFile.getDestination()) return False return True def serverAdd(self, serverFactory): """ Add the specified server pool, for use by the RetryQueue """ self.rQueue.addServerPool(serverFactory.serverPoolName) def initRetryQueue(self): """ Initialize and enable use of the RetryQueue """ self.retryQueueEnabled = self.rQueue.needRetryQueue() if self.retryQueueEnabled: self.rQueue.createQueues() def serverRemove(self, serverFactory): """ Remove the specified server pool """ self.rQueue.removeServerPool(serverFactory.serverPoolName) def getSmart(self, serverFactory): """ Get the next available segment in the queue. The 'smart'ness first checks for segments in the RetryQueue, otherwise it falls back to the main queue """ # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are # multiple serverPools) if self.retryQueueEnabled: try: priority, segment = self.rQueue.get( serverFactory.serverPoolName) segment.fromQueue = self return priority, segment except Empty: # All retry queues for this serverPool are empty. fall through pass if not len(self) and len(self.rQueue): # Catch the special case where both the main NZBSegmentQueue is empty, all # the retry queues for the serverPool are empty, but there is still more # left to download in the retry queue (scheduled for retry by other # serverPools) raise EmptyForThisPool() priority, segment = PriorityQueue.get_nowait(self) segment.fromQueue = self return priority, segment def requeue(self, serverFactory, segment): """ Requeue the segment for download. This differs from requeueMissing as it's for downloads that failed for reasons other than the file or group missing from the server (such as a connection timeout) """ # This segment only needs to go back into the retry queue if the retry queue is # enabled AND the segment was previously requeueMissing()'d if self.retryQueueEnabled and len(segment.failedServerPools): self.rQueue.requeue(serverFactory.serverPoolName, segment) else: self.put((segment.priority, segment)) # There's a funny case where other NZBLeechers in the calling NZBLeecher's factory # received Empty from the queue, then afterwards the connection is lost (say the # connection timed out), causing the requeue. Find and reactivate them because # they now have work to do self.nudgeIdleNZBLeechers(segment) def requeueMissing(self, serverFactory, segment): """ Requeue a missing segment. This segment will be added to the RetryQueue (if enabled), where other serverPools will find it and reattempt the download """ # This serverPool has just failed the download assert (serverFactory.serverPoolName not in segment.failedServerPools) segment.failedServerPools.append(serverFactory.serverPoolName) if self.retryQueueEnabled: self.rQueue.requeue(serverFactory.serverPoolName, segment) # We might have just requeued a segment onto an idle server pool. Reactivate # any idle connections pertaining to this segment self.nudgeIdleNZBLeechers(segment) else: raise PoolsExhausted() def nudgeIdleNZBLeechers(self, requeuedSegment): """ Activate any idle NZBLeechers that might need to download the specified requeued segment """ reactor.callLater(0, self._nudgeIdleNZBLeechers, requeuedSegment) def _nudgeIdleNZBLeechers(self, requeuedSegment): """ Activate any idle NZBLeechers that might need to download the specified requeued segment """ if not Hellanzb.downloadPaused and not requeuedSegment.nzbFile.nzb.canceled: for nsf in Hellanzb.nsfs: if nsf.fillServerPriority != self.fillServerPriority: continue if nsf.serverPoolName not in requeuedSegment.failedServerPools: nsf.activated = True nsf.fetchNextNZBSegment() def fileDone(self, nzbFile): """ Notify the queue a file is done. This is called after assembling a file into its final contents. Segments are really stored independantly of individual Files in the queue, hence this function """ self.nzbFilesLock.acquire() if nzbFile in self.nzbFiles: self.nzbFiles.remove(nzbFile) else: self.nzbFilesLock.release() return self.nzbFilesLock.release() if nzbFile.isAllSegmentsDecoded(): for nzbSegment in nzbFile.nzbSegments: if self.parent.onDiskSegments.has_key( nzbSegment.getDestination()): self.parent.onDiskSegments.pop(nzbSegment.getDestination()) if nzbFile.isExtraPar and nzbFile.nzb.queuedBlocks > 0: fileBlocks = getParSize(nzbFile.filename) nzbFile.nzb.queuedBlocks -= fileBlocks nzbFile.nzb.neededBlocks -= fileBlocks if nzbFile.isSkippedPar: # If a skipped par file was actually assembled, it wasn't actually skipped nzbFile.isSkippedPar = False if nzbFile in nzbFile.nzb.skippedParFiles: nzbFile.nzb.skippedParFiles.remove(nzbFile) if nzbFile.nzb.isSkippedParSubject(nzbFile.subject): nzbFile.nzb.skippedParSubjects.remove(nzbFile.subject) def segmentDone(self, nzbSegment, dequeue=False): """ Simply decrement the queued byte count and register this nzbSegment as finished downloading, unless the segment is part of a postponed download """ # NOTE: old code locked here: but this block should only contend with itself (only # called from the ArticleDecoder) ArticleDecoder thread (only segmentDone() and # isAllSegmentsDecoded() touches todoNzbSegments, dequeuedSegments, # totalQueuedBytes? self.nzbsLock.acquire() if nzbSegment in nzbSegment.nzbFile.todoNzbSegments: nzbSegment.nzbFile.todoNzbSegments.remove(nzbSegment) if dequeue: nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment) debug('segmentDone: dequeued: %s %i' % (nzbSegment.nzbFile.subject, nzbSegment.number)) elif nzbSegment in nzbSegment.nzbFile.dequeuedSegments: # NOTE: this should really never occur # need this elif? debug( '*** segmentDone called on dequeued nzbSegment -- removing from ' 'nzbFile.dequeuedSegments!') nzbSegment.nzbFile.dequeuedSegments.remove(nzbSegment) if nzbSegment.nzbFile.nzb in Hellanzb.queue.nzbs: self.totalQueuedBytes -= nzbSegment.bytes self.nzbsLock.release() if not dequeue: # NOTE: currently don't have to lock -- only the ArticleDecoder thread (via # ->handleDupeNZBSegment->isBeingDownloaded) reads onDiskSegments self.parent.onDiskSegments[ nzbSegment.getDestination()] = nzbSegment if nzbSegment.isFirstSegment(): nzbSegment.nzbFile.nzb.firstSegmentsDownloaded += 1 def isBeingDownloadedFile(self, segmentFilename): """ Whether or not the file on disk is currently in the middle of being downloaded/assembled. Return the NZBSegment representing the segment specified by the filename """ # see segmentDone segmentFilename = segmentFilename if self.parent.onDiskSegments.has_key(segmentFilename): return self.parent.onDiskSegments[segmentFilename] def parseNZB(self, nzb, verbose=True): """ Initialize the queue from the specified nzb file """ # Create a parser parser = make_parser() # No XML namespaces here parser.setFeature(feature_namespaces, 0) parser.setFeature(feature_external_ges, 0) # Create the handler fileName = nzb.nzbFileName self.nzbAdd(nzb) needWorkFiles = [] needWorkSegments = [] nzbp = NZBParser(nzb, needWorkFiles, needWorkSegments) # Tell the parser to use it parser.setContentHandler(nzbp) nzb.calculatingBytes = True # Parse the input try: parser.parse(fileName) except SAXParseException, saxpe: nzb.calculatingBytes = False self.nzbDone(nzb) msg = 'Unable to parse invalid NZB file: %s: %s' % \ (os.path.basename(fileName), saxpe.getException()) raise FatalError(msg) nzb.calculatingBytes = False # We trust the NZB XML's <segment number="111"> attribute, but if the sequence of # segments does not begin at "1", the parser wouldn't have found the # nzbFile.firstSegment for needWorkFile in nzbp.needWorkFiles: if needWorkFile.firstSegment is None and len( needWorkFile.nzbSegments): # Set the firstSegment to the smallest segment number sortedSegments = [(nzbSegment.number, nzbSegment) for nzbSegment in \ needWorkFile.nzbSegments] sortedSegments.sort() needWorkFile.firstSegment = sortedSegments[0][1] needWorkFile.firstSegment.priority = NZBSegmentQueue.NZB_CONTENT_P s = time.time() # The parser will add all the segments of all the NZBFiles that have not already # been downloaded. After the parsing, we'll check if each of those segments have # already been downloaded. it's faster to check all segments at one time needDlFiles, needDlSegments, onDiskSegments = segmentsNeedDownload(needWorkSegments, overwriteZeroByteSegments = \ nzb.overwriteZeroByteFiles) e = time.time() - s # firstSegmentsDownloaded needs to be tweaked if isSkippedPar and no segments were # found on disk by segmentsNeedDownload. i.e. first segments have ALWAYS already # been downloaded in isParRecovery mode fauxFirstSegmentsDownloaded = 0 if Hellanzb.SMART_PAR and nzb.isParRecovery: for nzbFile in nzb.nzbFiles: if nzbFile.isSkippedPar and nzbFile.firstSegment not in onDiskSegments: nzb.firstSegmentsDownloaded += 1 fauxFirstSegmentsDownloaded += 1 # Calculate and print parsed/skipped/queued statistics skippedPars = 0 queuedParBlocks = 0 for nzbFile in needDlFiles: if nzbFile.isSkippedPar: skippedPars += 1 elif nzb.isParRecovery and nzbFile.isExtraPar and \ not nzbFile.isSkippedPar and len(nzbFile.todoNzbSegments) and \ nzbFile.filename is not None and not isHellaTemp(nzbFile.filename): queuedParBlocks += getParSize(nzbFile.filename) onDiskBytes = 0 for nzbSegment in onDiskSegments: onDiskBytes += nzbSegment.bytes for nzbFile in nzb.nzbFiles: if nzbFile not in needDlFiles: onDiskBytes += nzbFile.totalBytes onDiskFilesCount = nzbp.fileCount - len(needWorkFiles) onDiskSegmentsCount = len(onDiskSegments) info('Parsed: %i files (%i posts), %s' % (nzbp.fileCount, nzbp.segmentCount, prettySize(nzb.totalBytes))) if onDiskFilesCount or onDiskSegmentsCount: filesMsg = segmentsMsg = separator = '' if onDiskFilesCount: filesMsg = '%i files' % onDiskFilesCount if onDiskSegmentsCount: segmentsMsg = '%i segments' % onDiskSegmentsCount if onDiskFilesCount and onDiskSegmentsCount: separator = ' and ' info('Skipped (on disk): %s%s%s, %s' % (filesMsg, separator, segmentsMsg, prettySize(onDiskBytes))) # Tally what was skipped for correct percentages in the UI for nzbSegment in onDiskSegments: nzbSegment.nzbFile.totalSkippedBytes += nzbSegment.bytes nzbSegment.nzbFile.nzb.totalSkippedBytes += nzbSegment.bytes # The needWorkFiles will tell us what nzbFiles are missing from the # FS. segmentsNeedDownload will further tell us what files need to be # downloaded. files missing from the FS (needWorkFiles) but not needing to be # downloaded (in needDlFiles) simply need to be assembled for nzbFile in needWorkFiles: if nzbFile not in needDlFiles: # Don't automatically 'finish' the NZB, we'll take care of that in this # function if necessary if verbose: info(nzbFile.getFilename() + ': Assembling -- all segments were on disk') # NOTE: this function is destructive to the passed in nzbFile! And is only # called on occasion (might bite you in the ass one day) try: assembleNZBFile(nzbFile, autoFinish=False) except OutOfDiskSpace: self.nzbDone(nzb) # FIXME: Shouldn't exit here error('Cannot assemble ' + nzbFile.getFilename() + ': No space left on device! Exiting..') Hellanzb.Core.shutdown(True) for nzbSegment in needDlSegments: # smartDequeue called from segmentsNeedDownload would have set # isSkippedParFile for us if not nzbSegment.nzbFile.isSkippedPar: self.put((nzbSegment.priority, nzbSegment)) else: # This would need to be downloaded if we didn't skip the segment, they are # officially dequeued, and can be requeued later nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment) # Requeue files in certain situations if nzb.firstSegmentsDownloaded == len(nzb.nzbFiles): # NOTE: This block of code does not commonly happen with newzbin.com NZBs: due # to how the DupeHandler handles .NFO files. newzbin.com seems to always # duplicate the .NFO file in their NZBs smartRequeue(nzb) logSkippedPars(nzb) if nzb.isParRecovery and nzb.skippedParSubjects and len(nzb.skippedParSubjects) and \ not len(self): # FIXME: This recovering ALL pars should be a mode (with a flag on the NZB # object). No par skipping would occur in this mode -- for the incredibly rare # case that first segments are lost prior to this mode taking place. What will # happen doesn't make sense: hellanzb will say 'recovering ALL pars', then # SmartPar will later skip pars msg = 'Par recovery download: No pars with prefix: %s -- recovering ALL pars' % \ nzb.parPrefix if skippedPars: msg = '%s (%i par files)' % (msg, skippedPars) if verbose: warn(msg) for nzbSegment in needDlSegments: if nzbSegment.nzbFile.isSkippedPar: self.put((nzbSegment.priority, nzbSegment)) nzbSegment.nzbFile.todoNzbSegments.add(nzbSegment) # Only reset the isSkippedPar flag after queueing for nzbSegment in needDlSegments: if nzbSegment.nzbFile.isSkippedPar: nzbSegment.nzbFile.isSkippedPar = False # We might have faked the value of this: reset it nzb.firstSegmentsDownloaded -= fauxFirstSegmentsDownloaded if not len(self): self.nzbDone(nzb) if verbose: info(nzb.archiveName + ': Assembled archive!') reactor.callLater(0, Hellanzb.Daemon.handleNZBDone, nzb) # True == the archive is complete return True # Finally tally the size of the queue self.calculateTotalQueuedBytes() dlMsg = 'Queued: %s' % prettySize(self.totalQueuedBytes) if nzb.isParRecovery and queuedParBlocks: dlMsg += ' (recovering %i %s)' % (queuedParBlocks, getParRecoveryName(nzb.parType)) info(dlMsg) # Archive not complete return False
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments = False): """ Faster version of needsDownload for multiple segments that do not have their real file name (for use by the Queue). When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing are marked as needing to be downloaded. (An easy first pass of figuring out exactly what needs to be downloaded). This function is the second pass. It takes all of those NZBFiles that need to be downloaded's child NZBSegments and scans the disk, detecting which segments are already on disk and can be skipped """ # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment # number in a map. Loop through the specified segmentList, doing a subject.find for # each segment filename with a matching segment number onDiskSegmentsByNumber = {} needDlFiles = set() # for speed while iterating needDlSegments = [] onDiskSegments = [] # Cache all WORKING_DIR segment filenames in a map of lists for file in os.listdir(Hellanzb.WORKING_DIR): if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), overwriteZeroByteSegments): continue ext = getFileExtension(file) if ext is not None and segmentEndRe.match(ext): segmentNumber = int(ext[-4:]) if onDiskSegmentsByNumber.has_key(segmentNumber): segmentFileNames = onDiskSegmentsByNumber[segmentNumber] else: segmentFileNames = [] onDiskSegmentsByNumber[segmentNumber] = segmentFileNames # cut off .segmentXXXX fileNoExt = file[:-12] segmentFileNames.append(fileNoExt) # Determine if each segment needs to be downloaded for segment in segmentList: if not onDiskSegmentsByNumber.has_key(segment.number): # No matching segment numbers, obviously needs to be downloaded needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) continue segmentFileNames = onDiskSegmentsByNumber[segment.number] foundFileName = None for segmentFileName in segmentFileNames: # We've matched to our on disk segment if we: # a) find that on disk segment's file name in our potential segment's subject # b) match that on disk segment's file name to our potential segment's temp # file name (w/ .segmentXXXX cutoff) if segment.nzbFile.subject.find(segmentFileName) > -1 or \ segment.getTempFileName()[:-12] == segmentFileName: foundFileName = segmentFileName break if not foundFileName: needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) else: if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \ segment.nzbFile.filename is None: # HACK: filename is None. so we only have the temporary name in # memory. since we didnt see the temporary name on the filesystem, but we # found a subject match, that means we have the real name on the # filesystem. In the case where this happens we've figured out the real # filename (hopefully!). Set it if it hasn't already been set setRealFileName(segment.nzbFile, foundFileName, settingSegmentNumber = segment.number) if Hellanzb.SMART_PAR: # smartDequeue won't actually 'dequeue' any of this segment's # nzbFile's segments (because there are no segments in the queue at # this point). It will identifyPar the segment AND more importantly it # will mark nzbFiles as isSkippedPar (taken into account later during # parseNZB) and print a 'Skipping par' message for those isSkippedPar # nzbFiles segment.smartDequeue(readOnlyQueue = True) onDiskSegments.append(segment) # Originally the main reason to call segmentDone here is to update the queue's # onDiskSegments (so isBeingDownloaded can safely detect things on disk during # Dupe renaming). However it's correct to call this here, it's as if hellanzb # just finished downloading and decoding the segment. The only incorrect part # about the call is the queue's totalQueuedBytes is decremented. That total is # reset to zero just before it is recalculated at the end of parseNZB, however Hellanzb.queue.segmentDone(segment) # This segment was matched. Remove it from the list to avoid matching it again # later (dupes) segmentFileNames.remove(foundFileName) #else: # debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \ # segment.nzbFile.subject) return needDlFiles, needDlSegments, onDiskSegments