Exemplo n.º 1
0
def setRealFileName(nzbFile, filename, forceChange = False, settingSegmentNumber = None):
    """ Set the actual filename of the segment's parent nzbFile. If the filename wasn't
    already previously set, set the actual filename atomically and also atomically rename
    known temporary files belonging to that nzbFile to use the new real filename """
    # FIXME: remove locking. actually, this function really needs to be locking when
    # nzb.destDir is changing (when the archive dir is moved around)
    switchedReal = False
    if nzbFile.filename is not None and nzbFile.filename != filename and \
            not isHellaTemp(nzbFile.filename):
        # This NZBFile already had a real filename set, and now something has triggered it
        # be changed
        switchedReal = True

        if forceChange:
            # Force change -- this segment has been found to be a duplicate and needs to
            # be renamed (but its parent NZBFile is currently being downloaded)
            nzbFile.forcedChangedFilename = True
        else:
            # Not a force change. Either ignore the supposed new real filename (we already
            # had one, we're just going to stick with it) and print an error about
            # receiving bad header data. Or if this NZBFile filename mismatches because it
            # was previously found to be a dupe (and its filename was renamed) just
            # completely ignore the new filename
            if not nzbFile.forcedChangedFilename:
                segmentInfo = ''
                if settingSegmentNumber is not None:
                    segmentInfo = ' segment: %i' % settingSegmentNumber
                    
                error(nzbFile.showFilename + segmentInfo + \
                      ' has incorrect filename header!: ' + filename + ' should be: ' + \
                      nzbFile.showFilename)
            return
    elif nzbFile.filename == filename:
        return
     
    # We might have been using a tempFileName previously, and just succesfully found
    # the real filename in the articleData. Immediately rename any files that were
    # using the temp name
    nzbFile.tempFileNameLock.acquire()
    renameFilenames = {}

    if switchedReal:
        notOnDisk = nzbFile.todoNzbSegments.union(nzbFile.dequeuedSegments)
        # Get the original segment filenames via getDestination() (before we change it)
        renameSegments = [(nzbSegment, nzbSegment.getDestination()) for nzbSegment in
                           nzbFile.nzbSegments if nzbSegment not in notOnDisk]

    # Change the filename
    nzbFile.filename = filename

    if switchedReal:
        # Now get the new filenames via getDestination()
        for (renameSegment, oldName) in renameSegments:
            renameFilenames[os.path.basename(oldName)] = \
                os.path.basename(renameSegment.getDestination())

    # We also need a mapping of temp filenames to the new filename, incase we just found
    # the real file name (filename is None or filename was previously set to a temp name)
    for nzbSegment in nzbFile.nzbSegments:
        renameFilenames[nzbSegment.getTempFileName()] = \
            os.path.basename(nzbSegment.getDestination())
                          
    # Rename all segments
    for file in os.listdir(nzbFile.nzb.destDir):
        if file in renameFilenames:
            orig = os.path.join(nzbFile.nzb.destDir, file)
            new = os.path.join(nzbFile.nzb.destDir, renameFilenames.get(file))
            shutil.move(orig, new)

            # Keep the onDiskSegments map in sync
            if Hellanzb.queue.onDiskSegments.has_key(orig):
                Hellanzb.queue.onDiskSegments[new] = \
                    Hellanzb.queue.onDiskSegments.pop(orig)

    nzbFile.tempFileNameLock.release()
Exemplo n.º 2
0
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments=False):
    """ Faster version of needsDownload for multiple segments that do not have their real file
    name (for use by the Queue).

    When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing
    are marked as needing to be downloaded. (An easy first pass of figuring out exactly
    what needs to be downloaded).

    This function is the second pass. It takes all of those NZBFiles that need to be
    downloaded's child NZBSegments and scans the disk, detecting which segments are
    already on disk and can be skipped
    """
    # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment
    # number in a map. Loop through the specified segmentList, doing a subject.find for
    # each segment filename with a matching segment number

    onDiskSegmentsByNumber = {}

    needDlFiles = set()  # for speed while iterating
    needDlSegments = []
    onDiskSegments = []

    # Cache all WORKING_DIR segment filenames in a map of lists
    for file in os.listdir(Hellanzb.WORKING_DIR):
        if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                overwriteZeroByteSegments):
            continue

        ext = getFileExtension(file)
        if ext is not None and segmentEndRe.match(ext):
            segmentNumber = int(ext[-4:])

            if onDiskSegmentsByNumber.has_key(segmentNumber):
                segmentFileNames = onDiskSegmentsByNumber[segmentNumber]
            else:
                segmentFileNames = []
                onDiskSegmentsByNumber[segmentNumber] = segmentFileNames

            # cut off .segmentXXXX
            fileNoExt = file[:-12]
            segmentFileNames.append(fileNoExt)

    # Determine if each segment needs to be downloaded
    for segment in segmentList:

        if not onDiskSegmentsByNumber.has_key(segment.number):
            # No matching segment numbers, obviously needs to be downloaded
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
            continue

        segmentFileNames = onDiskSegmentsByNumber[segment.number]

        foundFileName = None
        for segmentFileName in segmentFileNames:
            # We've matched to our on disk segment if we:
            # a) find that on disk segment's file name in our potential segment's subject
            # b) match that on disk segment's file name to our potential segment's temp
            # file name (w/ .segmentXXXX cutoff)
            if segment.nzbFile.subject.find(segmentFileName) > -1 or \
                    segment.getTempFileName()[:-12] == segmentFileName:
                foundFileName = segmentFileName
                break

        if not foundFileName:
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
        else:
            if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \
                    segment.nzbFile.filename is None:
                # HACK: filename is None. so we only have the temporary name in
                # memory. since we didnt see the temporary name on the filesystem, but we
                # found a subject match, that means we have the real name on the
                # filesystem. In the case where this happens we've figured out the real
                # filename (hopefully!). Set it if it hasn't already been set
                setRealFileName(segment.nzbFile,
                                foundFileName,
                                settingSegmentNumber=segment.number)

                if Hellanzb.SMART_PAR:
                    # smartDequeue won't actually 'dequeue' any of this segment's
                    # nzbFile's segments (because there are no segments in the queue at
                    # this point). It will identifyPar the segment AND more importantly it
                    # will mark nzbFiles as isSkippedPar (taken into account later during
                    # parseNZB) and print a 'Skipping par' message for those isSkippedPar
                    # nzbFiles
                    segment.smartDequeue(readOnlyQueue=True)

            onDiskSegments.append(segment)

            # Originally the main reason to call segmentDone here is to update the queue's
            # onDiskSegments (so isBeingDownloaded can safely detect things on disk during
            # Dupe renaming). However it's correct to call this here, it's as if hellanzb
            # just finished downloading and decoding the segment. The only incorrect part
            # about the call is the queue's totalQueuedBytes is decremented. That total is
            # reset to zero just before it is recalculated at the end of parseNZB, however
            Hellanzb.queue.segmentDone(segment)

            # This segment was matched. Remove it from the list to avoid matching it again
            # later (dupes)
            segmentFileNames.remove(foundFileName)

        #else:
        #    debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \
        #          segment.nzbFile.subject)

    return needDlFiles, needDlSegments, onDiskSegments
Exemplo n.º 3
0
def setRealFileName(nzbFile, filename, forceChange = False, settingSegmentNumber = None):
    """ Set the actual filename of the segment's parent nzbFile. If the filename wasn't
    already previously set, set the actual filename atomically and also atomically rename
    known temporary files belonging to that nzbFile to use the new real filename """
    # FIXME: remove locking. actually, this function really needs to be locking when
    # nzb.destDir is changing (when the archive dir is moved around)
    switchedReal = False
    if nzbFile.filename is not None and nzbFile.filename != filename and \
            not isHellaTemp(nzbFile.filename):
        # This NZBFile already had a real filename set, and now something has triggered it
        # be changed
        switchedReal = True

        if forceChange:
            # Force change -- this segment has been found to be a duplicate and needs to
            # be renamed (but its parent NZBFile is currently being downloaded)
            nzbFile.forcedChangedFilename = True
        else:
            # Not a force change. Either ignore the supposed new real filename (we already
            # had one, we're just going to stick with it) and print an error about
            # receiving bad header data. Or if this NZBFile filename mismatches because it
            # was previously found to be a dupe (and its filename was renamed) just
            # completely ignore the new filename
            if not nzbFile.forcedChangedFilename:
                segmentInfo = ''
                if settingSegmentNumber is not None:
                    segmentInfo = ' segment: %i' % settingSegmentNumber
                    
                error(nzbFile.showFilename + segmentInfo + \
                      ' has incorrect filename header!: ' + filename + ' should be: ' + \
                      nzbFile.showFilename)
            return
    elif nzbFile.filename == filename:
        return
     
    # We might have been using a tempFileName previously, and just succesfully found
    # the real filename in the articleData. Immediately rename any files that were
    # using the temp name
    nzbFile.tempFileNameLock.acquire()
    renameFilenames = {}

    if switchedReal:
        notOnDisk = nzbFile.todoNzbSegments.union(nzbFile.dequeuedSegments)
        # Get the original segment filenames via getDestination() (before we change it)
        renameSegments = [(nzbSegment, nzbSegment.getDestination()) for nzbSegment in
                           nzbFile.nzbSegments if nzbSegment not in notOnDisk]

    # Change the filename
    nzbFile.filename = filename

    if switchedReal:
        # Now get the new filenames via getDestination()
        for (renameSegment, oldName) in renameSegments:
            renameFilenames[os.path.basename(oldName)] = \
                os.path.basename(renameSegment.getDestination())

    # We also need a mapping of temp filenames to the new filename, incase we just found
    # the real file name (filename is None or filename was previously set to a temp name)
    for nzbSegment in nzbFile.nzbSegments:
        renameFilenames[nzbSegment.getTempFileName()] = \
            os.path.basename(nzbSegment.getDestination())
                          
    # Rename all segments
    for file in os.listdir(nzbFile.nzb.destDir):
        if file in renameFilenames:
            orig = os.path.join(nzbFile.nzb.destDir, file)
            new = os.path.join(nzbFile.nzb.destDir, renameFilenames.get(file))
            shutil.move(orig, new)

            # Keep the onDiskSegments map in sync
            if Hellanzb.queue.onDiskSegments.has_key(orig):
                Hellanzb.queue.onDiskSegments[new] = \
                    Hellanzb.queue.onDiskSegments.pop(orig)

    nzbFile.tempFileNameLock.release()
Exemplo n.º 4
0
class NZBSegmentQueue(PriorityQueue):
    """ priority fifo queue of segments to download. lower numbered segments are downloaded
    before higher ones """
    NZB_CONTENT_P = 100000  # normal nzb downloads
    # FIXME: EXTRA_PAR2_P isn't actually used
    EXTRA_PAR2_P = 0  # par2 after-the-fact downloads are more important

    def __init__(self, fileName=None, parent=None):
        PriorityQueue.__init__(self)

        if parent is not None:
            self.parent = parent
        else:
            self.parent = self

            # Segments curently on disk
            self.onDiskSegments = {}

        # Maintain a collection of the known nzbFiles belonging to the segments in this
        # queue. Set is much faster for _put & __contains__
        self.nzbFiles = set()
        self.postponedNzbFiles = set()
        self.nzbFilesLock = Lock()

        self.nzbs = []
        self.nzbsLock = Lock()

        self.totalQueuedBytes = 0

        self.fillServerPriority = 0

        self.retryQueueEnabled = False
        self.rQueue = RetryQueue()

        if fileName is not None:
            self.parseNZB(fileName)

    def cancel(self):
        self.postpone(cancel=True)

    def clear(self):
        """ Clear the queue of all its contents"""
        if self.retryQueueEnabled is not None:
            self.rQueue.clear()
        PriorityQueue.clear(self)

        self.nzbs = []

        self.parent.onDiskSegments.clear()

    def postpone(self, cancel=False):
        """ Postpone the current download """
        self.clear()

        self.nzbsLock.acquire()
        self.nzbFilesLock.acquire()

        if not cancel:
            self.postponedNzbFiles.update(self.nzbFiles)
        self.nzbFiles.clear()

        self.nzbFilesLock.release()
        self.nzbsLock.release()

        self.totalQueuedBytes = 0

    def unpostpone(self, nzb):
        """ Recall a postponed NZB """
        self.nzbFilesLock.acquire()
        arName = archiveName(nzb.nzbFileName)
        found = []
        for nzbFile in self.postponedNzbFiles:
            # FIXME:
            # Why is this not nzbFile.nzb == nzb?
            if nzbFile.nzb.archiveName == arName:
                found.append(nzbFile)
        for nzbFile in found:
            self.postponedNzbFiles.remove(nzbFile)
        self.nzbFilesLock.release()

    def _put(self, item):
        """ Add a segment to the queue """
        priority, item = item

        # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments
        if isinstance(item, NZBFile):
            offset = 0
            for nzbSegment in item.nzbSegments:
                PriorityQueue._put(self, (priority + offset, nzbSegment))
                offset += 1
        else:
            # Assume segment, add to list
            if item.nzbFile not in self.nzbFiles:
                self.nzbFiles.add(item.nzbFile)
            PriorityQueue._put(self, (priority, item))

    def calculateTotalQueuedBytes(self):
        """ Calculate how many bytes are queued to be downloaded in this queue """
        # NOTE: we don't maintain this calculation all the time, too much CPU work for
        # _put
        self.totalQueuedBytes = 0
        self.nzbFilesLock.acquire()
        files = self.nzbFiles.copy()
        self.nzbFilesLock.release()

        # Total all the nzbFiles, then subtract their segments that don't need to be
        # downloaded
        for nzbFile in files:
            self.totalQueuedBytes += nzbFile.totalBytes

            if len(nzbFile.todoNzbSegments) != len(nzbFile.nzbSegments):
                for nzbSegment in nzbFile.nzbSegments:
                    if nzbSegment not in nzbFile.todoNzbSegments:
                        self.totalQueuedBytes -= nzbSegment.bytes

    def dequeueSegments(self, nzbSegments):
        """ Explicitly dequeue the specified nzb segments """
        # ATOMIC:
        dequeued = self.dequeueItems([(nzbSegment.priority, nzbSegment) for nzbSegment in \
                                      nzbSegments])
        dequeuedSegments = [segment for priority, segment in dequeued]
        if self.retryQueueEnabled:
            dequeuedSegments.extend(self.rQueue.dequeueSegments(nzbSegments))

        for nzbSegment in dequeuedSegments:
            self.segmentDone(nzbSegment, dequeue=True)

        return dequeuedSegments

    def addQueuedBytes(self, bytes):
        """ Add to the totalQueuedBytes count """
        self.totalQueuedBytes += bytes

    def currentNZBs(self):
        """ Return a copy of the list of nzbs currently being downloaded """
        self.nzbsLock.acquire()
        nzbs = self.nzbs[:]
        self.nzbsLock.release()
        return nzbs

    def nzbAdd(self, nzb):
        """ Denote this nzb as currently being downloaded """
        self.nzbsLock.acquire()
        self.nzbs.append(nzb)
        self.nzbsLock.release()

    def nzbDone(self, nzb):
        """ NZB finished """
        self.nzbsLock.acquire()
        try:
            self.nzbs.remove(nzb)
        except ValueError:
            # NZB might have been canceled
            pass
        self.nzbsLock.release()

    def isNZBDone(self, nzb, postponed=None):
        """ Determine whether or not all of the specified NZB as been thoroughly downloaded """
        if postponed is None:
            if nzb not in Hellanzb.queue.currentNZBs():
                postponed = True
            else:
                postponed = False

        self.nzbFilesLock.acquire()
        if not postponed:
            queueFilesCopy = self.nzbFiles.copy()
        else:
            queueFilesCopy = self.postponedNzbFiles.copy()
        self.nzbFilesLock.release()

        for nzbFile in queueFilesCopy:
            if nzbFile not in nzb.nzbFiles:
                continue

            debug('isNZBDone: NOT DONE: ' + nzbFile.getDestination())
            return False
        return True

    def serverAdd(self, serverFactory):
        """ Add the specified server pool, for use by the RetryQueue """
        self.rQueue.addServerPool(serverFactory.serverPoolName)

    def initRetryQueue(self):
        """ Initialize and enable use of the RetryQueue """
        self.retryQueueEnabled = self.rQueue.needRetryQueue()
        if self.retryQueueEnabled:
            self.rQueue.createQueues()

    def serverRemove(self, serverFactory):
        """ Remove the specified server pool """
        self.rQueue.removeServerPool(serverFactory.serverPoolName)

    def getSmart(self, serverFactory):
        """ Get the next available segment in the queue. The 'smart'ness first checks for segments
        in the RetryQueue, otherwise it falls back to the main queue """
        # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are
        # multiple serverPools)
        if self.retryQueueEnabled:
            try:
                priority, segment = self.rQueue.get(
                    serverFactory.serverPoolName)
                segment.fromQueue = self
                return priority, segment
            except Empty:
                # All retry queues for this serverPool are empty. fall through
                pass

            if not len(self) and len(self.rQueue):
                # Catch the special case where both the main NZBSegmentQueue is empty, all
                # the retry queues for the serverPool are empty, but there is still more
                # left to download in the retry queue (scheduled for retry by other
                # serverPools)
                raise EmptyForThisPool()

        priority, segment = PriorityQueue.get_nowait(self)
        segment.fromQueue = self
        return priority, segment

    def requeue(self, serverFactory, segment):
        """ Requeue the segment for download. This differs from requeueMissing as it's for
        downloads that failed for reasons other than the file or group missing from the
        server (such as a connection timeout) """
        # This segment only needs to go back into the retry queue if the retry queue is
        # enabled AND the segment was previously requeueMissing()'d
        if self.retryQueueEnabled and len(segment.failedServerPools):
            self.rQueue.requeue(serverFactory.serverPoolName, segment)
        else:
            self.put((segment.priority, segment))

        # There's a funny case where other NZBLeechers in the calling NZBLeecher's factory
        # received Empty from the queue, then afterwards the connection is lost (say the
        # connection timed out), causing the requeue. Find and reactivate them because
        # they now have work to do
        self.nudgeIdleNZBLeechers(segment)

    def requeueMissing(self, serverFactory, segment):
        """ Requeue a missing segment. This segment will be added to the RetryQueue (if enabled),
        where other serverPools will find it and reattempt the download """
        # This serverPool has just failed the download
        assert (serverFactory.serverPoolName not in segment.failedServerPools)
        segment.failedServerPools.append(serverFactory.serverPoolName)

        if self.retryQueueEnabled:
            self.rQueue.requeue(serverFactory.serverPoolName, segment)

            # We might have just requeued a segment onto an idle server pool. Reactivate
            # any idle connections pertaining to this segment
            self.nudgeIdleNZBLeechers(segment)
        else:
            raise PoolsExhausted()

    def nudgeIdleNZBLeechers(self, requeuedSegment):
        """ Activate any idle NZBLeechers that might need to download the specified requeued
        segment """
        reactor.callLater(0, self._nudgeIdleNZBLeechers, requeuedSegment)

    def _nudgeIdleNZBLeechers(self, requeuedSegment):
        """ Activate any idle NZBLeechers that might need to download the specified requeued
        segment """
        if not Hellanzb.downloadPaused and not requeuedSegment.nzbFile.nzb.canceled:
            for nsf in Hellanzb.nsfs:
                if nsf.fillServerPriority != self.fillServerPriority:
                    continue
                if nsf.serverPoolName not in requeuedSegment.failedServerPools:
                    nsf.activated = True
                    nsf.fetchNextNZBSegment()

    def fileDone(self, nzbFile):
        """ Notify the queue a file is done. This is called after assembling a file into its
        final contents. Segments are really stored independantly of individual Files in
        the queue, hence this function """
        self.nzbFilesLock.acquire()
        if nzbFile in self.nzbFiles:
            self.nzbFiles.remove(nzbFile)
        else:
            self.nzbFilesLock.release()
            return
        self.nzbFilesLock.release()

        if nzbFile.isAllSegmentsDecoded():
            for nzbSegment in nzbFile.nzbSegments:
                if self.parent.onDiskSegments.has_key(
                        nzbSegment.getDestination()):
                    self.parent.onDiskSegments.pop(nzbSegment.getDestination())

            if nzbFile.isExtraPar and nzbFile.nzb.queuedBlocks > 0:
                fileBlocks = getParSize(nzbFile.filename)
                nzbFile.nzb.queuedBlocks -= fileBlocks
                nzbFile.nzb.neededBlocks -= fileBlocks

            if nzbFile.isSkippedPar:
                # If a skipped par file was actually assembled, it wasn't actually skipped
                nzbFile.isSkippedPar = False
                if nzbFile in nzbFile.nzb.skippedParFiles:
                    nzbFile.nzb.skippedParFiles.remove(nzbFile)
                if nzbFile.nzb.isSkippedParSubject(nzbFile.subject):
                    nzbFile.nzb.skippedParSubjects.remove(nzbFile.subject)

    def segmentDone(self, nzbSegment, dequeue=False):
        """ Simply decrement the queued byte count and register this nzbSegment as finished
        downloading, unless the segment is part of a postponed download """
        # NOTE: old code locked here: but this block should only contend with itself (only
        # called from the ArticleDecoder) ArticleDecoder thread (only segmentDone() and
        # isAllSegmentsDecoded() touches todoNzbSegments, dequeuedSegments,
        # totalQueuedBytes?
        self.nzbsLock.acquire()
        if nzbSegment in nzbSegment.nzbFile.todoNzbSegments:
            nzbSegment.nzbFile.todoNzbSegments.remove(nzbSegment)
            if dequeue:
                nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment)
                debug('segmentDone: dequeued: %s %i' %
                      (nzbSegment.nzbFile.subject, nzbSegment.number))
            elif nzbSegment in nzbSegment.nzbFile.dequeuedSegments:
                # NOTE: this should really never occur
                # need this elif?
                debug(
                    '*** segmentDone called on dequeued nzbSegment -- removing from '
                    'nzbFile.dequeuedSegments!')
                nzbSegment.nzbFile.dequeuedSegments.remove(nzbSegment)
            if nzbSegment.nzbFile.nzb in Hellanzb.queue.nzbs:
                self.totalQueuedBytes -= nzbSegment.bytes
        self.nzbsLock.release()

        if not dequeue:
            # NOTE: currently don't have to lock -- only the ArticleDecoder thread (via
            # ->handleDupeNZBSegment->isBeingDownloaded) reads onDiskSegments
            self.parent.onDiskSegments[
                nzbSegment.getDestination()] = nzbSegment

            if nzbSegment.isFirstSegment():
                nzbSegment.nzbFile.nzb.firstSegmentsDownloaded += 1

    def isBeingDownloadedFile(self, segmentFilename):
        """ Whether or not the file on disk is currently in the middle of being
        downloaded/assembled. Return the NZBSegment representing the segment specified by
        the filename """
        # see segmentDone
        segmentFilename = segmentFilename
        if self.parent.onDiskSegments.has_key(segmentFilename):
            return self.parent.onDiskSegments[segmentFilename]

    def parseNZB(self, nzb, verbose=True):
        """ Initialize the queue from the specified nzb file """
        # Create a parser
        parser = make_parser()

        # No XML namespaces here
        parser.setFeature(feature_namespaces, 0)
        parser.setFeature(feature_external_ges, 0)

        # Create the handler
        fileName = nzb.nzbFileName
        self.nzbAdd(nzb)
        needWorkFiles = []
        needWorkSegments = []
        nzbp = NZBParser(nzb, needWorkFiles, needWorkSegments)

        # Tell the parser to use it
        parser.setContentHandler(nzbp)

        nzb.calculatingBytes = True
        # Parse the input
        try:
            parser.parse(fileName)
        except SAXParseException, saxpe:
            nzb.calculatingBytes = False
            self.nzbDone(nzb)
            msg = 'Unable to parse invalid NZB file: %s: %s' % \
                (os.path.basename(fileName), saxpe.getException())
            raise FatalError(msg)
        nzb.calculatingBytes = False

        # We trust the NZB XML's <segment number="111"> attribute, but if the sequence of
        # segments does not begin at "1", the parser wouldn't have found the
        # nzbFile.firstSegment
        for needWorkFile in nzbp.needWorkFiles:
            if needWorkFile.firstSegment is None and len(
                    needWorkFile.nzbSegments):
                # Set the firstSegment to the smallest segment number
                sortedSegments = [(nzbSegment.number, nzbSegment) for nzbSegment in \
                                  needWorkFile.nzbSegments]
                sortedSegments.sort()
                needWorkFile.firstSegment = sortedSegments[0][1]
                needWorkFile.firstSegment.priority = NZBSegmentQueue.NZB_CONTENT_P

        s = time.time()
        # The parser will add all the segments of all the NZBFiles that have not already
        # been downloaded. After the parsing, we'll check if each of those segments have
        # already been downloaded. it's faster to check all segments at one time
        needDlFiles, needDlSegments, onDiskSegments = segmentsNeedDownload(needWorkSegments,
                                                                           overwriteZeroByteSegments = \
                                                                           nzb.overwriteZeroByteFiles)
        e = time.time() - s

        # firstSegmentsDownloaded needs to be tweaked if isSkippedPar and no segments were
        # found on disk by segmentsNeedDownload. i.e. first segments have ALWAYS already
        # been downloaded in isParRecovery mode
        fauxFirstSegmentsDownloaded = 0
        if Hellanzb.SMART_PAR and nzb.isParRecovery:
            for nzbFile in nzb.nzbFiles:
                if nzbFile.isSkippedPar and nzbFile.firstSegment not in onDiskSegments:
                    nzb.firstSegmentsDownloaded += 1
                    fauxFirstSegmentsDownloaded += 1

        # Calculate and print parsed/skipped/queued statistics
        skippedPars = 0
        queuedParBlocks = 0
        for nzbFile in needDlFiles:
            if nzbFile.isSkippedPar:
                skippedPars += 1
            elif nzb.isParRecovery and nzbFile.isExtraPar and \
                    not nzbFile.isSkippedPar and len(nzbFile.todoNzbSegments) and \
                    nzbFile.filename is not None and not isHellaTemp(nzbFile.filename):
                queuedParBlocks += getParSize(nzbFile.filename)

        onDiskBytes = 0
        for nzbSegment in onDiskSegments:
            onDiskBytes += nzbSegment.bytes
        for nzbFile in nzb.nzbFiles:
            if nzbFile not in needDlFiles:
                onDiskBytes += nzbFile.totalBytes
        onDiskFilesCount = nzbp.fileCount - len(needWorkFiles)
        onDiskSegmentsCount = len(onDiskSegments)
        info('Parsed: %i files (%i posts), %s' %
             (nzbp.fileCount, nzbp.segmentCount, prettySize(nzb.totalBytes)))
        if onDiskFilesCount or onDiskSegmentsCount:
            filesMsg = segmentsMsg = separator = ''
            if onDiskFilesCount:
                filesMsg = '%i files' % onDiskFilesCount
            if onDiskSegmentsCount:
                segmentsMsg = '%i segments' % onDiskSegmentsCount
            if onDiskFilesCount and onDiskSegmentsCount:
                separator = ' and '
            info('Skipped (on disk): %s%s%s, %s' %
                 (filesMsg, separator, segmentsMsg, prettySize(onDiskBytes)))

        # Tally what was skipped for correct percentages in the UI
        for nzbSegment in onDiskSegments:
            nzbSegment.nzbFile.totalSkippedBytes += nzbSegment.bytes
            nzbSegment.nzbFile.nzb.totalSkippedBytes += nzbSegment.bytes

        # The needWorkFiles will tell us what nzbFiles are missing from the
        # FS. segmentsNeedDownload will further tell us what files need to be
        # downloaded. files missing from the FS (needWorkFiles) but not needing to be
        # downloaded (in needDlFiles) simply need to be assembled
        for nzbFile in needWorkFiles:
            if nzbFile not in needDlFiles:
                # Don't automatically 'finish' the NZB, we'll take care of that in this
                # function if necessary
                if verbose:
                    info(nzbFile.getFilename() +
                         ': Assembling -- all segments were on disk')

                # NOTE: this function is destructive to the passed in nzbFile! And is only
                # called on occasion (might bite you in the ass one day)
                try:
                    assembleNZBFile(nzbFile, autoFinish=False)
                except OutOfDiskSpace:
                    self.nzbDone(nzb)
                    # FIXME: Shouldn't exit here
                    error('Cannot assemble ' + nzbFile.getFilename() +
                          ': No space left on device! Exiting..')
                    Hellanzb.Core.shutdown(True)

        for nzbSegment in needDlSegments:
            # smartDequeue called from segmentsNeedDownload would have set
            # isSkippedParFile for us
            if not nzbSegment.nzbFile.isSkippedPar:
                self.put((nzbSegment.priority, nzbSegment))
            else:
                # This would need to be downloaded if we didn't skip the segment, they are
                # officially dequeued, and can be requeued later
                nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment)

        # Requeue files in certain situations
        if nzb.firstSegmentsDownloaded == len(nzb.nzbFiles):
            # NOTE: This block of code does not commonly happen with newzbin.com NZBs: due
            # to how the DupeHandler handles .NFO files. newzbin.com seems to always
            # duplicate the .NFO file in their NZBs
            smartRequeue(nzb)
            logSkippedPars(nzb)

        if nzb.isParRecovery and nzb.skippedParSubjects and len(nzb.skippedParSubjects) and \
                not len(self):
            # FIXME: This recovering ALL pars should be a mode (with a flag on the NZB
            # object). No par skipping would occur in this mode -- for the incredibly rare
            # case that first segments are lost prior to this mode taking place. What will
            # happen doesn't make sense: hellanzb will say 'recovering ALL pars', then
            # SmartPar will later skip pars
            msg = 'Par recovery download: No pars with prefix: %s -- recovering ALL pars' % \
                nzb.parPrefix
            if skippedPars:
                msg = '%s (%i par files)' % (msg, skippedPars)
            if verbose:
                warn(msg)
            for nzbSegment in needDlSegments:
                if nzbSegment.nzbFile.isSkippedPar:
                    self.put((nzbSegment.priority, nzbSegment))
                    nzbSegment.nzbFile.todoNzbSegments.add(nzbSegment)

            # Only reset the isSkippedPar flag after queueing
            for nzbSegment in needDlSegments:
                if nzbSegment.nzbFile.isSkippedPar:
                    nzbSegment.nzbFile.isSkippedPar = False

            # We might have faked the value of this: reset it
            nzb.firstSegmentsDownloaded -= fauxFirstSegmentsDownloaded

        if not len(self):
            self.nzbDone(nzb)
            if verbose:
                info(nzb.archiveName + ': Assembled archive!')

            reactor.callLater(0, Hellanzb.Daemon.handleNZBDone, nzb)

            # True == the archive is complete
            return True

        # Finally tally the size of the queue
        self.calculateTotalQueuedBytes()
        dlMsg = 'Queued: %s' % prettySize(self.totalQueuedBytes)
        if nzb.isParRecovery and queuedParBlocks:
            dlMsg += ' (recovering %i %s)' % (queuedParBlocks,
                                              getParRecoveryName(nzb.parType))
        info(dlMsg)

        # Archive not complete
        return False
Exemplo n.º 5
0
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments = False):
    """ Faster version of needsDownload for multiple segments that do not have their real file
    name (for use by the Queue).

    When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing
    are marked as needing to be downloaded. (An easy first pass of figuring out exactly
    what needs to be downloaded).

    This function is the second pass. It takes all of those NZBFiles that need to be
    downloaded's child NZBSegments and scans the disk, detecting which segments are
    already on disk and can be skipped
    """
    # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment
    # number in a map. Loop through the specified segmentList, doing a subject.find for
    # each segment filename with a matching segment number

    onDiskSegmentsByNumber = {}
    
    needDlFiles = set() # for speed while iterating
    needDlSegments = []
    onDiskSegments = []

    # Cache all WORKING_DIR segment filenames in a map of lists
    for file in os.listdir(Hellanzb.WORKING_DIR):
        if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                overwriteZeroByteSegments):
            continue
        
        ext = getFileExtension(file)
        if ext is not None and segmentEndRe.match(ext):
            segmentNumber = int(ext[-4:])
            
            if onDiskSegmentsByNumber.has_key(segmentNumber):
                segmentFileNames = onDiskSegmentsByNumber[segmentNumber]
            else:
                segmentFileNames = []
                onDiskSegmentsByNumber[segmentNumber] = segmentFileNames

            # cut off .segmentXXXX
            fileNoExt = file[:-12]
            segmentFileNames.append(fileNoExt)

    # Determine if each segment needs to be downloaded
    for segment in segmentList:

        if not onDiskSegmentsByNumber.has_key(segment.number):
            # No matching segment numbers, obviously needs to be downloaded
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
            continue

        segmentFileNames = onDiskSegmentsByNumber[segment.number]
        
        foundFileName = None
        for segmentFileName in segmentFileNames:
            # We've matched to our on disk segment if we:
            # a) find that on disk segment's file name in our potential segment's subject
            # b) match that on disk segment's file name to our potential segment's temp
            # file name (w/ .segmentXXXX cutoff)
            if segment.nzbFile.subject.find(segmentFileName) > -1 or \
                    segment.getTempFileName()[:-12] == segmentFileName:
                foundFileName = segmentFileName
                break

        if not foundFileName:
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
        else:
            if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \
                    segment.nzbFile.filename is None:
                # HACK: filename is None. so we only have the temporary name in
                # memory. since we didnt see the temporary name on the filesystem, but we
                # found a subject match, that means we have the real name on the
                # filesystem. In the case where this happens we've figured out the real
                # filename (hopefully!). Set it if it hasn't already been set
                setRealFileName(segment.nzbFile, foundFileName,
                            settingSegmentNumber = segment.number)

                if Hellanzb.SMART_PAR:
                    # smartDequeue won't actually 'dequeue' any of this segment's
                    # nzbFile's segments (because there are no segments in the queue at
                    # this point). It will identifyPar the segment AND more importantly it
                    # will mark nzbFiles as isSkippedPar (taken into account later during
                    # parseNZB) and print a 'Skipping par' message for those isSkippedPar
                    # nzbFiles
                    segment.smartDequeue(readOnlyQueue = True)
                
            onDiskSegments.append(segment)
            
            # Originally the main reason to call segmentDone here is to update the queue's
            # onDiskSegments (so isBeingDownloaded can safely detect things on disk during
            # Dupe renaming). However it's correct to call this here, it's as if hellanzb
            # just finished downloading and decoding the segment. The only incorrect part
            # about the call is the queue's totalQueuedBytes is decremented. That total is
            # reset to zero just before it is recalculated at the end of parseNZB, however
            Hellanzb.queue.segmentDone(segment)

            # This segment was matched. Remove it from the list to avoid matching it again
            # later (dupes)
            segmentFileNames.remove(foundFileName)

        #else:
        #    debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \
        #          segment.nzbFile.subject)

    return needDlFiles, needDlSegments, onDiskSegments