def startElement(self, name, attrs): if name == 'file': subject = self.parseUnicode(attrs.get('subject')) poster = self.parseUnicode(attrs.get('poster')) self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb) self.segmentNumbers.clear() self.fileNeedsDownload = \ self.file.needsDownload(workingDirListing = self.workingDirListing, workingDirDupeMap = self.workingDirDupeMap) # Special handling for par recovery downloads extraMsg = '' if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery: if not self.nzb.isSkippedParSubject(subject): # Only download previously marked pars self.fileNeedsDownload = False extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)' self.file.nzb.firstSegmentsDownloaded += 1 elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject): # Previously marked par -- only download it if it pertains to the # particular par. We keep it set to needsDownload here so it gets to # parseNZB -- parseNZB won't actually queue it self.file.isSkippedPar = True if not self.fileNeedsDownload: debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(), self.file.subject)) self.fileCount += 1 self.file.number = self.fileCount self.fileSegmentNumber = 1 elif name == 'group': self.chars = [] elif name == 'segment': try: self.bytes = int(attrs.get('bytes')) except ValueError: self.bytes = 0 try: self.number = int(attrs.get('number')) except ValueError: self.number = self.fileSegmentNumber self.fileSegmentNumber += 1 self.chars = []
def assertRecoveredSkippedPar(self, subject): n = NZB('test.nzb') n.isParRecovery = True n.parPrefix = 'test' n.parType = PAR2 Hellanzb.queue.nzbAdd(n) file = NZBFile(subject, 'today', '*****@*****.**', n) file.isSkippedPar = True self.writeState() del n self.recoverState() n2 = NZB.fromStateXML('downloading', 'test') #print str(Hellanzb.recoveredState) #print str(n2.skippedParSubjects) self.assertEquals(True, n2.isSkippedParSubject(subject))
class NZBParser(ContentHandler): """ Parse an NZB 1.0 file into an NZBSegmentQueue http://www.newzbin.com/DTD/nzb/nzb-1.0.dtd """ def __init__(self, nzb, needWorkFiles, needWorkSegments): # nzb file to parse self.nzb = nzb # to be populated with the files that either need to be downloaded or simply # assembled, and their segments self.needWorkFiles = needWorkFiles self.needWorkSegments = needWorkSegments # parsing variables self.file = None self.bytes = None self.number = None self.chars = None self.fileNeedsDownload = None self.fileCount = 0 self.segmentCount = 0 self.fileSegmentNumber = 1 # All encountered segment numbers for the current NZBFile self.segmentNumbers = set() # Current listing of existing files in the WORKING_DIR self.workingDirListing = [] # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk self.workingDirDupeMap = {} # heapq priority from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P files = os.listdir(Hellanzb.WORKING_DIR) files.sort() for file in files: # Anonymous duplicate file segments lying around are too painful to keep track # of. As are segments that previously failed on different servers if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file): os.remove(os.path.join(Hellanzb.WORKING_DIR, file)) continue # Add an entry to the self.workingDirDupeMap if this file looks like a # duplicate, and also skip adding it to self.workingDirListing (dupes are # handled specially so we don't care for them there) if handleDupeOnDisk(file, self.workingDirDupeMap): continue if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), self.nzb.overwriteZeroByteFiles): continue self.workingDirListing.append(file) def startElement(self, name, attrs): if name == 'file': subject = self.parseUnicode(attrs.get('subject')) poster = self.parseUnicode(attrs.get('poster')) self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb) self.segmentNumbers.clear() self.fileNeedsDownload = \ self.file.needsDownload(workingDirListing = self.workingDirListing, workingDirDupeMap = self.workingDirDupeMap) # Special handling for par recovery downloads extraMsg = '' if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery: if not self.nzb.isSkippedParSubject(subject): # Only download previously marked pars self.fileNeedsDownload = False extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)' self.file.nzb.firstSegmentsDownloaded += 1 elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject): # Previously marked par -- only download it if it pertains to the # particular par. We keep it set to needsDownload here so it gets to # parseNZB -- parseNZB won't actually queue it self.file.isSkippedPar = True if not self.fileNeedsDownload: debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(), self.file.subject)) self.fileCount += 1 self.file.number = self.fileCount self.fileSegmentNumber = 1 elif name == 'group': self.chars = [] elif name == 'segment': try: self.bytes = int(attrs.get('bytes')) except ValueError: self.bytes = 0 try: self.number = int(attrs.get('number')) except ValueError: self.number = self.fileSegmentNumber self.fileSegmentNumber += 1 self.chars = [] def characters(self, content): if self.chars is not None: self.chars.append(content) def endElement(self, name): if name == 'file': if self.fileNeedsDownload: self.needWorkFiles.append(self.file) else: # done adding all child segments to this NZBFile. make note that none of # them need to be downloaded self.file.nzb.totalSkippedBytes += self.file.totalBytes self.file.todoNzbSegments.clear() self.file = None self.fileNeedsDownload = None elif name == 'group': newsgroup = self.parseUnicode(''.join(self.chars)) self.file.groups.append(newsgroup) self.chars = None elif name == 'segment': if self.number in self.segmentNumbers: # This segment number was already registered return self.segmentNumbers.add(self.number) self.segmentCount += 1 messageId = self.parseUnicode(''.join(self.chars)) nzbs = NZBSegment(self.bytes, self.number, messageId, self.file) if self.number == 1: self.file.firstSegment = nzbs if self.fileNeedsDownload: # HACK: Maintain the order in which we encountered the segments by adding # segmentCount to the priority. lame afterthought -- after realizing # heapqs aren't ordered. nzbContentPriority must now be large enough so # that it won't ever clash with EXTRA_PAR2_P + i nzbs.priority = self.nzbContentPriority if nzbs.number != 1: nzbs.priority += self.segmentCount self.needWorkSegments.append(nzbs) self.chars = None self.number = None self.bytes = None def parseUnicode(self, unicodeOrStr): if isinstance(unicodeOrStr, unicode): return unicodeOrStr.encode('latin-1') return unicodeOrStr
class NZBParser(ContentHandler): """ Parse an NZB 1.0 file into an NZBSegmentQueue http://www.newzbin.com/DTD/nzb/nzb-1.0.dtd """ def __init__(self, nzb, needWorkFiles, needWorkSegments): # nzb file to parse self.nzb = nzb # to be populated with the files that either need to be downloaded or simply # assembled, and their segments self.needWorkFiles = needWorkFiles self.needWorkSegments = needWorkSegments # parsing variables self.file = None self.bytes = None self.number = None self.chars = None self.fileNeedsDownload = None self.fileCount = 0 self.segmentCount = 0 self.fileSegmentNumber = 1 # All encountered segment numbers for the current NZBFile self.segmentNumbers = set() # Current listing of existing files in the WORKING_DIR self.workingDirListing = [] # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk self.workingDirDupeMap = {} # heapq priority from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P files = os.listdir(Hellanzb.WORKING_DIR) files.sort() for file in files: # Anonymous duplicate file segments lying around are too painful to keep track # of. As are segments that previously failed on different servers if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file): os.remove(os.path.join(Hellanzb.WORKING_DIR, file)) continue # Add an entry to the self.workingDirDupeMap if this file looks like a # duplicate, and also skip adding it to self.workingDirListing (dupes are # handled specially so we don't care for them there) if handleDupeOnDisk(file, self.workingDirDupeMap): continue if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), self.nzb.overwriteZeroByteFiles): continue self.workingDirListing.append(file) def startElement(self, name, attrs): if name == 'file': subject = self.parseUnicode(attrs.get('subject')) poster = self.parseUnicode(attrs.get('poster')) self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb) self.segmentNumbers.clear() self.fileNeedsDownload = \ self.file.needsDownload(workingDirListing = self.workingDirListing, workingDirDupeMap = self.workingDirDupeMap) # Special handling for par recovery downloads extraMsg = '' if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery: if not self.nzb.isSkippedParSubject(subject): # Only download previously marked pars self.fileNeedsDownload = False extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)' self.file.nzb.firstSegmentsDownloaded += 1 elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject): # Previously marked par -- only download it if it pertains to the # particular par. We keep it set to needsDownload here so it gets to # parseNZB -- parseNZB won't actually queue it self.file.isSkippedPar = True if not self.fileNeedsDownload: debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(), self.file.subject)) self.fileCount += 1 self.file.number = self.fileCount self.fileSegmentNumber = 1 elif name == 'group': self.chars = [] elif name == 'segment': try: self.bytes = int(attrs.get('bytes')) except ValueError: self.bytes = 0 try: self.number = int(attrs.get('number')) except ValueError: self.number = self.fileSegmentNumber self.fileSegmentNumber += 1 self.chars = [] def characters(self, content): if self.chars is not None: self.chars.append(content) def endElement(self, name): if name == 'file': if self.fileNeedsDownload: self.needWorkFiles.append(self.file) else: # done adding all child segments to this NZBFile. make note that none of # them need to be downloaded self.file.nzb.totalSkippedBytes += self.file.totalBytes self.file.todoNzbSegments.clear() self.file = None self.fileNeedsDownload = None elif name == 'group': newsgroup = self.parseUnicode(''.join(self.chars)) self.file.groups.append(newsgroup) self.chars = None elif name == 'segment': if self.number in self.segmentNumbers: # This segment number was already registered return self.segmentNumbers.add(self.number) self.segmentCount += 1 messageId = self.parseUnicode(''.join(self.chars)) nzbs = NZBSegment(self.bytes, self.number, messageId, self.file) if self.number == 1: self.file.firstSegment = nzbs if self.fileNeedsDownload: # HACK: Maintain the order in which we encountered the segments by adding # segmentCount to the priority. lame afterthought -- after realizing # heapqs aren't ordered. nzbContentPriority must now be large enough so # that it won't ever clash with EXTRA_PAR2_P + i nzbs.priority = self.nzbContentPriority if nzbs.number != 1: nzbs.priority += self.segmentCount self.needWorkSegments.append(nzbs) self.chars = None self.number = None self.bytes = None def parseUnicode(self, unicodeOrStr): if isinstance(unicodeOrStr, unicode): unicodeOrStr = unicodeOrStr.encode('latin-1') return unicodeOrStr.strip()