def __init__(self, path): self.current_index = 0 self.path = path self.lockfile = LockFile.LockFile(self.path + ".lock") self.lock() self.__dirty = 0 self.dict = {} self.sorted = [] self.load()
def GetArchLock(self): if self._lock_file: return 1 self._lock_file = LockFile.LockFile( os.path.join(mm_cfg.LOCK_DIR, self.maillist.internal_name() + '-arch.lock')) try: self._lock_file.lock(timeout=0.5) except LockFile.TimeOutError: return 0 return 1
def makelock(): return LockFile.LockFile(LOCKFILE)
def save_attachment(mlist, msg, dir): # attachment is extracted from the message part pointed by msg and stored # to standard mailman attachement dir. See Mailman/Handlers/Scrubber.py # where this code come from. Scrubber specific behavior have been removed # the return value is a composed pair, physical filename and it's mailman's # list url. Not used by this handler, see ftp_upload_attchment(). fsdir = os.path.join(mlist.archive_dir(), dir) makedirs(fsdir) # Figure out the attachment type and get the decoded data decodedpayload = msg.get_payload(decode=True) # BAW: mimetypes ought to handle non-standard, but commonly found types, # e.g. image/jpg (should be image/jpeg). For now we just store such # things as application/octet-streams since that seems the safest. ctype = msg.get_content_type() filename = get_attachment_fname(mlist, msg) filename, fnext = os.path.splitext(filename) # HTML message doesn't have filename :-( ext = fnext or guess_extension(ctype, fnext) if not ext: # We don't know what it is, so assume it's just a shapeless # application/octet-stream, unless the Content-Type: is # message/rfc822, in which case we know we'll coerce the type to # text/plain below. if ctype == 'message/rfc822': ext = '.txt' else: ext = '.bin' # Allow only alphanumerics, dash, underscore, and dot ext = sre.sub('', ext) path = None # We need a lock to calculate the next attachment number lockfile = os.path.join(fsdir, 'attachments.lock') lock = LockFile.LockFile(lockfile) lock.lock() try: # Now base the filename on what's in the attachment, uniquifying it if # necessary. if not filename: filebase = 'attachment' else: # Sanitize the filename given in the message headers parts = pre.split(filename) filename = parts[-1] # Strip off leading dots filename = dre.sub('', filename) # Allow only alphanumerics, dash, underscore, and dot filename = sre.sub('', filename) # If the filename's extension doesn't match the type we guessed, # which one should we go with? For now, let's go with the one we # guessed so attachments can't lie about their type. Also, if the # filename /has/ no extension, then tack on the one we guessed. # The extension was removed from the name above. filebase = filename # Now we're looking for a unique name for this file on the file # system. If msgdir/filebase.ext isn't unique, we'll add a counter # after filebase, e.g. msgdir/filebase-cnt.ext counter = 0 extra = '' while True: path = os.path.join(fsdir, filebase + extra + ext) # Generally it is not a good idea to test for file existance # before just trying to create it, but the alternatives aren't # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't # NFS-safe). Besides, we have an exclusive lock now, so we're # guaranteed that no other process will be racing with us. if os.path.exists(path): counter += 1 extra = '-%04d' % counter else: break finally: lock.unlock() # `path' now contains the unique filename for the attachment. fp = open(path, 'w') fp.write(decodedpayload) fp.close() # Now calculate the url baseurl = mlist.GetBaseArchiveURL() # Private archives will likely have a trailing slash. Normalize. if baseurl[-1] <> '/': baseurl += '/' # A trailing space in url string may save users who are using # RFC-1738 compliant MUA (Not Mozilla). # Trailing space will definitely be a problem with format=flowed. # Bracket the URL instead. url = baseurl + '%s/%s%s%s' % (dir, filebase, extra, ext) return path, url
def save_attachment(mlist, msg, dir, filter_html=True): fsdir = os.path.join(mlist.archive_dir(), dir) makedirs(fsdir) # Figure out the attachment type and get the decoded data decodedpayload = msg.get_payload(decode=True) # BAW: mimetypes ought to handle non-standard, but commonly found types, # e.g. image/jpg (should be image/jpeg). For now we just store such # things as application/octet-streams since that seems the safest. ctype = msg.get_content_type() # i18n file name is encoded lcset = Utils.GetCharSet(mlist.preferred_language) filename = Utils.oneline(msg.get_filename(''), lcset) filename, fnext = os.path.splitext(filename) # For safety, we should confirm this is valid ext for content-type # but we can use fnext if we introduce fnext filtering if mm_cfg.SCRUBBER_USE_ATTACHMENT_FILENAME_EXTENSION: # HTML message doesn't have filename :-( ext = fnext or guess_extension(ctype, fnext) else: ext = guess_extension(ctype, fnext) if not ext: # We don't know what it is, so assume it's just a shapeless # application/octet-stream, unless the Content-Type: is # message/rfc822, in which case we know we'll coerce the type to # text/plain below. if ctype == 'message/rfc822': ext = '.txt' else: ext = '.bin' # Allow only alphanumerics, dash, underscore, and dot ext = sre.sub('', ext) path = None # We need a lock to calculate the next attachment number lockfile = os.path.join(fsdir, 'attachments.lock') lock = LockFile.LockFile(lockfile) lock.lock() try: # Now base the filename on what's in the attachment, uniquifying it if # necessary. if not filename or mm_cfg.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME: filebase = 'attachment' else: # Sanitize the filename given in the message headers parts = pre.split(filename) filename = parts[-1] # Strip off leading dots filename = dre.sub('', filename) # Allow only alphanumerics, dash, underscore, and dot filename = sre.sub('', filename) # If the filename's extension doesn't match the type we guessed, # which one should we go with? For now, let's go with the one we # guessed so attachments can't lie about their type. Also, if the # filename /has/ no extension, then tack on the one we guessed. # The extension was removed from the name above. filebase = filename # Now we're looking for a unique name for this file on the file # system. If msgdir/filebase.ext isn't unique, we'll add a counter # after filebase, e.g. msgdir/filebase-cnt.ext counter = 0 extra = '' while True: path = os.path.join(fsdir, filebase + extra + ext) # Generally it is not a good idea to test for file existance # before just trying to create it, but the alternatives aren't # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't # NFS-safe). Besides, we have an exclusive lock now, so we're # guaranteed that no other process will be racing with us. if os.path.exists(path): counter += 1 extra = '-%04d' % counter else: break finally: lock.unlock() # `path' now contains the unique filename for the attachment. There's # just one more step we need to do. If the part is text/html and # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be # here), then send the attachment through the filter program for # sanitization if filter_html and ctype == 'text/html': base, ext = os.path.splitext(path) tmppath = base + '-tmp' + ext fp = open(tmppath, 'wb') try: fp.write(decodedpayload) fp.close() cmd = mm_cfg.ARCHIVE_HTML_SANITIZER % {'filename': tmppath} progfp = os.popen(cmd, 'r') decodedpayload = progfp.read() status = progfp.close() if status: syslog('error', 'HTML sanitizer exited with non-zero status: %s', status) finally: os.unlink(tmppath) # BAW: Since we've now sanitized the document, it should be plain # text. Blarg, we really want the sanitizer to tell us what the type # if the return data is. :( ext = '.txt' path = base + '.txt' # Is it a message/rfc822 attachment? elif ctype == 'message/rfc822': submsg = msg.get_payload() # BAW: I'm sure we can eventually do better than this. :( decodedpayload = Utils.websafe(str(submsg)) fp = open(path, 'wb') fp.write(decodedpayload) fp.close() # Now calculate the url baseurl = mlist.GetBaseArchiveURL() # Private archives will likely have a trailing slash. Normalize. if baseurl[-1] != '/': baseurl += '/' # A trailing space in url string may save users who are using # RFC-1738 compliant MUA (Not Mozilla). # Trailing space will definitely be a problem with format=flowed. # Bracket the URL instead. url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext) return url