def test_get_object_range(self): obj = 'object' content = 'abcdefghij' headers = {'x-amz-meta-test': 'swift'} self.conn.make_request('PUT', self.bucket, obj, headers=headers, body=content) headers = {'Range': 'bytes=1-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'bcdef') headers = {'Range': 'bytes=5-'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'fghij') headers = {'Range': 'bytes=-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'fghij') ranges = ['1-2', '4-5'] headers = {'Range': 'bytes=%s' % ','.join(ranges)} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertTrue('content-type' in headers) # sanity content_type, boundary = headers['content-type'].split(';') self.assertEqual('multipart/byteranges', content_type) self.assertTrue(boundary.startswith('boundary=')) # sanity boundary_str = boundary[len('boundary='):] sio = StringIO(body) mfile = MultiFile(sio) mfile.push(boundary_str) def check_line_header(line, expected_key, expected_value): key, value = line.split(':', 1) self.assertEqual(expected_key, key.strip()) self.assertEqual(expected_value, value.strip()) for range_value in ranges: start, end = map(int, range_value.split('-')) # go to next section and check sanity self.assertTrue(mfile.next()) lines = mfile.readlines() # first line should be content-type which # includes original content-type # e.g. Content-Type: application/octet-stream check_line_header(lines[0].strip(), 'Content-Type', 'application/octet-stream') # second line should be byte range information # e.g. Content-Range: bytes 1-2/11 expected_range = 'bytes %s/%s' % (range_value, len(content)) check_line_header(lines[1].strip(), 'Content-Range', expected_range) # rest rest = [line for line in lines[2:] if line.strip()] self.assertEqual(1, len(rest)) # sanity self.assertTrue(content[start:end], rest[0]) # no next section self.assertFalse(mfile.next()) # sanity
def process_archive(self, peer, sender, mail_options, recips, rcptopts, data): """Archives email meta data using a Backend""" LOG(E_INFO, "%s: Sender is <%s> - Recipients (Envelope): %s" % (self.type, sender, ",".join(recips))) size = len(data) if size < MINSIZE: return self.do_exit(550, "Invalid Mail") if not data.endswith(NL): data = data + NL args = {} aid = None mid = None stream = StringIO(data) msg = Message(stream) if sender == "": LOG(E_INFO, "%s: Null return path mail, not archived" % (self.type)) return self.sendmail("<>", mail_options, recips, rcptopts, data, aid) ## Check if I have msgid in my cache mid = msg.get("message-id", self.new_mid()) hash = hash_headers(msg.get) if self.hashdb.has_key(hash): LOG(E_TRACE, "%s: Message-id: %s" % (self.type, mid)) aid = self.hashdb[hash] LOG(E_TRACE, "%s: Message already has year/pid pair, only adding header" % self.type) return self.sendmail(sender, mail_options, recips, rcptopts, self.add_aid(data, msg, aid), aid, hash) args["m_mid"] = mid args["hash"] = hash ## Check for duplicate headers dupe = dupe_check(msg.headers) if dupe is not None: LOG(E_ERR, "%s: Duplicate header %s" % (self.type, dupe)) return self.do_exit(552, "Duplicate header %s" % dupe) ## Extraction of From field m_from = msg.getaddrlist("From") if len(m_from) == 1: m_from = safe_parseaddr(m_from[0][1]) else: m_from = None ## Empty or invalid 'From' field, try to use sender if m_from is None: LOG(E_ERR, "%s: no From header in mail using sender" % self.type) m_from = safe_parseaddr(sender) ## No luck if m_from is None: return self.do_exit(552, "Mail has not suitable From/Sender") args["m_from"] = m_from ## Extract 'To' field m_to = [] for h in msg.getaddrlist("To"): rec = safe_parseaddr(h[1]) if rec is None: continue m_to.append(rec) ## Empty 'To' field use recipients if len(m_to) == 0: LOG(E_ERR, "%s: no To header in mail using recipients" % self.type) for recipient in recips: rec = safe_parseaddr(recipient) if rec is None: continue m_to.append(rec) if len(m_to) == 0: return self.do_exit(552, "Mail has not suitable To/Recipient") ## Extract 'Cc' field for h in msg.getaddrlist("Cc"): rec = safe_parseaddr(h[1]) if rec is None: continue m_to.append(rec) ## Cleanup: remove duplicates recs = [] for rec in m_to: if rec not in recs: recs.append(rec) args["m_rec"] = recs ## Extract 'Subject' field m_sub = mime_decode_header(msg.get("Subject", "No Subject")) if subjpattern is not None and m_sub.find(subjpattern) != -1: LOG(E_INFO, "%s: Subject pattern matched, not archived" % self.type) return self.sendmail(sender, mail_options, recips, rcptopts, self.remove_aid(data, msg)) args["m_sub"] = m_sub ## Whitelist check: From, To and Sender (envelope) checklist = [m_from] + m_to ss = safe_parseaddr(sender) if ss is not None: checklist.append(ss) for check in checklist: if check.split("@", 1)[0] in whitelist: LOG(E_INFO, "%s: Mail to: %s in whitelist, not archived" % (self.type, check)) return self.sendmail(sender, mail_options, recips, rcptopts, self.remove_aid(data, msg)) ## Sender size limit check - in kb if dbchecker is not None and dbchecker.quota_check(m_from, size >> 10): return self.do_exit(422, "Sender quota execeded") args["m_size"] = size ## Extract 'Date' field m_date = None if self.datefromemail: m_date = msg.getdate("Date") try: mktime(m_date) except: m_date = None if m_date is None: m_date = localtime(time()) args["m_date"] = m_date m_attach = [] if msg.maintype != "multipart": m_parse = parse_message(msg) if m_parse is not None: m_attach.append(m_parse) else: filepart = MultiFile(stream) filepart.push(msg.getparam("boundary")) try: while filepart.next(): submsg = Message(filepart) subpart = parse_message(submsg) if subpart is not None: m_attach.append(subpart) except: LOG(E_ERR, "%s: Error in multipart splitting" % self.type) args["m_attach"] = m_attach if dbchecker is not None: ## Collect data for mb lookup addrs = [] for addr in [m_from] + m_to: addrs.append(addr) args["m_mboxes"] = dbchecker.mblookup(addrs) else: args["m_mboxes"] = [] year, pid, error = self.backend.process(args) if year == 0: LOG(E_ERR, "%s: Backend Error: %s" % (self.type, error)) return self.do_exit(pid, error) ## Adding X-Archiver-ID: header aid = "%d-%d" % (year, pid) data = self.add_aid(data, msg, aid) LOG(E_TRACE, "%s: inserting %s msg in hashdb" % (self.type, aid)) self.hashdb[hash] = aid self.hashdb.sync() ## Next hop LOG(E_TRACE, "%s: backend worked fine" % self.type) LOG(E_TRACE, "%s: passing data to nexthop: %s:%s" % (self.type, self.output_address, self.output_port)) return self.sendmail(sender, mail_options, recips, rcptopts, data, aid, hash)
def expectMultipleRanges(self, range, sets, draft=0, rangeParse=re.compile('bytes\s*(\d+)-(\d+)/(\d+)')): req = self.app.REQUEST rsp = req.RESPONSE # Add headers req.environ['HTTP_RANGE'] = 'bytes=%s' % range if draft: req.environ['HTTP_REQUEST_RANGE'] = 'bytes=%s' % range body = self.doGET(req, rsp) self.failUnless(rsp.getStatus() == 206, 'Expected a 206 status, got %s' % rsp.getStatus()) self.failIf(rsp.getHeader('content-range'), 'The Content-Range header should not be set!') ct = string.split(rsp.getHeader('content-type'), ';')[0] draftprefix = draft and 'x-' or '' self.failIf(ct != 'multipart/%sbyteranges' % draftprefix, "Incorrect Content-Type set. Expected 'multipart/%sbyteranges', " "got %s" % (draftprefix, ct)) if rsp.getHeader('content-length'): self.failIf(rsp.getHeader('content-length') != str(len(body)), 'Incorrect Content-Length is set! Expected %s, got %s.' % ( str(len(body)), rsp.getHeader('content-length'))) # Decode the multipart message bodyfile = cStringIO.StringIO('Content-Type: %s\n\n%s' % ( rsp.getHeader('content-type'), body)) bodymessage = Message(bodyfile) partfiles = MultiFile(bodyfile) partfiles.push(bodymessage.getparam('boundary')) partmessages = [] add = partmessages.append while partfiles.next(): add(Message(cStringIO.StringIO(partfiles.read()))) # Check the different parts returnedRanges = [] add = returnedRanges.append for part in partmessages: range = part['content-range'] start, end, size = rangeParse.search(range).groups() start, end, size = int(start), int(end), int(size) end = end + 1 self.failIf(size != len(self.data), 'Part Content-Range header reported incorrect length. ' 'Expected %d, got %d.' % (len(self.data), size)) part.rewindbody() body = part.fp.read() # Whotcha! Bug in MultiFile; the CRLF that is part of the boundary # is returned as part of the body. Note that this bug is resolved # in Python 2.2. if body[-2:] == '\r\n': body = body[:-2] self.failIf(len(body) != end - start, 'Part (%d, %d) is of wrong length, expected %d, got %d.' % ( start, end, end - start, len(body))) self.failIf(body != self.data[start:end], 'Part (%d, %d) has incorrect data. Expected %s, got %s.' % ( start, end, `self.data[start:end]`, `body`)) add((start, end)) # Copmare the ranges used with the expected range sets. self.failIf(returnedRanges != sets, 'Got unexpected sets, expected %s, got %s' % ( sets, returnedRanges))
def test_get_object_range(self): obj = 'object' content = 'abcdefghij' headers = {'x-amz-meta-test': 'swift'} self.conn.make_request( 'PUT', self.bucket, obj, headers=headers, body=content) headers = {'Range': 'bytes=1-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEquals(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEquals(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEquals('swift', headers['x-amz-meta-test']) self.assertEquals(body, 'bcdef') headers = {'Range': 'bytes=5-'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEquals(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEquals(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEquals('swift', headers['x-amz-meta-test']) self.assertEquals(body, 'fghij') headers = {'Range': 'bytes=-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEquals(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEquals(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEquals('swift', headers['x-amz-meta-test']) self.assertEquals(body, 'fghij') ranges = ['1-2', '4-5'] headers = {'Range': 'bytes=%s' % ','.join(ranges)} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEquals(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertTrue('content-type' in headers) # sanity content_type, boundary = headers['content-type'].split(';') self.assertEquals('multipart/byteranges', content_type) self.assertTrue(boundary.startswith('boundary=')) # sanity boundary_str = boundary[len('boundary='):] sio = StringIO(body) mfile = MultiFile(sio) mfile.push(boundary_str) def check_line_header(line, expected_key, expected_value): key, value = line.split(':', 1) self.assertEquals(expected_key, key.strip()) self.assertEquals(expected_value, value.strip()) for range_value in ranges: start, end = map(int, range_value.split('-')) # go to next section and check sanity self.assertTrue(mfile.next()) lines = mfile.readlines() # first line should be content-type which # includes original content-type # e.g. Content-Type: application/octet-stream check_line_header( lines[0].strip(), 'Content-Type', 'application/octet-stream') # second line should be byte range information # e.g. Content-Range: bytes 1-2/11 expected_range = 'bytes %s/%s' % (range_value, len(content)) check_line_header( lines[1].strip(), 'Content-Range', expected_range) # rest rest = [line for line in lines[2:] if line.strip()] self.assertEquals(1, len(rest)) # sanity self.assertTrue(content[start:end], rest[0]) # no next section self.assertFalse(mfile.next()) # sanity
def process_archive(self, peer, sender, mail_options, recips, rcptopts, data): """Archives email meta data using a Backend""" LOG( E_INFO, '%s: Sender is <%s> - Recipients (Envelope): %s' % (self.type, sender, ','.join(recips))) size = len(data) if size < MINSIZE: return self.do_exit(550, 'Invalid Mail') if not data.endswith(NL): data = data + NL args = {} aid = None mid = None stream = StringIO(data) msg = Message(stream) if sender == '': LOG(E_INFO, '%s: Null return path mail, not archived' % (self.type)) return self.sendmail('<>', mail_options, recips, rcptopts, data, aid) ## Check if I have msgid in my cache mid = msg.get('message-id', self.new_mid()) hash = hash_headers(msg.get) if self.hashdb.has_key(hash): LOG(E_TRACE, '%s: Message-id: %s' % (self.type, mid)) aid = self.hashdb[hash] LOG( E_TRACE, '%s: Message already has year/pid pair, only adding header' % self.type) return self.sendmail(sender, mail_options, recips, rcptopts, self.add_aid(data, msg, aid), aid, hash) args['m_mid'] = mid args['hash'] = hash ## Check for duplicate headers dupe = dupe_check(msg.headers) if dupe is not None: LOG(E_ERR, '%s: Duplicate header %s' % (self.type, dupe)) return self.do_exit(552, 'Duplicate header %s' % dupe) ## Extraction of From field m_from = msg.getaddrlist('From') if len(m_from) == 1: m_from = safe_parseaddr(m_from[0][1]) else: m_from = None ## Empty or invalid 'From' field, try to use sender if m_from is None: LOG(E_ERR, '%s: no From header in mail using sender' % self.type) m_from = safe_parseaddr(sender) ## No luck if m_from is None: return self.do_exit(552, 'Mail has not suitable From/Sender') args['m_from'] = m_from ## Extract 'To' field m_to = [] for h in msg.getaddrlist('To'): rec = safe_parseaddr(h[1]) if rec is None: continue m_to.append(rec) ## Empty 'To' field use recipients if len(m_to) == 0: LOG(E_ERR, '%s: no To header in mail using recipients' % self.type) for recipient in recips: rec = safe_parseaddr(recipient) if rec is None: continue m_to.append(rec) if len(m_to) == 0: return self.do_exit(552, 'Mail has not suitable To/Recipient') ## Extract 'Cc' field for h in msg.getaddrlist('Cc'): rec = safe_parseaddr(h[1]) if rec is None: continue m_to.append(rec) ## Cleanup: remove duplicates recs = [] for rec in m_to: if rec not in recs: recs.append(rec) args['m_rec'] = recs ## Extract 'Subject' field m_sub = mime_decode_header(msg.get('Subject', 'No Subject')) if subjpattern is not None and m_sub.find(subjpattern) != -1: LOG(E_INFO, '%s: Subject pattern matched, not archived' % self.type) return self.sendmail(sender, mail_options, recips, rcptopts, self.remove_aid(data, msg)) args['m_sub'] = m_sub ## Whitelist check: From, To and Sender (envelope) checklist = [m_from] + m_to ss = safe_parseaddr(sender) if ss is not None: checklist.append(ss) for check in checklist: if check.split('@', 1)[0] in whitelist: LOG( E_INFO, '%s: Mail to: %s in whitelist, not archived' % (self.type, check)) return self.sendmail(sender, mail_options, recips, rcptopts, self.remove_aid(data, msg)) ## Sender size limit check - in kb if dbchecker is not None and dbchecker.quota_check( m_from, size >> 10): return self.do_exit(422, 'Sender quota execeded') args['m_size'] = size ## Extract 'Date' field m_date = None if self.datefromemail: m_date = msg.getdate('Date') try: mktime(m_date) except: m_date = None if m_date is None: m_date = localtime(time()) args['m_date'] = m_date m_attach = [] if msg.maintype != 'multipart': m_parse = parse_message(msg) if m_parse is not None: m_attach.append(m_parse) else: filepart = MultiFile(stream) filepart.push(msg.getparam('boundary')) try: while filepart.next(): submsg = Message(filepart) subpart = parse_message(submsg) if subpart is not None: m_attach.append(subpart) except: LOG(E_ERR, '%s: Error in multipart splitting' % self.type) args['m_attach'] = m_attach if dbchecker is not None: ## Collect data for mb lookup addrs = [] for addr in [m_from] + m_to: addrs.append(addr) args['m_mboxes'] = dbchecker.mblookup(addrs) else: args['m_mboxes'] = [] year, pid, error = self.backend.process(args) if year == 0: LOG(E_ERR, '%s: Backend Error: %s' % (self.type, error)) return self.do_exit(pid, error) ## Adding X-Archiver-ID: header aid = '%d-%d' % (year, pid) data = self.add_aid(data, msg, aid) LOG(E_TRACE, '%s: inserting %s msg in hashdb' % (self.type, aid)) self.hashdb[hash] = aid self.hashdb.sync() ## Next hop LOG(E_TRACE, '%s: backend worked fine' % self.type) LOG( E_TRACE, '%s: passing data to nexthop: %s:%s' % (self.type, self.output_address, self.output_port)) return self.sendmail(sender, mail_options, recips, rcptopts, data, aid, hash)