def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib, )) self.list = [] if self.qs_on_post: for key, value in urllib.parse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing): self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ parser = email.parser.FeedParser() # Create bogus content-type header for proper multipart parsing parser.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self.type, ib)) parser.feed(self.fp.read()) full_msg = parser.close() # Get subparts msgs = full_msg.get_payload() for msg in msgs: fp = StringIO(msg.get_payload()) part = klass(fp, msg, ib, environ, keep_blank_values, strict_parsing) self.list.append(part) self.skip_lines()
def read_record(stream): # Skip blank lines. Record bodies are supposed to be followed by a blank # separator line. In addition, Node-path records tend to have one or two # extra blank lines after them. while True: line = stream.readline() if line != b"\n": break if not line: raise EOFError() parser = email.parser.BytesFeedParser() while True: assert line.endswith(b"\n") parser.feed(line) if not line.rstrip(b"\r\n"): break line = stream.readline() message = parser.close() for defect in message.defects: warn(f"{stream.name}: {defect!r}") length = message.get_all("Content-length") if not length: return (message, None) [length] = length length = int(length) content = stream.read(length) assert len(content) == length return (message, content)
def test_slo_multi_ranged_get(self): file_item = self.env.container.file('manifest-abcde') file_contents = file_item.read( hdrs={"Range": "bytes=1048571-1048580,2097147-2097156"}) # See testMultiRangeGets for explanation parser = email.parser.FeedParser() parser.feed("Content-Type: %s\r\n\r\n" % file_item.content_type) parser.feed(file_contents) root_message = parser.close() self.assertTrue(root_message.is_multipart()) # sanity check byteranges = root_message.get_payload() self.assertEqual(len(byteranges), 2) self.assertEqual(byteranges[0]['Content-Type'], "application/octet-stream") self.assertEqual( byteranges[0]['Content-Range'], "bytes 1048571-1048580/4194305") self.assertEqual(byteranges[0].get_payload(), "aaaaabbbbb") self.assertEqual(byteranges[1]['Content-Type'], "application/octet-stream") self.assertEqual( byteranges[1]['Content-Range'], "bytes 2097147-2097156/4194305") self.assertEqual(byteranges[1].get_payload(), "bbbbbccccc")
def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) self.list = [] if self.qs_on_post: for key, value in urllib.parse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing): self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ parser = email.parser.FeedParser() # Create bogus content-type header for proper multipart parsing parser.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self.type, ib)) parser.feed(self.fp.read()) full_msg = parser.close() # Get subparts msgs = full_msg.get_payload() for msg in msgs: fp = StringIO(msg.get_payload()) part = klass(fp, msg, ib, environ, keep_blank_values, strict_parsing) self.list.append(part) self.skip_lines()
def get_classpath_from_manifest(manifest): parser = email.parser.FeedParser() parser.feed(manifest) msg = parser.close() cp = msg.get('Class-Path', None) if cp is None: return [] else: return cp.replace('\r\n ', '').replace('\r\n', '').split(' ')
def process_single_email(filep, user): """ Process a single email """ data = filep.read() if filep != sys.stdin: # Same behavior as before refactoring filep.close() parser = email.parser.FeedParser() parser.feed(data) message = parser.close() process_message(message, user)
def parse_email(handle): """parse raw email from text and return msg object""" parser = email.parser.FeedParser() raw_email = handle.read() try: parser.feed(raw_email) except Exception: raise Exception('file format not valid') msg = parser.close() return msg
def all_messages(self): i = 1 max = len(self._conn.list()[1]) + 1 while i < max: (response, lines, octets) = self._conn.retr(i) parser = email.parser.BytesFeedParser() for line in lines: parser.feed(line) parser.feed(b'\n') yield (i, parser.close()) i += 1
def __iter__(self): with contextlib.closing(self._f) as f: parser = email.parser.FeedParser() while True: line = f.readline() if not line: break if line.rstrip() == '--{0}'.format(self._boundary).encode(): line = f.readline() while line.rstrip(): parser.feed(line.decode()) line = f.readline() yield json.loads(f.read(int(parser.close()['Content-Length'])).decode()) parser = email.parser.FeedParser()
def parse_message(data): # Parse the email to its constituent parts # https://bugs.python.org/issue25545 says # > A unicode string has no RFC defintion as an email, so things do not work right... # > You do have to conditionalize your 2/3 code to use the bytes parser and generator if you are dealing with 8-bit # > messages. There's just no way around that. if six.PY2: parser = email.feedparser.FeedParser() parser.feed(data) msg = parser.close() else: # works the same as BytesFeedParser, and better than non-"Bytes" parsers for some messages parser = email.parser.BytesParser() msg = parser.parsebytes(data.encode('utf-8')) # Extract relevant data result = {} result['multipart'] = multipart = msg.is_multipart() result['headers'] = dict(msg) result['message_id'] = _parse_message_id(msg.get('Message-ID')) result['in_reply_to'] = _parse_message_id(msg.get('In-Reply-To')) result['references'] = _parse_message_id(msg.get('References')) if result['message_id'] == []: result['message_id'] = h.gen_message_id() else: result['message_id'] = result['message_id'][0] if multipart: result['parts'] = [] for part in msg.walk(): dpart = dict(headers=dict(part), message_id=result['message_id'], in_reply_to=result['in_reply_to'], references=result['references'], content_type=part.get_content_type(), filename=part.get_filename(None), payload=part.get_payload(decode=True)) # payload is sometimes already unicode (due to being saved in mongo?) if part.get_content_maintype() == 'text': dpart['payload'] = six.ensure_text(dpart['payload']) result['parts'].append(dpart) else: result['payload'] = msg.get_payload(decode=True) # payload is sometimes already unicode (due to being saved in mongo?) if msg.get_content_maintype() == 'text': result['payload'] = six.ensure_text(result['payload']) return result
def get_cgi_reports(package, timeout, system='debian', http_proxy='', archived=False, source=False, version=None): try: page = open_url(cgi_package_url(system, package, archived, source, version=version), http_proxy, timeout) except: raise NoNetwork if not page: return (0, None, None) #content = page.read() #if 'Maintainer' not in content: # return (0, None, None) parser = BTSParser(cgi=True) for line in page: try: line = line.decode('utf-8') # BTS pages are encoded in utf-8 except UnicodeDecodeError: # page has a bad char line = line.decode('utf-8', 'replace') parser.feed(line) parser.close() try: page.fp._sock.recv = None except: pass page.close() # Reorganize hierarchy to put recently-fixed bugs at top parser.reorganize() # Morph @ 2008-08-15; due to BTS output format changes try: parser.hierarchy.remove(('Select bugs', [])) except: pass data = (parser.bugcount, parser.title, parser.hierarchy) del parser return data
def parse_html_report(number, url, http_proxy, timeout, followups=False, cgi=True): page = open_url(url, http_proxy, timeout) if not page: return None parser = BTSParser(cgi=cgi, followups=followups) for line in page: parser.feed(line) parser.close() try: page.fp._sock.recv = None except: pass page.close() items = parser.preblock title = "#%d: %s" % (number, parser.title) if not followups: items = [items] output = [] for stuff in items: parts = stuff.split('\n\n') match = re.search('^Date: (.*)$', parts[0], re.M | re.I) date_submitted = '' if match: date_submitted = 'Date: %s\n' % match.group(1) stuff = ('\n\n'.join(parts[1:])).rstrip() if not stuff: continue item = date_submitted+stuff+os.linesep output.append(item) if not output: return None return (title, output)
def process_message(self, peer, mailfrom, rcpttos, data): parser = email.parser.FeedParser() parser.feed(data) msg = parser.close() toAddress = msg['To'] fromAddress = msg['From'] subject = msg['Subject'] body = self._bmformat(msg) #Make sure we don't send an actually blank subject or body--this can cause problems. if not subject: subject = ' ' if not body: body = ' ' if bminterface.send(toAddress, fromAddress, subject, body): print "Message queued for sending..." else: print "There was an error trying to send the message..." return 0
def process_message(self, peer, mailfrom, rcpttos, data): parser = email.parser.FeedParser() parser.feed(data) msg = parser.close() toAddress = msg['To'] fromAddress = msg['From'] subject = u' '.join(unicode(t[0], t[1] or 'UTF-8') for t in email.header.decode_header(msg['Subject'])).encode('UTF-8') body = self._bmformat(msg) #Make sure we don't send an actually blank subject or body--this can cause problems. if not subject: subject = ' ' if not body: body = ' ' if bminterface.send(toAddress, fromAddress, subject, body): logging.info("Message queued for sending...") else: logging.info("There was an error trying to send the message...") return 0
async def headers(self): """ Entry with self.c = first character of headers Leaves with self.c not set """ parser = email.parser.FeedParser() blank_line = True for _ in range(30000): if not self.c: break parser.feed(self.c.decode("latin-1")) if self.c == b"\n": if blank_line: break blank_line = True elif self.c != b"\r": blank_line = False await self.next_char() else: raise ExcessError("30000 or more headers") return parser.close()
def process_message(self, peer, mailfrom, rcpttos, data): parser = email.parser.FeedParser() parser.feed(data) msg = parser.close() toAddress = msg['To'] fromAddress = msg['From'] subject = u' '.join( unicode(t[0], t[1] or 'UTF-8') for t in email.header.decode_header( msg['Subject'])).encode('UTF-8') body = self._bmformat(msg) #Make sure we don't send an actually blank subject or body--this can cause problems. if not subject: subject = ' ' if not body: body = ' ' if bminterface.send(toAddress, fromAddress, subject, body): print "Message queued for sending..." else: print "There was an error trying to send the message..." return 0
def parse_message(raw): parser = email.parser.FeedParser() parser.feed(raw) return parser.close()
sys.exit(1) path = sys.argv[1] conn = sqlite3.connect('spamstat.db') c = conn.cursor() c.execute('CREATE TABLE IF NOT EXISTS spam (file TEXT PRIMARY KEY, msgid TEXT, recv TEXT, date DATE)') for f in os.listdir(path): fp = open(path + '/' + f) parser = email.parser.FeedParser() for line in fp: if line == "\n": message = parser.close() break parser.feed(line) fp.close() id = message['Message-ID'] to = email.utils.parseaddr(message['To'])[1] date_raw = email.utils.parsedate(message['Date']) if date_raw: date = time.mktime(date_raw) else: date = time.time() row = (f, id, to, date) c.execute('REPLACE INTO spam VALUES (?,?,?,?)', row) conn.commit()
def response_to_msg(resp: httpx.Response) -> email.message.EmailMessage: parser = email.parser.BytesFeedParser(policy=email.policy.HTTP) for name, value in resp.headers.items(): parser.feed(name.encode("latin-1")) parser.feed(b": ") parser.feed(value.encode("latin-1")) parser.feed(b"\r\n") parser.feed(b"\r\n") parser.feed(resp.content) return cast(email.message.EmailMessage, parser.close())
formatter = logging.Formatter('%(name)s: [%(levelname)s] %(message)s') handler.setFormatter(formatter) log.addHandler(handler) # Read config try: config = configparser.ConfigParser() cfgfile = os.path.splitext(os.path.abspath(__file__))[0] + '.ini' if not config.read(cfgfile): raise RuntimeError('Config file %s not found', cfgfile) # Reads STDIN until two consecutive empty line detected (end of email) parser = email.parser.FeedParser() for line in sys.stdin: parser.feed(line) msg = parser.close() # Parse the message log.debug('message received, %d characters, subject: %s', len(str(msg)), msg['Subject']) # Try to match the message job = None for sec in config.sections(): if sec[:4] == 'job_': j = sec[4:] matched = None for k, v in config.items(sec): if k[0] != '_': if k in msg and re.match(v, msg[k]):
def deliver (): config = Config.factory() set_source('deliver') set_level(config.get('logging_level')) try: recipient = sys.argv[1] except IndexError as e: error('No recipient specified.') return notice("Delivering mail for '%s'" % recipient) data = [] size = 0 while True: _data = sys.stdin.read(4096) if len(_data) == 0: break size += len(_data) if size > config.get('max_message_size'): error("Maximum message size exceeded.") sys.exit(-1) data.append(_data) data = ''.join(data) archive_dir = config.get('archive_dir') archive_file = False if not archive_dir == '': if not os.path.isdir(archive_dir): error("Archive dir not found") else: archive_file = os.path.join(archive_dir, hashlib.sha1('%f' % time.time()).hexdigest()) with open(archive_file, 'w') as fp: fp.write(data) parser = email.parser.FeedParser() parser.feed(data) message = parser.close() doc = dict([(a.lower(), b) for a, b in message.items()]) payload = [] for part in message.walk(): if part.get_content_type() in ['text/plain', 'text/html']: payload.append({'content-type': part.get_content_type(), 'payload': part.get_payload()}) doc['payload'] = payload doc['mailbox'] = 'inbox' doc['tags'] = [] try: server = couch.Server('http://%s:%d' % (config.get('couchdb_host'), config.get('couchdb_port'))) users_db = server[config.get('couchdb_users')] db = server[users_db[recipient]['couchdb']] db.save(doc) except couchdb.http.ResourceNotFound as e: print e
def get_reply_string_with_custom_message_v2(payload, headers, isMultipart): con = "NOT_FOUND" if isMultipart: bodyString = payload.get_payload() ''' in case of multipart mails, check for the payload's header to identify Content-Type and Content-Transfer-Encoding ''' if 'Content-Transfer-Encoding' in payload.keys(): con = payload.get('Content-Transfer-Encoding') else: con = "NOT_FOUND" else: ''' if not multipart, payload itself is the bodyString ''' bodyString = payload # print bodyString print "CONTENT-TRANSFER-ENCODING : " + str(con) if con.lower() == "base64": bodyString = base64.b64decode(bodyString) else: bodyString = quopri.decodestring(bodyString) ''' checking different custom messages being used. Also account for different translations of the message for i18n ''' index = bodyString.find("Please reply above this line") if index == -1: index = bodyString.find("Please write your reply above this line") if index == -1: replyString = "NOT_FOUND" else: ''' 1. strip by the custom message ''' replyString = bodyString[0:index-1] ''' 2. strip by the custom message encoding, if at all ''' if con == "quoted-printable": index = replyString.find("=3D=3D=3D=3D=3D") replyString = replyString[0:index-1] ''' 3. strip by the sent notice ''' index = replyString.rfind("From:") replyString = replyString[0:index-1] ''' 4. strip by the written notice from the sender ''' index = replyString.rfind("On") replyString = replyString[0:index-1] ''' 5. CLIENT_DEPENDENT_PROCESSING ''' ''' For MS Outlook ''' if headers['X-Mailer'] is not None and headers['X-Mailer'].lower().find('outlook') != -1: ''' check for additional headers in newer versions of outlook > 12.0 :- ''' newer = 0 index = replyString.lower().find("_NextPart_".lower()) if index != -1: newer = 1 if newer: replyString = replyString[index+1+10:] ''' further improvement could be searching & stripping all possible values of Content-Type :: standardised constant ''' index = replyString.lower().find("Content-Type".lower()) replyString = replyString[index+1+15:] ''' further improvement could be searching & stripping all possible values of Content-Transfer-Encoding :: standardised constant ''' index = replyString.lower().find("Content-Transfer-Encoding".lower()) replyString = replyString[index+1+42:] ''' For Lotus Notes ''' if headers['X-Mailer'] is not None and headers['X-Mailer'].lower().find('lotus notes') != -1: ''' further improvement could be searching & stripping all possible values of Content-Type :: standardised constant ''' index = replyString.lower().find("Content-Type".lower()) replyString = replyString[index+1+15:] ''' further improvement could be searching & stripping all possible values of Content-Transfer-Encoding :: standardised constant ''' index = replyString.lower().find("Content-Transfer-Encoding".lower()) replyString = replyString[index+1+42:] ''' For Blackberry ''' if headers['X-Mailer'] is not None and headers['X-Mailer'].lower().find('blackberry') != -1: ''' since Blackberry encodes messages in the form of HTML, we need to parse HTML in the body string. ''' parser = CustomHTMLParser() parser.feed(replyString) replyString = ''.join(parsedHTMLContent) ''' further improvement could be searching & stripping all possible values of Content-Type :: standardised constant ''' index = replyString.lower().find("Content-Type".lower()) replyString = replyString[index+1+15:] ''' further improvement could be searching & stripping all possible values of Content-Transfer-Encoding :: standardised constant ''' index = replyString.lower().find("Content-Transfer-Encoding".lower()) replyString = replyString[index+1+42:] ''' find and strip remaining Blackberry-specific email headers ''' index = 0 while index != -1: index = replyString.lower().find("X-".lower()) replyString = replyString[index+1+10:] ''' try and remove all blank lines / spaces at the beginning of lines encoded by the client ''' ''' temp = [] tempString = replyString.split("\n") for string in tempString : if string != "\r" : temp.append(string.strip()) replyString = " ".join(temp) ''' return replyString.strip()
def test_get_object_range(self): obj = 'object' content = b'abcdefghij' headers = {'x-amz-meta-test': 'swift'} self.conn.make_request('PUT', self.bucket, obj, headers=headers, body=content) headers = {'Range': 'bytes=1-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, b'bcdef') headers = {'Range': 'bytes=5-'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, b'fghij') headers = {'Range': 'bytes=-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, b'fghij') ranges = ['1-2', '4-5'] headers = {'Range': 'bytes=%s' % ','.join(ranges)} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertIn('content-length', headers) self.assertIn('content-type', headers) # sanity content_type, boundary = headers['content-type'].split(';') self.assertEqual('multipart/byteranges', content_type) self.assertTrue(boundary.startswith('boundary=')) # sanity boundary_str = boundary[len('boundary='):] # TODO: Using swift.common.utils.multipart_byteranges_to_document_iters # could be easy enough. if six.PY2: parser = email.parser.FeedParser() else: parser = email.parser.BytesFeedParser() parser.feed(b"Content-Type: multipart/byterange; boundary=%s\r\n\r\n" % boundary_str.encode('ascii')) parser.feed(body) message = parser.close() self.assertTrue(message.is_multipart()) # sanity check mime_parts = message.get_payload() self.assertEqual(len(mime_parts), len(ranges)) # sanity for index, range_value in enumerate(ranges): start, end = map(int, range_value.split('-')) # go to next section and check sanity self.assertTrue(mime_parts[index]) part = mime_parts[index] self.assertEqual('application/octet-stream', part.get_content_type()) expected_range = 'bytes %s/%s' % (range_value, len(content)) self.assertEqual(expected_range, part.get('Content-Range')) # rest payload = part.get_payload(decode=True).strip() self.assertEqual(content[start:end + 1], payload)
def get_reports(package, timeout, system='debian', mirrors=None, version=None, http_proxy='', archived=False, source=False): if system == 'debian': if isinstance(package, basestring): if source: pkg_filter = 'src' else: pkg_filter = 'package' bugs = debianbts.get_bugs(pkg_filter, package) else: bugs = map(int, package) # retrieve bugs and generate the hierarchy stats = debianbts.get_status(bugs) d = defaultdict(list) for s in stats: # We now return debianbts.Bugreport objects, containing all the info # for a bug, so UIs can extract them as needed d[s.severity].append(s) # keep the bugs ordered per severity # XXX: shouldn't it be something UI-related? # # The hierarchy is a list of tuples: # (description of the severity, list of bugs for that severity) hier = [] for sev in SEVLIST: if sev in d: hier.append(('Bugs with severity %s' % sev, d[sev])) return (len(bugs), 'Bug reports for %s' % package, hier) # XXX: is the code below used at all now? can we remove it? if isinstance(package, basestring): if SYSTEMS[system].get('cgiroot'): try: result = get_cgi_reports(package, timeout, system, http_proxy, archived, source, version=version) except: raise NoNetwork if result: return result url = package_url(system, package, mirrors, source) try: page = open_url(url, http_proxy, timeout) except: raise NoNetwork if not page: return (0, None, None) #content = page.read() #if 'Maintainer' not in content: # return (0, None, None) parser = BTSParser() for line in page: parser.feed(line) parser.close() try: page.fp._sock.recv = None except: pass page.close() return parser.bugcount, parser.title, parser.hierarchy # A list of bug numbers this_hierarchy = [] package = [int(x) for x in package] package.sort() for bug in package: result = get_report(bug, timeout, system, mirrors, http_proxy, archived) if result: title, body = result this_hierarchy.append(title) #print title title = "Multiple bug reports" bugcount = len(this_hierarchy) hierarchy = [('Reports', this_hierarchy)] return bugcount, title, hierarchy
def get_reports(package, timeout, system='debian', mirrors=None, version=None, http_proxy='', archived=False, source=False): if system == 'debian': if isinstance(package, str): if source: pkg_filter = 'src' else: pkg_filter = 'package' bugs = debianbts.get_bugs(pkg_filter, package) else: bugs = list(map(int, package)) try: # retrieve bugs and generate the hierarchy stats = debianbts.get_status(bugs) except: raise QuertBTSError d = defaultdict(list) for s in stats: # We now return debianbts.Bugreport objects, containing all the info # for a bug, so UIs can extract them as needed d[s.severity].append(s) # keep the bugs ordered per severity # XXX: shouldn't it be something UI-related? # # The hierarchy is a list of tuples: # (description of the severity, list of bugs for that severity) hier = [] for sev in SEVLIST: if sev in d: hier.append(('Bugs with severity %s' % sev, d[sev])) return (len(bugs), 'Bug reports for %s' % package, hier) # XXX: is the code below used at all now? can we remove it? if isinstance(package, str): if SYSTEMS[system].get('cgiroot'): try: result = get_cgi_reports(package, timeout, system, http_proxy, archived, source, version=version) except: raise NoNetwork if result: return result url = package_url(system, package, mirrors, source) try: page = open_url(url, http_proxy, timeout) except: raise NoNetwork if not page: return (0, None, None) # content = page.read() # if 'Maintainer' not in content: # return (0, None, None) parser = BTSParser() for line in page.splitlines(): parser.feed(line) parser.close() return parser.bugcount, parser.title, parser.hierarchy # A list of bug numbers this_hierarchy = [] package = [int(x) for x in package] package.sort() for bug in package: result = get_report(bug, timeout, system, mirrors, http_proxy, archived) if result: title, body = result this_hierarchy.append(title) # print title title = "Multiple bug reports" bugcount = len(this_hierarchy) hierarchy = [('Reports', this_hierarchy)] return bugcount, title, hierarchy
with open("licenses/licenses.txt", mode="w", encoding="UTF-8") as out: for p in packages: print("processing " + p.project_name) if usereqlist and p.project_name not in reqlist: print("skipped, not in package list") continue row = p.project_name row += " " row += p.parsed_version.public if (p.has_metadata(p.PKG_INFO)): parser = email.parser.FeedParser() parser.feed(p.get_metadata(p.PKG_INFO)) headers = parser.close() if "license" in headers: row += " " + headers["license"] else: row += " unknown" if "home-page" in headers: url = urllib.parse.urlparse(headers["home-page"]) row += " " + url.geturl() urlpath = url.path if not urlpath.endswith("/"):
def pull( self, path: str, *, encoding: str = 'utf-8' ) -> typing.Union[typing.BinaryIO, typing.TextIO]: """Read a file's content from the remote system. Args: path: Path of the file to read from the remote system. encoding: Encoding to use for decoding the file's bytes to str, or None to specify no decoding. Returns: A readable file-like object, whose read() method will return str objects decoded according to the specified encoding, or bytes if encoding is None. """ query = { 'action': 'read', 'path': path, } headers = {'Accept': 'multipart/form-data'} response = self._request_raw('GET', '/v1/files', query, headers) options = self._ensure_content_type(response.headers, 'multipart/form-data') boundary = options.get('boundary', '') if not boundary: raise ProtocolError('invalid boundary {!r}'.format(boundary)) # We have to manually write the Content-Type with boundary, because # email.parser expects the entire multipart message with headers. parser = email.parser.BytesFeedParser() parser.feed(b'Content-Type: multipart/form-data; boundary=' + boundary.encode('utf-8') + b'\r\n\r\n') # Then read the rest of the response and feed it to the parser. while True: chunk = response.read(8192) if not chunk: break parser.feed(chunk) message = parser.close() # Walk over the multipart parts and read content and metadata. resp = None content = None for part in message.walk(): name = part.get_param('name', header='Content-Disposition') if name == 'response': resp = _json_loads(part.get_payload()) elif name == 'files': filename = part.get_filename() if filename != path: raise ProtocolError( 'path not expected: {}'.format(filename)) # decode=True, ironically, avoids decoding bytes to str content = part.get_payload(decode=True) if resp is None: raise ProtocolError('no "response" field in multipart body') self._raise_on_path_error(resp, path) if content is None: raise ProtocolError('no file content in multipart response') if encoding is not None: reader = io.StringIO(content.decode(encoding)) else: reader = io.BytesIO(content) return reader
def test_get_object_range(self): obj = 'object' content = 'abcdefghij' headers = {'x-amz-meta-test': 'swift'} self.conn.make_request( 'PUT', self.bucket, obj, headers=headers, body=content) headers = {'Range': 'bytes=1-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'bcdef') headers = {'Range': 'bytes=5-'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'fghij') headers = {'Range': 'bytes=-5'} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertEqual(headers['content-length'], '5') self.assertTrue('x-amz-meta-test' in headers) self.assertEqual('swift', headers['x-amz-meta-test']) self.assertEqual(body, 'fghij') ranges = ['1-2', '4-5'] headers = {'Range': 'bytes=%s' % ','.join(ranges)} status, headers, body = \ self.conn.make_request('GET', self.bucket, obj, headers=headers) self.assertEqual(status, 206) self.assertCommonResponseHeaders(headers) self.assertTrue('content-length' in headers) self.assertTrue('content-type' in headers) # sanity content_type, boundary = headers['content-type'].split(';') self.assertEqual('multipart/byteranges', content_type) self.assertTrue(boundary.startswith('boundary=')) # sanity boundary_str = boundary[len('boundary='):] # TODO: Using swift.common.utils.multipart_byteranges_to_document_iters # could be easy enough. parser = email.parser.FeedParser() parser.feed( "Content-Type: multipart/byterange; boundary=%s\r\n\r\n" % boundary_str) parser.feed(body) message = parser.close() self.assertTrue(message.is_multipart()) # sanity check mime_parts = message.get_payload() self.assertEqual(len(mime_parts), len(ranges)) # sanity for index, range_value in enumerate(ranges): start, end = map(int, range_value.split('-')) # go to next section and check sanity self.assertTrue(mime_parts[index]) part = mime_parts[index] self.assertEqual( 'application/octet-stream', part.get_content_type()) expected_range = 'bytes %s/%s' % (range_value, len(content)) self.assertEqual( expected_range, part.get('Content-Range')) # rest payload = part.get_payload().strip() self.assertEqual(content[start:end + 1], payload)