def test_lxml_etree_bug(self): try: import lxml.etree except ImportError: pass else: doc = u"<feed>&illformed_charref</feed>".encode('utf8') # Importing lxml.etree currently causes libxml2 to # throw SAXException instead of SAXParseException. feedparser.parse(feedparser._StringIO(doc)) self.assertTrue(True)
def send_head(self): """Send custom headers defined in test case Example: <!-- Header: Content-type: application/atom+xml Header: X-Foo: bar --> """ # Short-circuit the HTTP status test `test_redirect_to_304()` if self.path == "/-/return-304.xml": self.send_response(304) self.send_header("Content-type", "text/xml") self.end_headers() return feedparser._StringIO(u"".encode("utf-8")) path = self.translate_path(self.path) # the compression tests' filenames determine the header sent if self.path.startswith("/tests/compression"): if self.path.endswith("gz"): headers = {"Content-Encoding": "gzip"} else: headers = {"Content-Encoding": "deflate"} headers["Content-type"] = "application/xml" else: headers = dict( [ (k.decode("utf-8"), v.decode("utf-8").strip()) for k, v in self.headers_re.findall(open(path, "rb").read()) ] ) f = open(path, "rb") if (self.headers.get("if-modified-since") == headers.get("Last-Modified", "nom")) or ( self.headers.get("if-none-match") == headers.get("ETag", "nomatch") ): status = 304 else: status = 200 headers.setdefault("Status", status) self.send_response(int(headers["Status"])) headers.setdefault("Content-type", self.guess_type(path)) self.send_header("Content-type", headers["Content-type"]) self.send_header("Content-Length", str(os.stat(f.name)[6])) for k, v in headers.items(): if k not in ("Status", "Content-type"): self.send_header(k, v) self.end_headers() return f
def send_head(self): """Send custom headers defined in test case Example: <!-- Header: Content-type: application/atom+xml Header: X-Foo: bar --> """ # Short-circuit the HTTP status test `test_redirect_to_304()` if self.path == '/-/return-304.xml': self.send_response(304) self.send_header('Content-type', 'text/xml') self.end_headers() return feedparser._StringIO(u''.encode('utf-8')) path = self.translate_path(self.path) # the compression tests' filenames determine the header sent if self.path.startswith('/tests/compression'): if self.path.endswith('gz'): headers = {'Content-Encoding': 'gzip'} else: headers = {'Content-Encoding': 'deflate'} headers['Content-type'] = 'application/xml' else: headers = dict([ (k.decode('utf-8'), v.decode('utf-8').strip()) for k, v in self.headers_re.findall(open(path, 'rb').read()) ]) f = open(path, 'rb') if (self.headers.get('if-modified-since') == headers.get('Last-Modified', 'nom')) \ or (self.headers.get('if-none-match') == headers.get('ETag', 'nomatch')): status = 304 else: status = 200 headers.setdefault('Status', status) self.send_response(int(headers['Status'])) headers.setdefault('Content-type', self.guess_type(path)) self.send_header("Content-type", headers['Content-type']) self.send_header("Content-Length", str(os.stat(f.name)[6])) for k, v in headers.items(): if k not in ('Status', 'Content-type'): self.send_header(k, v) self.end_headers() return f
def send_head(self): """Send custom headers defined in test case Example: <!-- Header: Content-type: application/atom+xml Header: X-Foo: bar --> """ # Short-circuit the HTTP status test `test_redirect_to_304()` if self.path == '/-/return-304.xml': self.send_response(304) self.send_header('Content-type', 'text/xml') self.end_headers() return feedparser._StringIO(u''.encode('utf-8')) path = self.translate_path(self.path) # the compression tests' filenames determine the header sent if self.path.startswith('/tests/compression'): if self.path.endswith('gz'): headers = {'Content-Encoding': 'gzip'} else: headers = {'Content-Encoding': 'deflate'} else: headers = dict( [(k.decode('utf-8'), v.decode('utf-8').strip()) for k, v in self.headers_re.findall(open(path, 'rb').read())]) f = open(path, 'rb') if (self.headers.get('if-modified-since') == headers.get('Last-Modified', 'nom')) \ or (self.headers.get('if-none-match') == headers.get('ETag', 'nomatch')): status = 304 else: status = 200 headers.setdefault('Status', status) self.send_response(int(headers['Status'])) headers.setdefault('Content-type', self.guess_type(path)) self.send_header("Content-type", headers['Content-type']) self.send_header("Content-Length", str(os.stat(f.name)[6])) for k, v in headers.items(): if k not in ('Status', 'Content-type'): self.send_header(k, v) self.end_headers() return f
handlers = [handlers] try: f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers) data = f.read() except Exception, e: result['bozo'] = 1 result['bozo_exception'] = e data = '' f = None # if feed is gzip-compressed, decompress it if f and data and hasattr(f, 'headers'): if gzip and f.headers.get('content-encoding', '') == 'gzip': try: data = gzip.GzipFile(fileobj=_StringIO(data)).read() except Exception, e: # Some feeds claim to be gzipped but they're not, so # we get garbage. Ideally, we should re-request the # feed without the 'Accept-encoding: gzip' header, # but we don't. result['bozo'] = 1 result['bozo_exception'] = e data = '' elif zlib and f.headers.get('content-encoding', '') == 'deflate': try: data = zlib.decompress(data, -zlib.MAX_WBITS) except Exception, e: result['bozo'] = 1 result['bozo_exception'] = e data = ''
if type(handlers) == types.InstanceType: handlers = [handlers] try: f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers) data = f.read() except Exception, e: result['bozo'] = 1 result['bozo_exception'] = e data = '' f = None # if feed is gzip-compressed, decompress it if f and data and hasattr(f, 'headers'): if gzip and f.headers.get('content-encoding', '') == 'gzip': try: data = gzip.GzipFile(fileobj=_StringIO(data)).read() except Exception, e: # Some feeds claim to be gzipped but they're not, so # we get garbage. Ideally, we should re-request the # feed without the 'Accept-encoding: gzip' header, # but we don't. result['bozo'] = 1 result['bozo_exception'] = e data = '' elif zlib and f.headers.get('content-encoding', '') == 'deflate': try: data = zlib.decompress(data, -zlib.MAX_WBITS) except Exception, e: result['bozo'] = 1 result['bozo_exception'] = e data = ''
if len(xyz) == 0: sys.exit( "config file not setup. newsid wont be updated,setup new config before run" ) df = pd.read_csv( '../../PythonFlask/Extraction/Sources/RSS_ExtractionFormatV2.csv', index_col=0) # In[2]: df2 = df[['Name', 'Rss', 'NTags', 'SCOPE', 'Type']] # In[4]: feedparser._open_resource = lambda *args, **kwargs: feedparser._StringIO( requests.get(args[0], timeout=15).content) feeds = [] posts = [] description = [] counter = 0 for url in df2['Rss']: feed = feedparser.parse(url) Name = df2['Name'][counter] Ntags = df2['NTags'][counter] Scope = df2['SCOPE'][counter] Type = df2['Type'][counter] counter = counter + 1 for post in feed.entries: try: