def get_message(self, msgid): """ Returns a FeedParserPlus object (which inherits from traditional FeedparserDict) representing one SeenThis message. """ endpoint = get_endpoint_tmpl % int(msgid) request = urllib2.Request(url=endpoint) self._add_headers(request) server = urllib2.urlopen(request) data = server.read() try: atom_entry = FeedParserPlus.parse(data) except: import tempfile (datafilefd, datafilename) = tempfile.mkstemp(suffix=".atom", prefix="seenthis_", text=True) datafile = os.fdopen(datafilefd, "w") datafile.write(data) datafile.close() raise InvalidResponse("ATOM parsing error of the answer. The data has been saved in %s" % datafilename) # If the message-ID does not exist, SeenThis does not return a # 404 and sends back an invalid XML file :-( # http://seenthis.net/messages/70646 So, we hack. if atom_entry.has_key("bozo_exception"): raise NotFound("Message %i does not apparently exist" % int(msgid)) return atom_entry
#!/usr/bin/env python from FeedParserPlus import FeedParserPlus import sys for atom in sys.argv[1:]: atom_feed = FeedParserPlus.parse(open(atom).read()) #print atom_feed.serialize()
def get(self, n=None, continue_on_error=False): """ n is the number of messages to retrieve. When None, we just retrieve what SeenThis gives us (today, the last 25 messages). Otherwise, we loop to retrieve n messages. Warning: because there may be less messages on SeenThis, there is no guarantee to have n entries in the result. To get all the messages, just set n to a very high number. The result is a FeedParserPlus object (which inherits from traditional FeedparserDict). """ total = 0 over = False result = None step = 0 while not over: # TODO: we could use feedparser to retrieve the feed, not # only to parse it... if step == 0: endpoint = self.retrieve_endpoint else: endpoint = "%s/%i" % (self.retrieve_endpoint, total) request = urllib2.Request(url=endpoint) self._add_headers(request) server = urllib2.urlopen(request) data = server.read() # Bad workaround FeedParser's bug #91 http://code.google.com/p/feedparser/issues/detail?id=91 for line in data: # Awfully slow, we should replace :-) it with something faster if string.find(data, "!DOCTYPE") != -1: data = string.replace(data, "!DOCTYPE", "!\ DOCTYPE") try: atom_feed = FeedParserPlus.parse(data) except: import tempfile (datafilefd, datafilename) = tempfile.mkstemp(suffix=".atom", prefix="seenthis_", text=True) datafile = os.fdopen(datafilefd, "w") datafile.write(data) datafile.close() print >> sys.stderr, "Parsing error of the answer. The data has been saved in %s" % datafilename if continue_on_error: (exc_type, exc_value, exc_traceback) = sys.exc_info() print 'Exception %s "%s" %s' % ( exc_type, exc_value, traceback.format_list(traceback.extract_tb(exc_traceback)), ) step += 1 continue else: raise got = len(atom_feed["entries"]) if got == 0: over = True else: total += got step += 1 print >> sys.stderr, "Got %i entries..." % total if result is None: result = atom_feed else: for entry in atom_feed["entries"]: result["entries"].append(entry) if n is not None and total >= n: over = True return result