def iter_threads(self): table = self.parser.select(self.document.getroot(), "table#listeMessages", 1) for tr in table.xpath("./tr"): if tr.attrib.get("class", "") not in ("msgLu", "msgNonLu"): continue author = unicode(self.parser.select(tr, "td.colEmetteur", 1).text) link = self.parser.select(tr, "td.colObjet a", 1) date_raw = self.parser.select(tr, "td.colDate1", 1).attrib["data"] jsparams = re.search("\((.+)\)", link.attrib["onclick"]).groups()[0] jsparams = [i.strip("'\" ") for i in jsparams.split(",")] page_id, _id, unread = jsparams # this means unread on the website unread = False if unread == "false" else True # 2012/02/29:01h30min45sec dt_match = re.match("(\d+)/(\d+)/(\d+):(\d+)h(\d+)min(\d+)sec", date_raw).groups() dt_match = [int(d) for d in dt_match] thread = Thread(_id) thread._link_id = (page_id, unread) thread.date = datetime(*dt_match) thread.title = unicode(link.text) message = Message(thread, 0) message.set_empty_fields(None) message.flags = message.IS_HTML message.title = thread.title message.date = thread.date message.sender = author message.content = NotLoaded # This is the only thing we are missing thread.root = message yield thread
def iter_threads(self): table = self.parser.select(self.document.getroot(), 'table#listeMessages', 1) for tr in table.xpath('./tr'): if tr.attrib.get('class', '') not in ('msgLu', 'msgNonLu'): continue author = unicode(self.parser.select(tr, 'td.colEmetteur', 1).text) link = self.parser.select(tr, 'td.colObjet a', 1) date_raw = self.parser.select(tr, 'td.colDate1', 1).attrib['data'] jsparams = re.search('\((.+)\)', link.attrib['onclick']).groups()[0] jsparams = [i.strip('\'" ') for i in jsparams.split(',')] page_id, _id, unread = jsparams # this means unread on the website unread = False if unread == "false" else True # 2012/02/29:01h30min45sec dt_match = re.match('(\d+)/(\d+)/(\d+):(\d+)h(\d+)min(\d+)sec', date_raw).groups() dt_match = [int(d) for d in dt_match] thread = Thread(_id) thread._link_id = (page_id, unread) thread.date = datetime(*dt_match) thread.title = unicode(link.text) message = Message(thread, 0) message.set_empty_fields(None) message.flags = message.IS_HTML message.title = thread.title message.date = thread.date message.sender = author message.content = NotLoaded # This is the only thing we are missing thread.root = message yield thread