def parse_list(page, soup): data = {} m = soup.findAll('tr', {'onmouseover': 'this.style.backgroundColor="#FFF5F5"'}) messages = [] for msg in m: item = {} if msg.contents[3].contents[0]['src'] == u'images/memo_unread.gif': item['unread'] = True item['topic'] = msg.contents[5].text item['link'] = '/inbox/view/%d' % int(no_matcher.match(msg.contents[5].contents[3]['href']).group(1)) item['sender'] = msg.contents[7].contents[3].text messages.append(item) data['message_lists'] = messages pages = soup.findAll('table', {'cellpadding': '5'})[1].text.replace(' ', '') pagesp = pages.replace('[', ' ').replace(']', ' ').strip().split()[-1] try: maxpages = int(pagesp[-1]) except ValueError: try: maxpages = int(pagesp[-2]) except ValueError: maxpages = 10 data['maxpages'] = maxpages return data
def parse_view(page, soup): data = {} body = soup.findAll('table', {'width': '100%', 'border': '0', 'cellspacing': '0', 'cellpadding': '0'})[2].findAll('tr') data['name'] = body[1].contents[3].contents[3].text data['userid'] = body[1].contents[3].contents[5].contents[2][3:-1] data['topic'] = body[3].text data['date'] = body[5].text body_ = body[7].contents[3] body_.contents[0].replaceWith('') data['body'] = body_.renderContents() uidlink = body[8].contents[3].contents[0]['href'] data['uid'] = int(no_matcher.match(uidlink).group(1)) return data