def _bz_screenscrape(arg): """arg: bug id as string url: path to bugzilla installation without slash bz_version: integer 3 or 4 corresponding to the installation version""" #TODO: not assume everyone's in PST from lxml import etree import urllib #bugzilla.mozilla.org forces https which libxml2 balks at import datetime from util import warn_old if "arg" not in arg["qs"]: err(noarg) try: int(arg["qs"]["arg"]) except: err("Bug IDs must be numerical.") _default(arg, 'warn_old', True) #>implying there are good programmers outside of PST def pseudo_rfc3339(s): bits = s.split() return bits[0] + "T" + bits[1] + "-07:00" base_url = arg["url"] bugid = arg["qs"]["arg"] url = '%s/show_bug.cgi?id=%s' % (base_url, bugid) rval = {"id": url, "link": url, "entries": []} if "lang" in arg["qs"]: rval["lang"] = arg["qs"]["lang"] try: tree = etree.parse(urllib.urlopen(url), etree.HTMLParser(encoding="UTF-8")) except: err(badfetch) comments = tree.xpath("//div[contains(@class, 'bz_comment')]") if len(comments) == 0: err(badparse) rval["title"] = tree.xpath('/html/head/title')[0].text for e in tree.xpath('//pre[@class="bz_comment_text"]'): e.attrib["style"] = "white-space:pre-wrap" for comment in comments: if arg["version"] == 4: link = url + "#" + comment.attrib['id'] time = comment.xpath("div/span[@class='bz_comment_time']")[0].text.strip("\n ") pseudo = pseudo_rfc3339(time) fn = comment.xpath("div/span/span/span[@class='fn']") if len(fn) == 1: name = fn[0].text else: #user didn't give a full name to bugzilla name = comment.xpath("div/span/span")[0].text[:-1] #random newline title = "Comment %s - %s - %s" % (comment.attrib["id"][1:], name, time) content = etree.tostring(comment.xpath("pre[@class='bz_comment_text']")[0]) if arg["version"] == 3: link = base_url + "/" + comment.xpath("span/i/a")[0].attrib['href'] time = comment.xpath("span/i/span")[0].tail.strip("\n ") pseudo = pseudo_rfc3339(time) name = comment.xpath("span/i/span/a")[0].text # everyone always has a name title = "Comment %s - %s - %s" % (comment.xpath("span/i/a")[0].attrib["name"][1:], name, time) content = etree.tostring(comment.xpath("pre")[0]) entry = {"id": link, "title": title, "content": content, "content_type": "html", "author": name, "updated": pseudo, "published": pseudo, "link": link} rval["entries"].append(entry) rval["updated"] = pseudo #the last updated time of the global feed is the post time of the last comment... for now if arg["warn_old"] and datetime.datetime.strptime(rval["updated"][:-6], "%Y-%m-%dT%H:%M:%S") < (datetime.datetime.utcnow() - datetime.timedelta(days=365)): rval["entries"].append(warn_old(url, bugid)) return rval
def _bz_xmlrpc(arg): """arg: bug id as string url: path to bugzilla installation history: put history changes in feed (optional, default true) ccs: include cc changes in history (optional, default false)""" import xmlrpclib import sqlite3 import datetime import re now = datetime.datetime.utcnow() from util import rfc3339 from util import warn_old if "arg" not in arg["qs"]: err(noarg) try: int(arg['qs']['arg']) except: err("Bug IDs must be numerical.") if not "history" in arg["qs"]: # the default history = True else: if arg["qs"]["history"][0] in "Ff0": history = False else: history = True if not "ccs" in arg["qs"]: ccs = False else: if arg["qs"]["ccs"][0] in "Ff0": ccs = False else: ccs = True _default(arg, 'warn_old', True) url = arg["url"] bugid = arg["qs"]["arg"] p = xmlrpclib.ServerProxy(url + "/xmlrpc.cgi", use_datetime=True) try: bugdata = p.Bug.get({"ids":[bugid], "permissive": True}) except: err(badfetch) if len(bugdata['faults']) > 0: err(bugdata['faults'][0]['faultString']) bugdata = bugdata["bugs"][0] guid = '%s/show_bug.cgi?id=%s' % (url, str(bugdata['id'])) # get the ID in case the query string used the bug alias rval = {"id": guid, "link": guid, "updated": rfc3339(bugdata['last_change_time']), "title": "Bug %s - " % bugid + bugdata['summary'], "entries": []} if "lang" in arg["qs"]: rval["lang"] = arg["qs"]["lang"] try: bugcomments = p.Bug.comments({"ids":[bugid]})["bugs"][bugid]['comments'] except: err(badfetch) commenting_users = [x['author'] for x in bugcomments] if history: try: bug_history = p.Bug.history({"ids":[bugid]})['bugs'][0]['history'] except: err(badfetch) commenting_users.extend([h['who'] for h in bug_history]) c = arg["cursor"] c.executescript("""pragma temp_store = MEMORY; create temp table email_queries (email text unique);""") c.execute("insert or ignore into bugzillas (id, url) values (NULL, ?)", (url,)) bz_id = c.execute("select id from bugzillas where url = ?", (url,)).fetchall()[0][0] c.execute("delete from bugzilla_users where ts <= ?", (now.year*100 + now.month - 1,)) c.executemany("insert or ignore into email_queries (email) values (?)", ((e,) for e in commenting_users)) cache_misses = c.execute("select email from email_queries where not exists (select 1 from bugzilla_users where bugzilla_users.bz = ? and bugzilla_users.email = email_queries.email)", (bz_id,)).fetchall() if len(cache_misses) > 0: try: real_names = p.User.get({"names": [e[0] for e in cache_misses]})["users"] except: err(badfetch) for user in real_names: if len(user['real_name']) != 0: rn = user['real_name'] else: rn = user['name'] c.execute("insert into bugzilla_users (email, name, ts, bz) values (?, ?, ?, ?)", (user['name'], rn, now.year*100 + now.month, bz_id)) rn = lambda x: c.execute("select name from bugzilla_users where bz = ? and email = ?", (bz_id, x)).fetchall()[0][0] if history: for bug_history_change_no, bug_history_change in enumerate(bug_history): # don't even create an rss entry if cc is the only thing that's changed and we're ignoring ccs if len(bug_history_change['changes']) == 1 and bug_history_change['changes'][0]['field_name'] == 'cc' and ccs == False: continue history_id = guid + "#h" + str(bug_history_change_no) content = ['<pre style="white-space:pre-wrap">'] for field_change in bug_history_change['changes']: if field_change['field_name'] == 'cc' and ccs == False: continue content.append("Field <b>%s</b>:\n" % field_change['field_name']) if field_change['field_name'] == 'attachments.isobsolete': content.append('<a href="%s/attachment.cgi?id=%d">Attachment #%d</a> is obsolete\n' % (url, field_change['attachment_id'], field_change['attachment_id'])) if field_change['field_name'] in ['dependson', 'blocked']: sub = lambda f: re.sub("(\d+)", lambda m: '<a href="%s/show_bug.cgi?id=%s">%s</a>' % (url, m.group(1), "Bug " + m.group(1)), f) if 'added' in field_change: field_change['added'] = sub(field_change['added']) if 'removed' in field_change: field_change['removed'] = sub(field_change['removed']) content.append("Removed:\n") content.append(" %s\n" % field_change['removed']) content.append("Added:\n") content.append(" %s\n\n" % field_change['added']) content.append("</pre>") real_name = rn(bug_history_change['who']) when = rfc3339(bug_history_change['when']) entry = {"id": history_id, "title": "%s changed at %s" % (real_name, when), "author": real_name, "updated": when, "published": bug_history_change['when'], # keep for sorting "link": history_id, "content": "".join(content), "content_type": "html"} rval["entries"].append(entry) linkbugs = lambda x: re.sub("([Bb])ug (\d+)", lambda m: '<a href="%s/show_bug.cgi?id=%s">%s</a>' % (url, m.group(2), m.group(1) + "ug " + m.group(2)), x) for comment_no, comment in enumerate(bugcomments): comment_id = guid + "#c" + str(comment_no) real_name = rn(comment['author']) comment_time_str = rfc3339(comment['time']) entry = {"id": comment_id, "title": u"Comment %s - %s - %s" % (str(comment_no), real_name, comment_time_str), "content": '<pre style="white-space:pre-wrap">' + linkbugs(comment['text']) + "</pre>", "content_type": "html", "author": real_name, "updated": comment_time_str, "published": comment['time'], # keep for sorting "link": comment_id} rval["entries"].append(entry) rval["entries"].sort(key=lambda e: e["published"]) for entry in rval["entries"]: entry["published"] = rfc3339(entry["published"]) if arg["warn_old"] and bugdata['last_change_time'] < (now - datetime.timedelta(days=365)): rval["entries"].append(warn_old(guid, bugid)) return rval