示例#1
0
def genTweets(num, iden, insert=True, prime=None):
    '''generate and insert tweets under iden's name, according to iden's ideology'''
    def dict_factory(cursor, row):
        d = {}
        d[row[0]] = row[1]
        return d

    try:
        #acquire ideology blob
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        #dbc.row_factory = dict_factory
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at genTweets")
        idict = idcur.fetchall()
        idict = {t[0]: t[1] for t in idict}
        modeldir = './data/twblobs/' + str(int(idict[iden[0]])) + '/model/'
        dbc.close()
        #generate tweets
        gentweets = []
        for i in range(num):
            numwords = random.choice(range(15, 30))
            params = [
                "python", "./rnn/sample.py", "--save_dir", modeldir, "-n",
                str(numwords), "--sample",
                str(1)
            ]
            if prime is not None:
                params += ["--prime", prime]
            gentweets += [subprocess.check_output(params).split("\n")[1]]
        print("tweets"
              if insert else "reply") + " generation from model complete"

        collist = ["id", "time", "type", "contents", "author"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t)),
            unicode(datetime.datetime.now()), u'post', t, iden[0]
        ] for t in gentweets]
        if insert:
            #insert into database
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            dbc.text_factory = str
            if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
                raise Exception("Database Insertion Error at genTweets")
            dbc.close()

        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#2
0
def genLikes(contentid, idlist, authorideo=None, ideolist=None):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        collist = ["id", "lid", "cid"]
        problist = []
        if authorideo is not None and ideolist is not None:
            problist = [0.01 * (4 - abs(i - authorideo)) for i in ideolist]
        else:
            problist = [0.05 for _ in idlist]

        likes = [
            idlist[i] for i in xrange(len(idlist))
            if random.random() < problist[i]
        ]
        likelist = [[
            unicode(
                uuid.uuid3(uuid.NAMESPACE_DNS,
                           str(i) + str(contentid) + 'l')), i, contentid
        ] for i in likes]
        if not dbmngr.insertMany(dbc, "likes", collist, likelist):
            raise Exception("Database Insertion Error at genVotes")
        dbc.close()
        return likes
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#3
0
def genReplies(num, iden, replyto):
    try:
        #generate with genTweets
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        replies = genTweets(num, iden, False)
        #insert into database
        collist = [
            "id",
            "time",
            "type",
            "contents",
            "author",
            "replyto",
        ]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t[-2])),
            unicode(datetime.datetime.now()), u"reply", t[-2], iden[0], replyto
        ] for t in replies]

        if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
            raise Exception("Database Insertion Error at genReplies")

        dbc.close()
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#4
0
def genMemes(num, iden, background, insert=True, primer=None):
    try:
        #generate tweets with genTweets(num,iden)
        tw = genTweets(num, iden, False, primer)
        #break up tweets into 2 parts randomly
        for t in tw:
            k = random.choice(range(1, len(t[-2].split(" ")) - 2))
            t[-2] = "<MEME>".join(t[-2].split(t[-2].split(" ")[k]))
        print "top/bottom text generated, separated by string <MEME>"
        collist = ["id", "time", "type", "contents", "author", "memebg"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t[-2])),
            unicode(datetime.datetime.now()), u'meme', t[-2], iden[0],
            background
        ] for t in tw]
        #insert into database
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            dbc.text_factory = str
            if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
                raise Exception("Database Insertion Error at genMemes")
            dbc.close()
        print "Meme generation completed"
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#5
0
def getParticipation(commInfo, insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        res, jd = None, None
        for c in commInfo:
            endpoint = "/api/v2/committee_member?"
            endpoint += "committee=" + str(c[0])
            endpoint += "&limit=300"
            conn.request("GET", endpoint)
            res = conn.getresponse()
            checkResponse(res, endpoint)

            jd = json.loads(res.read())
            formatted = [(gUID(p[u'person'][u'id'],
                               p[u'committee'][u'id']), p[u'person'][u'id'],
                          p[u'committee'][u'id'], p[u'role'])
                         for p in jd[u'objects']]
            if insert:
                if not dbmngr.insertMany(dbc, "participates",
                                         ["id", "lid", "cid", "role"],
                                         formatted):
                    raise Exception("Database Insertion Error")
            else:
                pass
        dbc.close()
        conn.close()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
示例#6
0
def logexc(exc, verbose=True):
    try:
        dbc = dbmngr.connectDB("./log/", "log", False)
        collist = ["id", "event", "datetime", "name", "stack"]
        ins = [
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, str(hash(exc)))),
            u'exception',
            unicode(datetime.datetime.now()),
            unicode(dbmngr.sanitize(str(exc[1]))),
            unicode(dbmngr.sanitize(str(traceback.format_tb(exc[2])[0])))
        ]
        entry = {collist[i]: ins[i] for i in range(len(ins))}
        dbmngr.insertEntry(dbc, "log", entry)
        if verbose:
            print datetime.datetime.now()
            print "exception:", exc[1]
            print "stack:", traceback.print_tb(exc[2])
            print traceback.print_exc(exc[2])
        dbc.close()
        return True
    except Exception as e:
        print "rekt", e
        print traceback.print_tb(sys.exc_info()[2])
        return False
    return False
示例#7
0
def genTweetBlobs(twaccnts):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        print "query for id, ideology done"
        for i in range(5):
            acclist = [twaccnts[k[0]] for k in idlist if int(k[1]) == i]
            datadir = './data/twblobs/' + str(i) + '/'
            getAllTweets(acclist, datadir + 'input.txt')
            print "input file generation for ideology " + str(i) + " success"
            res = subprocess.call([
                "python", "./rnn/train.py", "--data_dir=" + datadir,
                "--save_dir=" + datadir + 'model/', "--rnn_size=" + str(32),
                "--num_epochs=" + str(1), "--seq_length=" + str(10),
                "--learning_rate=" + str(0.003), "--model=lstm"
            ])
            if res != 0:
                raise Exception(
                    "Training subprocess call Error at genTweetBlobs")
            print "model trained for ideology " + str(i)
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
示例#8
0
def genVotes(billid, voters):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        collist = ["id", "lid", "cid", "votes"]
        voteres = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS,
                               str(i) + str(billid))), i, billid,
            "yea" if random.choice(range(2)) > 0 else "nay"
        ] for i in voters]
        if not dbmngr.insertMany(dbc, "votes", collist, voteres):
            raise Exception("Database Insertion Error at genVotes")
        dbc.close()
        return voteres
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#9
0
def getIdeology():
    '''Get all of the legislators' ideology score
    Args:
        None
    Returns:
        dict:       A dictionary with {id->ideology}, None if operation failed
    '''
    try:
        dbc = dbmngr.connectDB('./data/', 'cspdb', False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"],
                                  ["legislators"]).fetchall()
        dbc.close()
        iddict = {i[0]: i[1] for i in idcur}
        csplog.logevent("query", "queried all ideologies")
        return iddict
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#10
0
def genBills(num, committee, iden):
    try:
        res = []
        for i in range(num):
            #generate a random number k, k in [1,5]
            k = random.choice(range(1, 6))
            #generate k bill title literals from bill blob
            literals = []
            modeldir = "./data/bills/model/"
            genlits = []
            for _ in range(k):
                numwords = random.choice(range(1, 4))
                genlits += [
                    subprocess.check_output([
                        "python", "./rnn/sample.py", "--save_dir", modeldir,
                        "-n",
                        str(numwords), "--sample",
                        str(1)
                    ]).split("\n")[1].capitalize()
                ]

            #concatenate literals
            res += [((", ".join(genlits[:-1]) + " and ") if k > 1 else "") +
                    genlits[-1] + " Act of 2017"]
            print res
            print "{0}/{1} bills generated".format(i + 1, num)
        #insert into database
        collist = ["id", "time", "type", "contents", "author", "committees"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, r)),
            unicode(datetime.datetime.now()), u'bill', r, iden[0], committee
        ] for r in res]
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
            raise Exception("Database Insertion Error at genBills")
        dbc.close()
        print "Bills insertion complete"
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#11
0
def updateLegisImg():
    def nonexist():
        '''handle the situation where person doesn't have an image on govtrack'''
        return open("./data/noimg.jpeg", "rb").read()

    def getImg(conn, iden):
        '''Gets the person 'iden's image from govtrack'''
        endpoint = "/data/photos/" + str(iden[0]) + "-200px.jpeg"
        try:
            conn.request("GET", endpoint)
            res = conn.getresponse()

            if res.status != 200:
                #means this person doesn't have an image on the govtrack database
                if res.status == 404:
                    print endpoint
                    return buffer(nonexist())
                else:
                    raise Exception(
                        "HTTP error:" + str(res.status) + " at updateLegisImg",
                        endpoint)
            return res.read()
        except Exception as e:
            print endpoint
            return buffer(nonexist())

    try:
        conn = httplib.HTTPSConnection(govhost)
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        updlist = [(sqlite3.Binary(getImg(conn, p)), p[0]) for p in idlist]
        if not dbmngr.updateMany(dbc, "legislators", ["image"], updlist):
            raise Exception("Update Error at updateLegisImg")
        conn.close()
        dbc.close()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
示例#12
0
def populate(t, r, insert=True):
    try:
        idlist = None
        if r > 0:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            idlist = dbmngr.queryEntry(dbc, ["id"], ["legislators"]).fetchall()
            dbc.close()
        if t == 0: return True
        for l in idlist:
            tweets = genTweets(t, l, True)
            if r == 0: continue
            for tw in tweets:
                reper = random.choice(idlist)
                genReplies(r, reper, tw[0])
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
示例#13
0
def main():
    '''Initializes empty log database for the project
       Also removes the old database if it exists, i.e., reset.
    '''
    conn = dbmngr.connectDB("./log/","log",True)
    if conn is not None:
        #create vote
        d = {
                "id":       ["text","primary key","not null"],
                "event":    ["text","not null"],
                "datetime": ["text","not null"],
                "name":     ["text"],
                "stack":    ["text"],
                "content":  ["text"]
            }
        dbmngr.createTable(conn,"log",d,None)

        conn.close()
    conn.close()
    return
示例#14
0
def logevent(event, description, verbose=True):
    try:
        dbc = dbmngr.connectDB("./log/", "log", False)
        collist = ["id", "event", "datetime", "name", "content"]
        ins = [
            unicode(
                uuid.uuid3(uuid.NAMESPACE_DNS,
                           str(hash(event) + hash(datetime.datetime.now())))),
            u'event',
            unicode(datetime.datetime.now()),
            unicode(event),
            unicode(description)
        ]
        entry = {collist[i]: ins[i] for i in range(len(ins))}
        dbmngr.insertEntry(dbc, "log", entry)
        if verbose:
            print event
            print description
        dbc.close()
        return True
    except Exception:
        logexc(sys.exc_info())
        return False
    return False
示例#15
0
def getCommInfo(insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        endpoint = "/api/v2/committee?obsolete=false&committee=null&limit=300"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        jd = json.loads(res.read())
        formatted = [(c[u'id'], c[u'name'], c[u'jurisdiction'],
                      c[u'committee_type']) for c in jd[u'objects']]
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            if not dbmngr.insertMany(dbc, "committees",
                                     ["id", "name", "desc", "floor"],
                                     formatted):
                raise Exception("Database Insertion Error")
        else:
            pass
        dbc.close()
        conn.close()
        return formatted
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
示例#16
0
def getBasicInfo(insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        endpoint = "/api/v2/role?"+\
                "current=true&"+\
                "role_type__in=senator|representative&"+\
                "fields=person__firstname,person__lastname,state,person__twitterid,person__id,person__name,party,role_type&"+\
                "limit=600"

        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        data = json.loads(res.read())
        print "scraped basic info from govtrack"

        endpoint = "/data/us/" + caucusnum + "/stats/sponsorshipanalysis_h.txt"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        ideo = res.read().split("\n")
        ideo = ideo[1:-1]
        print "scraped house ideology"

        endpoint = "/data/us/" + caucusnum + "/stats/sponsorshipanalysis_s.txt"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        ideo.extend(res.read().split("\n")[1:-1])
        print "scraped senate ideology"

        ideo = [k.split(",") for k in ideo]
        ideo = sorted(ideo, key=lambda l: l[1])
        binsize = len(ideo) // 5
        for i in range(5):
            for j in range(binsize * i, binsize * (i + 1) - 1):
                ideo[j][1] = i
        for j in range(binsize * 4, len(ideo)):
            ideo[j][1] = 4

        print "ideology formatted"
        conn.close()

        ideo = {int(p[0]): p[1] for p in ideo}

        formatted = [
            (p[u'person'][u'id'],
             (p[u'person'][u'firstname'] + " " + p[u'person'][u'lastname']),
             p[u'person'][u'name'], p[u'role_type'], p[u'party'], p[u'state'],
             ideo[p[u'person'][u'id']], None) for p in data[u'objects']
        ]
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            if not dbmngr.insertMany(dbc,"legislators",\
                    ["id","name","desc","role","party","state","ideology","image"],formatted):
                raise Exception("Database Insertion Error")
            dbc.close()
            return {
                p[u'person'][u'id']: p[u'person'][u'twitterid']
                for p in data[u'objects']
            }
        else:
            pass

        return formatted

    except Exception:
        csplog.logexc(sys.exc_info())
        return None

    return None
示例#17
0
def main():
    '''Initializes empty databases for the project
       Also removes the old database if it exists, i.e., reset.
    '''
    conn = dbmngr.connectDB("./data/", "cspdb", True)
    if conn is not None:

        ##Entities
        #Create Legislator table
        d = {
            "id": ["int", "primary key", "not null"],
            "name": ["text", "not null"],
            "desc": ["text"],
            "role": ["text", "not null"],
            "party": ["text", "not null"],
            "state": ["text"],
            "ideology": ["real"],
            "image": ["text"]
        }
        dbmngr.createTable(conn, "legislators", d)

        #create committees table
        d = {
            "id": ["int", "primary key", "not null"],
            "name": ["text", "not null"],
            "desc": ["text"],
            "floor": ["text"]  #whether this is senate or house committees
        }
        dbmngr.createTable(conn, "committees", d)

        #create content table
        d = {
            "id": ["text", "primary key", "not null"],
            "time": ["text", "not null"],
            "type": ["text", "not null"],
            "contents": ["text", "not null"],
            "memebg": ["text"],
            "replyto": ["text"],
            "author": ["int", "not null"],
            "committees": ["int"]
        }
        f = {
            "replyto": "contents(id)",
            "author": "legislators(id)",
            "committees": "committees(id)"
        }
        dbmngr.createTable(conn, "contents", d, f)

        ##create relations
        #create participate
        d = {
            "id": ["text", "primary key", "not null"],
            "cid": ["int", "not null"],
            "lid": ["int", "not null"],
            "role": ["text"]
        }
        f = {"lid": "legislators(id)", "cid": "committees(id)"}
        dbmngr.createTable(conn, "participates", d, f)

        #create like
        d = {
            "id": ["text", "primary key", "not null"],
            "lid": ["int", "not null"],
            "cid": ["text", "not null"]
        }
        f = {
            "lid": "legislators(id)",
            "cid": "contents(id)",
        }
        dbmngr.createTable(conn, "likes", d, f)

        #create vote
        d = {
            "id": ["text", "primary key", "not null"],
            "lid": ["int", "not null"],
            "cid": ["text", "not null"],
            "votes": ["text", "not null"]
        }
        f = {
            "lid": "legislators(id)",
            "cid": "contents(id)",
        }
        dbmngr.createTable(conn, "votes", d, f)
        conn.close()
    return