def genTweetBlobs(twaccnts): try: dbc = dbmngr.connectDB("./data/", "cspdb", False) idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"]) if idcur is None: raise Exception("Query Error at updateLegisImg") idlist = idcur.fetchall() print "query for id, ideology done" for i in range(5): acclist = [twaccnts[k[0]] for k in idlist if int(k[1]) == i] datadir = './data/twblobs/' + str(i) + '/' getAllTweets(acclist, datadir + 'input.txt') print "input file generation for ideology " + str(i) + " success" res = subprocess.call([ "python", "./rnn/train.py", "--data_dir=" + datadir, "--save_dir=" + datadir + 'model/', "--rnn_size=" + str(32), "--num_epochs=" + str(1), "--seq_length=" + str(10), "--learning_rate=" + str(0.003), "--model=lstm" ]) if res != 0: raise Exception( "Training subprocess call Error at genTweetBlobs") print "model trained for ideology " + str(i) return True except Exception: csplog.logexc(sys.exc_info()) return False return False
def genTweets(num, iden, insert=True, prime=None): '''generate and insert tweets under iden's name, according to iden's ideology''' def dict_factory(cursor, row): d = {} d[row[0]] = row[1] return d try: #acquire ideology blob dbc = dbmngr.connectDB("./data/", "cspdb", False) #dbc.row_factory = dict_factory idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"]) if idcur is None: raise Exception("Query Error at genTweets") idict = idcur.fetchall() idict = {t[0]: t[1] for t in idict} modeldir = './data/twblobs/' + str(int(idict[iden[0]])) + '/model/' dbc.close() #generate tweets gentweets = [] for i in range(num): numwords = random.choice(range(15, 30)) params = [ "python", "./rnn/sample.py", "--save_dir", modeldir, "-n", str(numwords), "--sample", str(1) ] if prime is not None: params += ["--prime", prime] gentweets += [subprocess.check_output(params).split("\n")[1]] print("tweets" if insert else "reply") + " generation from model complete" collist = ["id", "time", "type", "contents", "author"] contentlist = [[ unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t)), unicode(datetime.datetime.now()), u'post', t, iden[0] ] for t in gentweets] if insert: #insert into database dbc = dbmngr.connectDB("./data/", "cspdb", False) dbc.text_factory = str if not dbmngr.insertMany(dbc, "contents", collist, contentlist): raise Exception("Database Insertion Error at genTweets") dbc.close() return contentlist except Exception: csplog.logexc(sys.exc_info()) return None return None
def getIdeology(): '''Get all of the legislators' ideology score Args: None Returns: dict: A dictionary with {id->ideology}, None if operation failed ''' try: dbc = dbmngr.connectDB('./data/', 'cspdb', False) idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"]).fetchall() dbc.close() iddict = {i[0]: i[1] for i in idcur} csplog.logevent("query", "queried all ideologies") return iddict except Exception: csplog.logexc(sys.exc_info()) return None return None
def populate(t, r, insert=True): try: idlist = None if r > 0: dbc = dbmngr.connectDB("./data/", "cspdb", False) idlist = dbmngr.queryEntry(dbc, ["id"], ["legislators"]).fetchall() dbc.close() if t == 0: return True for l in idlist: tweets = genTweets(t, l, True) if r == 0: continue for tw in tweets: reper = random.choice(idlist) genReplies(r, reper, tw[0]) return True except Exception: csplog.logexc(sys.exc_info()) return False return False
def updateLegisImg(): def nonexist(): '''handle the situation where person doesn't have an image on govtrack''' return open("./data/noimg.jpeg", "rb").read() def getImg(conn, iden): '''Gets the person 'iden's image from govtrack''' endpoint = "/data/photos/" + str(iden[0]) + "-200px.jpeg" try: conn.request("GET", endpoint) res = conn.getresponse() if res.status != 200: #means this person doesn't have an image on the govtrack database if res.status == 404: print endpoint return buffer(nonexist()) else: raise Exception( "HTTP error:" + str(res.status) + " at updateLegisImg", endpoint) return res.read() except Exception as e: print endpoint return buffer(nonexist()) try: conn = httplib.HTTPSConnection(govhost) dbc = dbmngr.connectDB("./data/", "cspdb", False) idcur = dbmngr.queryEntry(dbc, ["id"], ["legislators"]) if idcur is None: raise Exception("Query Error at updateLegisImg") idlist = idcur.fetchall() updlist = [(sqlite3.Binary(getImg(conn, p)), p[0]) for p in idlist] if not dbmngr.updateMany(dbc, "legislators", ["image"], updlist): raise Exception("Update Error at updateLegisImg") conn.close() dbc.close() return True except Exception: csplog.logexc(sys.exc_info()) return False return False