Пример #1
0
def genTweetBlobs(twaccnts):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        print "query for id, ideology done"
        for i in range(5):
            acclist = [twaccnts[k[0]] for k in idlist if int(k[1]) == i]
            datadir = './data/twblobs/' + str(i) + '/'
            getAllTweets(acclist, datadir + 'input.txt')
            print "input file generation for ideology " + str(i) + " success"
            res = subprocess.call([
                "python", "./rnn/train.py", "--data_dir=" + datadir,
                "--save_dir=" + datadir + 'model/', "--rnn_size=" + str(32),
                "--num_epochs=" + str(1), "--seq_length=" + str(10),
                "--learning_rate=" + str(0.003), "--model=lstm"
            ])
            if res != 0:
                raise Exception(
                    "Training subprocess call Error at genTweetBlobs")
            print "model trained for ideology " + str(i)
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
Пример #2
0
def genTweets(num, iden, insert=True, prime=None):
    '''generate and insert tweets under iden's name, according to iden's ideology'''
    def dict_factory(cursor, row):
        d = {}
        d[row[0]] = row[1]
        return d

    try:
        #acquire ideology blob
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        #dbc.row_factory = dict_factory
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at genTweets")
        idict = idcur.fetchall()
        idict = {t[0]: t[1] for t in idict}
        modeldir = './data/twblobs/' + str(int(idict[iden[0]])) + '/model/'
        dbc.close()
        #generate tweets
        gentweets = []
        for i in range(num):
            numwords = random.choice(range(15, 30))
            params = [
                "python", "./rnn/sample.py", "--save_dir", modeldir, "-n",
                str(numwords), "--sample",
                str(1)
            ]
            if prime is not None:
                params += ["--prime", prime]
            gentweets += [subprocess.check_output(params).split("\n")[1]]
        print("tweets"
              if insert else "reply") + " generation from model complete"

        collist = ["id", "time", "type", "contents", "author"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t)),
            unicode(datetime.datetime.now()), u'post', t, iden[0]
        ] for t in gentweets]
        if insert:
            #insert into database
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            dbc.text_factory = str
            if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
                raise Exception("Database Insertion Error at genTweets")
            dbc.close()

        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
Пример #3
0
def getIdeology():
    '''Get all of the legislators' ideology score
    Args:
        None
    Returns:
        dict:       A dictionary with {id->ideology}, None if operation failed
    '''
    try:
        dbc = dbmngr.connectDB('./data/', 'cspdb', False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"],
                                  ["legislators"]).fetchall()
        dbc.close()
        iddict = {i[0]: i[1] for i in idcur}
        csplog.logevent("query", "queried all ideologies")
        return iddict
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
Пример #4
0
def populate(t, r, insert=True):
    try:
        idlist = None
        if r > 0:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            idlist = dbmngr.queryEntry(dbc, ["id"], ["legislators"]).fetchall()
            dbc.close()
        if t == 0: return True
        for l in idlist:
            tweets = genTweets(t, l, True)
            if r == 0: continue
            for tw in tweets:
                reper = random.choice(idlist)
                genReplies(r, reper, tw[0])
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
Пример #5
0
def updateLegisImg():
    def nonexist():
        '''handle the situation where person doesn't have an image on govtrack'''
        return open("./data/noimg.jpeg", "rb").read()

    def getImg(conn, iden):
        '''Gets the person 'iden's image from govtrack'''
        endpoint = "/data/photos/" + str(iden[0]) + "-200px.jpeg"
        try:
            conn.request("GET", endpoint)
            res = conn.getresponse()

            if res.status != 200:
                #means this person doesn't have an image on the govtrack database
                if res.status == 404:
                    print endpoint
                    return buffer(nonexist())
                else:
                    raise Exception(
                        "HTTP error:" + str(res.status) + " at updateLegisImg",
                        endpoint)
            return res.read()
        except Exception as e:
            print endpoint
            return buffer(nonexist())

    try:
        conn = httplib.HTTPSConnection(govhost)
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        updlist = [(sqlite3.Binary(getImg(conn, p)), p[0]) for p in idlist]
        if not dbmngr.updateMany(dbc, "legislators", ["image"], updlist):
            raise Exception("Update Error at updateLegisImg")
        conn.close()
        dbc.close()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False