def dbconversation(tablename): #creates edges from OP to mentioned graph = nx.DiGraph() #Plot following/follower network #restrict to those with 3 tweets or more mygetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() mintime = 1358090418 cur.execute("SELECT DISTINCT ScreenName FROM " + tablename + " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0") l = cur.fetchall() users = [] for item in l: users.append(item[0].lower()) cur.execute("SELECT Tweet, ScreenName FROM " + tablename + " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0") d = cur.fetchall() lz = len(d) z = 1 for item in d: #print "Tweet " +str(z)+"/"+str(lz) z += 1 if ("@" in item[0].lower()) and ("rt:" not in item[0].lower()): #Continue until character not in valid set, then check if is user in set #First count number of @s names = [] c = item[0].lower().count("@") start = 0 for i in range(c): s = item[0].lower().index("@", start) start = s + 1 k = 0 try: j = item[0][start] except: print item[0] while j in valid_characters: k += 1 try: j = item[0][start + k] except: j = "/" names.append(item[0][start:start + k].lower()) for name in names: if name.lower() in users and name.lower() != item[1].lower(): try: graph[item[1].lower()][name.lower()]['weight'] += 1 except: graph.add_edge(item[1].lower(), name.lower(), weight=1) #graph.add_edge(item[1].lower(), name.lower()) print "Built graph" nx.write_gml(graph, "newconv" + tablename + "nortdir.gml") print "Wrote graph"
from classtweetgetter import DBTweetGetter #from classtweetreader import DBTweetReader import datetime tags = [ "IPCC", "UNFCCC", "AR5", "WGII", "WGIII", "LTFchat", "Pages2k", "Pages", "HadCRUT", "GISS" ] for name in tags: myTweets = DBTweetGetter("IPCCdb.db", name) myTweets.query2("#" + name, 1600000, False) #Write log file # with open("log"+name+".txt", "a") as myfile: # now = datetime.datetime.now() # timestr=now.strftime("%d_%m_%H%M") # myfile.write(timestr+"\n") # readTweet=DBTweetReader("IPCCdb.db", tablename) # print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) # myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n") # udict=readTweet.getUserDict(tablename) # print "Total number of users: " + str(len(udict.keys())) # myfile.write("Total number of users: " + str(len(udict.keys()))+"\n") # for item in udict.items(): # if item[1]<3: # del udict[item[0]] # print "Total number of users with 3 tweets or more: " + str(len(udict.keys())) # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
import sqlite3 as lite import sys from classtweetgetter import DBTweetGetter mytweetgetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() ucon = lite.connect("userdb.db") ucur = ucon.cursor() tables = [ 'htclimatechange', 'htclimate', 'htglobalwarming', 'ClimateChange', 'GlobalWarming' ] names = [] for item in tables: cur.execute("SELECT DISTINCT ScreenName FROM " + item) a = cur.fetchall() for item2 in a: names.append(item2[0]) already = [] ucur.execute("SELECT ScreenName FROM usermap") b = ucur.fetchall() for item in b: already.append(item[0]) deleted = [] i = 0
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime tablename = "htclimate" queryname = '#climate' fname = "#climate" myTweets = DBTweetGetter("tweetsdb.db", tablename) myTweets.query(queryname, 160000, False) #Write log file with open("dblog" + fname + ".txt", "a") as myfile: now = datetime.datetime.now() timestr = now.strftime("%d_%m_%H%M") myfile.write(timestr + "\n") readTweet = DBTweetReader("tweetsdb.db", tablename) print "Total number of tweets: " + str( readTweet.getNumberOfTweets(tablename)) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) + "\n") udict = readTweet.getUserDict(tablename) print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys())) + "\n") for item in udict.items(): if item[1] < 3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str( len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys())) + "\n")
import sqlite3 as lite import sys import numpy as np from classtweetgetter import DBTweetGetter from time import sleep mygetter=DBTweetGetter(None,None) con=lite.connect("tweetsdb.db") cur=con.cursor() ucon=lite.connect("userdb.db") ucur=ucon.cursor() crawlers=[] chainlengths=[] nsame=[] noded={} chainlfile=open("chaindata.txt","w") nsamefile=open("nsamedata.txt","w") class chainCrawler(object): #note this method will repeat chains if there is V structure, should be minimal effect def __init__(self, node, n): self.n=n self.node=node self.stopwalk=False def walk(self): while self.stopwalk==False: self.step() return 0 def step(self):
def dbplotffnetwork(): graph = nx.DiGraph() #Plot following/follower network #restrict to those with 3 tweets or more mintime = 1358090418 maxtime = 1363963163 mygetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() ucon = lite.connect("userdb.db") ucur = ucon.cursor() tusers = [] users = [] cur.execute( "SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > " + str(mintime) + " AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") temp = cur.fetchall() for item in temp: tusers.append(item[0].lower()) for item in tusers: if not (item in users): if tusers.count(item) > 7: users.append(item) print len(users) # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>29: # users.append(item) # print len(users) # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>2: # users.append(item) # print len(users) #aim for 380 #sys.exit("Hammertime") i = 0 try: users.remove("undercoverzen") users.remove("jivelad") users.remove("anabananazavala") #TODO Formalise this except: pass for user in users: print "User " + str(i) + "/" + str(len(users)) i += 1 #For each user check which other users are in friends, followers ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='" + user.lower() + "' COLLATE NOCASE") frl = [] temp = ucur.fetchall() skip = False if len(temp) == 0: #get friends print "Downloading friends for " + user.lower() friendslist = mygetter.getFriends(user.lower(), [], -1) if friendslist != "FAIL": for friend in friendslist: ucur.execute("INSERT INTO friends VALUES('" + user.lower() + "'," + str(friend) + ")") frl = friendslist else: skip = True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: frl.append(item[0]) ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='" + user + "' COLLATE NOCASE") fol = [] temp = ucur.fetchall() skip = False if len(temp) == 0: #get friends print "Downloading followers for " + user.lower() followerslist = mygetter.getFollowers(user.lower(), [], -1) if followerslist != "FAIL": for follower in followerslist: ucur.execute("INSERT INTO followers VALUES('" + user.lower() + "'," + str(follower) + ")") fol = followerslist else: skip = True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: fol.append(item[0]) ucon.commit() if skip == False: graph.add_node(user.lower()) for other in users: skip2 = False ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='" + other.lower() + "' COLLATE NOCASE") temp = ucur.fetchall() if len(temp) == 0: #get ID from web print "Downloading userid for " + other.lower() x = mygetter.getIDfromUser(other.lower()) if x != "FAIL": ucur.execute("INSERT INTO usermap VALUES('" + other.lower() + "'," + x + ")") sid = x ucon.commit() else: try: users.remove(other.lower()) except: pass skip2 = True sleep(10) else: sid = temp[0][0] if skip2 == False: if sid in fol: graph.add_edge(other.lower(), user.lower()) if sid in frl: graph.add_edge(user.lower(), other.lower()) print "Built graph" nx.write_gml(graph, "newfriendfollowerhtccgt29.gml") ucon.commit() con.close() ucon.close() print "Wrote graph"
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime myTweets = DBTweetGetter("tweetsdb.db", "htclimatechange") myTweets.query('#climatechange', 160000, False) #Write log file with open("dblog#climatechange.txt", "a") as myfile: now = datetime.datetime.now() timestr = now.strftime("%d_%m_%H%M") myfile.write(timestr + "\n") readTweet = DBTweetReader("tweetsdb.db", "htclimatechange") print "Total number of tweets: " + str( readTweet.getNumberOfTweets("htclimatechange")) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange")) + "\n") udict = readTweet.getUserDict("htclimatechange") print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys())) + "\n") for item in udict.items(): if item[1] < 3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str( len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys())) + "\n") #modify this script # with open("gettweetsClimateChange.py", "r") as myfile: # mytext=myfile.read()