class Twister: def __init__(self,url='http://*****:*****@127.0.0.1:28332',formatter=None): self.twister = AuthServiceProxy(url) self.formatter = formatter def _format_message(self,message): return self.formatter and self.formatter(message) or message def clear_cache(self): #doesn't always work :( Need to figure this out. db = self.get_user_info._db # It doesn't matter which method. It's the same handler db.setup(self.get_user_info) # It needs the function to locate know which db (module's name) db.shelve.clear() db.shelve.sync() def _format_reply(self,r): "gracefully fails if reply is empty" return r and {"user":self.get_user_info(r['n']),'k':r['k']} or {} def _format_post_info(self,p): result = { "height":p['userpost']['height'], "k":p['userpost']['k'], "time":timestamp2iso(p['userpost']['time']), } if p['userpost'].has_key('rt'): result.update({ "message":self._format_message(p['userpost']['rt']['msg']), "user":self.get_user_info(p['userpost']['rt']['n']), "k":p['userpost']['rt']['k'], "rt_user":self.get_user_info(p['userpost']['n']), "reply":self._format_reply(p['userpost']['rt'].get('reply',{})), }) else: result.update({ "message":self._format_message(p['userpost']['msg']), "user":self.get_user_info(p['userpost']['n']), "k":p['userpost']['k'], "reply":self._format_reply(p['userpost'].get('reply',{})), }) return result @functioncache(ignore_instance=True) # cache forever. not supposed to change def get_twist(self,username,k): p = self.twister.dhtget(username,'post{0}'.format(k),'s') if p: return self._format_post_info(p[0]['p']['v']) raise SkipCache("Twist not found @{0}/{1}".format(username,k),{ "user":self.get_user_info('nobody'), "k":0, # maybe something needs this "lastk":0, # or this "message":"Twist not found (maybe it's private?) 😭", "time":"Never" }) @functioncache(60,ignore_instance=True) def get_twist_replies(self,username,k): return reversed([self._format_post_info(r['p']['v']) for r in self.twister.dhtget(username,'replies{0}'.format(k),'m')]) # We show them oldest first @functioncache(60,ignore_instance=True) def get_twist_rts(self,username,k): return [self._format_post_info(r['p']['v']) for r in self.twister.dhtget(username,'rts{0}'.format(k),'m')] @functioncache(60*15,ignore_instance=True) def get_user_info(self,username): if username == 'nobody': return {"username":"","fullname":"Nobody"} # Username is empty. Easier for mustache. result = self.twister.dhtget(username,'profile','s') if not result: raise SkipCache("user not found: @{0}".format(username), {"username":username,"fullname":"???"}) user = result[0]['p']['v'] user['username'] = username # handy if not user.get('fullname'): # happens user['fullname'] = username.capitalize() # Buddha is in the details user['bio']=self._format_message(user.get('bio','')) try: user['avatar'] = self.twister.dhtget(username,'avatar','s')[0]['p']['v'] if user['avatar']=='img/genericPerson.png': # ugly patch user['avatar'] = '/assets/img/genericPerson.png' except: user['avatar'] = None raise SkipCache("couldn't get avatar for @{0}, not caching".format(username),user) return user @functioncache(60*5,ignore_instance=True) def local_user_menu(self,active_user=None): users = [{'username':'','fullname':'Nobody','active':active_user==''}] if active_user=='': active = users[0] else: active = None for u in self.twister.listwalletusers(): user = self.get_user_info(u) if active_user==u: user.update({'active':True}) active = user users.append(user) return {"users":users,"active":active} @functioncache(60*5,ignore_instance=True) def get_following(self,localusername): return [{"username":u} for u in self.twister.getfollowing(localusername)] @functioncache(60*5,ignore_instance=True) def get_sponsored_posts(self,num=8): return reversed([self._format_post_info(p) for p in self.twister.getspamposts(num)]) # Don't ask me why reversed :) @functioncache(60,ignore_instance=True) def get_tag_posts(self,tag): return [self._format_post_info(p['p']['v']) for p in self.twister.dhtget(tag,'hashtag','m')] @functioncache(60,ignore_instance=True) def get_user_feed(self,localusername,num=8): return [self._format_post_info(p) for p in self.twister.getposts(num,self.get_following(localusername))] @functioncache(60,ignore_instance=True) def get_user_mentions(self,localusername): return [self._format_post_info(p['p']['v']) for p in self.twister.dhtget(localusername,'mention','m')] @functioncache(60,ignore_instance=True) def get_user_posts(self,username,num=8): result = [self._format_post_info(p) for p in self.twister.getposts(num,[{'username':username}])] if result: return result else: # We're not following. Let's "knit" the best timeline we can result = self.twister.dhtget(username,'status','s') while True: try: lastk = result[-1]['p']['v']['userpost']['lastk'] last = lastk and self.twister.dhtget(username,'post{0}'.format(result[-1]['p']['v']['userpost']['lastk']),'s') except: break if not last: break result.append(last[0]) return [self._format_post_info(s['p']['v']) for s in result] @functioncache(60,ignore_instance=True) def get_trending_tags(self,num=8): return self.twister.gettrendinghashtags(num)
print("Checking following list...") print("Expecting at least: " + ', '.join(following)) while True: twister_following = twister.getfollowing(username) print("Got: " + ', '.join(twister_following)) if not set(following) <= set(twister_following): print("Updating following list...") twister.follow(username, following) time.sleep(5) else: break print("Checking subscriptions...") while True: data = twister.dhtget(username, "profile", "s") try: twister_subscriptions = data[0]["p"]["v"]["mqtt_topics"] except: twister_subscriptions = [] print("Got: " + ', '.join(twister_subscriptions)) if not set(subscriptions) <= set(twister_subscriptions): print("Updating subscriptions...") try: seq = data[0]["p"]["seq"] + 1 except: seq = 1 rc = twister.dhtput(username, "profile", "s", {"mqtt_topics": subscriptions}, username, seq) time.sleep(5) else:
print("Checking following list...") print("Expecting at least: " + ', '.join(following)) while True: twister_following = twister.getfollowing(username) print("Got: " + ', '.join(twister_following)) if not set(following) <= set(twister_following): print("Updating following list...") twister.follow(username, following) time.sleep(5) else: break print("Checking subscriptions...") while True: data = twister.dhtget(username, "profile", "s") try: twister_subscriptions = data[0]["p"]["v"]["mqtt_topics"] except: twister_subscriptions = [] print("Got: " + ', '.join(twister_subscriptions)) if not set(subscriptions) <= set(twister_subscriptions): print("Updating subscriptions...") try: seq = data[0]["p"]["seq"] + 1 except: seq = 1 rc = twister.dhtput(username, "profile", "s", {"mqtt_topics":subscriptions}, username, seq) time.sleep(5) else: break
print str(block["height"]) + "\r", usernames = block["usernames"] for u in usernames: if not db.usernames.has_key(u): db.usernames[u] = User() if block.has_key("nextblockhash"): nextHash = block["nextblockhash"] else: break now = time.time() for u in db.usernames.keys(): if db.usernames[u].updateTime + cacheTimeout < now: print "getting avatar for", u, "..." d = twister.dhtget(u, "avatar", "s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].avatar = d[0]["p"]["v"] print "getting profile for", u, "..." d = twister.dhtget(u, "profile", "s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].fullname = d[0]["p"]["v"]["fullname"] db.usernames[u].location = d[0]["p"]["v"]["location"] db.usernames[u].updateTime = now cPickle.dump(db, open(dbFileName, "w")) from HTML import HTML from cgi import escape
print str(block["height"]) + "\r", usernames = block["usernames"] for u in usernames: if not db.usernames.has_key(u): db.usernames[u] = User() if block.has_key("nextblockhash"): nextHash = block["nextblockhash"] else: break now = time.time() for u in db.usernames.keys(): if db.usernames[u].updateTime + cacheTimeout < now: print "getting avatar for", u, "..." d = twister.dhtget(u,"avatar","s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].avatar = d[0]["p"]["v"] print "getting profile for", u, "..." d = twister.dhtget(u,"profile","s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].fullname = d[0]["p"]["v"]["fullname"] db.usernames[u].location = d[0]["p"]["v"]["location"] db.usernames[u].updateTime = now cPickle.dump(db,open(dbFileName,"w")) from HTML import HTML
class Twister: def __init__(self,url='http://*****:*****@127.0.0.1:28332',formatter=None): self.twister = AuthServiceProxy(url) self.formatter = formatter def _format_message(self,message): return self.formatter and self.formatter(message) or message def clear_cache(self): #doesn't always work :( Need to figure this out. db = self.get_user_info._db # It doesn't matter which method. It's the same handler db.setup(self.get_user_info) # It needs the function to locate know which db (module's name) db.shelve.clear() db.shelve.sync() def _format_reply(self,r): "gracefully fails if reply is empty" return r and {"user":self.get_user_info(r['n']),'username':r['n'],'k':r['k']} or {} def _format_post_info(self,p): result = { "height":p['userpost']['height'], "k":p['userpost']['k'], "time":timestamp2iso(p['userpost']['time']), } if p['userpost'].has_key('rt'): result.update({ "message":self._format_message(p['userpost']['rt']['msg']), "username":p['userpost']['rt']['n'], #"user":self.get_user_info(p['userpost']['rt']['n']), ### too heavy. we do it in an iframe "k":p['userpost']['rt']['k'], "rt_username":p['userpost']['n'], #"rt_user":self.get_user_info(p['userpost']['n']), ### too heavy. we do it in an iframe "reply":self._format_reply(p['userpost']['rt'].get('reply',{})), }) else: result.update({ "message":self._format_message(p['userpost']['msg']), "username":p['userpost']['n'], #"user":self.get_user_info(p['userpost']['n']), ### too heavy. we do it in an iframe "k":p['userpost']['k'], "reply":self._format_reply(p['userpost'].get('reply',{})), }) return result @functioncache(ignore_instance=True) # Cache forever. One day we'll look at our old avatars and remember how stupid we used to be. def get_twist(self,username,k): p = self.twister.dhtget(username,'post{0}'.format(k),'s') if p: return self._format_post_info(p[0]['p']['v']) raise SkipCache("Twist not found @{0}/{1}".format(username,k),{ "username":"", #"user":self.get_user_info('nobody'), "k":0, # maybe something needs this "lastk":0, # or this "message":"Twist not found 😭", "time":"Never" }) @functioncache(60,ignore_instance=True) def get_twist_replies(self,username,k): return list(reversed([self._format_post_info(r['p']['v']) for r in self.twister.dhtget(username,'replies{0}'.format(k),'m')])) # We show them oldest first @functioncache(60,ignore_instance=True) def get_twist_rts(self,username,k): return [self._format_post_info(r['p']['v']) for r in self.twister.dhtget(username,'rts{0}'.format(k),'m')] @functioncache(60*15,ignore_instance=True) def get_user_info(self,username): if username == 'nobody': return {"username":"","fullname":"Nobody"} # Username is empty. Easier for mustache. result = self.twister.dhtget(username,'profile','s') if not result: #raise SkipCache("user not found: @{0}".format(username), {"username":username,"fullname":username.capitalize()}) return {"username":username,"fullname":username.capitalize()} user = result[0]['p']['v'] user['username'] = username # handy if not user.get('fullname'): # happens user['fullname'] = username.capitalize() # Buddha is in the details user['bio'] = self._format_message(user.get('bio','')) # Tweak for "protcol-less" urls u = user.get('url') if u and not RE_URL_TWEAK.match(u): user['url'] = 'http://{0}'.format(u) try: user['avatar'] = self.twister.dhtget(username,'avatar','s')[0]['p']['v'] if user['avatar']=='img/genericPerson.png': # ugly patch user['avatar'] = None except: user['avatar'] = None #raise SkipCache("couldn't get avatar for @{0}, not caching".format(username),user) return user @functioncache(60*5,ignore_instance=True) def local_user_menu(self,active_user=None): users = [{'username':'','fullname':'Nobody','active':active_user==''}] if active_user=='': active = users[0] else: active = None for u in self.twister.listwalletusers(): user = self.get_user_info(u) if active_user==u: user.update({'active':True}) active = user users.append(user) return {"users":users,"active":active} @functioncache(60*5,ignore_instance=True) def get_following(self,localusername): return [{"username":u} for u in self.twister.getfollowing(localusername)] @functioncache(60*5,ignore_instance=True) def get_promoted_posts(self,num=8): return list(reversed([self._format_post_info(p) for p in self.twister.getspamposts(num)])) # Don't ask me why reversed :) @functioncache(60,ignore_instance=True) def get_tag_posts(self,tag): return [self._format_post_info(p['p']['v']) for p in self.twister.dhtget(tag,'hashtag','m')] @functioncache(60,ignore_instance=True) def get_user_feed(self,localusername,num=8): return [self._format_post_info(p) for p in self.twister.getposts(num,self.get_following(localusername))] @functioncache(60,ignore_instance=True) def get_user_mentions(self,localusername): return [self._format_post_info(p['p']['v']) for p in self.twister.dhtget(localusername,'mention','m')] @functioncache(60,ignore_instance=True) def get_user_messages(self,localusername,username=None,num=2): if username: raw = self.twister.getdirectmsgs(localusername,num,[{"username":username}]) else: raw = self.twister.getdirectmsgs(localusername,num,self.get_following(localusername)) result =[] localuser = self.get_user_info(localusername) for username in raw: user = self.get_user_info(username) messages = [] latest_ts = 0 for message in raw[username]: if message['time'] > latest_ts: latest_ts = message['time'] message['time'] = timestamp2iso(message['time']) message['username'] = message['fromMe'] and localusername or username message['user'] = message['fromMe'] and localuser or user message['message'] = self._format_message(message['text']) messages.insert(0,message) # reverse order (newer first) result.append({'username':username,'user':user,'messages':messages,'latest_ts':latest_ts}) return sorted(result,key=lambda thread:thread['latest_ts'],reverse=True) @functioncache(60,ignore_instance=True) def get_user_posts(self,username,num=8): result = [self._format_post_info(p) for p in self.twister.getposts(num,[{'username':username}])] if result: return result else: # We're not following. Let's "knit" the best timeline we can result = self.twister.dhtget(username,'status','s') while True: try: lastk = result[-1]['p']['v']['userpost']['lastk'] last = lastk and self.twister.dhtget(username,'post{0}'.format(result[-1]['p']['v']['userpost']['lastk']),'s') except: break if not last: break result.append(last[0]) return [self._format_post_info(s['p']['v']) for s in result] def get_users_by_partial_name(self,prefix,num=8): return self.twister.listusernamespartial(prefix,num) @functioncache(60,ignore_instance=True) def get_trending_tags(self,num=8): return self.twister.gettrendinghashtags(num) def get_info(self): # not cached return self.twister.getinfo()
class TwisterScraper: CACHE_MAX_DURATION = datetime.timedelta( 7) # ([days [, seconds [,microseconds]]]) def __init__(self, dbPath, server='localhost', port=28332, user='******', password='******', protocol='http'): self.serverUrl = '{protocol}://{user}:{passwd}@{server}:{port}'.format( protocol=protocol, server=server, port=port, user=user, passwd=password) self.twister = AuthServiceProxy(self.serverUrl) self.dbFile = dbPath self.locService = GeoLocationService() try: with open(self.dbFile, 'rb') as dbFile: self.db = pickle.load(dbFile) except FileNotFoundError: self.db = TwisterDb() self.saveDb() def get_user(self, username): if username in self.db.users: return self.db.users[username] else: return None def scrape_users(self): nextHash = 0 nextHash = self.twister.getblockhash(0) usernames = set() index = 0 while True: block = self.twister.getblock(nextHash) self.db.lastBlockHash = block['hash'] usernames = usernames.union(set(block['usernames'])) if len(usernames) > index: index = len(usernames) print('Found {0} usernames'.format(index)) if "nextblockhash" in block: nextHash = block["nextblockhash"] else: break if len(self.db.users) == 0: # first run for u in usernames: blankUser = User() blankUser.username = u blankUser.updateTime = datetime.datetime.now( ) - self.CACHE_MAX_DURATION self.saveDb() now = datetime.datetime.now() old_users = self.db.users.keys() need_refresh = [ u for u in old_users if (self.db.users[u].updateTime + self.CACHE_MAX_DURATION) < now ] new_users = usernames.difference(set(old_users)) to_fetch = new_users.union(set(need_refresh)) total_to_fetch = len(to_fetch) for n, u in enumerate(to_fetch): try: user = self._fetch_user_details(u) if hasattr(u, 'location'): try: u.locate() except MaxGeoRequestsException: print( "Could not locate '' because of max request limit reached" ) self.db.users[user.username] = user if n % 5 == 0: self.saveDb() print("({line} of {total}) Fetched {user} ...".format( user=u, line=n, total=total_to_fetch)) except HTTPException as e: print("Connection error retrieving user {0}: {1}".format( u, str(e))) def saveDb(self): print("Saving db") try: with open(self.dbFile, 'wb') as dbFile: pickle.dump(self.db, dbFile) except (KeyboardInterrupt, Exception): print("Closing db before quitting...") if dbFile: # close the hung descriptor and re-try the dumping try: dbFile.close() except Exception: pass with open(self.dbFile, 'wb') as dbFile: pickle.dump(self.db, dbFile) # once clean, re-raise raise def get_posts_since(self, username, dateObj, maxNum=1000): since_epoch = time.mktime(dateObj.timetuple()) all_posts = self.twister.getposts(1000, [{'username': username}]) all_posts = sorted(all_posts, key=lambda x: x['userpost']['time']) index = int(len(all_posts) / 2) def _post_time(i): return all_posts[i]['userpost']['time'] while 0 > index > len(all_posts): if _post_time(index - 1) < since_epoch < _post_time(index + 1): if _post_time(index) < since_epoch: index += 1 break elif _post_time(index) > since_epoch: index = int(index / 2) elif _post_time(index) < since_epoch: index = int(index + index / 2) return all_posts[index:] def _fetch_user_details(self, username): user = User() user.username = username avatarData = self.twister.dhtget(username, "avatar", "s") if len(avatarData) == 1: if 'p' in avatarData[0]: if 'v' in avatarData[0]['p']: user.avatar = avatarData[0]['p']['v'] profileData = self.twister.dhtget(username, 'profile', 's') if len(profileData) == 1: if 'p' in profileData[0]: if 'v' in profileData[0]['p']: profile = profileData[0]['p']['v'] for key in ['location', 'url', 'bio', 'fullname']: if key in profile: setattr(user, key, profile[key]) user.following = self.twister.getfollowing(username) user.updateTime = datetime.datetime.now() return user
print str(block["height"]) + "\r", usernames = block["usernames"] for u in usernames: if not db.usernames.has_key(u): db.usernames[u] = User() if block.has_key("nextblockhash"): nextHash = block["nextblockhash"] else: break now = time.time() for u in db.usernames.keys(): if db.usernames[u].updateTime + cacheTimeout < now: print "getting avatar for", u, "..." d = commentchain.dhtget(u,"avatar","s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].avatar = d[0]["p"]["v"] print "getting profile for", u, "..." d = commentchain.dhtget(u,"profile","s") if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): db.usernames[u].fullname = d[0]["p"]["v"]["fullname"] db.usernames[u].location = d[0]["p"]["v"]["location"] db.usernames[u].updateTime = now cPickle.dump(db,open(dbFileName,"w")) from HTML import HTML
class TwisterScraper: CACHE_MAX_DURATION = datetime.timedelta(7) # ([days [, seconds [,microseconds]]]) def __init__(self, dbPath, server='localhost', port=28332, user='******', password='******', protocol='http'): self.serverUrl = '{protocol}://{user}:{passwd}@{server}:{port}'.format(protocol=protocol, server=server, port=port, user=user, passwd=password) self.twister = AuthServiceProxy(self.serverUrl) self.dbFile = dbPath self.locService = GeoLocationService() try: with open(self.dbFile, 'rb') as dbFile: self.db = pickle.load(dbFile) except FileNotFoundError: self.db = TwisterDb() self.saveDb() def get_user(self, username): if username in self.db.users: return self.db.users[username] else: return None def scrape_users(self): nextHash = 0 nextHash = self.twister.getblockhash(0) usernames = set() index = 0 while True: block = self.twister.getblock(nextHash) self.db.lastBlockHash = block['hash'] usernames = usernames.union(set(block['usernames'])) if len(usernames) > index: index = len(usernames) print('Found {0} usernames'.format(index)) if "nextblockhash" in block: nextHash = block["nextblockhash"] else: break if len(self.db.users) == 0: # first run for u in usernames: blankUser = User() blankUser.username = u blankUser.updateTime = datetime.datetime.now() - self.CACHE_MAX_DURATION self.saveDb() now = datetime.datetime.now() old_users = self.db.users.keys() need_refresh = [u for u in old_users if (self.db.users[u].updateTime + self.CACHE_MAX_DURATION) < now] new_users = usernames.difference(set(old_users)) to_fetch = new_users.union(set(need_refresh)) total_to_fetch = len(to_fetch) for n, u in enumerate(to_fetch): try: user = self._fetch_user_details(u) if hasattr(u, 'location'): try: u.locate() except MaxGeoRequestsException: print("Could not locate '' because of max request limit reached") self.db.users[user.username] = user if n % 5 == 0: self.saveDb() print("({line} of {total}) Fetched {user} ...".format(user=u, line=n, total=total_to_fetch)) except HTTPException as e: print("Connection error retrieving user {0}: {1}".format(u, str(e))) def saveDb(self): print("Saving db") try: with open(self.dbFile, 'wb') as dbFile: pickle.dump(self.db, dbFile) except (KeyboardInterrupt, Exception): print("Closing db before quitting...") if dbFile: # close the hung descriptor and re-try the dumping try: dbFile.close() except Exception: pass with open(self.dbFile, 'wb') as dbFile: pickle.dump(self.db, dbFile) # once clean, re-raise raise def get_posts_since(self, username, dateObj, maxNum=1000): since_epoch = time.mktime(dateObj.timetuple()) all_posts = self.twister.getposts(1000, [{'username': username}]) all_posts = sorted(all_posts, key=lambda x: x['userpost']['time']) index = int(len(all_posts) / 2) def _post_time(i): return all_posts[i]['userpost']['time'] while 0 > index > len(all_posts): if _post_time(index - 1) < since_epoch < _post_time(index + 1): if _post_time(index) < since_epoch: index += 1 break elif _post_time(index) > since_epoch: index = int(index / 2) elif _post_time(index) < since_epoch: index = int(index + index / 2) return all_posts[index:] def _fetch_user_details(self, username): user = User() user.username = username avatarData = self.twister.dhtget(username, "avatar", "s") if len(avatarData) == 1: if 'p' in avatarData[0]: if 'v' in avatarData[0]['p']: user.avatar = avatarData[0]['p']['v'] profileData = self.twister.dhtget(username, 'profile', 's') if len(profileData) == 1: if 'p' in profileData[0]: if 'v' in profileData[0]['p']: profile = profileData[0]['p']['v'] for key in ['location', 'url', 'bio', 'fullname']: if key in profile: setattr(user, key, profile[key]) user.following = self.twister.getfollowing(username) user.updateTime = datetime.datetime.now() return user