stdout.write(boardname+' ') stdout.flush() boards.append(GET(boardname)) stdout.write('done!\n') stdout.flush() ### MAIN LOOP while True: print 'Starting loop!' print 'Updating boardnames...' boardnames = update_boards(boardnames) for board in boards: board.update() next_GET = repdigits.nextget(board.new_post['no'], 6) #if (board.next_GET != next_GET): #check if the GET is over #board.next_GET = next_GET #why did i put this in? =[ posts_to_go = board.next_GET - board.new_post['no'] posts_to_go = round(posts_to_go, 1-len(str(posts_to_go))) #round to the first digit min_until = (board.time_GET_occurs - datetime.utcnow()).total_seconds()/60.0 print "Time until /"+board.board+'/ '+GET_name(board.next_GET)+": ", str(timedelta(minutes=round(min_until))) if (min_until < MINUTES_GET_IS_SOON and min_until > MINUTES_GET_IS_UPON_US): if board.tweeted == False: print 'Tweeting!' msg = GET_TEXT.format(board=board.board, time_until=int(min_until), get_name=GET_name(board.next_GET), posts_to_go=int(posts_to_go)).capitalize() twitter.update_status(status=msg) #could add another tweepy error handler print msg else: print 'Already tweeted.' sleep(2) #to meet 4chan API timing standards
def update(self): # retries 5 times until success, and sleeps 15 minutes on error # get threads.json, store datetime.utcnow() into time_updated # compare last_updated and find oldest/newest, store # use C function next_GET to determine next_GET # use tweepy to search for tweets about GET # measure posting_rate # determine time GET occurs for i in range(0, 5): try: r = s.get( "http://a.4cdn.org/" + self.board + "/threads.json", headers={"If-Modified-Since": self.time_updated.strftime("%a, %d %b %Y %H:%M:%S GMT")}, ) if r.status_code == 304: # 304 is not modified for you HTTP plebs # skip next part or return self.time_updated = datetime.utcnow() return r.raise_for_status() jthrs = json.loads(r.content) newest_thr = jthrs[-1]["threads"][-1] # set newest to oldest oldest_thr = jthrs[0]["threads"][0] # and oldest to newest for page in jthrs: # then find the actual newest/oldest for thr in page["threads"]: try: if ( thr["last_modified"] > newest_thr["last_modified"] and datetime.utcfromtimestamp(thr["last_modified"]) < datetime.utcnow() ): newest_thr = thr except ValueError as e: # catch timestamp out of range error pass if thr["last_modified"] < oldest_thr["last_modified"]: oldest_thr = thr r = s.get("http://a.4cdn.org/" + self.board + "/thread/" + str(newest_thr["no"]) + ".json") r.raise_for_status() newest_thr = json.loads(r.content) self.new_post = newest_thr["posts"][-1] for i in range(0, 5): try: sleep(1.0) # one request per second r = s.get("http://a.4cdn.org/" + self.board + "/thread/" + str(oldest_thr["no"]) + ".json") r.raise_for_status() except HTTPError as e: print i if r.status_code == 404: if i == 4: raise oldest_thrs = sorted(jthrs[-1]["threads"], key=lambda x: x["last_modified"], reverse=False) oldest_thr = oldest_thrs[4 - i] # hope the rest haven't 404'd either continue else: raise break oldest_thr = json.loads(r.content) self.old_post = oldest_thr["posts"][-1] if self.board in NON_DUBS_BOARDS: self.next_GET = int( str(int(str(self.new_post["no"])[:-6]) + 1) + "0" * 6 ) # not creating a C function for clear GETs right now else: self.next_GET = repdigits.nextget(self.new_post["no"], 6) self.tweeted = False timeline = twitter.user_timeline(count=20) for status in timeline: if ( "/" + self.board + "/" in status.text or "[" + self.board + "]" in status.text ): # Do not tweet if a human has mentioned the board in the past hour if ( datetime.utcnow() - status.created_at ).total_seconds() / 3600.0 < HOURS_HUMAN_TWEET_IS_RELEVANT: self.tweeted = True break if ((self.new_post["time"] - self.old_post["time"]) / 60.0) > 0: self.posting_rate = (self.new_post["no"] - self.old_post["no"]) / ( (self.new_post["time"] - self.old_post["time"]) / 60.0 ) else: self.posting_rate = 0.0 if self.posting_rate != 0: self.time_GET_occurs = datetime.utcfromtimestamp(self.new_post["time"]) + timedelta( minutes=(self.next_GET - self.new_post["no"]) / self.posting_rate ) else: self.time_GET_occurs = datetime.utcfromtimestamp(self.new_post["time"]) + timedelta(days=10) self.time_updated = datetime.utcnow() except (RequestException, ConnectionError, HTTPError, Timeout) as e: if "404" in e.message: print "ERROR: Internet error: 404.", "Sleeping 2 minutes..." sleep(2 * 60) if i == 4: raise continue print "ERROR: Internet error: ", e.message, ".", "Sleeping 15 minutes..." sleep(15 * 60) if i == 4: raise continue except tweepy.error.TweepError as TError: if "Rate limit exceeded" in TError[0][0]["message"]: print "Twitter rate limit exceeded when updating. Sleeping 15 minutes..." else: print "Error: Tweepy: ", TError[0][0]["message"], "Sleeping 15 minutes..." sleep(15 * 60) if i == 4: raise continue except ValueError as e: if "json" in e.message.lower(): print "Error: Invalid JSON recieved from 4chan. Sleeping 15 minutes..." sleep(15 * 60) if i == 4: raise continue else: raise break