def get_top_subreddits(min_subscribers=500000): """Scraper for redditlist.com. Retrieves top subreddits with at least `min_subscribers`. :param min_subscribers: filter for subreddits (int) :return: list of subreddits with at least `min_subscribers` (List[Subreddit]) """ pagination_template = "/?page={}" url = "http://www.redditlist.com{page}" subs = [] page = 1 while True: source_code = requests.get( url.format(page=pagination_template.format(page))) plain_text = source_code.text soup = BeautifulSoup(plain_text, "html.parser") listings = soup.findAll("div", attrs={"class": "span4 listing" }) # subscriber list is the middle one items = listings[1].findAll("div", {"class": "listing-item"}) for sub in items: attrs = sub.attrs num_subscribers = int( sub.find("span", { "class": "listing-stat" }).get_text().replace(",", "")) if num_subscribers < min_subscribers: return sorted(subs, key=attrgetter("rank")) subs.append( subreddit( name=attrs["data-target-subreddit"], rank=int( sub.find("span", { "class": "rank-value" }).get_text()), url=sub.find("span", { "class": "subreddit-url" }).find("a").get("href"), subscribers=num_subscribers, type=attrs["data-target-filter"], )) page += 1
def random_submission(): log.info("making random submission") # Get a random submission from a random subreddit END_DATE_PY = datetime.datetime.now() - datetime.timedelta( days=NUMBER_DAYS_FOR_POST_TO_BE_OLD) ED = int((END_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds()) START_DATE_PY = END_DATE_PY - datetime.timedelta(days=1) SD = int((START_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds()) log.info(START_DATE_PY) log.info(END_DATE_PY) log.info(SD) log.info(ED) DATE_DIFF = "" log.info("choosing subreddits") if SUBREDDIT_LIST: log.info('using SUBREDDIT_LIST: {}'.format(SUBREDDIT_LIST)) subreddits = [] for subname in SUBREDDIT_LIST: subreddits.append( subreddit(name=subname, rank=1, url="https://example.com", subscribers=1000, type="what")) else: log.info("using get_top_subreddits") subreddits = get_top_subreddits() #log.info(subreddits) total_posts = [] for sub in subreddits[:TOP_SUBREDDIT_NUM]: big_upvote_posts = [] log.info("\n{}\n{}".format("#" * 20, sub)) tops = get_submissions(SD, ED, sub.name) big_upvote_posts = list( filter(lambda item: item["score"] >= MIN_SCORE, tops)) total_posts += big_upvote_posts log.info("found {} posts with score >= {}".format( len(big_upvote_posts), MIN_SCORE)) del big_upvote_posts post_to_repost = random.choice(total_posts) # print(post_to_repost) # print("doing submission") rand_sub = api.submission(id=post_to_repost["id"]) own_user = api.redditor(REDDIT_USERNAME) for submission in own_user.submissions.new(limit=20): if submission.title == rand_sub.title: log.error( "I had posted the post I was just about to make in the last 20 posts, I'm not posting it again." ) return log.info(rand_sub.title) #log.info(str(rand_sub)) # Check if there's any items in the submissions list. If not display error if rand_sub: try: # Check if the we're reposting a selfpost or a link post. # Set the required params accodingly, and reuse the content # from the old post log.info("submission title: " + rand_sub.title) log.info("posting to: {}".format(rand_sub.subreddit.name)) if rand_sub.is_self: params = { "title": rand_sub.title, "selftext": rand_sub.selftext } else: params = {"title": rand_sub.title, "url": rand_sub.url} # Submit the same content to the same subreddit. Prepare your salt picks api.subreddit(rand_sub.subreddit.display_name).submit(**params) except praw.exceptions.APIException as e: raise e except Exception as e: log.info(e) else: log.error("something broke")
def random_submission(): log.info("making random submission") # Get a random submission from a random subreddit END_DATE_PY = datetime.datetime.now() - datetime.timedelta( days=NUMBER_DAYS_FOR_POST_TO_BE_OLD) ED = int((END_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds()) START_DATE_PY = END_DATE_PY - datetime.timedelta(days=1) SD = int((START_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds()) log.info(START_DATE_PY) log.info(END_DATE_PY) log.info(SD) log.info(ED) DATE_DIFF = "" db_perm = TinyDB(DB_PERM).table('table') with transaction(db_perm) as tr: tr.insert({'session': str(time.time())}) log.info("choosing subreddits") if SUBREDDIT_LIST: log.info('using SUBREDDIT_LIST: {}'.format(SUBREDDIT_LIST)) subreddits = [] for subname in SUBREDDIT_LIST: subreddits.append( subreddit(name=subname, rank=1, url="https://example.com", subscribers=1000, type="what")) else: log.info("using get_top_subreddits") subreddits = get_top_subreddits() #filter disallowded subreddits = [ y for y in subreddits if y.name.lower().strip() not in DISALLOWED_SUBS ] #log.info(subreddits) total_posts = [] for sub in subreddits[:TOP_SUBREDDIT_NUM]: big_upvote_posts = [] log.info("\n{}\n{}".format("#" * 20, sub)) tops = get_submissions(SD, ED, sub.name) big_upvote_posts = list( filter(lambda item: item["score"] >= MIN_SCORE, tops)) total_posts += big_upvote_posts log.info("found {} posts with score >= {}".format( len(big_upvote_posts), MIN_SCORE)) del big_upvote_posts #cr1 checking of post already submitted before or my any thread and taking another post. MAX_TRIES = 10 else break print('[SAN] Sumbitting now') i = 1 while i < 10: post_to_repost = random.choice(total_posts) print('[SAN] Checking submission no. ' + str(i) + ' post_id=' + str(post_to_repost["id"])) postSubmitted = db_perm.search(Query().id == post_to_repost["id"]) if len(postSubmitted) > 0: print('[SAN] [ALREADY SUBMITTED] ' + postSubmitted[0]['id']) else: break i += 1 if i == 9: print('[SAN] All submissions are already submitted') return print('[SAN] Post available') # print(post_to_repost) # print("doing submission") rand_sub = api.submission(id=post_to_repost["id"]) own_user = api.redditor(REDDIT_USERNAME) for submission in own_user.submissions.new(limit=20): if submission.title == rand_sub.title: log.error( "I had posted the post I was just about to make in the last 20 posts, I'm not posting it again." ) return log.info(rand_sub.title) #log.info(str(rand_sub)) # Check if there's any items in the submissions list. If not display error if rand_sub: try: # Check if the we're reposting a selfpost or a link post. # Set the required params accodingly, and reuse the content # from the old post if DO_WE_SPIN_TITLES: rand_sub.title = rewrite_text(SPINNER_API, rand_sub.title) else: rand_sub.title = rand_sub.title log.info("submission title: " + rand_sub.title) log.info("posting to: {}".format(rand_sub.subreddit.name)) if rand_sub.is_self: params = { "title": rand_sub.title, "selftext": rand_sub.selftext } else: if DO_WE_REUPLOAD_TO_IMGUR: ## Returns the same result currently. Imgur Module needs more testing and url parsing. Automatically adds ? to end of url in preparation of obfusacating the URL from 'other discussion tab' params = { "title": rand_sub.title, "url": append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD, rand_sub.url) } else: params = { "title": rand_sub.title, "url": append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD, rand_sub.url) } # Submit the same content to the same subreddit. Prepare your salt picks api.subreddit(rand_sub.subreddit.display_name).submit(**params) #add the ids to tracked list to avoid resubmission by other threads table = TinyDB('db.json').table('table') with transaction(table) as tr: #insert a new record tr.insert({'id': rand_sub.id}) # adding to my san custom DB with transaction(db_perm) as tr: tr.insert({'id': rand_sub.id}) except praw.exceptions.APIException as e: raise e except Exception as e: log.info(e) else: log.error("something broke")