def build_post(new_users, number): title = "User re-add" if config.title_date: title = helpers.date_string() + " - " + title if config.title_number: stats = helpers.load_data("stats") stats["re-add count"] += 1 readd_count = stats["re-add count"] helpers.write_data("stats", stats) title += " #{}".format(readd_count) lines = [] for user in new_users: lines.append(r"- \#{} /u/{}".format(number, user)) number += 1 if config.stats_section: cap = number - 1 diff = len(new_users) lines.append( "\n# Info:\n\n- 0 users kicked\n- {} users added\n- Membercap: {} (+{})".format( diff, cap, diff ) ) body = " \n".join(lines) return title, body
def replace(old_un, new_un): users = helpers.load_data("user_list") users[users.index(old_un)] = new_un helpers.write_data("user_list", users) reddit = helpers.initialize_reddit() if not config.testing: try: reddit.subreddit(config.target_subreddit).flair.set( redditor=old_un, text="Moved to /u/{}".format(new_un) ) reddit.subreddit(config.target_subreddit).contributor.remove(old_un) except (praw.exceptions.PRAWException, prawcore.PrawcoreException): # Deleted user, most likely pass main.flair_users( [new_un], reddit, config.flair_normal, number_adjustment=users.index(new_un) ) main.add_users([new_un], reddit) participated = set(helpers.load_data("participated")) if old_un in participated: participated.add(new_un) helpers.write_data("participated", list(participated)) else: print( "Flaired and removed /u/{}; Flaired and added /u/{}".format(old_un, new_un) ) if config.update_sidebar: updates.update_sidebar(users)
def main(): if config.forward_user: forward_messages.forward(config.forward_user) reddit = helpers.initialize_reddit() participated = set(helpers.load_data("participated")) stats = helpers.load_data("stats") participated = participated.union( get_participants(reddit, stats["last_full_run"])) helpers.write_data("participated", list(participated)) regulars = config.regulars_list warn = [] for i in regulars: if i not in participated: warn.append(i) if len(warn) > 0: msg = "These regulars haven't participated this week: " aux = "" for i in warn: msg += aux + i aux = ", " print(msg) print("Amount of participants this week:", len(set(participated) - set(config.mods_list))) stats["last_daily_run"] = ( time.time() - 60) # to cover accidental gaps due to execution time helpers.write_data("stats", stats)
def new_sub(): reddit = helpers.initialize_reddit() main.check_permissions(reddit) user_list = helpers.load_data("user_list") main.flair_users(user_list, reddit, config.flair_normal) if config.change_title: updates.change_title() if config.update_sidebar: updates.update_sidebar(user_list) main.add_users(user_list, reddit) helpers.write_data("participated", [])
def main(): potential_adds = helpers.load_data("potential_adds", { "users": [], "urls": [] }) potential_adds_copy = deepcopy(potential_adds) users_to_remove = [ 3, 4, 5, 8, 9, 11, 12, 13, 14, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 49, 55, 56, 57, 58, 59, 60, 64 ] for i in users_to_remove: config.redditor_blacklist += potential_adds_copy["users"][i - 2] potential_adds["users"].remove(potential_adds_copy["users"][i - 2]) potential_adds["urls"].remove(potential_adds_copy["urls"][i - 2]) helpers.write_data("potential_adds", potential_adds) print("Amount remaining: " + str(len(potential_adds["users"])))
def acquire(): reddit = initialize_reddit() user_list = load_data("user_list") # new_users, new_user_urls = get_new_users(reddit, 1, user_list) user_amount = 65 # potential_adds = load_data("potential_adds", {"users": [], "urls": []}) potential_adds = {"users": [], "urls": []} # while new_users[0] in potential_adds["users"]: # just in case!! new_users, new_user_urls = get_new_users(reddit, user_amount, user_list) print(new_users, new_user_urls) formatted_urls = "" try: for i in range(user_amount): potential_adds["users"].append(new_users[i]) potential_adds["urls"].append(new_user_urls[i]) formatted_urls += " u/" + new_users[i] except Exception as exc: print( vars(exc) ) # you probably deleted potential_adds.json, it needs to be {"users": [], "urls": []} write_data("potential_adds", potential_adds) helpers.write_data_txt("formatted_users", formatted_urls)
def main(): user_list = helpers.load_data("user_list") reddit = helpers.initialize_reddit() stats = helpers.load_data("stats") if user_list and ("--ignore-active-community" not in sys.argv): # checks if the user-list is non-empty msg = "Userlist is non-empty. Exiting. Call with --ignore-active-community to run anyway" helpers.write_log_trash("Failed {}".format(helpers.date_string()), msg) raise ActiveCommunity(msg) new_users, new_user_urls = helpers.load_data("potential_adds") helpers.write_log_trash("New users {}".format(helpers.date_string()), new_users) post_text_items = [daddy.build_new_text(new_users, 1), "\n"] if config.entry_comments: post_text_items.append("[Comments for entry]({})".format( daddy.build_and_post_gist(new_users, new_user_urls))) if config.stats_section: post_text_items.append("# Info:\n\n") post_text_items.append("- {} users added".format(len(new_users))) diff = len(new_users) change = "+{}".format(diff) if diff >= 0 else str(diff) post_text_items.append("- Membercap: {} ({})".format( len(new_users), change)) post_text = "\n".join(post_text_items) title = config.main_log_title if config.title_date: title = helpers.date_string() + " - " + title if config.title_number: stats["log_count"] += 1 title += " #{}".format(stats["log_count"]) daddy.make_post(title, post_text, reddit) if config.change_title: updates.change_title() daddy.add_users(new_users, reddit) daddy.flair_users(new_users, reddit, config.flair_new) if config.update_sidebar: updates.update_sidebar(new_users) stats["last_full_run"] = time.time() helpers.write_data("stats", stats) helpers.write_data("user_list", new_users) helpers.write_data("participated", [])
text['pre'] = [] text['imgsrc'] = [] for tag in article.contents: #multiple if statements here to make is easier to read if tag is not None and tag.name is not None: if tag.name == "p": text['p'].append(tag.text) elif tag.name == 'h1': text['h1'].append(tag.text) elif tag.name == 'h3': text['h3'].append(tag.text) elif tag.name == 'pre': text['pre'].append(tag.text) for tag in article.findAll('img'): text['imgsrc'].append(tag['src']) helpers.write_data('bs', text) ## ## LXML ## import lxml.html page = lxml.html.fromstring(page_string) post = page.find_class('entry-content')[0] #0 since only one tag with that class text = {} text['p'] = [] text['h1'] = [] text['h3'] = [] text['pre'] = [] text['imgsrc'] = [] #test_content is needed to get all of the text within the tag, not just on the top level
def main(): if config.delay: time.sleep(random.randrange(0, config.max_delay * 60)) daily.main() reddit = (helpers.initialize_reddit() ) # will exit if Reddit isn't properly initialized check_permissions( reddit ) # will check if bot has all needed permissions; exit on failure participated = set(helpers.load_data("participated")) stats = helpers.load_data("stats") user_list = helpers.load_data("user_list") helpers.write_log_trash("User list {}".format(helpers.date_string()), user_list) if stats["last_full_run"] + 23 * 60 * 60 > time.time(): if "--override_time" not in sys.argv: msg = 'Less than 23 hours since last run. Exiting. Run with "--override_time" as an option to disregard' print(msg) helpers.write_log_trash("Failed {}".format(helpers.date_string()), msg) sys.exit(1) updated_list, not_participated = segregate_users(user_list, participated) helpers.write_log_trash( "Not participated {}".format(helpers.date_string()), not_participated) flair_and_remove(not_participated, reddit) flair_users(updated_list, reddit, config.flair_normal) saved_users, saved_urls = check_saved_users() valid_users = [] valid_urls = [] for i in range(len(saved_users)): if (valid_user(saved_users[i], reddit) and saved_users[i] not in valid_users and saved_users[i] not in updated_list): valid_users.append(saved_users[i]) valid_urls.append(saved_urls[i]) # helpers.delete_datafile("potential_adds") total_needed_users = len(valid_users) num_still_needed_users = 0 new_users, new_user_urls = get_new_users(reddit, num_still_needed_users, updated_list) new_users = valid_users + new_users new_user_urls = valid_urls + new_user_urls # new_users, new_user_urls = hack_shuffle(new_users, new_user_urls) new_users = new_users[:total_needed_users] new_user_urls = new_user_urls[:total_needed_users] helpers.write_log_trash("New users {}".format(helpers.date_string()), new_users) post_text_lines = [ build_removed_text(user_list, not_participated), "\n", build_new_text(new_users, len(updated_list) + 1), "\n", ] if config.entry_comments: try: post_text_lines.append("\n[Comments for entry]({})".format( build_and_post_gist(new_users, new_user_urls))) except Exception: # can fail! pass # lol if config.stats_section: post_text_lines.append("\n# Info:\n") post_text_lines.append("- {} users kicked".format( len(not_participated))) post_text_lines.append("- {} users added".format(len(new_users))) diff = len(new_users) - len(not_participated) change = "+{}".format(diff) if diff >= 0 else str(diff) post_text_lines.append("- Membercap: {} ({})".format( (len(updated_list) + len(new_users)), change)) post_text = "\n".join(post_text_lines) title = config.main_log_title if config.title_date: title = helpers.date_string() + " - " + title if config.title_number: stats["log_count"] += 1 title += " #{}".format(stats["log_count"]) make_post(title, post_text, reddit) if config.change_title: updates.change_title() add_users(new_users, reddit) flair_users(new_users, reddit, config.flair_new, number_adjustment=len(updated_list)) updated_list_copy = updated_list[:] updated_list_copy.extend(new_users) if config.update_sidebar: updates.update_sidebar(updated_list_copy) stats["last_full_run"] = time.time() helpers.write_data("stats", stats) helpers.write_data("user_list", updated_list_copy) helpers.write_data("participated", [])
url = '{}/{}/articles/{}/votes.json'.format(settings['src_root'], settings['locale'], src_article) votes = api.get_resource_list(url) if not votes: print('- no votes found') continue for vote in votes: if last_sync < arrow.get(vote['created_at']): print('- adding vote {} to article {}'.format( vote['id'], dst_article)) if vote['value'] == -1: url = '{}/articles/{}/down.json'.format( settings['dst_root'], dst_article) else: url = '{}/articles/{}/up.json'.format(settings['dst_root'], dst_article) payload = { 'vote': { 'user_id': vote['user_id'], 'created_at': vote['created_at'] } } response = api.post_resource(url, payload, status=200) if response is False: print('Skipping vote {}'.format(vote['id'])) utc = arrow.utcnow() sync_dates['article_votes'] = utc.format() write_data(sync_dates, 'sync_dates')
def insert_users_to_userlist(new_users): user_list = helpers.load_data("user_list") user_list.extend(new_users) helpers.write_data("user_list", user_list)
src_article['author_id'] = verify_author(src_article['author_id'], settings['team_user']) url = '{}/{}/sections/{}/articles.json'.format( settings['dst_root'], settings['locale'], dst_section) payload = package_article(src_article) new_article = api.post_resource(url, payload) if new_article is False: print('Skipping article {}'.format(src_article['id'])) continue article_map[str(src_article['id'])] = new_article['id'] continue if last_sync < arrow.get(src_article['edited_at']): print('- updating article {} in destination section {}'.format( src_article['id'], dst_section)) dst_article = article_map[str(src_article['id'])] url = '{}/articles/{}/translations/{}.json'.format( settings['dst_root'], dst_article, settings['locale']) payload = package_article(src_article, put=True) response = api.put_resource(url, payload) if response is False: print('Skipping article {}'.format(src_article['id'])) continue print('- article {} is up-to-date in destination section {}'.format( src_article['id'], dst_section)) utc = arrow.utcnow() sync_dates['articles'] = utc.format() write_data(sync_dates, 'sync_dates') write_data(article_map, 'article_map') write_js_redirects(article_map)
import helpers from selenium import webdriver from selenium.webdriver.common.keys import Keys url = 'https://bigishdata.com/2017/05/11/general-tips-for-web-scraping-with-python/' driver = webdriver.PhantomJS() driver.get(url) elem = driver.find_element_by_class_name('entry-content') text = {} desired_tags = (u'p', u'h1', u'h3', u'pre') for tag in desired_tags: tags = elem.find_elements_by_tag_name(tag) text[tag] = [] for data in tags: text[tag].append(data.text) helpers.write_data('selenium', text)
src_article) attachments = api.get_resource_list(url, list_name='article_attachments', paginate=False) if not attachments: print('- no attachments found') continue for src_attachment in attachments: if last_sync < arrow.get(src_attachment['created_at']): print('- adding new attachment {} to article {}'.format( src_attachment['file_name'], dst_article)) print(src_attachment) url = '{}/articles/{}/attachments.json'.format( settings['dst_root'], dst_article) new_attachment = api.post_attachment(url, src_attachment) if new_attachment is False: print('Skipping attachment {}'.format( src_attachment['file_name'])) continue attachment_map[str(src_attachment['id'])] = new_attachment['id'] attachment_article_map[str(src_attachment['id'])] = src_article continue print('- attachment {} is up to date'.format( src_attachment['file_name'])) utc = arrow.utcnow() sync_dates['attachments'] = utc.format() write_data(sync_dates, 'sync_dates') write_data(attachment_map, 'attachment_map') write_data(attachment_article_map, 'attachment_article_map')
def parse(self, response): selector = Selector(response=response) for tag in self.desired_tags: self.text[tag] = self.words_from_tags(tag, response) helpers.write_data('scrapy', self.text) yield self.text #how scrapy returns the json object you created
print('- adding new comment {} to article {}'.format( src_comment['id'], dst_article)) url = '{}/articles/{}/comments.json'.format( settings['dst_root'], dst_article) payload = package_comment(src_comment) new_comment = api.post_resource(url, payload) if new_comment is False: print('Skipping comment {}'.format(src_comment['id'])) continue comment_map[str(src_comment['id'])] = new_comment['id'] comment_article_map[str(src_comment['id'])] = src_article continue if last_sync < arrow.get(src_comment['updated_at']): print('- updating comment {} in article {}'.format( src_comment['id'], dst_article)) dst_comment = comment_map[str(src_comment['id'])] url = '{}/articles/{}/comments/{}.json'.format( settings['dst_root'], dst_article, dst_comment) payload = package_comment(src_comment, put=True) response = api.put_resource(url, payload) if response is False: print('Skipping comment {}'.format(src_comment['id'])) continue print('- comment {} is up to date'.format(src_comment['id'])) utc = arrow.utcnow() sync_dates['comments'] = utc.format() write_data(sync_dates, 'sync_dates') write_data(comment_map, 'comment_map') write_data(comment_article_map, 'comment_article_map')