from util.extractor import extract_information chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well browser.implicitly_wait(25) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = extract_information(browser, username) with open('./profiles/' + username + '.json', 'w') as fp: json.dump(information, fp) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
from util.cli_helper import get_all_user_names from util.extractor import extract_information from util.settings import Settings display = Display(visible=0, size=(1024, 768)) display.start() browser = webdriver.Firefox() # makes sure slower connections work as well print ("Waiting 10 sec") browser.implicitly_wait(10) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username) with open(Settings.profile_location + '/' + username + '.json', 'w') as fp: json.dump(information, fp) print ("Number of users who commented on his/her profile is ", len(user_commented_list),"\n") file = open(Settings.profile_commentors_location + '/' + username + "_commenters.txt","w") for line in user_commented_list: file.write(line) file.write("\n") file.close() print ("\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n")
try: browser = init_chromedriver(chrome_options, capabilities) except Exception as exc: print(exc) sys.exit() try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) except Exception as exc: print("Error login user: "******"Error with user " + username) sys.exit(1) Datasaver.save_profile_json(username, information) print("Number of users who commented on their profile is ", len(user_commented_list), "\n")
def slackSendMsg(msg): webhook_url = 'https://hooks.slack.com/services/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' slack_data = {'text': '#InstagramCrawl:\n' + msg} response = requests.post(webhook_url, data=json.dumps(slack_data), headers={'Content-Type': 'application/json'}) if response.status_code != 200: raise ValueError( 'Request to slack returned an error %s, the response is:\n%s' % (response.status_code, response.text)) try: tags = get_all_user_names() for tag in tags: print('Extracting information from #' + tag) information, errMsg = extract_tag_information(browser, tag, Settings.limit_amount) Datasaver.save_profile_json(tag, information) if (errMsg != None and errMsg != ''): #Send msg to slack slackSendMsg(errMsg) except KeyboardInterrupt: print('Aborted...') except Exception as e: slackSendMsg(str(e))
def slackSendMsg(msg): webhook_url = 'https://hooks.slack.com/services/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' slack_data = {'text': '#InstagramCrawl:\n' + msg} response = requests.post( webhook_url, data=json.dumps(slack_data), headers={'Content-Type': 'application/json'} ) if response.status_code != 200: raise ValueError( 'Request to slack returned an error %s, the response is:\n%s' % (response.status_code, response.text) ) try: tags = get_all_user_names() for tag in tags: print('Extracting information from #' + tag) information, errMsg = extract_tag_information(browser, tag, Settings.limit_amount) Datasaver.save_profile_json(tag,information) if (errMsg != None and errMsg != ''): #Send msg to slack slackSendMsg(errMsg) except KeyboardInterrupt: print('Aborted...') except Exception as e: slackSendMsg(str(e))