def get_info(username): try: browser = init_chromedriver(chrome_options, capabilities) except Exception as exc: print(exc) sys.exit() try: information = [] user_commented_list = [] try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) sys.exit(1) Datasaver.save_profile_json(username, information) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close() return information
def crawl_profile(usernames): with SetupBrowserEnvironment() as browser: for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username, Settings.limit_amount) Datasaver.save_profile_json(username, information.to_dict()) print ("Number of users who commented on their profile is ", len(user_commented_list),"\n")
def main(usernames): chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_argument('--headless') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome( 'C:/devl/inst_data/instagram_profilecrawl/assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well print("Waiting 10 sec") browser.implicitly_wait(10) try: for username in usernames: print('Extracting information from ' + username) information = [] user_commented_list = [] try: information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) Datasaver.save_profile_json(username, information) print("Number of users who commented on his/her profile is ", len(user_commented_list), "\n") Datasaver.save_profile_commenters_txt(username, user_commented_list) print( "\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n" ) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
def get_posts_from_username(username, caption, limit_amount): with SetupBrowserEnvironment() as browser: instagram_stats = [] ig_stats, _ = extract_information(browser, username, limit_amount) now_datetime = arrow.now('US/Pacific') for post in ig_stats.posts: post_caption = post['caption'] if caption in post_caption: post_stats = { 'username': username, 'post_url': post['url'], 'likes': post['likes']['count'], 'views': post['views'], 'caption': post_caption, 'checked_date': now_datetime.format('MM-DD-YYYY'), 'checked_time': now_datetime.format('hh:mm:ss A'), 'still_up': True, } instagram_stats.append(post_stats) return instagram_stats
from util.extractor import extract_information chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well browser.implicitly_wait(25) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = extract_information(browser, username) with open('./profiles/' + username + '.json', 'w') as fp: json.dump(information, fp) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
display = Display(visible=0, size=(1024, 768)) display.start() browser = webdriver.Firefox() # makes sure slower connections work as well print ("Waiting 10 sec") browser.implicitly_wait(10) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username) with open(Settings.profile_location + '/' + username + '.json', 'w') as fp: json.dump(information, fp) print ("Number of users who commented on his/her profile is ", len(user_commented_list),"\n") file = open(Settings.profile_commentors_location + '/' + username + "_commenters.txt","w") for line in user_commented_list: file.write(line) file.write("\n") file.close() print ("\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n") except KeyboardInterrupt: print('Aborted...')
except Exception as exc: print(exc) sys.exit() try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = [] user_commented_list = [] try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) sys.exit(1) # pdb.set_trace() Datasaver.save_profile_json(username, information) # print ("Number of users who commented on their profile is ", len(user_commented_list),"\n") # Datasaver.save_profile_commenters_txt(username,user_commented_list) # print ("\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n") except KeyboardInterrupt: print('Aborted...')
display = Display(visible=0, size=(1024, 768)) display.start() browser = webdriver.Firefox() # makes sure slower connections work as well print("Waiting 10 sec") browser.implicitly_wait(10) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information( browser, username) with open(Settings.profile_location + '/' + username + '.json', 'w') as fp: json.dump(information, fp) print("Number of users who commented on his/her profile is ", len(user_commented_list), "\n") file = open( Settings.profile_commentors_location + '/' + username + "_commenters.txt", "w") for line in user_commented_list: file.write(line) file.write("\n") file.close() print(
try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) except Exception as exc: print("Error login user: "******"Error with user " + username) csv_file = './profiles/%s-followers.csv' % (username) write_csv(followers, csv_file) # get all followers and extract info for them
chrome_options=chrome_options) #SETTINGS: #set limit of posts to analyze: limit_amount = 12000 # makes sure slower connections work as well print("Waiting 10 sec") browser.implicitly_wait(10) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = extract_information(browser, username, limit_amount) with open('./profiles/' + username + '.json', 'w') as fp: fp.write(json.dumps(information, indent=4)) print("\n\nFinished. The json file was saved in profiles directory.\n") print( "____________________________________________________________\n\n") except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
from util.extractor import extract_information chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_argument('--headless') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well print ("Waiting 10 sec") browser.implicitly_wait(5) Settings.limit_amount = 1 Settings.scrap_posts_infos = False try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username, Settings.limit_amount) Datasaver.save_profile_json(username,information) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()