def scrape_users(self, usernames: list): users = [] for username in usernames: profile = Profile(username) profile.scrape(headers=self.headers, webdriver=self.driver) users.append(profile) return users
def insta(): mydict = request.form insta_username = str(mydict['name']) google = Profile(insta_username) google.scrape(keys=['profile_pic_url_hd']) print(google['profile_pic_url_hd']) image_link = google['profile_pic_url_hd'] # render on the template return render_template('index.html', image_link=image_link)
# URL INSTAGRAM WEB SCRAPER FOR NAFNAF import selenium import parser_libraries from selenium import webdriver from selenium.webdriver import Chrome from instascrape import Profile, scrape_posts path = "C:/Users/Moni/Downloads/chromedriver.exe" driver = webdriver.Chrome(path) driver.get("https://www.instagram.com/") webdriver = Chrome("C:/Users/Moni/Downloads/chromedriver.exe") headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "} nafnaf = Profile("nafnafcol") nafnaf.scrape(headers=headers) posts = nafnaf.get_posts(webdriver=webdriver, login_first=True, amount=20) scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=5, silent=False)
# See this tutorial to find your sessionid: # http://valvepress.com/how-to-get-instagram-session-cookie/ from selenium.webdriver import Chrome from instascrape import Profile, scrape_posts # Creating our webdriver webdriver = Chrome("path/to/chromedriver.exe") # Scraping Joe Biden's profile SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE' headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": f"sessionid={SESSIONID};"} joe = Profile("joebiden") joe.scrape(headers=headers) # Scraping the posts posts = joe.get_posts(webdriver=webdriver, login_first=True) scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10)
def test_from_soup(self, get_request, page_instance): profile_html = get_request.text profile_soup = BeautifulSoup(profile_html, features='lxml') profile_obj = Profile(profile_soup) profile_obj.scrape() assert profile_obj.followers == page_instance.followers
def test_from_html(self, get_request, page_instance): profile_html = get_request.text profile_obj = Profile(profile_html) profile_obj.scrape() assert profile_obj.followers == page_instance.followers
def page_instance(self, url, headers): random_profile = Profile(url) random_profile.scrape(headers=headers) return random_profile
from selenium.webdriver import Chrome from selenium import webdriver from instascrape import Profile, scrape_posts, Post username = '******' #browser = webdriver.Chrome('C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe') webdriver = Chrome( "C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe") headers = { "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": "sessionid=YOUR_SESSION_ID" } pr = Profile(username) pr.scrape(headers=headers) #posts = pr.get_recent_posts() posts = pr.get_posts(webdriver=webdriver, login_first=True) scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=0.5, silent=False) for post in scraped_posts: fname = post.upload_date.strftime("%Y-%m-%d %Hh%Mm") post.download(f"{fname}.png")
from selenium.webdriver import Chrome #for webdriver from instascrape import Profile, scrape_posts import matplotlib.pyplot as plt #for plots #get session id from (inspect page source, application, session id) and paste in sessionid='' headers = { "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": "sessionid=sessionid" } address = input('Enter Profile Name: ') #input target instagram account name webdriver = Chrome( "PATH/chromedriver.exe") #guide selenium to chromedriver's path profile = Profile(address) #use Profile() from instascrape profile.scrape(headers=headers) #use session id posts = profile.get_posts(webdriver=webdriver, login_first=False) #pops up intagram browser #If pause is set to a lowe value, instagram might redirect you. scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=10, silent=False) posts_data = [post.to_dict() for post in scraped_posts ] #or recent_posts// make a dict out of it posts_df = pd.DataFrame(posts_data) #Create a pandas DataFrame posts_df.to_csv(address + '.csv', index=False) #write data to csv file print(posts_df[['upload_date', 'comments']])