Пример #1
0
 def scrape_users(self, usernames: list):
     users = []
     for username in usernames:
         profile = Profile(username)
         profile.scrape(headers=self.headers, webdriver=self.driver)
         users.append(profile)
     return users
Пример #2
0
def insta():
    mydict = request.form
    insta_username = str(mydict['name'])

    google = Profile(insta_username)
    google.scrape(keys=['profile_pic_url_hd'])
    print(google['profile_pic_url_hd'])
    image_link = google['profile_pic_url_hd']

    # render on the template
    return render_template('index.html', image_link=image_link)
Пример #3
0
# URL INSTAGRAM WEB SCRAPER FOR NAFNAF

import selenium
import parser_libraries
from selenium import webdriver
from selenium.webdriver import Chrome
from instascrape import Profile, scrape_posts

path = "C:/Users/Moni/Downloads/chromedriver.exe"
driver = webdriver.Chrome(path)

driver.get("https://www.instagram.com/")
webdriver = Chrome("C:/Users/Moni/Downloads/chromedriver.exe")

headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "}

nafnaf = Profile("nafnafcol")
nafnaf.scrape(headers=headers)
posts = nafnaf.get_posts(webdriver=webdriver, login_first=True, amount=20)
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=5,
                                              silent=False)
Пример #4
0
# See this tutorial to find your sessionid:
# http://valvepress.com/how-to-get-instagram-session-cookie/

from selenium.webdriver import Chrome
from instascrape import Profile, scrape_posts

# Creating our webdriver
webdriver = Chrome("path/to/chromedriver.exe")

# Scraping Joe Biden's profile
SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE'
headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
           "cookie": f"sessionid={SESSIONID};"}
joe = Profile("joebiden")
joe.scrape(headers=headers)

# Scraping the posts
posts = joe.get_posts(webdriver=webdriver, login_first=True)
scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10)
Пример #5
0
 def test_from_soup(self, get_request, page_instance):
     profile_html = get_request.text
     profile_soup = BeautifulSoup(profile_html, features='lxml')
     profile_obj = Profile(profile_soup)
     profile_obj.scrape()
     assert profile_obj.followers == page_instance.followers
Пример #6
0
 def test_from_html(self, get_request, page_instance):
     profile_html = get_request.text
     profile_obj = Profile(profile_html)
     profile_obj.scrape()
     assert profile_obj.followers == page_instance.followers
Пример #7
0
 def page_instance(self, url, headers):
     random_profile = Profile(url)
     random_profile.scrape(headers=headers)
     return random_profile
Пример #8
0
from selenium.webdriver import Chrome
from selenium import webdriver
from instascrape import Profile, scrape_posts, Post

username = '******'
#browser = webdriver.Chrome('C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe')
webdriver = Chrome(
    "C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe")

headers = {
    "user-agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
    "cookie": "sessionid=YOUR_SESSION_ID"
}
pr = Profile(username)
pr.scrape(headers=headers)
#posts = pr.get_recent_posts()
posts = pr.get_posts(webdriver=webdriver, login_first=True)
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=0.5,
                                              silent=False)

for post in scraped_posts:
    fname = post.upload_date.strftime("%Y-%m-%d %Hh%Mm")
    post.download(f"{fname}.png")
Пример #9
0
from selenium.webdriver import Chrome  #for webdriver
from instascrape import Profile, scrape_posts
import matplotlib.pyplot as plt  #for plots

#get session id from (inspect page source, application, session id) and paste in sessionid=''
headers = {
    "user-agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
    "cookie": "sessionid=sessionid"
}

address = input('Enter Profile Name: ')  #input target instagram account name
webdriver = Chrome(
    "PATH/chromedriver.exe")  #guide selenium to chromedriver's path
profile = Profile(address)  #use Profile() from instascrape
profile.scrape(headers=headers)  #use session id
posts = profile.get_posts(webdriver=webdriver,
                          login_first=False)  #pops up intagram browser

#If pause is set to a lowe value, instagram might redirect you.
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=10,
                                              silent=False)

posts_data = [post.to_dict() for post in scraped_posts
              ]  #or recent_posts// make a dict out of it
posts_df = pd.DataFrame(posts_data)  #Create a pandas DataFrame
posts_df.to_csv(address + '.csv', index=False)  #write data to csv file
print(posts_df[['upload_date', 'comments']])