def add_item(item, n, inlist, p): pika = "https://www.pikalytics.com/pokedex/vgc2018/" + p + "/" uData = ureq(pika) #pull html html = uData.read() page_soup = soup(html, "html.parser") item = page_soup.findAll("div", {"class": "inline-block pokemon-stat-container"}) item = item[3].findAll("div", {"class": "pokedex-category-wrapper"}) item = item[0].findAll("div", {"id": "items_wrapper"}) item = item[0].findAll("div") item = item[0].findAll("div", {"class": "pokedex-move-entry-new"}) item = item[n].findAll("div", {"style": "display:inline-block;"}) item = str(item) item = item.split('>') item = item[1].split('<') item = item[0] if item not in inlist: inlist.append(item) return (item) else: n += 1 return (add_item(item, n, inlist, p))
def department(url): my_url = url uClient = ureq(my_url) # Open link, grab the page page_html = uClient.read() uClient.close() # html parse page_soup = soup(page_html, "html.parser") # grabs each class containers = page_soup.find("div", {"id": "crsebrowser"}) a = [] for string in page_soup.table.find_all("a", href=True): a.append(string.get_text()) dict = {my_url: []} for i in a: dict[my_url].append(i) return (dict)
from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq import csv my_url = 'http://www.espn.com/college-sports/basketball/recruiting/playerrankings/_/class/2021' client = ureq(my_url) #opens connection, grabs page page_html = client.read() client.close() #html parsing page_soup = soup(page_html, "html.parser") #grabs all item info via html containers = page_soup.findAll("tr") #array format: [[player, pos, [hometown, high school], ht, wt], [player, pos, hometown, ht, wt, stars, grade]] p_info = [] for c in containers: if ("oddrow" in c["class"]) or ("evenrow" in c["class"]): s = c.findAll("td") name = s[1].div.a.strong.text pos = s[2].b.text hometown = s[3].text ht = s[4].text wt = s[5].text p_info.append([name, pos, hometown, ht, wt]) #string formatting for i in p_info: index = i[2].find(",") + 4 i[2] = [i[2][:index], i[2][index:]] print([i[2][:index], i[2][index:]])
url = os.environ.get('URL_VAR') browser = webdriver.Chrome('C:\\Users\Leandro\Downloads\chromedriver.exe') browser.get(url) search = browser.find_element_by_name('field-keywords') search.send_keys(os.environ.get('SEND_KEYS')) search.send_keys(Keys.ENTER) browser.implicitly_wait(15) #passing my url from selenium to bs4 new_url = browser.current_url browser.implicitly_wait(2) client = ureq(new_url) html_scrap = client.read() client.close() html_soup = soup(html_scrap, "html.parser") containers = html_soup.findAll( "div", {"class": "s-include-content-margin s-border-bottom"}) page = containers[0] title_tag = page.find("span", {"class": "a-size-medium a-color-base a-text-normal"}) title = title_tag.text price_tag = page.find("span", {"class": "a-offscreen"}) price = price_tag.text filename = "portfolio-scraper.csv"
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup my_url = "https://www.newegg.com/Product/ProductList.aspx?Submit=ENE&IsNodeId=1&N=100007709%20600499109" # opening up a connection and grabbing the page uclient = ureq(my_url) page_html = uclient.read() uclient.close() # html parsing page_soup = soup(page_html, "html.parser") # grabs each product containers = page_soup.find_all("div",{"class":"item-container"}) abrands = page_soup.find_all("a",{"class":"item-brand"}) filename = "Graphics Cards.csv" f = open(filename, "w") headers = "Make, Product_Name, Shipping\n" f.write(headers) for container in containers: for abrand in abrands: pass make = abrand.img["title"] title_container = container.find_all("a",{"class":"item-title"}) product_name = title_container[0].text
from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq myurl = 'https://www.bestbuy.com/site/arlo-pro-3-6-camera-indoor-outdoor-wire-free-2k-hdr-security-camera-system-white/6364585.p?skuId=6364585' client = ureq(myurl) page_html = client.read() client.close() for i in range(20): sleep(3) page_soup = soup(page_html, 'html.parser') container = page_soup.findAll('div', {'class': 'price-box'}) # print(container[0]) # print(len(container)) containers = container[0] print(containers) # print(containers ) # price = containers.findAll('div',{'class': 'priceView-hero-price'}) # print(price[0].text)
import csv #to do operations on CSV import pandas as pd # file operations from bs4 import BeautifulSoup as soup #Scrapping tool from urllib.request import urlopen as ureq # For requesting data from link import numpy as np import re url = "http://howstat.com/cricket/Statistics/Players/PlayerList.asp?Group=A" pagehtml = ureq(url) soup = soup(pagehtml, "html.parser") #parse the html table = soup.find("table", {"class": "TableLined"}) with open('AZ.csv', 'a', newline='') as csvfile: f = csv.writer(csvfile) for x in table: rows = table.find_all('tr') #find all tr tag(rows) for tr in rows: data = [] cols = tr.find_all('td') #find all td tags(columns) for td in cols: data.append(td.text.strip()) print(data)
filename = 'drive/MyDrive/clgscrap.csv' #Results Storage #Reading the old csv for previous notification checker = [] try: with open(filename, "r+", encoding="utf-16") as r: reader = csv.reader(r, delimiter="\n") for i, line in enumerate(reader): checker = checker + [ line ] #Take the read lines into a list called checker except Exception as e: err = e #Connecting the website with scraper myurl = 'http://gecskp.ac.in' #provide the required URL ucli = ureq(myurl) #Connect to webpage using URLLIB page_html = ucli.read() #reads the webpage and transfers to a variable ucli.close( ) #We should close the connection as the webpage can contain timeout page_soup = soup( page_html, "html.parser" ) #Get the html part using the html.parser function of beautifulsoup library #Specify the tag where brief of needed data can be extracted containers = page_soup.findAll( "a", {"style": "font-weight: 600; font-style: normal;"}, {"href"}) #Scraping and updating the csv with open(filename, "w+", encoding="utf-16" ) as f: #External encoding to support multiple languages headers = "NEWS,LINK\n"
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup thehylia = "https://anime.thehylia.com/" music = "https://anime.thehylia.com/soundtracks/browse/all" animusic = "https://anime.thehylia.com/downloads/browse/all" import sys program_name = sys.argv[0] arguments = sys.argv[1:] count = len(arguments) for x in arguments: # print("x is :", x) uclient = ureq(x) soup_html = uclient.read() uclient.close() htmlsoup = soup(soup_html , "html.parser") content = htmlsoup.find("table", {"class":"blog"}) content = content.find("table") content = content.findAll("tr") for item in content: for val in item.findAll("td"): #print(val.text) if val.a != None: print(val.a["href"]) #print("\n") #with open('./database.db', 'w') as currfile:
from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq import re print("Name the movie") movie = input() new_movie = "" for i in movie: if i.isspace(): new_movie = new_movie + '+' else: new_movie = new_movie + i #print(new_movie) meurl = "https://www.imdb.com/find?q=" + new_movie + "&ref_=nv_sr_sm" print(meurl) uclient = ureq(meurl) page = uclient.read() # Download the page uclient.close() pagesoup = soup(page, "html.parser") main = pagesoup.find("div", {"id": "main"}) #print(main) #print("type of main is") #print(type(main)) links = main.findAll("a", href=re.compile("(/title/)+")) #print("\nlength of link is ") #print(len(links)) mainlink = links[0] mainlink = mainlink['href'] #print(mainlink) print("\nNow the final link is ")
def get_json(local): f = ureq( 'http://api.wunderground.com/api/0bf213c89d5d826c/geolookup/conditions/q/IA/' + local + '.json') data = json.loads(f.read()) return data
mycursor = mydb.cursor() """sql_table= ( "CREATE TABLE IF NOT EXISTS Moviez (" " `mov_no` int(11) NOT NULL AUTO_INCREMENT," " `release_year` varchar NOT NULL," " `mov_name` varchar(100) NOT NULL," " `genre` varchar(16) NOT NULL," " `rating` DECIMAL(2,2) NOT NULL," " `votes` INT(10) NOT NULL," " 'gross' DECIMAL(4,2) NOT NULL," " 'director' VARCHAR(40) NOT NULL," " PRIMARY KEY (`mov_no`)" ") ENGINE=InnoDB")""" myUrl = 'https://www.imdb.com/search/title?groups=top_250&sort=user_rating' html_data = ureq(myUrl) read_html = html_data.read() html_data.close() page_soup = soup(read_html, 'html.parser') #html_doc=print(page_soup.prettify()) my_container = page_soup.find_all('div', {'class': 'lister-item-content'}) #for string in page_soup.stripped_strings: # print(string) #container=my_container[0] """mov_name=container.find_all('a') print(mov_name[0].text) year=container.find_all('span',{'class':'lister-item-year text-muted unbold'}) print(year[0].text) rating=container.find_all('span',{'class':'global-sprite rating-star imdb-rating'})
def scrap(x): x = chr(x) #to change integer into character url = PRETEXT_URL + PLAYERLIST.format(x) try: print("\n") print(url) pagehtml = ureq(url) except HTTPError as e: print(e) except URLError as e: print("Website Can't be reached") else: soup = soups(pagehtml, "html.parser") #parse the html table = soup.find("table", {"class": "TableLined"}) if table is not None: #for x in table: rows = table.find_all('tr', attrs={"bgcolor": ["#FFFFFF", "#E3FBE9"]}) #find all tr tag(rows) localCricketData = [] for tr in rows: data = [] allLinks = tr.find_all('a', {"class": "LinkNormal"}) if (len(allLinks)): allLinks = allLinks[1:] cols = tr.find_all('td') #find all td tags(columns) i = 0 dataDist = {} isFullNameSet = 0 for td in cols: if (td.text.strip() not in IGNORE_VALUES and td.text.strip().find('No. of Records') == -1): textValue = td.text.strip() if (textValue == ''): textValue = 0 dataDist = mapData(dataDist, i, textValue) data.append(td.text.strip()) i += 1 for link in allLinks: dataTitle = '' subDataDist = {} subUrl = PRETEXT_URL + link.get('href') if re.search(ODI_URL, subUrl): dataTitle = 'odistats' elif re.search(T20_URL, subUrl): dataTitle = 't20stats' elif re.search(IPL_URL, subUrl): dataTitle = 'iplstats' elif re.search(TEST_URL, subUrl): dataTitle = 'teststats' try: #print("\n") #print(subUrl) subPagehtml = ureq(subUrl) except HTTPError as e: print(e) except URLError as e: print("Website Can't be reached") else: subSoup = soups(subPagehtml, "lxml") #parse the html # Set Full Name, Batting Style and Bowling Style if (isFullNameSet == 0): dataDist = mapData( dataDist, 6, subSoup.find('td', text='Full Name:').find_next( 'td').text.strip()) dataDist = mapData( dataDist, 7, subSoup.find( 'td', text='Bats:').find_next('td').text.strip()) dataDist = mapData( dataDist, 8, subSoup.find('td', text='Bowls:').find_next( 'td').text.strip()) if (subSoup.find('a', text='IPL Profile & Statistics')): allLinks.append( subSoup.find( 'a', text='IPL Profile & Statistics')) isFullNameSet = 1 if (dataTitle == 'iplstats'): dataDist['ipl_teams'] = subSoup.find( 'td', text='Teams:').find_next( 'td').text.strip().split(",") dataDist['ipl'] = re.sub( r"\([^()]*\)", "", subSoup.find('td', text='Matches:').find_next( 'td').text.strip()) dataDist['ipl'] = dataDist['ipl'].replace( "\u00A0", "") subTables = subSoup.find('table', attrs={"width": ["270"]}) content = subTables.find_all('td') mainKey = [] headers = subTables.find_all('td', attrs={"colspan": "2"}) for heading in headers: if (heading.text.strip() != ''): mainKey.append(heading.text.strip()) dataDist[dataTitle] = extractTdAndMapValues( subDataDist, mainKey, content) #print(dataDist) if (bool(dataDist)): localCricketData.append(dataDist) return localCricketData
def get_soup(url_string): web_client = ureq(url_string) page_content = web_client.read() web_client.close() return soup(page_content, 'html5lib')
from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq my_url="https://www.flipkart.com/search?q=mobile%20phone&as=on&as-show=on&marketplace=FLIPKART&otracker=start&as-pos=2_q_mobile%20phone" file_name="flipkartmobilerecord.csv" f=open(file_name,"w",encoding="utf-8") headers="product_name,price\n" f.write(headers) client=ureq(my_url) page_html=client.read() client.close() page_soup=soup(page_html,"html.parser") containers=page_soup.findAll("div",{"class":"_1-2Iqu row"}) for container in containers: name_container=container.findAll("div",{"class":"_3wU53n"}) name=name_container[0].text price_container= container.findAll("div",{"class":"_1vC4OE _2rQ-NK"}) price=price_container[0].text #print("name :"+name+"price :"+price+"\n")
#TAKES MAL_ID AS AN ARGUMENT TO GET IMAGE AND DESCRIPTION from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup import sqlite3, time, sys mal = "https://myanimelist.net/" topanimelist = "https://myanimelist.net/topanime.php?limit=14150" #0 to 14150 topmangalist = "https://myanimelist.net/topmanga.php?limit=46100" #0 to 46100 searchmal = "https://myanimelist.net/search/prefix.json?type=all&keyword=boku&v=1" malanime = "https://myanimelist.net/search/prefix.json?type=anime&keyword=boku&v=1" malmanga = "https://myanimelist.net/search/prefix.json?type=manga&keyword=boku&v=1" malcharacter = "https://myanimelist.net/search/prefix.json?type=character&keyword=boku&v=1" maluser = "******" mal_id = sys.argv[1] #30276 for ONE PUNCH MAN curr_url = "https://myanimelist.net/anime/" + str(mal_id) uclient = ureq(curr_url) soup_html = uclient.read() uclient.close() htmlsoup = soup(soup_html, "html.parser") #print(str(htmlsoup.find("meta", {"property" : "og:url"})["content"].split("/")[5])) print(str(htmlsoup.find("table", {"width": "100%"}).div.div.a.img["src"])) print(str(htmlsoup.find("span", {"itemprop": "description"}).text))
def get_url(page_number): return 'https://www.cars.com/for-sale/searchresults.action/?mdId=20823&mkId=20017&mlgId=28872&page=' + str( page_number ) + '&perPage=100&rd=99999&searchSource=GN_REFINEMENT&shippable-dealers-checkbox=true&showMore=false&sort=relevance&yrId=58487&yrId=30031936&yrId=35797618&yrId=36362520&zc=60542&localVehicles=false' #this url is searching for 2018-2019 honda civics under 10k miles. You can configure it to whatever car(s) or model(s) you want. filename = 'cars.csv' #output filename f = open(filename, 'w') headers = 'Price,Color\n' f.write(headers) for i in range(1, 51): #loop through pages of cars.. uClient = ureq(get_url(i)) #opens url page_html = uClient.read() #reads page html uClient.close() #closes url page_soup = soup(page_html, 'html.parser') #parses html containers = page_soup.findAll( 'div', {'class': 'shop-srp-listings__inner'} ) #finds all of the listing blocks. I have included a sample picture of what a listing block looks like for container in containers: #loops through each listing block on each page. I have it set to 100 listings per page in my link. price_container = container.find('div', 'payment-section') price = price_container.span.text.strip() if price == 'Not Priced': #some prices are not listed, so we do not want to include these since the cars will be of no use continue price = price.replace(',', '') price = price.replace('$', '') #gives price as a readable number color_container = container.find('ul',
from bs4 import BeautifulSoup as soup user1 = input( "PLEASE MAKE FOLDER NAME AS = database AND IF MADE THEN ENTER Y :-") user2 = input( "PLEASE MAKE ANOTHER FOLDER INSIDE THE database FOLDER NAME AS = images AND IF MADE THEN ENTER Y :- " ) if (user1 == 'y' and user2 == 'y'): my_url = 'https://www.imdb.com/list/ls004440136/' #opening csv file for database filename = "database/database.csv" f = open(filename, "w") #headers headers = "Image,Actor_name,Profession,personality traits\n" f.write(headers) #openeing up connection,grabbing the page uclint = ureq(my_url) page_html = uclint.read() uclint.close() #html parser page_soup = soup(page_html, "html.parser") #find the all div with class lister list detail sub-list in the page #grab each celebrity name containers = page_soup.findAll("div", {"class": "lister-item mode-detail"}) print("\n") print( str(len(containers)) + " Record Found And Stored In database.cvs File") #grabing only images of actors imgs = page_soup.find_all("div", {"class": "lister-item-image"}) links = [] for imge in imgs: link = imge.a.img.get('src')
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup """ from selenium import webdriver def render_page(url): driver = webdriver.Chrome() driver.get(url) r = driver.page_source return r r = render_page(myurl) """ myurl = 'https://ktu.edu.in' ucli = ureq(myurl) page_html = ucli.read() ucli.close() page_soup = soup(page_html, "html.parser") containers = page_soup.findAll("div", {"class": "latest-news"}) for container in containers: title_container = container.findAll("li", {"style": ""}) title_time = title_container[0].label.text[0:10] + " " + title_container[ 0].label.text[24:28] title_content = title_container[0].a.text print(title_time + " | " + " Announcement :" + title_content) """ filname='clgscrap.csv' f= open(filename,"w+")
data_json += [{ "name": product_name, "discription": product_discription, "colors": product_colors, "price": product_price }] # if i < len(grides): # data_csv = data_csv + ',' i += 1 if __name__ == '__main__': for x in my_url: uclient = ureq(my_url[x]) page_html = uclient.read() uclient.close() pagesoup = soup(page_html, "html.parser") grides = pagesoup.findAll("div", {"class": "grid-item-info"}) print('cotégorie de produits = ', x) print('num de produits = ', len(grides)) selection(grides, x) #file save with open(CSV_FILE, 'w') as csv_f: csv_f.write(data_csv) with open(JSON_FILE, 'w') as json_f: json_f.write(json.dumps(data_json) + '\n')
def game_soup(gameId): match_url = 'http://www.espn.com/nfl/game?gameId=' + str(gameId) u_client = ureq(match_url) page_html = u_client.read() u_client.close() return Soup(page_html, 'html.parser')
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup import sqlite3, time conn = sqlite3.connect('thehyliadatabasesql.db') c = conn.cursor() thehylia = "https://anime.thehylia.com/" music = "https://anime.thehylia.com/soundtracks/browse/all" animusic = "https://anime.thehylia.com/downloads/browse/all" #Music first uclient = ureq(music) soup_html = uclient.read() uclient.close() htmlsoup = soup(soup_html , "html.parser") content = htmlsoup.find("table", {"class":"blog"}) content = content.find("p", {"align":"left"}) content = content.findAll("a") c.execute("CREATE TABLE IF NOT EXISTS albums (album_id INT, url TEXT, name TEXT, unix INT)") album_id = 0 for link in content: unix = time.time() global album_id
# Import Libraries import pandas as pd from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup # URL url = 'https://www.optikmelawai.com/frame?category=01&gender=3+5&page=1' # Opening up connection, grabbing the page uClient = ureq(url) page_html = uClient.read() uClient.close() # html parsing page_soup = soup(page_html, "html.parser") # grabs each product containers = page_soup.findAll("div",{"class":"pad-box2"}) # Creating dataset titles = [] brands = [] prices = [] discount_prices = [] # loop each product in page for container in containers: title = container.a["title"] titles.append(title) brand_container = container.findAll("p",{"class":"jdl-prod"})
# coding: utf-8 # In[3]: from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq import pandas as pd my_url= 'https://www.flipkart.com/search?q=mi&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off' #link of website to be scraped client = ureq(my_url) #giving link to request page_html = client.read() #read the page html client.close() page_soup = soup(page_html, "html.parser") #parsing the html of page containers = page_soup.findAll("div", {"class" :"_3O0U0u"}) #HTML tag that contain the needed information print("The number of components is ") print (len(containers)) #no. of components in the tag #print(soup.prettify(containers[0])) #prettify the html and print the specified component n = int(input("what is the range")) for i in range(n): container = containers[i] print(container.div.img["alt"]) #print the attribute needed(name) of the component price = container.findAll("div", {"class":"_1vC4OE _2rQ-NK"}) #tag and class containing price print(price[0].text) #file= "getdata.csv"
# Import statements from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup from urllib.request import Request import json # Getting URL my_url = input("Enter genius URL of artist: ") # Setting a known brower user agent req = Request(my_url, headers={'User-Agent': 'Mozilla/5.0'}) # Opening up connection, grabbing the page u_client = ureq(req) page_html = u_client.read() u_client.close() # Does HTML parsing page_soup = soup(page_html, "html.parser") # Instantiating json to store lyrics song_json = {} song_json["Lyrics"] = [] # Grabs each songs from artist page containers = page_soup.findAll("div", {"class": "mini_card_grid-song"}) # For each song, go to URL and grab lyrics for container in containers: song_url = container.a["href"]
carlist = [] filename = "ricardo_scraping_test_1.csv" badbrands = (121, 141, 158, 154, 172, 181, 195, 198, 208, 255, 287, 293, 299, 290, 341) for x in range(1, 401): print('BRAND {}'.format(x)) for z in range(1, 300): if x in badbrands: break print('PAGE {}'.format(z)) my_ulr = str( "https://auto.ricardo.ch/de/s?make=" + str(x) + "&offer_type=classified&sort_type=registration_date&sort_order=asc&page=" + str(z)) uClient = ureq(my_ulr) myData = uClient.read() uClient.close page_soup = soup(myData, "html.parser") articles = page_soup.findAll("a", {"class": "ric-article"}, href=True) #print(car.markestring,car.modelstring) for a in articles: if str(a['href'])[0] == '/': car = rst.run('https://auto.ricardo.ch' + str(a['href'])) else:
from bs4 import BeautifulSoup import pandas as pd import re import nltk from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer nltk.download('stopwords') containerlist=[] i=1 for i in range(25): # Number of pages plus one test11= "https://www.trustpilot.com/review/www.sonetel.com?page="+(str(i)) uclient = ureq(test11) page_html = uclient.read() uclient.close() page_soup= BeautifulSoup(page_html,"html.parser") container = page_soup.find_all("div", {"class" :"review-body"}) contain=container[0:-1] containerlist.extend(contain) i=i+1 # print(len(contain)) print (len(containerlist)) len(contain) labels=['review1','review2','review3','review4','review5','review6','review7','review8','review9','review10',
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup my_url = 'https://accounts.google.com/signin' uclient = ureq(my_url) #opening a connection, grabbing a page page_html = uclient.read() uclient.close() page_soup = soup(page_html, "html.parser") #html parsing search = page_soup.find_all('input', {'type': 'email'}) for result in search: # print(result, '\n') input_value = result.get('value') print('c', input_value, 'c')
def download(): uclient = ureq(url).read() return uclient
from urllib.request import urlopen as ureq from bs4 import BeautifulSoup as soup import pandas filename = 'products.csv' f = open(filename,'w') headers = 'brand, product_name, price, shipping\n' f.write(headers) url = "https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38" uclient = ureq(url) page_html = uclient.read() uclient.close() page_soup = soup(page_html,'html.parser') containers = page_soup.findAll('div',{'class':'item-container'}) for container in containers: brand = container.div.div.a.img['title'] title = container.findAll('a',{'class':'item-title'})[0].text price_class = container.findAll('ul',{'class':'price'})[0] price = price_class.findAll('li',{'class':'price-current'})[0].strong.text shipping = container.findAll('li',{'class':'price-ship'})[0].text.strip() f.write(brand+',' + title.replace(',','|') + ',' + price + ',' + shipping + '\n') f.close() df_product = pandas.read_csv(filename) print(df_product)
enter_product = driver.find_element_by_xpath( '//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/div/input' ).send_keys("iphone") init_search = driver.find_element_by_xpath( '//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/button') init_search.click() driver.set_page_load_timeout("15") from bs4 import BeautifulSoup as soup from urllib.request import urlopen as ureq myurl = "https://www.flipkart.com/search?q=iphone&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off" uclient = ureq(myurl) #opens the connection req = uclient.read() #read the complete html page and save in variable uclient.close() #close the connection req = soup(req, 'html.parser') #html5lib containers = req.findAll('div', {'class': '_1UoZlX'}) #print (len(containers)) #print (soup.prettify(containers[0])) container = containers[0] name = container.findAll('div', {'class': '_3wU53n'}) #print (name[1].text) price = container.findAll('div', {'class': '_1vC4OE _2rQ-NK'}) #print (price[0].text)