Exemplo n.º 1
0
    all_hotels = soup.find_all("div",{"class":"hotelCardListing"})
    


    for hotel in all_hotels:
        hotel_dict={}
        hotel_dict["name"] = hotel.found("h3", {"class": "listingHotelDescription_hotelName"}).text
        hotel_dict["address"] = hotel.find("span": {"itemprop": "streetAdress"}).text
        hotel_dict["price"] =hotel.find("span": {"class": "listingPrice_finalPrice"}).text
        # try ....... except
        try:
            hotel_rating = hotel.find("span", {"class": "hotelRating_ratingSummary"}).text
        except AttributeError:
            hotel_dict["rating"]=None

        parent_amenities_element = hotel.find("div", {"class": "amenityWrapper"})

        amenities_list =[]

        for amenity in parent_amenities_element.find_all("class": "amenityWrapper_amenity"}):
            amenities_list.append(amenity.find("span", {"class": , "d-body-sm"}).text,strip())
        hotel_dict["amenities"]=', '.join(amenities_list[:-1])
        scraped_info_list.append(hotel_dict)
        connect.insert_into_table(args.dbname, tupple(hotel_dict.values()))

dataFrame=pandas.DataFrame(scrapped_info_list)
print("Creating csv file....")
dataFrame.to_csv("0yo.csv")
connect.get_hotel_info(args.dbname)
Exemplo n.º 2
0
    # ("tag",{"attribute":"value"}) 
    all_products = soup.find_all("div",{"class":"thumbnail"}) # returns a list if all the elements which satsfy the given condition in parenthesis

    for product in all_products:
        product_dic = {} # empty  dictionary
        product_dic["name"] = product.find("a",{"class":"title"}).text # name
        product_dic["price"] = product.find("h4",{"class":"price"}).text # price
        product_dic["review"] = product.find("p",{"class":"pull-right"}).text # review

        pro_desc = product.find("p",{"class":"description"}).text # description
        
        pro_desc_list = pro_desc.split(',') # converting the description into list

        # product_dic["Description"] = ', '.join(pro_desc_list[:4]) # description
        product_dic["Screen-size"] = ''.join(pro_desc_list[0]) # description - Screen-size
        product_dic["Processor"] = ''.join(pro_desc_list[1]) # description - Processor
        product_dic["RAM"] = ''.join(pro_desc_list[2]) # description - RAM
        product_dic["ROM"] = ''.join(pro_desc_list[3]) # description - ROM

        scraped_info_list.append(product_dic) # list of products' dictionary
        connect.insert_into_table(args.dbname, tuple(product_dic.values()))

        # print(pro_name,pro_price,pro_desc_list[:4],pro_review) 




# saving_csv()

# printing the scraped data in terminal
connect.get_product_info(args.dbname)
Exemplo n.º 3
0
import connect

parser = argparse.ArgumentParser()
parser.add_argument("--dbname", help="Enter the name of db", type=str)
args=parser.parse_args()

flip_url="https://www.flipkart.com/search?q=mobiles&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
page_num_MAX = 3
scraped_info_list=[]
connect.connect(args.dbname)

for page_num in range(1, page_num_MAX):
    req=requests.get(flip_url+ str(page_num))
    content=req.content
    soup=BeautifulSoup(content,"html.parser")
    all_phone=soup.find_all("div",{"class":"_3O0U0u"})
    
    for phone in all_phone:
        phone_dict={}
        phone_dict["name"]=phone.find("div",{"class":"_3wU53n"}).text
        phone_dict["rate"]=phone.find("div",{"class":"_6BWGkk"}).text
    #print(phone_name, phone_rate)
        scraped_info_list.append(phone_dict)
        connect.insert_into_table(args.dbname,tuple(phone_dict.values()))
                                  
dataframe = pandas.DataFrame(scraped_info_list)
dataframe.to_csv("flip.csv")
connect.get_phone_info(args.dbname)

#THUS THE OUT IS STORED IN DATABASE AND CSV FILE
Exemplo n.º 4
0
dbname = input("Enter the db name:")
connect.connect(dbname)

olx_url = "https://www.olx.in/items/q-yamaha-rx100?isSearchCall=true"

scraped_info_list = []

req = requests.get(olx_url)
content = req.content

soup = BeautifulSoup(content, "html.parser")
all_bikes = soup.find_all("div", {"class": "IKo3_"})

for bike in all_bikes:
    bike_dict = {}
    bike_dict["prices"] = bike.find("span", {"class": "_89yzn"}).text

    try:
        bike_dict["year"] = bike.find("span", {"class": "_2TVI3"}).text
    except AttributeError:
        pass

    bike_dict["title"] = bike.find("span", {"class": "_2tW1I"}).text
    bike_dict["address"] = bike.find("span", {"class": "tjgMj"}).text
    scraped_info_list.append(bike_dict)
    connect.insert_into_table(dbname, tuple(bike_dict.values()))

dataFrame = pandas.DataFrame(scraped_info_list)
dataFrame.to_csv("olx.csv")
connect.get_bike_info(dbname)
Exemplo n.º 5
0
        parent_features=mobile.find('div',{'class':'fMghEO'})
        for feature in parent_features.find_all('ul',{'class':'_1xgFaf'}):
            f=feature.find_all('li',{'class':'rgWa7D'})
            
            for i in f:
                list=i.text
                feature_list.append(list)
            #print(feature_list)    
        print()
        
        mobile_dict['Features']=',  '.join(feature_list)    
        #we cant store list in csv. Thus we use .join() to join list with comma and space

        scraped_info_list.append(mobile_dict)

        connect.insert_into_table(args.dbname,tuple(mobile_dict.values()))      #values has to be tuple
        
        #store all collected info in a list
        #as pandas process info in form of list
        #to do this-->take all collected info and convert it into dictionary


#dataFrame=pandas.DataFrame(scraped_info_list)              #data structure used by pandas lib
#print('creating csv file...')
#dataFrame.to_csv('Flipkart.csv')                #converts ds to csv file

      
connect.get_info(args.dbname)  


Exemplo n.º 6
0
    except AttributeError:
        product_dict["rating"] = ""
    product_dict["price"] = product.find('div', class_='_1vC4OE _2rQ-NK').text
    try:
        product_dict["discount"] = product.find('div', class_='VGWI6T').text
    except AttributeError:
        product_dict["discount"] = ""

    parent_feature_element = product.find('ul', class_='vFw0gD')
    features_list = []
    for feature in parent_feature_element.find_all('li', class_='tVe95H'):
        features_list.append(feature.text)
    product_dict["features"] = ', '.join(features_list)

    scraped_info_list.append(product_dict)
    connect.insert_into_table(tuple(product_dict.values()))

dataFrame = pandas.DataFrame(scraped_info_list)
dataFrame.to_csv("laptops.csv")
connect.get_laptop_info()
'''
Output :-

Table created successfully
('Dell 14 3000 Core i3 7th Gen - (4 GB/1 TB HDD/Linux) inspiron 3481 Laptop', '4.1', '₹25,490', '13% off', 'Intel Core i3 Processor (7th Gen), 4 GB DDR4 RAM, Linux/Ubuntu Operating System, 1 TB HDD, 35.56 cm (14 inch) Display, 1 Year Limited Hardware Warranty, InHome Service After Remote Diagnosis')
('HP 14q Core i3 7th Gen - (8 GB/256 GB SSD/Windows 10 Home) 14q-cs0023TU Thin and Light Laptop', '4.3', '₹32,990', '3% off', 'Intel Core i3 Processor (7th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 256 GB SSD, 35.56 cm (14 inch) Display, 1 Year Onsite Warranty')
('Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 TB HDD/Windows 10 Home) 130-15IKB Laptop', '4', '₹27,990', '15% off', 'Intel Core i3 Processor (7th Gen), 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty')
('HP 15 Pentium Gold - (4 GB/1 TB HDD/Windows 10 Home) 15-di0001TU Laptop', '4.4', '₹23,490', '8% off', 'Intel Pentium Gold Processor, 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty')
('HP 15s Ryzen 3 Dual Core - (4 GB/256 GB SSD/Windows 10 Home) 15s-eq0007AU Thin and Light Laptop', '4.5', '₹30,990', '4% off', 'AMD Ryzen 3 Dual Core Processor, 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 256 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty')
('Lenovo Ideapad L340 Core i5 9th Gen - (8 GB/1 TB HDD/128 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA G...', '4.5', '₹59,990', '39% off', '60 Hz Refresh Rate- It can display upto 60 frames per second., Intel Core i5 Processor (9th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD|128 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty')
('Acer Nitro 5 Core i7 9th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Geforce ...', '4.6', '₹69,990', '33% off', '60 Hz Refresh Rate- It can display upto 60 frames per second., Intel Core i7 Processor (9th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD|256 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year International Travelers Warranty (ITW)')