def find_locality(cityName): try: searchurl = "http://www.commonfloor.com/localities/index/city/%s" % ( cityName) f = requests.get(searchurl) html = f.text soup = BeautifulSoup(html) localities = [] data = soup.find('tbody') data = typec(sub("(?m)^\s+", "", typec(data.text, 'string', 'string')), 'string', 'string') data = data.split('\n') for item in data: if item.isalpha(): localities.append(item) print localities return localities except: print "Error find_locality" print cityName
def find_all_restaurants(loca,cityId): try: restaurants = [] searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % (cityId,loca[1]) f = requests.get(searchurl) html = f.text soup = BeautifulSoup(html) # data = BeautifulSoup(str(soup.find_all("div",{'class':'vendor__title'}))) data = BeautifulSoup(typec(soup.find_all("div",{'class':'vendor__title'}), 'string', 'string')) for link in data.find_all("a"): uniqueId = search('/restaurant/(.+?)">', typec(link,'string','string')).group(1) restaurants.append((typec(uniqueId,'string','string'),)) print restaurants return restaurants except: print "Error in find_all_restaurants" print "Loca" print loca print "cityId" print cityId
def find_foodpanda_valid_locality(cityId): # try: foodpanda_locality = [] locality_id = [] #To temporarily store Area_idies tempraroy_list1 = [] #To temporarily store Name of the localities tempraroy_list2 = [] flag = 0 localities = [] # for i in "abcdefghijklmnopqrstuvwxyz": # for j in "abcdefghijklmnopqrstuvwxyz": # localities.append(i+j) f = open("comparison", "r") combinations = [] temp = f.read().strip().split('\n') temp.pop(0) for item in temp: item = item.split(" ") if int(item[1]) >= 10: combinations.append(item[0]) f.close() print len(combinations) for loca in combinations: try: # print count # print len(foodpanda_locality) # print len(locality_id) # print locality_id # print loca # print cityId count = 0 #getting html from the Search Url a = time() searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % ( cityId, loca) f = requests.get(searchurl) html = f.text soup = BeautifulSoup(html) b = time() - a print b c = time() #checking if the the page of Suggestions of localities is opened or #the request has been redirected to the page of specific locality if (soup.find('h1', {'class': 'h2'})): heading = sub(":", "", soup.find('h1', {'class': 'h2'}).text) heading = heading.strip() if heading == "Suggestions": #Extracting Area_idies of the localities tempraroy_list1[:] = [] for data in soup.find_all('a', {'class': 'list-group-item'}): tempraroy_list1.append( search('area_id=(.+?)">', typec(data, 'string', 'string')).group(1)) #Appendng unique area_id in locality_id list #Extracting Name of the Localities tempraroy_list2[:] = [] for data in soup.find_all( 'div', {'class': 'content-block location-suggestions'}): tempraroy_list2 = sub("(?m)^\s+", "", data.text).split('\n') tempraroy_list2.pop(0) # poping "Suggestion" string tempraroy_list2.pop(len(tempraroy_list2) - 1) # poping whitespace if tempraroy_list1 == [] and tempraroy_list2 == []: print loca, ":" + "0" #Appending Uniquely Localities Full Data in the foodpanda_locality for locality, area_id in zip(tempraroy_list2, tempraroy_list1): if (locality[0:2].lower() == loca): searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % ( area_id) Dummytuple = ( area_id, typec((locality).replace(unichr(8226), ''), 'string', 'string'), typec(cityId, 'string', 'string'), searchurl, ) for item in foodpanda_locality: if Dummytuple[0] == item[0]: flag = 1 if flag != 1: foodpanda_locality.append(Dummytuple) if area_id not in locality_id: locality_id.append(area_id) flag = 0 count += 1 else: try: if (locality.split('(')[1][0:2].lower() == loca ): searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % ( area_id) Dummytuple = ( area_id, typec((locality).replace( unichr(8226), ''), 'string', 'string'), typec(cityId, 'string', 'string'), searchurl, ) for item in foodpanda_locality: if Dummytuple[0] == item[0]: flag = 1 if flag != 1: foodpanda_locality.append(Dummytuple) if area_id not in locality_id: locality_id.append(area_id) flag = 0 count += 1 except: flag = 0 print loca, ' : ', count print(time() - c) except requests.exceptions.ConnectionError as e: print e f = open("locationConnectionError", "a") f.write("cityId: %s, " % (cityId)) f.write("combination: %s\n" % (loca)) f.close() if None in locality_id: locality_id.pop(locality_id.index(None)) return (foodpanda_locality, locality_id)
def restaurant_info(restaurantsData): # try: searchurl = "https://www.foodpanda.in/restaurant/%s" % (restaurantsData[0]) f = requests.get(searchurl) html = f.text soup = BeautifulSoup(html) print restaurantsData[0] != "cj8ta/dominos" if restaurantsData[0] != "cj8ta/dominos": restaurantsData += (typec( sub("(?m)^\s+", "", sub(r"[^\x00-\x7F]+", "", (soup.find('h1').text))), 'string', 'string').rstrip('\n'), ) restaurantsData += (typec( sub(r"[^\x00-\x7F]+", "", (soup.find('address').text)), 'string', 'string').lstrip('\n'), ) restaurantsData += (typec( json.loads( soup.find('script', { "type": "application/ld+json" }).text)["aggregateRating"]["ratingValue"], 'string', 'float'), ) details = sub( "(?m)^\s+", "", typec( soup.find('ul', { 'class': 'cart__empty__elements' }).text, 'string', 'string')).split('\n') deliveryFee = None deliveryTime = None paymentOption = None deliveryMinAmount = None Voucher = False pickupTime = None for index, item in enumerate(details): if (item == 'Delivery time:'): deliveryTime = details[index + 1] elif (item == 'Online payment available'): paymentOption = True elif (item == 'Delivery fee'): deliveryFee = typec( sub(",", "", sub("Rs.", "", details[index + 1])), 'string', 'float') elif (item == 'Delivery min.:'): deliveryMinAmount = typec( sub(",", "", sub("Rs.", "", details[index + 1])), 'string', 'float') elif (item == 'Accepts Vouchers'): Voucher = True if (soup.find("dt", {"class": "vendor-pickup-time"}) != None): soup2 = BeautifulSoup( typec( soup.find("dt", { "class": "vendor-pickup-time" }).findNext("dd"), 'string', 'string')) data = soup2.find("dd").text pickupTime = (sub("(?m)^\s+", "", typec(data, 'string', 'string')).split("\n")).pop(0) restaurantsData += ( deliveryFee, deliveryTime, pickupTime, deliveryMinAmount, paymentOption, Voucher, searchurl, ) #Extracing Food data for the Restaurant foodData = [] soup = BeautifulSoup(html) #To store food data Temporarily string = '' for data in soup.find_all('div', {'class': 'menu-item__content-wrapper'}): # soup2 = BeautifulSoup(str(data)) soup2 = BeautifulSoup(typec(data, 'string', 'string')) dish_name = soup2.find('div', {'class': 'menu-item__title'}).text for val in soup2.find_all('article', {'class': 'menu-item__variation'}): string += (sub("(?m)^\s+", "", dish_name)) string += (sub("(?m)^\s+", "", val.text)) string = string.strip().split('\n') for index, item in enumerate(string): if item == u'\xa0': string.pop(index) # print string foodtuple = () itemCount = 0 foodtuple += (restaurantsData[0], ) for index, item in enumerate(string): if item == 'Add': itemCount = 0 foodData.append(foodtuple + (searchurl, )) foodtuple = () foodtuple += (restaurantsData[0], ) else: item = item.replace(unichr(160), '') if "Rs." in typec((sub(r"[^\x00-\x7F]+", "", item)), 'string', 'string') and itemCount % 2 == 1: item = sub( "Rs.", "", typec((sub(r"[^\x00-\x7F]+", "", item)), 'string', 'string')) item = sub(",", "", typec(item, 'string', 'string')) foodtuple += ( 'None', typec(item, 'string', 'float'), ) itemCount += 1 elif "Rs." in typec((sub(r"[^\x00-\x7F]+", "", item)), 'string', 'string'): item = sub( "Rs.", "", typec((sub(r"[^\x00-\x7F]+", "", item)), 'string', 'string')) item = sub(",", "", typec(item, 'string', 'string')) foodtuple += (typec(item, 'string', 'float'), ) else: foodtuple += (typec((sub(r"[^\x00-\x7F]+", "", item)), 'string', 'string'), ) itemCount += 1 cuisineData = [] soup = BeautifulSoup(html) for cuisines in json.loads( soup.find('script', { "type": "application/ld+json" }).text)["servesCuisine"]: cuisineData.append(( restaurantsData[0], typec(cuisines, 'string', 'string').lstrip().rstrip(), )) print cuisineData return (restaurantsData, foodData, cuisineData) else: print restaurantsData[0] return (None, None, None) # except: # print "Error In restaurant_info" # print "restaurantsData" # print restaurantsData
def find_foodpanda_valid_locality(cityId, localities): try: foodpanda_locality = [] locality_id = [] #To temporarily store Area_idies tempraroy_list1 = [] #To temporarily store Name of the localities tempraroy_list2 = [] count = 1 flag = 0 for loca in localities: print count print len(foodpanda_locality) print len(locality_id) print locality_id print loca print cityId count += 1 #getting html from the Search Url searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % ( cityId, loca) f = requests.get(searchurl) html = f.text soup = BeautifulSoup(html) #checking if the the page of Suggestions of localities is opened or #the request has been redirected to the page of specific locality if (soup.find('h1', {'class': 'h2'})): heading = sub(":", "", soup.find('h1', {'class': 'h2'}).text) heading = heading.strip() if heading == "Suggestions": #Extracting Area_idies of the localities tempraroy_list1[:] = [] for data in soup.find_all('a', {'class': 'list-group-item'}): tempraroy_list1.append( search('area_id=(.+?)">', typec(data, 'string', 'string')).group(1)) #Appendng unique area_id in locality_id list for area_id in tempraroy_list1: if area_id not in locality_id: locality_id.append(area_id) #Extracting Name of the Localities tempraroy_list2[:] = [] for data in soup.find_all( 'div', {'class': 'content-block location-suggestions'}): tempraroy_list2 = sub("(?m)^\s+", "", data.text).split('\n') tempraroy_list2.pop(0) # poping "Suggestion" string tempraroy_list2.pop(len(tempraroy_list2) - 1) # poping whitespace #Appending Uniquely Localities Full Data in the foodpanda_locality for locality, area_id in zip(tempraroy_list2, tempraroy_list1): searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % ( area_id) Dummytuple = ( area_id, typec((locality).replace(unichr(8226), ''), 'string', 'string'), typec(cityId, 'string', 'string'), searchurl, ) for item in foodpanda_locality: if Dummytuple[0] == item[0]: flag = 1 if flag != 1: foodpanda_locality.append(Dummytuple) flag = 0 else: #Appending the locality that is already a valid name for foodpanda data = soup.find('meta', {'property': 'og:url'}) area_id = search('area_id=(.+?)"', typec(data, 'string', 'string')).group(1) #Appendng unique area_id in locality_id list if area_id not in locality_id: locality_id.append(area_id) searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % ( area_id) Dummytuple = ( area_id, typec((loca).replace(unichr(8226), ''), 'string', 'string'), typec(cityId, 'string', 'string'), searchurl, ) for item in foodpanda_locality: if Dummytuple[0] == item[0]: flag = 1 if flag != 1: foodpanda_locality.append(Dummytuple) flag = 0 if None in locality_id: locality_id.pop(locality_id.index(None)) return (foodpanda_locality, locality_id) except: print "Error in locality" print "city Id" print cityId print "localities" print localities