Exemplo n.º 1
0
def find_locality(cityName):
    try:
        searchurl = "http://www.commonfloor.com/localities/index/city/%s" % (
            cityName)
        f = requests.get(searchurl)
        html = f.text
        soup = BeautifulSoup(html)
        localities = []
        data = soup.find('tbody')
        data = typec(sub("(?m)^\s+", "", typec(data.text, 'string', 'string')),
                     'string', 'string')
        data = data.split('\n')
        for item in data:
            if item.isalpha():
                localities.append(item)
        print localities
        return localities
    except:
        print "Error find_locality"
        print cityName
Exemplo n.º 2
0
def find_all_restaurants(loca,cityId):
	try:
		restaurants = []
		searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % (cityId,loca[1])
		f = requests.get(searchurl)
		html = f.text
		soup = BeautifulSoup(html)
		# data = BeautifulSoup(str(soup.find_all("div",{'class':'vendor__title'})))
		data = BeautifulSoup(typec(soup.find_all("div",{'class':'vendor__title'}), 'string', 'string'))
		for link in data.find_all("a"):
			uniqueId = search('/restaurant/(.+?)">', typec(link,'string','string')).group(1)
			restaurants.append((typec(uniqueId,'string','string'),))
		print restaurants
		return restaurants
	except:
		print "Error in find_all_restaurants"
		print "Loca"
		print loca
		print "cityId"
		print cityId
Exemplo n.º 3
0
def find_foodpanda_valid_locality(cityId):
    # try:
    foodpanda_locality = []
    locality_id = []
    #To temporarily store  Area_idies
    tempraroy_list1 = []
    #To temporarily store  Name of the localities
    tempraroy_list2 = []

    flag = 0
    localities = []
    # for i in "abcdefghijklmnopqrstuvwxyz":
    # 	for j in "abcdefghijklmnopqrstuvwxyz":
    # 		localities.append(i+j)
    f = open("comparison", "r")
    combinations = []
    temp = f.read().strip().split('\n')
    temp.pop(0)
    for item in temp:
        item = item.split(" ")
        if int(item[1]) >= 10:
            combinations.append(item[0])
    f.close()
    print len(combinations)
    for loca in combinations:
        try:
            # print count
            # print len(foodpanda_locality)
            # print len(locality_id)
            # print locality_id
            # print loca
            # print cityId
            count = 0

            #getting html from the Search Url
            a = time()
            searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % (
                cityId, loca)
            f = requests.get(searchurl)
            html = f.text
            soup = BeautifulSoup(html)
            b = time() - a
            print b
            c = time()
            #checking if the the page of Suggestions of localities is opened or
            #the request has been redirected to the page of specific locality
            if (soup.find('h1', {'class': 'h2'})):
                heading = sub(":", "", soup.find('h1', {'class': 'h2'}).text)
                heading = heading.strip()
                if heading == "Suggestions":

                    #Extracting Area_idies of the localities
                    tempraroy_list1[:] = []
                    for data in soup.find_all('a',
                                              {'class': 'list-group-item'}):
                        tempraroy_list1.append(
                            search('area_id=(.+?)">',
                                   typec(data, 'string', 'string')).group(1))

                    #Appendng unique area_id in locality_id list

                    #Extracting Name of the Localities
                    tempraroy_list2[:] = []
                    for data in soup.find_all(
                            'div',
                        {'class': 'content-block location-suggestions'}):
                        tempraroy_list2 = sub("(?m)^\s+", "",
                                              data.text).split('\n')
                    tempraroy_list2.pop(0)  # poping "Suggestion" string
                    tempraroy_list2.pop(len(tempraroy_list2) -
                                        1)  # poping whitespace

                    if tempraroy_list1 == [] and tempraroy_list2 == []:
                        print loca, ":" + "0"

                    #Appending Uniquely Localities Full Data in the foodpanda_locality
                    for locality, area_id in zip(tempraroy_list2,
                                                 tempraroy_list1):

                        if (locality[0:2].lower() == loca):

                            searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % (
                                area_id)
                            Dummytuple = (
                                area_id,
                                typec((locality).replace(unichr(8226), ''),
                                      'string', 'string'),
                                typec(cityId, 'string', 'string'),
                                searchurl,
                            )
                            for item in foodpanda_locality:
                                if Dummytuple[0] == item[0]:
                                    flag = 1
                            if flag != 1:
                                foodpanda_locality.append(Dummytuple)
                            if area_id not in locality_id:
                                locality_id.append(area_id)
                            flag = 0
                            count += 1

                        else:
                            try:
                                if (locality.split('(')[1][0:2].lower() == loca
                                    ):

                                    searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % (
                                        area_id)
                                    Dummytuple = (
                                        area_id,
                                        typec((locality).replace(
                                            unichr(8226), ''), 'string',
                                              'string'),
                                        typec(cityId, 'string', 'string'),
                                        searchurl,
                                    )
                                    for item in foodpanda_locality:
                                        if Dummytuple[0] == item[0]:
                                            flag = 1
                                    if flag != 1:
                                        foodpanda_locality.append(Dummytuple)
                                    if area_id not in locality_id:
                                        locality_id.append(area_id)
                                    flag = 0
                                    count += 1

                            except:
                                flag = 0
                    print loca, ' : ', count
            print(time() - c)
        except requests.exceptions.ConnectionError as e:
            print e
            f = open("locationConnectionError", "a")
            f.write("cityId: %s, " % (cityId))
            f.write("combination: %s\n" % (loca))
            f.close()

    if None in locality_id:
        locality_id.pop(locality_id.index(None))
    return (foodpanda_locality, locality_id)
def restaurant_info(restaurantsData):
    # try:
    searchurl = "https://www.foodpanda.in/restaurant/%s" % (restaurantsData[0])
    f = requests.get(searchurl)
    html = f.text
    soup = BeautifulSoup(html)
    print restaurantsData[0] != "cj8ta/dominos"
    if restaurantsData[0] != "cj8ta/dominos":
        restaurantsData += (typec(
            sub("(?m)^\s+", "",
                sub(r"[^\x00-\x7F]+", "", (soup.find('h1').text))), 'string',
            'string').rstrip('\n'), )
        restaurantsData += (typec(
            sub(r"[^\x00-\x7F]+", "", (soup.find('address').text)), 'string',
            'string').lstrip('\n'), )
        restaurantsData += (typec(
            json.loads(
                soup.find('script', {
                    "type": "application/ld+json"
                }).text)["aggregateRating"]["ratingValue"], 'string',
            'float'), )
        details = sub(
            "(?m)^\s+", "",
            typec(
                soup.find('ul', {
                    'class': 'cart__empty__elements'
                }).text, 'string', 'string')).split('\n')
        deliveryFee = None
        deliveryTime = None
        paymentOption = None
        deliveryMinAmount = None
        Voucher = False
        pickupTime = None
        for index, item in enumerate(details):
            if (item == 'Delivery time:'):
                deliveryTime = details[index + 1]
            elif (item == 'Online payment available'):
                paymentOption = True
            elif (item == 'Delivery fee'):
                deliveryFee = typec(
                    sub(",", "", sub("Rs.", "", details[index + 1])), 'string',
                    'float')
            elif (item == 'Delivery min.:'):
                deliveryMinAmount = typec(
                    sub(",", "", sub("Rs.", "", details[index + 1])), 'string',
                    'float')
            elif (item == 'Accepts Vouchers'):
                Voucher = True
        if (soup.find("dt", {"class": "vendor-pickup-time"}) != None):
            soup2 = BeautifulSoup(
                typec(
                    soup.find("dt", {
                        "class": "vendor-pickup-time"
                    }).findNext("dd"), 'string', 'string'))
            data = soup2.find("dd").text
            pickupTime = (sub("(?m)^\s+", "",
                              typec(data, 'string',
                                    'string')).split("\n")).pop(0)
        restaurantsData += (
            deliveryFee,
            deliveryTime,
            pickupTime,
            deliveryMinAmount,
            paymentOption,
            Voucher,
            searchurl,
        )

        #Extracing Food data for the Restaurant
        foodData = []
        soup = BeautifulSoup(html)

        #To store food data Temporarily
        string = ''
        for data in soup.find_all('div',
                                  {'class': 'menu-item__content-wrapper'}):
            # soup2 = BeautifulSoup(str(data))
            soup2 = BeautifulSoup(typec(data, 'string', 'string'))
            dish_name = soup2.find('div', {'class': 'menu-item__title'}).text
            for val in soup2.find_all('article',
                                      {'class': 'menu-item__variation'}):
                string += (sub("(?m)^\s+", "", dish_name))
                string += (sub("(?m)^\s+", "", val.text))
        string = string.strip().split('\n')
        for index, item in enumerate(string):
            if item == u'\xa0':
                string.pop(index)
            # print string
        foodtuple = ()
        itemCount = 0
        foodtuple += (restaurantsData[0], )
        for index, item in enumerate(string):
            if item == 'Add':
                itemCount = 0
                foodData.append(foodtuple + (searchurl, ))
                foodtuple = ()
                foodtuple += (restaurantsData[0], )
            else:
                item = item.replace(unichr(160), '')
                if "Rs." in typec((sub(r"[^\x00-\x7F]+", "", item)), 'string',
                                  'string') and itemCount % 2 == 1:
                    item = sub(
                        "Rs.", "",
                        typec((sub(r"[^\x00-\x7F]+", "", item)), 'string',
                              'string'))
                    item = sub(",", "", typec(item, 'string', 'string'))
                    foodtuple += (
                        'None',
                        typec(item, 'string', 'float'),
                    )
                    itemCount += 1
                elif "Rs." in typec((sub(r"[^\x00-\x7F]+", "", item)),
                                    'string', 'string'):
                    item = sub(
                        "Rs.", "",
                        typec((sub(r"[^\x00-\x7F]+", "", item)), 'string',
                              'string'))
                    item = sub(",", "", typec(item, 'string', 'string'))
                    foodtuple += (typec(item, 'string', 'float'), )
                else:
                    foodtuple += (typec((sub(r"[^\x00-\x7F]+", "", item)),
                                        'string', 'string'), )
                    itemCount += 1

        cuisineData = []
        soup = BeautifulSoup(html)
        for cuisines in json.loads(
                soup.find('script', {
                    "type": "application/ld+json"
                }).text)["servesCuisine"]:
            cuisineData.append((
                restaurantsData[0],
                typec(cuisines, 'string', 'string').lstrip().rstrip(),
            ))
        print cuisineData
        return (restaurantsData, foodData, cuisineData)
    else:
        print restaurantsData[0]
        return (None, None, None)
    # except:
    # 	print "Error In restaurant_info"
    # 	print "restaurantsData"
    # 	print restaurantsData
Exemplo n.º 5
0
def find_foodpanda_valid_locality(cityId, localities):
    try:
        foodpanda_locality = []
        locality_id = []
        #To temporarily store  Area_idies
        tempraroy_list1 = []
        #To temporarily store  Name of the localities
        tempraroy_list2 = []
        count = 1
        flag = 0
        for loca in localities:
            print count
            print len(foodpanda_locality)
            print len(locality_id)
            print locality_id
            print loca
            print cityId
            count += 1

            #getting html from the Search Url
            searchurl = "https://www.foodpanda.in/location-suggestions?cityId=%s&area=%s" % (
                cityId, loca)
            f = requests.get(searchurl)
            html = f.text
            soup = BeautifulSoup(html)

            #checking if the the page of Suggestions of localities is opened or
            #the request has been redirected to the page of specific locality
            if (soup.find('h1', {'class': 'h2'})):
                heading = sub(":", "", soup.find('h1', {'class': 'h2'}).text)
                heading = heading.strip()
                if heading == "Suggestions":

                    #Extracting Area_idies of the localities
                    tempraroy_list1[:] = []
                    for data in soup.find_all('a',
                                              {'class': 'list-group-item'}):
                        tempraroy_list1.append(
                            search('area_id=(.+?)">',
                                   typec(data, 'string', 'string')).group(1))

                    #Appendng unique area_id in locality_id list
                    for area_id in tempraroy_list1:
                        if area_id not in locality_id:
                            locality_id.append(area_id)

                    #Extracting Name of the Localities
                    tempraroy_list2[:] = []
                    for data in soup.find_all(
                            'div',
                        {'class': 'content-block location-suggestions'}):
                        tempraroy_list2 = sub("(?m)^\s+", "",
                                              data.text).split('\n')
                    tempraroy_list2.pop(0)  # poping "Suggestion" string
                    tempraroy_list2.pop(len(tempraroy_list2) -
                                        1)  # poping whitespace

                    #Appending Uniquely Localities Full Data in the foodpanda_locality
                    for locality, area_id in zip(tempraroy_list2,
                                                 tempraroy_list1):
                        searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % (
                            area_id)
                        Dummytuple = (
                            area_id,
                            typec((locality).replace(unichr(8226), ''),
                                  'string', 'string'),
                            typec(cityId, 'string', 'string'),
                            searchurl,
                        )
                        for item in foodpanda_locality:
                            if Dummytuple[0] == item[0]:
                                flag = 1
                        if flag != 1:
                            foodpanda_locality.append(Dummytuple)
                        flag = 0

            else:

                #Appending the locality that is already a valid name for foodpanda
                data = soup.find('meta', {'property': 'og:url'})
                area_id = search('area_id=(.+?)"',
                                 typec(data, 'string', 'string')).group(1)

                #Appendng unique area_id in locality_id list
                if area_id not in locality_id:
                    locality_id.append(area_id)
                searchurl = 'http://www.foodpanda.in/restaurants?area_id=%s' % (
                    area_id)
                Dummytuple = (
                    area_id,
                    typec((loca).replace(unichr(8226), ''), 'string',
                          'string'),
                    typec(cityId, 'string', 'string'),
                    searchurl,
                )
                for item in foodpanda_locality:
                    if Dummytuple[0] == item[0]:
                        flag = 1
                if flag != 1:
                    foodpanda_locality.append(Dummytuple)
                flag = 0

        if None in locality_id:
            locality_id.pop(locality_id.index(None))
        return (foodpanda_locality, locality_id)
    except:
        print "Error in locality"
        print "city Id"
        print cityId
        print "localities"
        print localities