Exemplo n.º 1
0
 def parseMain(self, response):
     self.restaurantIDsGetter = RestaurantIDsGetter(response)
     linksExtractor = LinkExtractor(
         allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*"))
     links = linksExtractor.extract_links(response)
     for link in links:
         yield Request(link.url, callback=self.parseRestaurants)
Exemplo n.º 2
0
class RestaurantSpider(CrawlSpider):
    name = "RestaurantSpider"
    allowed_domains = ["domiciliosbogota.com"]
    start_urls = (
        'http://www.domiciliosbogota.com/',
    ) 
    productLinkGetter = ProductLinkGetter()
    rules = [Rule(LinkExtractor(allow=(r"http://www\.domiciliosbogota\.com/$")), 'parseMain')]
    
    def parseMain(self, response):
        self.restaurantIDsGetter = RestaurantIDsGetter(response)
        linksExtractor = LinkExtractor(allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*"))
        links = linksExtractor.extract_links(response)
        for link in links:
            yield Request(link.url, callback = self.parseRestaurants)
            
    def parseRestaurants(self, response):
        sel = RestaurantSelector(response)
        restaurant = Restaurant()
        restaurant["url"]                   = response.url       
        restaurant["name"]                  = sel.getName()      
        restaurant["id"]                    = self.restaurantIDsGetter.getID("/" + response.url.split("/")[-1])
        restaurant["deliveryTimeInMinutes"] = sel.getDeliveryTimeInMinutes()
        restaurant["minOrderPrice"]         = sel.getMinOrderPrice()
        restaurant["deliveryCost"]          = sel.getDeliveryCost()
        restaurant["payMethods"]            = sel.getPayMethods()
        restaurant["menu"]                  = sel.getMenuCategories()
        restaurant["tagCategories"]         = sel.getTagCategories()
        restaurant["averagePunctuation"]    = sel.getAveragePunctuation()
        restaurant["quantityOfComments"]    = sel.getQuantityOfComments()
        return  restaurant
Exemplo n.º 3
0
class RestaurantSpider(CrawlSpider):
    name = "RestaurantSpider"
    allowed_domains = ["domiciliosbogota.com"]
    start_urls = ('http://www.domiciliosbogota.com/', )
    productLinkGetter = ProductLinkGetter()
    rules = [
        Rule(LinkExtractor(allow=(r"http://www\.domiciliosbogota\.com/$")),
             'parseMain')
    ]

    def parseMain(self, response):
        self.restaurantIDsGetter = RestaurantIDsGetter(response)
        linksExtractor = LinkExtractor(
            allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*"))
        links = linksExtractor.extract_links(response)
        for link in links:
            yield Request(link.url, callback=self.parseRestaurants)

    def parseRestaurants(self, response):
        sel = RestaurantSelector(response)
        restaurant = Restaurant()
        restaurant["url"] = response.url
        restaurant["name"] = sel.getName()
        restaurant["id"] = self.restaurantIDsGetter.getID(
            "/" + response.url.split("/")[-1])
        restaurant["deliveryTimeInMinutes"] = sel.getDeliveryTimeInMinutes()
        restaurant["minOrderPrice"] = sel.getMinOrderPrice()
        restaurant["deliveryCost"] = sel.getDeliveryCost()
        restaurant["payMethods"] = sel.getPayMethods()
        restaurant["menu"] = sel.getMenuCategories()
        restaurant["tagCategories"] = sel.getTagCategories()
        restaurant["averagePunctuation"] = sel.getAveragePunctuation()
        restaurant["quantityOfComments"] = sel.getQuantityOfComments()
        return restaurant
Exemplo n.º 4
0
class RestautantIDsGetterTest(unittest.TestCase):
    def setUp(self):
        self.response = fakeResponseFromFile("examples/main", None)
        self.restaurantIDsGetter = RestaurantIDsGetter(self.response)

    def tearDown(self):
        pass

    def testGetIDsFromMainPage(self):
        expectedId = self.restaurantIDsGetter.getID(
            "http://www.domiciliosbogota.com/domicilios-pan-pa-ya.html")
        self.assertEquals(expectedId, "6802")

    def testIfDoNotHaveIDReturnsNoID(self):
        expectedId = self.restaurantIDsGetter.getID(
            "http://www.domiciliosbogota.com/domicilios-cali-vea-castilla.html"
        )
        self.assertEquals(expectedId, "NoID")
class RestautantIDsGetterTest(unittest.TestCase):


    def setUp(self):
        self.response = fakeResponseFromFile("examples/main", None)
        self.restaurantIDsGetter = RestaurantIDsGetter(self.response)


    def tearDown(self):
        pass


    def testGetIDsFromMainPage(self):
        expectedId =self.restaurantIDsGetter.getID("http://www.domiciliosbogota.com/domicilios-pan-pa-ya.html")
        self.assertEquals(expectedId, "6802")
    
    def testIfDoNotHaveIDReturnsNoID(self):
        expectedId =self.restaurantIDsGetter.getID("http://www.domiciliosbogota.com/domicilios-cali-vea-castilla.html")
        self.assertEquals(expectedId, "NoID")       
Exemplo n.º 6
0
 def setUp(self):
     self.response = fakeResponseFromFile("examples/main", None)
     self.restaurantIDsGetter = RestaurantIDsGetter(self.response)
 def setUp(self):
     self.response = fakeResponseFromFile("examples/main", None)
     self.restaurantIDsGetter = RestaurantIDsGetter(self.response)
Exemplo n.º 8
0
 def parseMain(self, response):
     self.restaurantIDsGetter = RestaurantIDsGetter(response)
     linksExtractor = LinkExtractor(allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*"))
     links = linksExtractor.extract_links(response)
     for link in links:
         yield Request(link.url, callback = self.parseRestaurants)