def parse(self, response): self.seen_urls.append(response.url) l = MbedLibLoader(item=MbedLibItem(), response=response) l.add_xpath('repo_type', '/html/body/div[4]/div[2]/div[2]/table/tr[1]/td/text()[2]') l.add_xpath('owner', '/html/body/div[4]/div[1]/div/a[1]/text()[2]') l.add_xpath('name', '/html/body/div[4]/div[1]/div/a[2]/text()[2]') l.add_xpath('repository', '/html/body/div[4]/div[1]/div/a[2]/@href') l.add_xpath('description', './/*[@id="mbed-content"]/p[1]/text()') # may need some cleaning up \n l.add_value('frameworks', 'mbed') l.add_value('platforms', ['freescalekinetis', 'nordicnrf51', 'nxplpc', 'ststm32', 'teensy']) l.add_xpath('components', '/html/body/div[4]/div[2]/div[3]//a/@href') item = l.load_item() request = scrapy.Request(response.url+"dependencies",callback=self.parse_dependencies) request.meta['libpage'] = response.url request.meta['item'] = item return request
def parse_project(self, response): self.seen_urls.append(response.url) l = MbedLibLoader(item=MbedLibItem(), response=response) l.add_xpath('repo_type', '/html/body/div[4]/div[2]/div[2]/table/tr[1]/td/text()[2]') l.add_xpath('owner', '/html/body/div[4]/div[1]/div/a[1]/text()[2]') l.add_xpath('ownerurl', '/html/body/div[4]/div[1]/div/a[1]/@href') l.add_xpath('name', '/html/body/div[4]/div[1]/div/a[2]/text()[2]') l.add_xpath('repository', '/html/body/div[4]/div[1]/div/a[2]/@href') l.add_xpath('description', './/*[@id="mbed-content"]/p[1]/text()') l.add_value('frameworks', 'mbed') l.add_value('platforms', mbed_platforms()) l.add_xpath('components', '/html/body/div[4]/div[2]/div[3]//a/@href') item = l.load_item() request = scrapy.Request(response.url+"dependencies",callback=self.parse_dependencies) request.meta['libpage'] = response.url request.meta['item'] = item return request
def parse_project(self, response): self.seen_urls.append(response.url) l = MbedLibLoader(item=MbedLibItem(), response=response) l.add_xpath( 'repo_type', '/html/body/div[4]/div[2]/div[2]/table/tr[1]/td/text()[2]') l.add_xpath('owner', '/html/body/div[4]/div[1]/div/a[1]/text()[2]') l.add_xpath('ownerurl', '/html/body/div[4]/div[1]/div/a[1]/@href') l.add_xpath('name', '/html/body/div[4]/div[1]/div/a[2]/text()[2]') l.add_xpath('repository', '/html/body/div[4]/div[1]/div/a[2]/@href') l.add_xpath('description', './/*[@id="mbed-content"]/p[1]/text()') l.add_value('frameworks', 'mbed') l.add_value('platforms', mbed_platforms()) l.add_xpath('components', '/html/body/div[4]/div[2]/div[3]//a/@href') item = l.load_item() request = scrapy.Request(response.url + "dependencies", callback=self.parse_dependencies) request.meta['libpage'] = response.url request.meta['item'] = item return request