def parse(self, response): for res in response.css('li.col-12'): item = GithubItem({ 'name': res.xpath('.//a/text()').extract_first().strip(), 'update_time': res.xpath('.//relative-time/@datetime').extract_first() }) yield item
def parse(self, response): for r in response.css('div#user-repositories-list ul li'): item = GithubItem({ "name": r.xpath('.//h3/a/text()').re_first('\n\s*(.*)'), "update_time": r.css('relative-time::attr(datetime)').extract_first() }) yield item
def parse(self, response): for r in response.css('div#user-repositories-list ul li'): item = GithubItem() item['name'] = r.xpath('.//h3/a/text()').re_first('\n\s*(.*)'), item['update_time'] = r.css( 'relative-time::attr(datetime)').extract_first() r_url = response.urljoin(r.xpath('.//h3/a/@href').extract_first()) request = scrapy.Request(r_url, callback=self.parse_details) request.meta['item'] = item yield request
def parse(self, response): for content in response.xpath('//*[@id="user-repositories-list"]/ul/li'): item = GithubItem({ "name": content.xpath('.//div[1]/h3/a/text()').re_first('(\w+)'), "update_time": content.xpath('.//div[3]/relative-time/@datetime').extract_first() }) repository_url = response.urljoin(content.xpath('.//div[1]/h3/a/@href').extract_first()) request = scrapy.Request(repository_url, callback=self.parse_content) request.meta['item'] = item yield request
def parse(self, response): for github in response.css('li.col-12'): item = GithubItem({ 'name': github.css('div[class="d-inline-block mb-1"] h3 a::text').re_first('(\S+)'), 'update_time':github.css('div[class="f6 text-gray mt-2"] relative-time::attr(datetime)').extract_first() }) #github_url = 'https://github.com' + github.css('div[class="d-inline-block mb-1"] h3 a::attr(href)').extract_first() github_url = 'https://github.com/shiyanlou/' + github.css('div[class="d-inline-block mb-1"] h3 a::text').re_first('(\S+)') request = scrapy.Request(github_url,callback=self.parse_other) request.meta['item'] = item yield request
def parse(self, response): for github in response.css('li.col-12'): item = GithubItem({ 'name': github.css('div[class="d-inline-block mb-1"] h3 a::text'). re_first('(\w+)'), 'update_time': github.css( 'div[class="f6 text-gray mt-2"] relative-time::attr(datetime)' ).extract_first() }) yield item
def parse(self, response): for gt in response.css('li.col-12'): item = GithubItem( name=gt.css('h3 a::text').extract_first().strip(), update_time=gt.css( 'relative-time ::attr(datetime)').extract_first()) hub_url = response.css('h3 a::attr(href)').extract_first() full_hub_url = response.urljoin(hub_url) request = scrapy.Request(full_hub_url, self.parse_haha) request.meta['item'] = item print(item['name']) yield request
def parse(self, response): for repository in response.css('li.public'): item = GithubItem({ 'name': repository.xpath('.//a[@itemprop="name codeRepository"]/text()' ).re_first("\n\s*(.*)"), 'description': response.xpath('.//p[@itemprop="description"]/text()'). re_first('\n\s*(.*)\s'), 'update_time': repository.xpath('.//relative-time/@datetime').extract_first() }) yield item
def parse(self, response): for repository in response.xpath( '//div[@class="col-10 col-lg-9 d-inline-block"]'): item = GithubItem() item['name'] = repository.xpath( './div/h3/a/text()').extract_first().strip() item['update_time']=\ repository.xpath('.//div[@class="f6 text-gray mt-2"]/relative-time/@datetime').extract_first() repository_url = repository.xpath( './div/h3/a/@href').extract_first() full_url = response.urljoin(repository_url) request = scrapy.Request(full_url, self.after_parse) request.meta['item'] = item yield request
def parse(self, response): for course in response.xpath( '//div[@id="user-repositories-list"]/ul/li'): time_str = course.xpath( './/div[@class="f6 text-gray mt-2"]/relative-time/@datetime' ).extract_first() item = GithubItem({ 'name': course.xpath('.//div[@class="d-inline-block mb-1"]/h3/a/text()' ).re_first('\n\s*(.*)'), 'update_time': datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%SZ') }) yield item
def parse(self, response): for repository in response.css('li.public'): item = GithubItem({ 'name': repository.xpath('.//a[@itemprop="name codeRepository"]/text()' ).re_first("\n\s*(.*)"), 'update_time': repository.xpath('.//relative-time/@datetime').extract_first() }) repository_url = 'https://github.com' + repository.xpath( './/div[contains(@class,"mb-1")]/h3/a/@href').extract_first() request = scrapy.Request(repository_url, callback=self.parse_branch) request.meta['item'] = item yield request
def parse(self, response): item = GithubItem() for course in response.xpath( '//div[@id="user-repositories-list"]/ul/li'): time_str = course.xpath( './/div[@class="f6 text-gray mt-2"]/relative-time/@datetime' ).extract_first() item['name'] = course.xpath( './/div[@class="d-inline-block mb-1"]/h3/a/text()').re_first( '\n\s*(.*)') item['update_time'] = datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%SZ') other_item = response.urljoin( course.xpath('.//div[@class="d-inline-block mb-1"]/h3/a/@href' ).extract_first()) request = scrapy.Request(other_item, callback=self.parse_other) request.meta['item'] = item yield request
def parse(self, response): for content in response.xpath('//*[@id="user-repositories-list"]/ul/li'): yield GithubItem({ "name": content.xpath('.//div[1]/h3/a/text()').re_first('(\w+)'), "update_time": content.xpath('.//div[3]/relative-time/@datetime').extract_first() })