示例#1
0
 def parse(self, response):
     for res in response.css('li.col-12'):
         item = GithubItem({
             'name':
             res.xpath('.//a/text()').extract_first().strip(),
             'update_time':
             res.xpath('.//relative-time/@datetime').extract_first()
         })
         yield item
示例#2
0
 def parse(self, response):
     for r in response.css('div#user-repositories-list ul li'):
         item = GithubItem({
             "name":
             r.xpath('.//h3/a/text()').re_first('\n\s*(.*)'),
             "update_time":
             r.css('relative-time::attr(datetime)').extract_first()
         })
         yield item
示例#3
0
 def parse(self, response):
     for r in response.css('div#user-repositories-list ul li'):
         item = GithubItem()
         item['name'] = r.xpath('.//h3/a/text()').re_first('\n\s*(.*)'),
         item['update_time'] = r.css(
             'relative-time::attr(datetime)').extract_first()
         r_url = response.urljoin(r.xpath('.//h3/a/@href').extract_first())
         request = scrapy.Request(r_url, callback=self.parse_details)
         request.meta['item'] = item
         yield request
示例#4
0
 def parse(self, response):
     for content in response.xpath('//*[@id="user-repositories-list"]/ul/li'):
         item = GithubItem({
                 "name": content.xpath('.//div[1]/h3/a/text()').re_first('(\w+)'),
                 "update_time": content.xpath('.//div[3]/relative-time/@datetime').extract_first()
                 })
         repository_url = response.urljoin(content.xpath('.//div[1]/h3/a/@href').extract_first())
         request = scrapy.Request(repository_url, callback=self.parse_content)
         request.meta['item'] = item
         yield request
示例#5
0
 def parse(self, response):
     for github in response.css('li.col-12'):
         item = GithubItem({
             'name': github.css('div[class="d-inline-block mb-1"] h3 a::text').re_first('(\S+)'),
             'update_time':github.css('div[class="f6 text-gray mt-2"] relative-time::attr(datetime)').extract_first()
         })
         #github_url = 'https://github.com' + github.css('div[class="d-inline-block mb-1"] h3 a::attr(href)').extract_first()
         github_url = 'https://github.com/shiyanlou/' + github.css('div[class="d-inline-block mb-1"] h3 a::text').re_first('(\S+)')
         request = scrapy.Request(github_url,callback=self.parse_other)
         request.meta['item'] = item
         yield request
示例#6
0
 def parse(self, response):
     for github in response.css('li.col-12'):
         item = GithubItem({
             'name':
             github.css('div[class="d-inline-block mb-1"] h3 a::text').
             re_first('(\w+)'),
             'update_time':
             github.css(
                 'div[class="f6 text-gray mt-2"] relative-time::attr(datetime)'
             ).extract_first()
         })
         yield item
示例#7
0
 def parse(self, response):
     for gt in response.css('li.col-12'):
         item = GithubItem(
             name=gt.css('h3 a::text').extract_first().strip(),
             update_time=gt.css(
                 'relative-time ::attr(datetime)').extract_first())
         hub_url = response.css('h3 a::attr(href)').extract_first()
         full_hub_url = response.urljoin(hub_url)
         request = scrapy.Request(full_hub_url, self.parse_haha)
         request.meta['item'] = item
         print(item['name'])
         yield request
示例#8
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = GithubItem({
             'name':
             repository.xpath('.//a[@itemprop="name codeRepository"]/text()'
                              ).re_first("\n\s*(.*)"),
             'description':
             response.xpath('.//p[@itemprop="description"]/text()').
             re_first('\n\s*(.*)\s'),
             'update_time':
             repository.xpath('.//relative-time/@datetime').extract_first()
         })
         yield item
示例#9
0
 def parse(self, response):
     for repository in response.xpath(
             '//div[@class="col-10 col-lg-9 d-inline-block"]'):
         item = GithubItem()
         item['name'] = repository.xpath(
             './div/h3/a/text()').extract_first().strip()
         item['update_time']=\
             repository.xpath('.//div[@class="f6 text-gray mt-2"]/relative-time/@datetime').extract_first()
         repository_url = repository.xpath(
             './div/h3/a/@href').extract_first()
         full_url = response.urljoin(repository_url)
         request = scrapy.Request(full_url, self.after_parse)
         request.meta['item'] = item
         yield request
示例#10
0
 def parse(self, response):
     for course in response.xpath(
             '//div[@id="user-repositories-list"]/ul/li'):
         time_str = course.xpath(
             './/div[@class="f6 text-gray mt-2"]/relative-time/@datetime'
         ).extract_first()
         item = GithubItem({
             'name':
             course.xpath('.//div[@class="d-inline-block mb-1"]/h3/a/text()'
                          ).re_first('\n\s*(.*)'),
             'update_time':
             datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%SZ')
         })
         yield item
示例#11
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = GithubItem({
             'name':
             repository.xpath('.//a[@itemprop="name codeRepository"]/text()'
                              ).re_first("\n\s*(.*)"),
             'update_time':
             repository.xpath('.//relative-time/@datetime').extract_first()
         })
         repository_url = 'https://github.com' + repository.xpath(
             './/div[contains(@class,"mb-1")]/h3/a/@href').extract_first()
         request = scrapy.Request(repository_url,
                                  callback=self.parse_branch)
         request.meta['item'] = item
         yield request
示例#12
0
文件: github.py 项目: shium/louplus
 def parse(self, response):
     item = GithubItem()
     for course in response.xpath(
             '//div[@id="user-repositories-list"]/ul/li'):
         time_str = course.xpath(
             './/div[@class="f6 text-gray mt-2"]/relative-time/@datetime'
         ).extract_first()
         item['name'] = course.xpath(
             './/div[@class="d-inline-block mb-1"]/h3/a/text()').re_first(
                 '\n\s*(.*)')
         item['update_time'] = datetime.strptime(time_str,
                                                 '%Y-%m-%dT%H:%M:%SZ')
         other_item = response.urljoin(
             course.xpath('.//div[@class="d-inline-block mb-1"]/h3/a/@href'
                          ).extract_first())
         request = scrapy.Request(other_item, callback=self.parse_other)
         request.meta['item'] = item
         yield request
示例#13
0
 def parse(self, response):
     for content in response.xpath('//*[@id="user-repositories-list"]/ul/li'):
         yield GithubItem({
                 "name": content.xpath('.//div[1]/h3/a/text()').re_first('(\w+)'),
                 "update_time": content.xpath('.//div[3]/relative-time/@datetime').extract_first()
                 })