def parse_detail(self, response): item = YYItem() item['name'] = response.xpath( '//span[@class="niae2_top"]/text()').extract()[0] item['unit'] = '采购单位' item['time'] = '2020-01-01 00:00' item['sources'] = self.name item['address'] = response.url yield item
def parse_item(self, response): print(response.text) item = YYItem() item['name'] = '' item['unit'] = '' item['time'] = '2020-01-01 00:00' item['address'] = '' item['sources'] = self.name yield item
def parse(self, response): res = json.loads(response.text) if res.get('result'): for each in res.get('rows'): item = YYItem() item['name'] = each['noticeTitle'] item['time'] = each['publishTime'][:-5] item['unit'] = each['noticeSource'] item['address'] = self.base_url + each['id'] item['sources'] = self.title yield item
def parse(self, response): items = response.xpath('//*[@id="con_two_1"]/ul/li') for each in items: item = YYItem() item['name'] = each.xpath('./a/text()').extract()[0] item['time'] = '2020-' + each.xpath( './/span[1]/text()').extract()[0] + " 00:00" item['unit'] = each.xpath('.//span/a/text()').extract()[0] item['address'] = each.xpath('./a/@href').extract()[0] item['sources'] = self.title yield item
def parse(self, response): res = json.loads(response.text) data = res.get('param1') if data: for each in data.get('result'): item = YYItem() item['name'] = each['title'] item['time'] = each['publishTime'][:-3] item['unit'] = '' item['address'] = self.base_url + each['id'] item['sources'] = self.title yield item
def parse(self, response): items = response.xpath('//*[@id="searchResult"]/table/tbody/tr') for each in items[2:]: item = YYItem() item['name'] = each.xpath('.//td[3]/a/text()').extract()[0] item['time'] = each.xpath( './/td[last()]/text()').extract()[0] + ' 00:00' item['unit'] = each.xpath('.//td[1]/text()').extract()[0] item['address'] = self.base_url + each.xpath( './/@onclick').extract()[0][14:-2] item['sources'] = self.title yield item
def parse(self, response): res = json.loads(response.text) if res.get('result'): for each in res.get('rows'): item = YYItem() item['name'] = each['noticeTitle'] yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + 'index_' + str(self.offset) + '.htm', callback=self.parse)
def parse(self, response): res = json.loads(response.text) for ite in res: items = ite.get('list') if ite.get('list') else [] for each in items: item = YYItem() item['name'] = each['title'] item['time'] = time.strftime( "%Y-%m-%d %H:%M", time.localtime(each['publishtime'])) item['unit'] = '' item['address'] = self.base_url + each['_id'] + '.html' item['sources'] = self.title yield item
def parse(self, response): item_base_url = "http://www.chinaunicombidding.cn{}" items = response.xpath('//*[@id="div1"]/table/tr') for each in items: item = YYItem() item['name'] = each.xpath('.//td/span/@title').extract()[0] item['time'] = each.xpath( './/td[2]/text()').extract()[0] + ' 00:00' item['unit'] = '' item['address'] = item_base_url.format( each.xpath('.//td/span/@onclick').extract()[0].split('"')[1]) item['sources'] = self.title yield item
def parse(self, response): items = response.xpath('//div[@class="W750 Right"]/div/ul/li') for each in items: item = YYItem() item['name'] = each.xpath('.//a/text()').extract()[0] item['time'] = each.xpath('.//span/text()').extract()[0] + " 00:00" item['unit'] = each.xpath('./text()').extract()[0] item['address'] = 'http://www.bidding.csg.cn' + each.xpath( './/a/@href').extract()[0] item['sources'] = self.title yield item if self.offset < 2: self.offset += 1 yield scrapy.Request(self.base_url + '_' + str(self.offset) + '.jhtml', callback=self.parse)
def parse(self, response): res = json.loads(response.text) items = res.get('list', {}).get('contentList') if items: for each in items: item = YYItem() item['name'] = each['nonSecretTitle'] item['time'] = each['publishTime'][:-3] item['unit'] = '' item['address'] = 'http://www.weain.mil.cn' + each['pcUrl'] item['sources'] = self.title yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + str(self.offset), callback=self.parse)
def parse(self, response): items = response.xpath('//a[@class="item-blue"]') for each in items: item = YYItem() item['name'] = each.xpath('.//div/div/div[1]/text()').extract()[0] item['time'] = each.xpath( './/div/div/div[last()]/text()').extract()[1][1:12] + '00:00' item['unit'] = '' item['address'] = 'http://www.huobiao.cn' + each.xpath( './@href').extract()[0] item['sources'] = self.title yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + str(self.offset) + '.html', callback=self.parse)
def parse(self, response): items = response.xpath('//*[@id="searchcontent"]/ul/li') for each in items: item = YYItem() item['name'] = each.xpath('.//div/a[last()]/text()').extract()[0] item['time'] = each.xpath('.//div[@class="s_c_l_right"]/text()' ).extract()[1].split('\r')[0].replace( '/', '-') + ' 00:00' item['unit'] = '' item['address'] = 'https://www.bidcenter.com.cn' + each.xpath( './/div/a[last()]/@href').extract()[0] item['sources'] = self.title yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + str(self.offset) + '.html', callback=self.parse)
def parse(self, response): items = response.xpath('//*[@id="list"]/tbody/tr') for each in items: item = YYItem() item['name'] = each.xpath( '//td[@class="td_1"]/a/text()').extract()[0] item['time'] = each.xpath( './td[last()]/text()').extract()[0] + ' 00:00' item['unit'] = '' item['address'] = 'https://www.chinabidding.cn' + each.xpath( '//td[@class="td_1"]/a/@href').extract()[0] item['sources'] = self.title yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + str(self.offset) + '.html', callback=self.parse)
def parse(self, response): items = response.xpath( '//div[@class="vF_detail_relcontent_lst"]/ul/li') for each in items: item = YYItem() item['name'] = each.xpath('.//a/text()').extract()[0] item['time'] = each.xpath('.//em[2]/text()').extract()[0] item['unit'] = each.xpath('.//em[last()]/text()').extract()[0] item['address'] = self.base_url + each.xpath( './/a/@href').extract()[0].replace('./', '') item['sources'] = self.title yield item if self.offset < 5: self.offset += 1 yield scrapy.Request(self.base_url + 'index_' + str(self.offset) + '.htm', callback=self.parse)
def parse(self, response): items = response.xpath('//*[@id="searchResult"]/table/tbody/tr') for each in items[2:]: item = YYItem() item['name'] = each.xpath('.//td[3]/a/text()').extract()[0] yield item