Exemplo n.º 1
0
 def parse_item(self, response):
     result = json.loads(response.body)
     for r in result['resultData']:
         url = 'http://aj.hifda.gov.cn/web/showContent.jsp?id=' + r['id']
         data = {
             u'企业(商户)名称': r['companyname'],
             u'注册地址': r['companysite'],
             u'法定代表人姓名': r['companyman'],
             u'法定代表人身份证号': r['companymanid'],
             u'负责人姓名': r['responsible_man'],
             u'负责人身份证号': r['resp_man_id'],
             u'直接责任人': r['direct_person'],
             u'社会信用代码': r['idcode'],
             u'案件分类': r['toclassify'],
             u'案件名称': r['losecase'],
             u'行政处罚决定文书号': r['punish_writ_num'],
             u'主要违法事实': r['losedetail'],
             u'处罚依据和内容': r['punishway'],
             u'处罚机关': r['punishunit'],
             u'处罚时间': r['punishtime']
         }
         already = SpiderData.objects.filter(url=url)
         if already.count() == 0:
             sendData('hainan', data, url)
         else:
             pass
Exemplo n.º 2
0
 def parse_detail(self, response):
     trs = response.xpath('//*[@id="main"]/div/div[2]/table//tr')
     data = {}
     for tr in trs:
         key = tr.xpath('td[1]/text()').extract_first()
         val = tr.xpath('td[2]/text()').extract_first()
         data[key] = val
     sendData('shanghai', data, response.url)
Exemplo n.º 3
0
    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.

        # Should return either None or an iterable of Response, dict
        # or Item objects.
        sendData(spider.name, {
            'error': unicode(exception),
            'url': response.url
        }, response.url, True)
Exemplo n.º 4
0
 def parse_detail(self, response):
     trs = response.xpath('//*[@id="edit"]//tr')
     data = {}
     for i, tr in enumerate(trs):
         if i > 0:
             key = tr.xpath('th/text()').extract_first()
             val = val = tr.xpath('td/text()').extract_first()
             if key:
                 key = key.replace(':', '').replace(' ', '')
                 if not val:
                     val = ''
                 data[key] = val
     sendData('gansu', data, response.url)
Exemplo n.º 5
0
 def parse_item(self, response):
     trs = response.xpath('//table/tr')
     data = {}
     for tr in trs:
         key = tr.xpath('td[1]/text()').extract_first()
         val = tr.xpath('td[2]/text()').extract_first()
         if key or val:
             data[key] = val
     try:
         already = SpiderData.objects.filter(scrapyname='sfda',data__contains={u"被抽样单位名称":data[u'被抽样单位名称'],u"生产日期/批号":data[u'生产日期/批号'],u"抽检项目":data[u'抽检项目']}).count()
     except:
         already = 1
     if not already:
         sendData('sfda',data,response.url)
Exemplo n.º 6
0
    def parse_item(self, response):
        date = response.meta['date']
        title = response.meta['title']
        data = {}
        tables = response.xpath('//table[@class="rtab2"]')
        for table in tables:
            trs = table.xpath('tr')
            for tr in trs:
                key = tr.xpath('th/text()').extract_first().split(u':')[0]
                value = tr.xpath('td/text()').extract_first()
                data[key] = value

        already = SpiderData.objects.filter(url=response.url)
        if already.count() == 0:
            sendData('case', data, response.url)
        else:
            pass