Exemplo n.º 1
0
 def parse(self, response):
     request = checkTimeError(response)
     if request:return request
     '''从业资格证书--公司基本信息'''
     item = SacItem()
     js = json.loads(response.text)
     configs = configs1
     
     for json_ in js:
         result = dict()
         for config in configs['data']:
             result[config['En']] = json_[config['v']]
             result[config['En']] = S.replace_invalid_char(result[config['En']])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         CropRowID = result['CropRowID']
         datas = asc_data(CropRowID)
         headers = {'User-Agent':generate_user_agent()}
         yield scrapy.FormRequest("http://person.sac.net.cn/pages/registration/train-line-register!search.action",
                                  formdata=datas[0],
                                  headers = headers,
                                  meta = {'CropRowID':CropRowID},
                                  priority=0,
                                  callback = self.cctparse)
         yield scrapy.FormRequest("http://person.sac.net.cn/pages/registration/train-line-register!search.action",
                                  formdata=datas[1],
                                  headers = headers,
                                  meta = {'CropRowID':CropRowID},
                                  priority=0,
                                  callback = self.cctparse)
         yield item
Exemplo n.º 2
0
 def senior_executiveParse(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券公司--高管信息'''
     item = SacItem()
     orgid = response.meta['orgid']
     js = json.loads(response.text)
     configs = {'list':{'v':'','t':'','keys':['orgid','name','OFFICE_DATE','OFFICE_DATE'],'db':'dbo.SAC_executive'},
                'data':[{'n':'现任职务','En':'CURRENT_POSITION','t':'json','v':'EI_CURRENT_POSITION','dt':''},
                        {'n':'姓名','En':'name','t':'json','v':'EI_NAME','dt':''},
                        {'n':'任职起始时间','En':'OFFICE_DATE','t':'json','v':'EI_OFFICE_DATE','dt':''},
                        {'n':'性别','En':'gender','t':'json','v':'GC_ID','dt':''},
                        ]
                }
     for js_ in js:
         result=dict()
         result['orgid'] = orgid
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
Exemplo n.º 3
0
 def otcInfoParse4(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券评级机构--高管人员信息'''
     item = SacItem()
     orgid = response.meta['otcid']
     js = json.loads(response.text)
     configs = {'list':{'v':'','t':'','keys':['NAME','orgid','PRACTITIONERS_START_DATE'],'db':'dbo.SAC_otcseniorExecutive'},
                'data':[{'n':'中国注册会计师资格证书号码','En':'ACCOUNTANTS_NO','t':'json','v':'EI_ACCOUNTANTS_NO','dt':''},
                        {'n':'现任职务','En':'CURRENT_POSITION','t':'json','v':'EI_CURRENT_POSITION','dt':''},
                        {'n':'是否通过证券评级业务高级管理人员资质测试','En':'ISPASS_SENIOR_MANAGEMENT','t':'json','v':'EI_ISPASS_SENIOR_MANAGEMENT','dt':''},
                        {'n':'姓名','En':'NAME','t':'json','v':'EI_NAME','dt':''},
                        {'n':'任职起始时间','En':'PRACTITIONERS_START_DATE','t':'json','v':'EI_PRACTITIONERS_START_DATE','dt':''},
                        {'n':'证券从业人员证书号码','En':'SECURITIES_PROFESSIONALS','t':'json','v':'EI_SECURITIES_PROFESSIONALS','dt':''},
                        {'n':'性别','En':'Gender','t':'json','v':'GC_ID','dt':''}
                        ]
                }
     for js_ in js:
         result = dict()
         result['orgid'] = orgid
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
Exemplo n.º 4
0
 def otcInfoParse3(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券评级机构--执照图片'''
     item = SacItem()
     orgid = response.meta['orgid']
     js = json.loads(response.text)
     configs = {'list':{'v':'','t':'','keys':['REG_ID','ZRNI_NAME'],'db':'dbo.SAC_otclicenseCopy'},
                'data':[{'n':'REGID','En':'REG_ID','t':'json','v':'MRI_REG_ID','dt':''},
                        {'n':'证书ID','En':'ZRNI_ID','t':'json','v':'ZRNI_ID','dt':''},
                        {'n':'证书name','En':'ZRNI_NAME','t':'json','v':'ZRNI_NAME','dt':''},
                        {'n':'证书path','En':'ZRNI_PATH','t':'json','v':'ZRNI_PATH','dt':''},
                        {'n':'证书类型','En':'ZRNI_TYPE','t':'json','v':'ZRNI_TYPE','dt':''},
                        ]
                }
     for js_ in js:
         result = dict()
         result['orgid'] = orgid
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         formtxt = 'http://jg.sac.net.cn/pages/publicity/train-line-register!writeFile.action?inputPath={path}&fileName={filename}'
         filename = urllib.parse.quote(urllib.parse.quote(result['ZRNI_NAME'].encode('utf-8')).encode('utf-8'))
         result['url'] = formtxt.format(path=result['ZRNI_PATH'],filename = filename)
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
Exemplo n.º 5
0
 def otcInfoParse2(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券评级机构--基本信息2'''
     item = SacItem()
     js = json.loads(response.text)
     configs = otcInfoBaseconfigs2
     for js_ in js:
         result = response.meta['result']
         for config in configs['data']:
             k = config['En']  
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
         
         
         yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!search.action',
                                      formdata = {
                                                  'filter_EQS_mri_reg_id':str(result['REG_ID']),
                                                  'sqlkey':'info',
                                                  'sqlval':'GET_FILES_BY_REG_ID'},
                                      callback = self.otcInfoParse3,
                                      meta = {'orgid':result['orgid']},
                                      headers = {'User-Agent':generate_user_agent(os=('win','mac','linux')),
                                                 'Referer': 'http://jg.sac.net.cn/pages/publicity/credit_rating_reg.html?aoi_id={orgid}&is_org_search=no'.format(orgid=result['orgid']),
                                                 'Content-Type': 'application/x-www-form-urlencoded',
                                                 'Connection':'keep-alive'},
                                  )
Exemplo n.º 6
0
 def Employee_Change(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券从业资格-个人变更信息'''
     item = SacItem()
     js = json.loads(response.text)
     result = dict()
     configs = Employee_ChangeConfigs 
     for json_ in js:
         for config in configs['data']:
             result[config['En']] = S.select_content(json_, config,response)
             result[config['En']] = S.replace_invalid_char(result[config['En']])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
Exemplo n.º 7
0
 def orgInfoParse2(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券公司信息获取经营范围'''
     item = SacItem()
     result = response.meta['result']
     result['orgid'] = response.meta['orgid']
     js = json.loads(response.text)
     PTSC_NAME = []
     for i in js:
         PTSC_NAME.append(i['PTSC_NAME'])
     result['ptsc'] = ','.join(PTSC_NAME)
     result['ptsc'] = S.replace_invalid_char(result['ptsc'])
     item['result'] = result
     item['keys'] = ['orgid']
     item['db'] = 'dbo.SAC_securitiesInfo'
     yield item
Exemplo n.º 8
0
 def EQS_sacInfoParse2(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券投资咨询机构--基本信息2'''
     js = json.loads(response.text)
     item = SacItem()
     configs = EQS_sacInfoParse2Configs
     for js_ in js:
         result = response.meta['result']
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
Exemplo n.º 9
0
 def Employee_InFo(self, response):
     request = checkTimeError(response)
     if request:return request
     item = SacItem()
     try:
         js = json.loads(response.text)
         result = response.meta['result']
         for json_ in js:
             result['image'] = 'http://photo.sac.net.cn/sacmp/images/'+json_['RPI_PHOTO_PATH']
             result['ADI_NAME'] = json_['ADI_NAME']
             result['ADI_ID'] = json_['ADI_ID']
             item['result'] = result
             item['keys'] = cctconfigs['list']['keys']
             item['db'] = cctconfigs['list']['db']
             yield item
     except:
         msg = '%s%s'%(response.url,response.text)
         scrapy.log.msg(msg)
Exemplo n.º 10
0
 def BRANCH_OrgParse(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券公司--分公司信息'''
     item = SacItem()
     orgid = response.meta['orgid']
     page = response.meta['page']
     js = json.loads(response.text)
     if page == 1:
         totalPage = js['totalPages']
     else:
         totalPage = response.meta['totalPage']
     configs = BRANCH_OrgConfigs
     for js_ in js['result']:
         result=dict()
         result['orgid'] = orgid
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
     if page<=totalPage:
         page+=1
         yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!list.action',
                                  formdata = {'filter_LIKES_mboi_branch_full_name':'',
                                             'filter_LIKES_mboi_off_address':'',
                                             'filter_EQS_aoi_id':str(orgid),
                                             'page.searchFileName':'publicity',
                                             'page.sqlKey':'PAG_BRANCH_ORG',
                                             'page.sqlCKey':'SIZE_BRANCH_ORG',
                                             '_search':'false',
                                             'nd':str(int(time.time()*1000)),
                                             'page.pageSize':'15',
                                             'page.pageNo':str(page),
                                             'page.orderBy':'MATO_UPDATE_DATE',
                                             'page.order':'desc'},
                                  meta = {'orgid':orgid,'page':page,'totalPage':totalPage},
                                  callback = self.BRANCH_OrgParse,
                                  headers = {'User-Agent':generate_user_agent(os=('win','mac','linux'))},)
Exemplo n.º 11
0
 def otcInfoParse5(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券评级机构--执业人员信息'''
     item = SacItem()
     page = response.meta['page']
     orgid = response.meta['otcid']
     js = json.loads(response.text)
     if page==1:
         totalPage = js['totalPages']
     else:
         totalPage = response.meta['totalPage']
     configs = otcInfoConfigs
     for js_ in js['result']:
         result = dict()
         result['orgid'] = orgid
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_, config)
             result[k] = S.replace_invalid_char(result[k])
         item['result'] = result
         item['keys'] = configs['list']['keys']
         item['db'] = configs['list']['db']
         yield item
     if page<totalPage:
         page+=1
         yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!list.action',
                                 formdata={
                                         'filter_EQS_aoi_id':str(orgid),
                                         'page.searchFileName':'publicity',
                                         'page.sqlKey':'PAG_PRACTITIONERS',
                                         'page.sqlCKey':'SIZE_PRACTITONERS',
                                         '_search':'false',
                                         'nd':str(int(time.time()*1000)),
                                         'page.pageSize':'15',
                                         'page.pageNo':str(page),
                                         'page.orderBy':'MATO_UPDATE_DATE',
                                         'page.order':'desc'},
                                 callback = self.otcInfoParse5,
                                 meta = {'otcid':orgid,'page':page,'totalPage':totalPage},
                                 headers = {'User-Agent':generate_user_agent(os=('win','mac','linux')),
                                             'Connection':'keep-alive'},)
Exemplo n.º 12
0
 def orgInfoParse1(self, response):
     request = checkTimeError(response)
     if request:return request
     '''证券公司信息基本信息--result传入orgInfoParse2'''
     item = SacItem()
     orgid = response.meta['orgid']
     js = json.loads(response.text)
     configs = orgInfoparse1configs
     result = dict()
     for js_ in js:
         for config in configs['data']:
             k = config['En']
             result[k] = S.select_content(js_    , config,response)
             result[k] = S.replace_invalid_char(result[k])
         data = {'filter_EQS_aoi_id':str(orgid),
                     'sqlkey':'publicity',
                     'sqlval':'SEARCH_ZQGS_QUALIFATION'}
         yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!search.action',
                                  formdata = data,
                                  headers = {'User-Agent':generate_user_agent(os=('win','mac','linux'))},
                                  callback = self.orgInfoParse2,
                                  meta = {'orgid':orgid,'result':result},
                                  )