Пример #1
0
    def get_contributive_info(self, session, param_dict, data):
        try:
            url = 'http://{host}/gsbaseInfoAction_gdczInfo.action?randomNum={rand}&nbxh={nbxh}&qylx={qylx}&menustring=1'.format(
                host=self.host,
                rand=util.get_random_num(),
                nbxh=param_dict['nbxh'],
                qylx=param_dict['qylx'])
            r = self.task_request(session, session.get, url)
            if r is None:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  '',
                                  status=self.STATUS_FAIL)
                return

            try:
                page_num = int(
                    PyQuery(r.text,
                            parser='html').find('#countPage').attr('value'))
            except Exception as e:
                self.log.exception(e)
                page_num = 1

            if page_num == 0:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  r.text,
                                  status=self.STATUS_NOT_EXIST)
                return

            self.append_model(data, Model.contributive_info, url, r.text)

            # 获得出资详情
            self.get_contributive_info_detail(session, r.text, data)

            for page in xrange(2, page_num + 1):
                url = 'http://{host}/gsbaseInfoAction_gdczInfo.action?randomNum={rand}&nbxh={nbxh}&qylx={qylx}&menustring=1&currPage={page}'.format(
                    host=self.host,
                    rand=util.get_random_num(),
                    nbxh=param_dict['nbxh'],
                    qylx=param_dict['qylx'],
                    page=page)
                r = self.task_request(session, session.get, url)
                if r is None:
                    self.append_model(data,
                                      Model.contributive_info,
                                      url,
                                      '',
                                      status=self.STATUS_FAIL)
                    return

                self.append_model(data, Model.contributive_info, url, r.text)

                # 获得出资详情
                self.get_contributive_info_detail(session, r.text, data)

        except Exception as e:
            self.log.exception(e)
Пример #2
0
    def get_shareholder_info(self, session, pri_pid, data):
        page = 1
        total_page = 1
        while page <= total_page:
            url = 'http://{host}/ansubcapital/queryAnsubcapitaltrue.do' \
                  '?pripid={pripid}&randommath={randommath}&currentPage={page}' \
                .format(host=self.host, pripid=pri_pid, randommath=util.get_random_num(), page=page)
            r = self.task_request(session, session.get, url)
            if r is None:
                self.append_model(data, Model.shareholder_info, url, '', status=self.STATUS_FAIL)
                return

            json_data = util.json_loads(r.text)
            if json_data is None:
                self.append_model(data, Model.shareholder_info, url, r.text, status=self.STATUS_FAIL)
                return

            page_info = json_data.get('page', None)
            if page_info is None:
                self.append_model(data, Model.shareholder_info, url, r.text, status=self.STATUS_FAIL)
                return

            total_page = page_info.get('totalPage', None)
            if total_page is None:
                self.append_model(data, Model.shareholder_info, url, r.text, status=self.STATUS_FAIL)
                return

            total_page = int(total_page)
            if total_page == 0:
                total_page = 1

            self.append_model(data, Model.shareholder_info, url, r.text)
            page += 1
Пример #3
0
 def get_key_person_info(self, session, pri_pid, data):
     rand = util.get_random_num()
     url = 'http://{host}/epriperson/queryPerson.do?' \
           'pripid={pripid}&randommath={randommath}' \
         .format(host=self.host, pripid=pri_pid, randommath=rand)
     r = self.task_request(session, session.get, url)
     if r is None:
         self.append_model(data, Model.key_person_info, url, '', status=self.STATUS_FAIL)
         return
     self.append_model(data, Model.key_person_info, url, r.text)
Пример #4
0
 def get_annual_base_info(self, session, pri_pid, data, year):
     rand = util.get_random_num()
     url = 'http://{host}/anbaseinfo/getquerbaseinfo.do' \
           '?pripid={pripid}&year={year}&randommath={randommath}' \
         .format(host=self.host, pripid=pri_pid, randommath=rand, year=year)
     r = self.task_request(session, session.get, url)
     if r is None:
         self.append_model(data, Model.annual_info, url, '',
                           status=self.STATUS_FAIL,
                           year=year,
                           classify=Model.type_detail)
         return
     self.append_model(data, Model.annual_info, url, r.text,
                       year=year,
                       classify=Model.type_detail)
Пример #5
0
    def get_key_person_info(self, session, param_dict, data):
        url = 'http://{host}/gsbaseInfoAction_zzryMoreInfo.action?nbxh={nbxh}'.format(
            host=self.host,
            rand=util.get_random_num(),
            nbxh=param_dict['nbxh'],
            qylx=param_dict['qylx'])
        r = self.task_request(session, session.get, url)
        if r is None:
            self.append_model(data,
                              Model.key_person_info,
                              url,
                              '',
                              status=self.STATUS_FAIL)
            return

        self.append_model(data, Model.key_person_info, url, r.text)
Пример #6
0
    def get_shareholder_info(self, session, param_dict, data):
        url = 'http://{host}/gsbaseInfoAction_gdczGtInfo.action?randomNum={rand}&nbxh={nbxh}&qylx={qylx}&menustring=1'.format(
            host=self.host,
            rand=util.get_random_num(),
            nbxh=param_dict['nbxh'],
            qylx=param_dict['qylx'])
        r = self.task_request(session, session.get, url)
        if r is None:
            self.append_model(data,
                              Model.shareholder_info,
                              url,
                              '',
                              status=self.STATUS_FAIL)
            return

        self.append_model(data, Model.shareholder_info, url, r.text)
Пример #7
0
 def get_annual_shareholder_info(self, session, pri_pid, data, year):
     rand = util.get_random_num()
     url = 'http://{host}/ansubcapital/queryAnsubcapital.do' \
           '?pripid={pripid}&year={year}&randommath={randommath}&showCount=100' \
         .format(host=self.host, pripid=pri_pid, randommath=rand, year=year)
     r = self.task_request(session, session.get, url)
     if r is None:
         self.append_model(data, Model.annual_info, url, '',
                           status=self.STATUS_FAIL,
                           year=year,
                           classify=Model.type_detail)
         return None, None
     self.append_model(data, Model.annual_info, url, r.text,
                       year=year,
                       classify=Model.type_detail)
     return url, r.text
Пример #8
0
    def get_annual_info(self, session, pri_pid, data):
        rand = util.get_random_num()
        url = 'http://{host}/anbaseinfo/queryBaseinfoReport.do' \
              '?pripid={pripid}&randommath={randommath}&currentPage=1' \
            .format(host=self.host, pripid=pri_pid, randommath=rand)
        r = self.task_request(session, session.get, url)
        if r is None:
            return

        result = util.json_loads(r.text)
        if result is None:
            return

        len_year = len(result.get('data'))
        page_num = None
        if result.get('page', None) is not None:
            page_num = result.get('page').get('totalPage', None)

        for page in xrange(page_num):
            for i in xrange(len_year):
                try:
                    year = result.get('data')[i]['ANCHEYEAR']
                except:
                    continue
                # 企业年报基本信息
                self.get_annual_base_info(session, pri_pid, data, year)

                # 企业年报网点信息
                self.get_annual_website_info(session, pri_pid, data, year)

                # 企业年报股东信息
                self.get_annual_shareholder_info(session, pri_pid, data, year)

                # 企业年报对外投资信息
                self.get_annual_investment_info(session, pri_pid, data, year)

                # 企业年报对外提供保证担保信息
                self.get_annual_assurance_info(session, pri_pid, data, year)

                # 企业年报股权变更信息
                self.get_annual_change_info(session, pri_pid, data, year)

                # 企业年报修改信息
                self.get_annual_amendant_info(session, pri_pid, data, year)

                # 企业年报企业基本状况
                self.get_annual_status_info(session, pri_pid, data, year)
Пример #9
0
    def get_annual_info(self, session, param_dict, data):
        url = 'http://{host}/gsbaseInfoAction_qynbInfo.action?randomNum={rand}&nbxh={nbxh}&qylx={qylx}&menustring=4'.format(
            host=self.host,
            rand=util.get_random_num(),
            nbxh=param_dict['nbxh'],
            qylx=param_dict['qylx'])
        r = self.task_request(session, session.get, url)
        if r is None:
            return

        pattern = 'qynbBase\(\'(.*?)\',\'(.*?)\',\'(.*?)\'\)'
        find_list = re.findall(pattern, r.text)
        if len(find_list) <= 0:
            return

        for nb_item in find_list:
            nbxh = nb_item[0]
            year = nb_item[1]
            qylx = nb_item[2]
            url = 'http://{host}/gsQynbAction_qynbBaseInfo.action?nbxh={nbxh}&anCheYear={year}&qylxFlag=2&qylx={qylx}'.format(
                host=self.host, nbxh=nbxh, year=year, qylx=qylx)
            r = self.task_request(session, session.get, url)
            if r is None:
                self.append_model(data,
                                  Model.annual_info,
                                  url,
                                  '',
                                  status=self.STATUS_FAIL,
                                  year=year,
                                  classify=Model.type_detail)
                continue

            # 基本信息
            self.append_model(data,
                              Model.annual_info,
                              url,
                              r.text,
                              year=year,
                              classify=Model.type_detail)
            # nbxh ,year,qylx
            # 年报其他信息抓取
            item_list = PyQuery(r.text, parser='html').find('iframe').items()
            for item in item_list:
                src = item.attr('src')
                if src is None or src == '':
                    continue
                url = 'http://{host}{src}'.format(host=self.host, src=src)
                r = self.task_request(session, session.get, url)
                if r is None:
                    self.append_model(data,
                                      Model.annual_info,
                                      url,
                                      '',
                                      status=self.STATUS_FAIL,
                                      year=year,
                                      classify=Model.type_detail)
                    continue
                self.append_model(data,
                                  Model.annual_info,
                                  url,
                                  r.text,
                                  year=year,
                                  classify=Model.type_detail)

                ###服务器翻页,怎么办
                jq = PyQuery(r.text, parser='html')
                somepagenum = jq.find('#countPage').attr('value')

                if somepagenum is not None:
                    if somepagenum > 1:
                        pagenum = int(somepagenum)
                        # print somepagenum
                        i = 2
                        while i <= pagenum:
                            url = 'http://{host}{src}&currPage={pagenum}'.format(
                                host=self.host, src=src, pagenum=i)
                            # print url
                            i += 1
                            r_item = self.task_request(session, session.get,
                                                       url)
                            if r_item is None:
                                self.append_model(data,
                                                  Model.annual_info,
                                                  url,
                                                  '',
                                                  status=self.STATUS_FAIL,
                                                  year=year,
                                                  classify=Model.type_detail)

                            self.append_model(data,
                                              Model.annual_info,
                                              url,
                                              r_item.text,
                                              year=year,
                                              classify=Model.type_detail)
Пример #10
0
    def get_contributive_info(self, session, pri_pid, data):
        page = 1
        total_page = 1
        while page <= total_page:
            url = 'http://{host}/einvperson/getqueryeInvPersonService.do' \
                  '?pripid={pripid}&randommath={randommath}&currentPage={page}' \
                .format(host=self.host, pripid=pri_pid, randommath=util.get_random_num(), page=page)
            r = self.task_request(session, session.get, url)
            if r is None:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  '',
                                  status=self.STATUS_FAIL,
                                  classify=Model.type_list)
                return

            json_data = util.json_loads(r.text)
            if json_data is None:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  r.text,
                                  status=self.STATUS_FAIL,
                                  classify=Model.type_list)
                return

            page_info = json_data.get('page', None)
            if page_info is None:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  r.text,
                                  status=self.STATUS_FAIL,
                                  classify=Model.type_list)
                return

            total_page = page_info.get('totalPage', None)
            if total_page is None:
                self.append_model(data,
                                  Model.contributive_info,
                                  url,
                                  r.text,
                                  status=self.STATUS_FAIL,
                                  classify=Model.type_list)
                return

            total_page = int(total_page)
            if total_page == 0:
                total_page = 1

            self.append_model(data,
                              Model.contributive_info,
                              url,
                              r.text,
                              classify=Model.type_list)

            show_count = page_info.get('showCount', None)
            if show_count is None:
                return

            # 解析详细信息
            data_info = json_data.get('data', None)
            if data_info is not None:
                for index, item in enumerate(data_info):
                    invid = item.get('INVID', None)
                    if invid is None:
                        continue
                    url = 'http://{host}/einvperson/queryInfo?invid={invid}&random={rand}'.format(
                        host=self.host,
                        invid=invid,
                        rand=random.randint(10, 100))
                    r = self.task_request(session, session.get, url)
                    if r is None:
                        self.append_model(data,
                                          Model.contributive_info,
                                          url,
                                          '',
                                          status=self.STATUS_FAIL,
                                          classify=Model.type_detail)
                        continue
                    self.append_model(data,
                                      Model.contributive_info,
                                      url,
                                      r.text,
                                      classify=Model.type_detail)

            page += 1