Exemplo n.º 1
0
 def get_com_id(self):
     # sel = """
     # SELECT `com_id`,`com_name`,`status_tm`,`count_tm`
     # FROM `com_info`
     # WHERE `origin`
     # IS NOT NULL AND LENGTH(`com_id`) = 32
     # AND `status_tm` IS NULL
     # AND `count_patent` != '0'
     # ORDER BY RAND() LIMIT 1;
     # """
     # sel = """
     # SELECT b.`com_id`,b.`com_name`,b.`status_tm`,b.`count_tm`
     # FROM temp_ppp a JOIN com_info b
     # ON a.`com_name`=b.`com_name`
     # AND LENGTH(b.com_id)=32
     # AND b.`status_tm` IS NULL
     # AND count_tm != 0
     # ORDER BY RAND() LIMIT 1;
     # """
     sel = """
     SELECT `com_id`,`com_name`,`status_tm`,`count_tm`
     FROM `com_info`
     WHERE `other_id` LIKE '%ls1000%'
     AND LENGTH(`com_id`) = 32
     AND `status_tm` IS NULL
     AND `count_tm` != '0'
     ORDER BY RAND() LIMIT 1;
     """
     result = db().selsts(sel)
     if result == ():
         result = [None, None, None, None]
     else:
         result = result[0]
     return result
Exemplo n.º 2
0
 def upd_status_execued(self, com_id, count):  #更新com_info表相关字段状态码
     if count == -1:
         status = -1
     elif count == 0:
         status = 0
     else:
         status = 9
     upd = f"""
     UPDATE 
     `com_info` 
     SET
     `status_credit_execued` = "{status}",`count_credit_execued` = "{count}"
     WHERE 
     `com_id` = "{com_id}" ;
     """
     db().updsts(upd)
Exemplo n.º 3
0
 def __init__(self):
     self.db = db()
     self.dk = dk()
     self.gh = gh()
     self.tm = tm()
     self.gm = gm()
     self.index_url = 'https://www.qcc.com'
 def upd_status(self, com_id, status_column, count_column,
                count):  # 更新com_info表相关字段状态码
     if count == -1:
         status = -1
     elif count == 0:
         status = 0
     else:
         status = 9
     upd = f"""
         UPDATE 
         `com_info` 
         SET
         `{status_column}` = "{status}",`{count_column}` = "{count}"
         WHERE 
         `com_id` = "{com_id}" ;
         """
     db().updsts(upd)
Exemplo n.º 5
0
 def get_com_id(self):
     sel = """
     SELECT `com_id`,`com_name`,`status_tm`,`count_tm`
     FROM `com_info` 
     WHERE `origin` 
     IS NOT NULL AND LENGTH(`com_id`) > 5 AND `status_tm` IS NULL AND `count_patent` != '0'
     ORDER BY RAND() LIMIT 1;
     """
     result = db().selsts(sel)
     if result == ():
         result = [None,None,None,None]
     else:
         result = result[0]
     return result
    def get_com_id(self):  # 随机获取一条符合条件的公司信息
        sel = """
        SELECT `com_id`,`com_name`
        FROM `com_info`
        WHERE `origin`
        IS NOT NULL AND LENGTH(`com_id`) > 5 AND `status_credit_adm_license` IS NULL
        ORDER BY RAND() LIMIT 1;
        """

        # 测试sql#
        # sel = """
        # SELECT `com_id`, `com_name`
        # FROM `com_info`
        # WHERE com_id = '299eee201318f0283f086b4847d69fc7';
        # """
        # 测试sql#

        result = db().selsts(sel)
        if result == ():
            result = [None, None]
        else:
            result = result[0]
        return result
Exemplo n.º 7
0
 def __init__(self, tree):
     self.db = db()
     self.tree = tree
     self.index_url = 'https://www.qichacha.com'
    def get_page_info(self):  # 解析页面内容
        alb = AdmLicenseBc()
        value = alb.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        count_record = value[3]
        key = dk().search_key(com_name)
        count = 0
        for page in range(1, count_page + 1):
            index_url = 'https://www.qcc.com'
            page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=run&box=licens'
            hds = gh().header()
            hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
            time.sleep(random.randint(1, 2))
            res = requests.get(page_url, headers=hds).text
            if '<script>window.location.href' in res:
                print('访问频繁,需验证!{get_page_info[2]}')
                input('暂停')
            elif '<script>location.href="/user_login"</script>' in res:
                print('Cookie失效,需更换!{get_page_info[2]}')
                input('程序暂停运行!')
            elif '您的账号访问超频,请稍后访问或联系客服人员' in res:
                print('账号访问超频,请更换账号!{get_page_info[2]}')
                input('程序暂停运行!')
            else:
                tree = etree.HTML(res)
                content_li = tree.xpath(
                    '//table[@class="ntable ntable-odd"]/tr[position()>2]')
                for nbr, content in enumerate(content_li, 1):
                    count += 1
                    try:
                        license_num = content.xpath('td[1]/text()')[0]
                        license_doc_num = content.xpath('td[2]/text()')[0]
                        license_doc_name = content.xpath('td[3]/text()')[0]
                        valid_period_from = content.xpath('td[4]/text()')[0]
                        valid_period_to = content.xpath('td[5]/text()')[0]
                        license_office = content.xpath('td[6]/text()')[0]
                        license_content = content.xpath('td[7]/text()')[0]
                    except:
                        license_num = None
                        license_doc_num = None
                        license_doc_name = None
                        valid_period_from = None
                        valid_period_to = None
                        license_office = None
                        license_content = None

                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'当前时间:{create_time}')
                    print(
                        f'公司ID:{com_id}\n序号:{license_num}\n许可文件编号:{license_doc_num}\n许可文件名称:{license_doc_name}\n有效期自:{valid_period_from}\n'
                        f'有效期至:{valid_period_to}\n许可机关:{license_office}\n许可内容:{license_content}'
                    )
                    if license_num == None:
                        ins = """
                        INSERT INTO
                        `com_credit_adm_license_bc`
                        (`com_id`,`license_num`,`license_doc_num`,`license_doc_name`,`valid_period_from`,
                        `valid_period_to`,`license_office`,`license_content`,`create_time`)
                        VALUES
                        (NULL,NULL,NULL,NULL,NULL,
                        NULL,NULL,NULL,NULL);
                        """
                    else:
                        ins = f"""
                        INSERT INTO
                        `com_credit_adm_license_bc`
                        (`com_id`,`license_num`,`license_doc_num`,`license_doc_name`,`valid_period_from`,
                        `valid_period_to`,`license_office`,`license_content`,`create_time`)
                        VALUES
                        ("{com_id}","{license_num}","{license_doc_num}","{license_doc_name}","{valid_period_from}",
                        "{valid_period_to}","{license_office}","{license_content}","{create_time}");
                        """
                    db().inssts(ins)

                    upd = f"""
                        UPDATE 
                        `com_info` 
                        SET
                        `status_credit_adm_license_bc` = 1
                        WHERE 
                        `com_id` = "{com_id}" ;
                        """
                    db().updsts(upd)

        localtime = tm().get_localtime()  # 当前时间
        print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
        print(f'当前时间:{localtime}\n')
        time.sleep(3)
Exemplo n.º 9
0
 def get_com_id(self):
     # sel = """
     # SELECT `com_id`,`com_name`,`status_patent`,`count_patent`
     # FROM `com_info`
     # WHERE `origin`
     # IS NOT NULL AND LENGTH(`com_id`) > 8 AND `status_patent` IS NULL AND `count_patent` != '0'
     # ORDER BY RAND() LIMIT 1;
     # """
     # sel ="""
     # SELECT `com_id`,`com_name`,`status_patent`,`count_patent`
     # FROM `com_info`
     # WHERE `com_id` IN
     # (
     # '06b9ede70996255ed343050895046d00',
     # '09a2b97c0596a84cf14404a4bd2c37d5',
     # '18ff2c7ad1d11bfe40e0bec84f6d04d3',
     # '1b16bbdae1540c6a72cd81d918b7c1f6',
     # '30c09ef2def97bd3dc8d021fc2233b05',
     # '31755ff79f6e867d79f7e49cb34da867',
     # '424b1559bdac92d298cf9751979eb26b',
     # '48431ef3f2c62cc60e1f4c22a178ee50',
     # '4c468b205f73f703274e9db7f769a03f',
     # '5602135acdc60cd54daf58cffbc24367',
     # '61b780963a4bc4df5707fe376e41fb6f',
     # '652177a5d80be3d70d7460a09018f599',
     # '722e57a557a857c16121d5c03bd06d42',
     # '7bb7f10fbffbdb6af869af34e8697ecc',
     # '89d337c3d33410e68ca65d7933bd7d05',
     # '8ad8b2d2c15fb92f9ce14107489e83cd',
     # '9779771217b77e4538bd505660939c9a',
     # '9b0c52e7af1ee199857b94bc3ea6be3d',
     # 'a484e7a0b3167f6b257beb51dd93b241',
     # 'a58533710987ecf98159545b61505a74',
     # 'a5a0ba522ce994fb2a8de3a7625534e1',
     # 'a9aa7de83d5d7b4c5008310395b1f403',
     # 'ad797adc3b0a3fe293a0d7238c671b72',
     # 'af8ef0be6adcc6cc6c5b5d1c217b487c',
     # 'b45f3cc43a98aa52f5b3409cef1d6cd9',
     # 'd3d4ff0894e82ca22a9e6b3a66fda267',
     # 'dbe7a5624002aec7b0f26445c94f60cc',
     # 'e06f5af040745430aec2faf8684ae3c7',
     # 'f11933e8723fd03d325529bd2adc19a6',
     # 'fa078a468930c63c92f7909b5a1c5788',
     # 'ff0e1ff937b7aaa29b8953a54c978fe8'
     # )
     # AND `status_patent` IS NULL AND `count_patent` != '0'
     # ORDER BY RAND() LIMIT 1;
     # """
     # sel = """
     # SELECT `com_id`,`com_name`,`status_patent`,`count_patent`
     # FROM `com_info`
     # WHERE `origin` = '崂山区虚拟现实企业90家数据相关'
     # AND LENGTH(`com_id`) > 5 AND `status_patent` IS NULL AND `count_patent` != '0'
     # ORDER BY RAND() LIMIT 1;
     # """
     # sel = """
     # SELECT b.`com_id`,b.`com_name`,b.`status_patent`,b.`count_patent`
     # FROM temp_ppp a JOIN com_info b
     # ON a.`com_name`=b.`com_name`
     # AND LENGTH(b.com_id)=32
     # AND b.`status_patent` IS NULL
     # AND count_patent != 0
     # ORDER BY RAND() LIMIT 1;
     # """
     sel = """
     SELECT com_id,com_name,status_patent,count_patent 
     FROM com_info WHERE status_patent IS NULL
     AND count_patent != 0
     AND `other_id` LIKE '%ls1000%'
     ORDER BY RAND() LIMIT 1;
     """
     result = db().selsts(sel)
     if result == ():
         result = [None, None, None, None]
     else:
         result = result[0]
     return result
Exemplo n.º 10
0
    def get_page_info(self):  #获取页面详情
        pt = PatentInfo()
        value = pt.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]

        # 临时代码,供单次补采数据【001】
        # com_id = 'x697654f34422233895571cf26e42268'
        # com_name = '青岛科技大学'
        # count_page = 500
        # 临时代码,供单次补采数据【001】

        if com_id == None:
            pass
        else:
            key = dk().search_key(com_name)
            index_url = value[3]
            count = 0
            start_time = tm().get_localtime()  #当前时间
            for page in range(1, count_page + 1):  #临时代码,供单次补采数据【001】
                # for page in range(1, count_page + 1):
                #     if page == 1:
                #         page_url = f'https://www.qichacha.com/company_getinfos?unique={com_id}&companyname={com_name}&tab=assets'
                page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=assets&box=zhuanli'
                hds = gh().header()
                hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
                time.sleep(random.randint(1, 2))
                res_pg = requests.get(page_url, headers=hds).text
                if '<script>window.location.href' in res_pg:
                    print('访问频繁,需验证!{get_page_info[1]}')
                    input('暂停')
                elif '<script>location.href="/user_login"</script>' in res_pg:
                    print('Cookie失效,需更换!{get_page_info[1]}')
                    input('程序暂停运行!')
                elif '您的账号访问超频,请稍后访问或联系客服人员' in res_pg:
                    print('账号访问超频,请更换账号!{get_page_info[1]}')
                    input('程序暂停运行!')
                else:
                    tree_pg = etree.HTML(res_pg)
                    content_li = tree_pg.xpath('//table/tr[position()>1]')
                    for content in content_li:
                        count += 1
                        patent_num = content.xpath('td[1]/text()')[0]
                        patent_type = content.xpath('td[2]/text()')[0]
                        patent_pub_num = content.xpath('td[3]/text()')[0]
                        patent_pub_date = content.xpath('td[4]/text()')[0]
                        patent_name = content.xpath(
                            'td[5]/a/text()')[0].strip()
                        patent_link = content.xpath('td[5]/a/@href')[0]
                        patent_id = patent_link.split('_com_')[1]
                        patent_url = ''.join((index_url, patent_link))
                        time.sleep(random.randint(1, 3))
                        res_dt = requests.get(patent_url, headers=hds).text
                        if '<script>window.location.href' in res_dt:
                            print('访问频繁,需验证!{get_page_info[2]}')
                            input('暂停')
                        elif '<script>location.href="/user_login"</script>' in res_dt:
                            print('Cookie失效,需更换!{get_page_info[2]}')
                            input('程序暂停运行!')
                        elif '您的账号访问超频,请稍后访问或联系客服人员' in res_dt:
                            print('账号访问超频,请更换账号!{get_page_info[2]}')
                            input('程序暂停运行!')
                        else:
                            tree_dt = etree.HTML(res_dt)
                            app_num = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_date = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请日")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            prio_date = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权日")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            prio_num = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            inventor = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"发明人")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            try:
                                applicant = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]/a/text()'
                                )[0].strip()
                            except:
                                applicant = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]'
                                )[0].strip()
                            try:
                                agency = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/a/text()'
                                )[0].strip()
                            except:
                                agency = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/text()'
                                )[0].strip()
                            agent = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理人")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            ipc = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"IPC分类号")]/following-sibling::td[1]/text()'
                            )[0].strip().replace(' ', '').replace('\n', '')
                            cpc = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"CPC分类号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_address = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人地址")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_zip_code = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人邮编")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            try:
                                abstract = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要")]/following-sibling::td[1]/text()'
                                )[0].strip()
                            except:
                                abstract = tree_dt.xpath(
                                    'string(//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要")]/following-sibling::td)'
                                ).strip()
                            try:
                                abstract_photo = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要附图")]/following-sibling::td[1]/img/@src'
                                )[0].strip()
                            except:
                                abstract_photo = '-'
                            try:
                                claim = tree_dt.xpath(
                                    '//table[@class="ntable"]/tr/td[@class="ea_instructions" and position()=1]/p/text()'
                                )
                                claim = ''.join(claim).replace('"', "'")
                            except:
                                claim = '-'
                            try:
                                instructions = tree_dt.xpath(
                                    '//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h1/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h2/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/p/text()'
                                )
                                instructions = ''.join(instructions)
                            except:
                                instructions = '-'
                            print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                                '-' * 9, count, page, count_page))
                            localtime = tm().get_localtime()  # 当前时间
                            create_time = localtime
                            print(f'公司ID:{com_id} 当前时间:{localtime}')
                            print(f'公司名称:{com_name}\n专利ID:{patent_id}')
                            print(
                                f'序号:{patent_num}\n专利类型:{patent_type}\n公开(公告)号:{patent_pub_num}\n公开(公告)日期:{patent_pub_date}\n专利名称:{patent_name}\n'
                                f'专利页URL:{patent_url}\n申请号:{app_num}\n申请日期:{app_date}\n优先权日:{prio_date}\n优先权号:{prio_num}\n'
                                f'发明人:{inventor}\n申请(专利权)人:{applicant}\n代理机构:{agency}\n代理人:{agent}\nIPC分类号:{ipc}\n'
                                f'CPC分类号:{cpc}\n申请人地址:{app_address}\n申请人邮编:{app_zip_code}\n摘要:{abstract}\n摘要附图:{abstract_photo}\n'
                                f'权利要求:{claim}\n说明书:{instructions}\n')
                            ins = f"""
                            INSERT INTO  
                            `com_patent`
                            (`com_id`,`patent_num`,`patent_type`,`patent_pub_num`,`patent_pub_date`,
                            `patent_name`,`patent_url`,`app_num`,`app_date`,`prio_date`,
                            `prio_num`,`inventor`,`applicant`,`agency`,`agent`,
                            `ipc`,`cpc`,`app_address`,`app_zip_code`,`abstract`,`abstract_photo`,
                            `claim`,`instructions`,`create_time`,`patent_id`)
                            VALUES 
                            ("{com_id}","{patent_num}","{patent_type}","{patent_pub_num}","{patent_pub_date}",
                            "{patent_name}","{patent_url}","{app_num}","{app_date}","{prio_date}",
                            "{prio_num}","{inventor}","{applicant}","{agency}","{agent}",
                            "{ipc}","{cpc}","{app_address}","{app_zip_code}","{abstract}","{abstract_photo}",
                            "{claim}","{instructions}","{create_time}","{patent_id}");
                            """
                            db().inssts(ins)

                            upd = f"""
                            UPDATE 
                            `com_info` 
                            SET
                            `status_patent` = 1
                            WHERE 
                            `com_id` = "{com_id}" ;
                            """
                            db().updsts(upd)
            localtime = tm().get_localtime()  # 当前时间
            print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
            print(f'当前时间:{localtime}\n')
            time.sleep(3)
Exemplo n.º 11
0
 def __init__(self, arg):
     self.db = db()
     self.arg = arg
Exemplo n.º 12
0
    def get_page_info(self):  #解析页面内容
        cd = Credit()
        value = cd.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        count_record = value[3]
        key = dk().search_key(com_name)
        count = 0
        for page in range(1, count_page + 1):
            index_url = 'https://www.qichacha.com'
            page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=susong&box=zhixing'
            hds = gh().header()
            hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
            time.sleep(random.randint(1, 2))
            res_pg = requests.get(page_url, headers=hds).text
            if '<script>window.location.href' in res_pg:
                print('访问频繁,需验证!{get_page_info}')
                input('暂停')
            elif '<script>location.href="/user_login"</script>' in res_pg:
                print('Cookie失效,需更换!{get_page_info}')
                input('程序暂停运行!')
            elif '您的账号访问超频,请稍后访问或联系客服人员' in res_pg:
                print('账号访问超频,请更换账号!{get_page_info}')
                input('程序暂停运行!')
            else:
                tree_pg = etree.HTML(res_pg)
                content_li = tree_pg.xpath(
                    '//table[@class="ntable ntable-odd"]/tr[position()>2]')
                for nbr, content in enumerate(content_li, 1):
                    count += 1
                    try:
                        exec_num = content.xpath('td[1]/text()')[0]
                        case_num = content.xpath('td[2]/a/text()')[0]
                        case_id = content.xpath(
                            'td[2]/a[contains(@onclick,"showRelatModal")]/@onclick'
                        )[0].split('zhixing",')[1].split('"')[1]
                        case_url = 'id='.join(
                            ('https://www.qichacha.com/company_zhixingRelat?',
                             case_id))
                        filing_time = content.xpath('td[3]/text()')[0]
                        court_of_exec = content.xpath('td[4]/text()')[0]
                        exec_obj = content.xpath('td[5]/text()')[0]
                        time.sleep(random.randint(1, 2))
                        res_info = requests.get(case_url, headers=hds).text
                        if '<script>window.location.href' in res_info:
                            print('访问频繁,需验证!{get_page_info}')
                            input('暂停')
                        elif '<script>location.href="/user_login"</script>' in res_info:
                            print('Cookie失效,需更换!{get_page_info}')
                            input('程序暂停运行!')
                        elif '您的账号访问超频,请稍后访问或联系客服人员' in res_info:
                            print('账号访问超频,请更换账号!{get_page_info}')
                            input('程序暂停运行!')
                        else:
                            tree_info = etree.HTML(res_info)
                            exec_person = tree_info.xpath(
                                '//table/tbody/tr[1]/td[2]/text()')[0]
                            occ = tree_info.xpath(
                                '//table/tbody/tr[1]/td[4]/text()')[0]
                    except:
                        exec_num = None
                        case_num = None
                        case_id = None
                        case_url = None
                        filing_time = None
                        court_of_exec = None
                        exec_obj = None
                        exec_person = None
                        occ = None
                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'当前时间:{create_time}')
                    print(
                        f'公司ID:{com_id}\n序号:{exec_num}\n案号:{case_num}\n案例ID:{case_id}\n案例链接:{case_url}\n'
                        f'立案时间:{filing_time}\n执行法院:{court_of_exec}\n执行标的:{exec_obj}\n被执行人:{exec_person}\n身份证号/组织机构代码:{occ}\n'
                    )
                    if exec_num == None:
                        ins = """
                        INSERT INTO
                        `com_credit_execued`
                        (`com_id`,`exec_num`,`case_num`,`case_id`,`filing_time`,
                        `court_of_exec`,`exec_obj`,`exec_person`,`occ`,`create_time`)
                        VALUES
                        (NULL,NULL,NULL,NULL,NULL,
                        NULL,NULL,NULL,NULL);
                        """
                    else:
                        ins = f"""
                        INSERT INTO 
                        `com_credit_execued`
                        (`com_id`,`exec_num`,`case_num`,`case_id`,`filing_time`,
                        `court_of_exec`,`exec_obj`,`exec_person`,`occ`,`create_time`)
                        VALUES 
                        ("{com_id}","{exec_num}","{case_num}","{case_id}","{filing_time}",
                        "{court_of_exec}","{exec_obj}","{exec_person}","{occ}","{create_time}");
                        """
                    db().inssts(ins)

                    upd = f"""
                    UPDATE 
                    `com_info` 
                    SET
                    `status_credit_execued` = 1
                    WHERE 
                    `com_id` = "{com_id}" ;
                    """
                    db().updsts(upd)

        localtime = tm().get_localtime()  # 当前时间
        print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
        print(f'当前时间:{localtime}\n')
        time.sleep(3)
Exemplo n.º 13
0
    def get_page_info(self):  #获取页面详情
        ws = WebSite()
        value = ws.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]

        # 临时代码,供单次补采数据【001】
        # com_id = 'f1c5372005e04ba99175d5fd3db7b8fc'
        # com_name = '深圳市腾讯计算机系统有限公司'
        # count_page = 45
        # 临时代码,供单次补采数据【001】

        if com_id == None:
            pass
        else:
            key = ws.dk.search_key(com_name)
            index_url = value[3]
            count = 0
            start_time = ws.tm.get_localtime()  #当前时间
            for page in range(1, count_page + 1):  #临时代码,供单次补采数据【001】
                # for page in range(1, count_page + 1):
                #     if page == 1:
                #         page_url = f'https://www.qichacha.com/company_getinfos?unique={com_id}&companyname={com_name}&tab=assets'
                page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=assets&box=website'
                hds = ws.gh.header()
                hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
                time.sleep(random.randint(1, 2))
                res_pg = requests.get(page_url, headers=hds).text
                tree_pg = ws.gm.verify(res_pg)
                content_li = tree_pg.xpath('//table/tr[position()>1]')
                for content in content_li:
                    count += 1
                    web_num = content.xpath('td[1]/text()')[0]
                    web_name = content.xpath('td[2]/text()')[0]
                    web_site = content.xpath('td[3]/a/text()')
                    if len(web_site) > 1:
                        web_site = web_site
                    elif len(web_site) == 0:
                        web_site = '-'
                    else:
                        web_site = web_site[0]
                    domain_name = content.xpath('td[4]/text()')[0].split('\n')
                    if len(domain_name) > 2:
                        domain_name_li = []
                        for domain in domain_name:
                            if domain != '':
                                domain = domain.strip()
                                domain_name_li.append(domain)
                            else:
                                pass
                        domain_name = domain_name_li
                    else:
                        domain_name = domain_name[1].strip()
                    icp = content.xpath('td[5]/text()')[0].strip()
                    approved_date = content.xpath('td[6]/text()')[0]
                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'公司ID:{com_id} 当前时间:{localtime}')
                    print(f'公司名称:{com_name}\n序号:{web_num}')
                    print(
                        f'网站名称:{web_name}\n网址:{web_site}\n域名:{domain_name}\n网站备案/许可证号:{icp}\n审核日期:{approved_date}\n'
                    )
                    ins = f"""
                    INSERT INTO
                    `com_web`
                    (`com_id`,`web_num`,`web_name`,`web_site`,`domain_name`,
                    `icp`,`approved_date`,`create_time`)
                    VALUES
                    ("{com_id}","{web_num}","{web_name}","{web_site}","{domain_name}",
                    "{icp}","{approved_date}","{create_time}");
                    """
                    db().inssts(ins)

                    upd = f"""
                    UPDATE
                    `com_info`
                    SET
                    `status_web` = 1
                    WHERE
                    `com_id` = "{com_id}" ;
                    """
                    db().updsts(upd)
            localtime = tm().get_localtime()  # 当前时间
            print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
            print(f'当前时间:{localtime}\n')
            time.sleep(3)
Exemplo n.º 14
0
 def __init__(self):
     self.db = db()
     self.dk = dk()
     self.gh = gh()
     self.gm = gm()
     self.tm = tm()
Exemplo n.º 15
0
    def get_page_info(self):  #获取页面详情
        tmi = TradeMarkInfo()
        value = tmi.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        if com_id == None:
            pass
        else:
            key = tmi.dk.search_key(com_name)
            index_url = value[3]
            count = 0
            for page in range(1, count_page + 1):
                # 'https://www.qichacha.com/company_getinfos?unique=&companyname=&p=2&tab=assets&box=zhuanli&zlpublicationyear=&zlipclist=&zlkindcode=&zllegalstatus='
                page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=assets&box=shangbiao'
                hds = tmi.gh.header()
                hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
                time.sleep(random.randint(1, 2))
                res_tmi = requests.get(page_url, headers=hds).text
                tree_tmi = etree.HTML(res_tmi)
                content_li = tree_tmi.xpath('//table/tr[position()>1]')
                for content in content_li:
                    count += 1
                    tm_num = content.xpath('td[1]/text()')[0]
                    tm_logo_url = content.xpath('td[2]/img/@src')[0]
                    tm_name = content.xpath('td[3]/text()')[0]
                    tm_status = content.xpath('td[4]/text()')[0]
                    app_date = content.xpath('td[5]/text()')[0]
                    tm_regno = content.xpath('td[6]/text()')[0]
                    tm_int_type = content.xpath('td[7]/text()')[0]
                    trademark_link = content.xpath('td[8]/a/@href')[0]
                    trademark_url = ''.join((index_url, trademark_link))
                    time.sleep(random.randint(1, 3))
                    res_dt = requests.get(trademark_url, headers=hds).text
                    tree_dt = etree.HTML(res_dt)
                    sim_groups = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"类似群")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_cn = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人名称(中文)")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_en = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人名称(英文)")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_addr_cn = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人地址(中文)")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_addr_en = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人地址(英文)")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    first_trial_no = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"初审公告期号")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    first_trial_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"初审公告日期")]/following-sibling::td[1]/text()'
                    )[0].strip().replace(' ', '').replace('\n', '')
                    reg_not_peri_no = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"注册公告期号")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    reg_not_peri_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"注册公告日期")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    is_comm_tm = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"是否共有商标")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    tm_type = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"商标类型")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    exclu_right_limit = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"专用权期限")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    tm_form = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"商标形式")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    int_reg_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"国际注册日期")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    later_scheduled_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"后期指定日期")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    prio_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权日期")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    try:
                        agency = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理/办理机构")]/following-sibling::td[1]/a/text()'
                        )[0].strip()
                    except:
                        agency = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理/办理机构")]/following-sibling::td[1]/text()'
                        )[0].strip()
                    service = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"商品/服务")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'当前时间:{localtime}')
                    print(f'公司ID:{com_id}\n公司名称:{com_name}')
                    print(
                        f'序号:{tm_num}\n商标LOGO URL:{tm_logo_url}\n商标名称:{tm_name}\n商标状态:{tm_status}\n申请时间:{app_date}\n'
                        f'申请/注册号:{tm_regno}\n国际类型:{tm_int_type}\n类似群:{sim_groups}\n申请人名称(中文):{app_cn}\n申请人名称(英文):{app_en}\n'
                        f'申请人地址(中文):{app_addr_cn}\n申请人地址(英文):{app_addr_en}\n初审公告期号:{first_trial_no}\n初审公告日期:{first_trial_date}\n注册公告期号:{reg_not_peri_no}\n'
                        f'注册公告日期:{reg_not_peri_date}\n是否共有商标:{is_comm_tm}\n商标类型:{tm_type}\n专用权期限:{exclu_right_limit}\n商标形式:{tm_form}\n'
                        f'国际注册日期:{int_reg_date}\n后期指定日期:{later_scheduled_date}\n优先权日期:{prio_date}\n代理机构:{agency}\n商品/服务:{service}'
                    )
                    ins = f"""
                    INSERT INTO  
                    `com_trademark`
                    (`com_id`,`tm_num`,`tm_logo_url`,`tm_name`,`tm_status`,
                    `app_date`,`tm_regno`,`tm_int_type`,`sim_groups`,`app_cn`,
                    `app_en`,`app_addr_cn`,`app_addr_en`,`first_trial_no`,`first_trial_date`,
                    `reg_not_peri_no`,`reg_not_peri_date`,`is_comm_tm`,`tm_type`,`exclu_right_limit`,
                    `tm_form`,`int_reg_date`,`later_scheduled_date`,`prio_date`,`agency`,
                    `service`,`create_time`)
                    VALUES 
                    ("{com_id}","{tm_num}","{tm_logo_url}","{tm_name}","{tm_status}",
                    "{app_date}","{tm_regno}","{tm_int_type}","{sim_groups}","{app_cn}",
                    "{app_en}","{app_addr_cn}","{app_addr_en}","{first_trial_no}","{first_trial_date}",
                    "{reg_not_peri_no}","{reg_not_peri_date}","{is_comm_tm}","{tm_type}","{exclu_right_limit}",
                    "{tm_form}","{int_reg_date}","{later_scheduled_date}","{prio_date}","{agency}",
                    "{service}","{create_time}");
                    """
                    db().inssts(ins)

                    upd = f"""
                    UPDATE 
                    `com_info` 
                    SET
                    `status_tm` = 1
                    WHERE 
                    `com_id` = "{com_id}" ;
                    """
                    db().updsts(upd)
                    # input('暂停')
            localtime = tm().get_localtime()  # 当前时间
            print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
            print(f'当前时间:{localtime}')
Exemplo n.º 16
0
 def __init__(self):
     self.db = db()
Exemplo n.º 17
0
 def __init__(self):
     self.db = db()
     self.dk = dk()
     self.gh = gh()