class get_json(object): def __init__(self): self.db = MongoDB('172.16.74.249:27017', 'db_reptile_company', 'company_name') self.redis = REDIS(host=RedisHost, port=RedisPort, password=RedisPassword, db=RedisDB) self.item = {} # mongdb--redeis def transfer(self): dd = self.db.mongo_find({}) for i in dd: item = {} item['_id'] = i['_id'] item['company_name'] = i['company_name'] b = self.redis.add('coampanylidt', json.dumps(item)) print('存入成功', b, item) # 百度企业信用基本信息 def get_companydetails(self, company_name): res1 = s.fetch('https://xin.baidu.com/s?q={}&t=0'.format( parse.quote(company_name))) href_list = re.findall(r'{"pid":"(\S+)","entName":', res1.text) if len(href_list) != 0: details_href = 'https://xin.baidu.com//detail//compinfo?pid=' + href_list[ 0] # company = res1.html.xpath('//a[@class="zx-list-item-url"]/@title')[0] print(details_href) res = s.fetch(details_href) # print(res.text) exit() # 统一社会信用代码 self.item['credit_code'] = res.html.xpath( '//td[contains(text(),"统一社会信用代码")]/following-sibling::td[1]/text()', first=True) # 客户公司注册时间 self.item['register_time'] = res.html.xpath( '//*[@class="zx-detail-basic-table"]//td[contains(text(),"成立日期")]/following-sibling::td[1]/text()', first=True) # 客户公司注册金额 registerMoney self.item['register_money'] = res.html.xpath( '//td[contains(text(),"注册资本")]/following-sibling::td[1]/text()', first=True) # 客户所属行业 self.item['industry'] = res.html.xpath( '//td[contains(text(),"所属行业")]/following-sibling::td[1]/text()', first=True) # 客户公司状态:正常/注销 self.item['business_state'] = res.html.xpath( '//td[contains(text(),"经营状态")]/following-sibling::td[1]/text()', first=True) # 组织机构代码 self.item['organization_code'] = res.html.xpath( '//td[contains(text(),"组织机构代码")]/following-sibling::td[1]/text()', first=True) # 工商注册号 self.item['register_num'] = res.html.xpath( '//td[contains(text(),"工商注册号")]/following-sibling::td[1]/text()', first=True) # 法定代表人 self.item['legal_man'] = res.html.xpath( '//td[contains(text(),"法定代表人")]/following-sibling::td[1]/text()', first=True) # 登记机关 self.item['regist_organ'] = res.html.xpath( '//td[contains(text(),"登记机关")]/following-sibling::td[1]/text()', first=True) # 核准日期 self.item['confirmtime'] = res.html.xpath( '//*[@class="zx-detail-basic-table"]//td[contains(text(),"审核/年检日期")]/following-sibling::td[1]/text()', first=True) # 营业期限 self.item['business_timeout'] = res.html.xpath( '//*[@class="zx-detail-basic-table"]//td[contains(text(),"营业期限")]/following-sibling::td[1]/text()', first=True) # 企业类型 self.item['register_address'] = res.html.xpath( '//*[@class="zx-detail-basic-table"]//td[contains(text(),"企业类型")]/following-sibling::td[1]/text()', first=True) # 企业地址 self.item['registerAddress'] = res.html.xpath( '//*[@class="zx-detail-basic-table"]//td[contains(text(),"注册地址")]/following-sibling::td[1]/text()', first=True) # 经营范围 self.item['business_scope'] = res.html.xpath( '//td[contains(text(),"经营范围")]/following-sibling::td[1]//@data-content', first=True) self.item['usedName'] = res.html.xpath( '//td[contains(text(),"曾用名")]/following-sibling::td[1]/text()', first=True) # 经营方式 # self.item['operation'] = None # 来源网站 self.item['web_source'] = 'https://xin.baidu.com/' # 公司名 self.item['company_name'] = company_name # 来源网址 self.item['company_url'] = details_href self.item['_id'] = hashlib.md5( (company_name).encode(encoding='utf-8')).hexdigest() self.item['web_update_time'] = time.strftime( "%Y-%m-%d", time.localtime(int(time.time()))) # print(self.item) # return self.item # code 201 if company_name != company and len(self.item) > 4: self.db.mong_find_one_update({"_id": self.item['_id']}, {"flag": "公司名有问题"}) return '公司名有问题 --- %s' % self.item['company_name'] else: db1.mongo_add(self.item) return '%s 插入成功 !!!!' % self.item['company_name'] else: _id = hashlib.md5( (company_name).encode(encoding='utf-8')).hexdigest() self.db.mong_find_one_update({"_id": _id}, {"flag": "未找到匹配的公司名"}) return '未找到匹配的公司名---%s' % company_name
if down in rec_list1: down_translate = '撤销复审决定书' elif down in rec_list2: down_translate = '关于撤销连续三年未使用商标的决定' elif down in rec_list3: down_translate = '商标撤销复审答辩通知书' elif down in rec_list4: down_translate = '商标无效宣告答辩通知书' elif down in rec_list5: down_translate = '连续三年不使用撤销申请的结案通知' elif down in rec_list6: down_translate = '商标异议答辩通知书' same_pic_data = db.find_many('pic_url', pic_url) try: if same_pic_data[0]['id'] < same_pic_data[1]['id']: db.mong_find_one_update({'id': same_pic_data[0]['id']}, {"type": up_translate}) db.mong_find_one_update({'id': same_pic_data[1]['id']}, {"type": down_translate}) os.remove(r'G:\after\up.jpg') os.remove(r'G:\after\down.jpg') os.remove(r'G:\before\target.jpg') else: db.mong_find_one_update({'id': same_pic_data[0]['id']}, {"type": down_translate}) db.mong_find_one_update({'id': same_pic_data[1]['id']}, {"type": up_translate}) os.remove(r'G:\after\up.jpg') os.remove(r'G:\after\down.jpg')