def meizu(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://mwx-api.meizu.com/metter-price/get-cat' url2 = 'https://mwx-api.meizu.com/metter-price/get-metter' try: js1 = hhnetworm.getRes(url1, result='j') for aa in js1['data']: js2 = hhnetworm.getRes(url2, data={'mobile_cat': {aa['id']}}, result='j') for bb in js2['data']: price = int(float(bb['price'])) dic = { 'business': '官修', 'brand': '魅族', 'type': '手机', 'model': aa['name'], 'color': '', 'malfunction': bb['repair_name'], 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='魅族', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('魅族')) traceback.print_exc() finally: return rt_arr if finish else []
def xiaomi(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://www.mi.com/service/materialprice/' try: sel1 = hhnetworm.getRes(url1, result='t-s') text = sel1.css("body script:nth-child(3)").extract_first() js1 = json.loads(text[text.find("=") + 1:text.find("</script>")]) for aa in js1: for bb in aa['child']: if 'child' in bb.keys(): try: for cc in bb['child']: rt_arr.append(dicMaker(cc)) except: for cc in bb['child']: for dd in cc['child']: rt_arr.append(dicMaker(dd)) else: try: rt_arr.append(dicMaker(bb)) except: print(bb['child']) HhTime.costPrinter(st_time, pjName='小米', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('小米')) traceback.print_exc() finally: return rt_arr if finish else []
def vivo(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://www.vivo.com.cn/service/accessory/product/list' url2 = "http://www.vivo.com.cn/service/accessory/query" try: js1 = hhnetworm.getRes(url1, method='p', result='j') for aa in js1['data']: for bb in aa['products']: js2 = hhnetworm.getRes(url2, method='p', data={'productId': bb['id']}, result='j') for cc in js2['data']: price = int(float(str(cc['price']))) dic = { 'business': '官修', 'brand': 'vivo', 'type': '手机', 'model': bb['name'], 'color': '', 'malfunction': cc['name'], 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='vivo', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('vivo')) traceback.print_exc() finally: return rt_arr if finish else []
def haoyun56(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://www.haoyun56.com/xue/chepaihao/' try: sel1 = hhnetworm.getRes(url1) for aa in sel1.css("#dlProvince tr td div a"): pro = aa.css("::text").extract_first().strip() sel2 = hhnetworm.getRes(url1 + "?province={}".format( aa.css("::attr(href)").extract_first()[-6:])) for bb in sel2.css("#div_Provice table tr:nth-child(n+2)"): dic = { 'source': 'haoyun56', "province": pro, "city": bb.css("td:nth-child(2) a::text").extract_first().strip(), "code": bb.css("td:nth-child(3)::text").extract_first().strip() } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='haoyun56') finish = True except: print("----------Wrong: {}".format('haoyun56')) traceback.print_exc() finally: return dlData_haoyun56(rt_arr) if finish else []
def shunfeng(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "http://www.sf-express.com/sf-service-owf-web/service/region/A000086000/subRegions" url2 = "http://www.sf-express.com/sf-service-owf-web/service/region/%s/subRegions" try: js1 = hhnetworm.getRes(url1, data={'lang': 'sc'}, result='j') for aa in js1: province = aa['name'] js2 = hhnetworm.getRes(url2 % str(aa['code']), data={'level': 2, 'lang': 'sc'}, result='j') for bb in js2: city = bb['name'] if str(bb['level']).strip() == "4": dic = {'province': province, 'city': city, 'district': ''} rt_arr.append(dic) print(dic) else: js3 = hhnetworm.getRes(url2 % str(bb['code']), data={'level': 3, 'lang': 'sc', 'region': 'cn'}, result='j') for cc in js3: dic = {'province': province, 'city': city, 'district': cc['name']} rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='顺丰地址库', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('顺丰地址库')) traceback.print_exc() finally: return rt_arr if finish else []
def tcmap(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "http://www.tcmap.com.cn/list/car_list.html" try: # 除 上海 海南 重庆 新疆 青海 sel1 = hhnetworm.getRes(url1) for aa in sel1.css("#list360 table"): pro = aa.css("tr:nth-child(1) td:nth-child(1) strong a::text" ).extract_first() for bb in aa.css("tr:nth-child(n+2)"): dic = { 'source': 'tcmap', "province": pro, "city": bb.css("td:nth-child(1) a::text").extract_first(), "code": bb.css("td:nth-child(2)::text").extract_first() } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='tcmap') finish = True except: print("----------Wrong: {}".format('tcmap')) traceback.print_exc() finally: return dlData_tcmap(rt_arr) if finish else []
def haocc(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() try: sel1 = hhnetworm.getRes("http://www.51hao.cc/") for i, aa in enumerate(sel1.css("div.fkt:nth-child(n+3)"), 1): province = aa.css("div.fkbj p a::text").extract_first().replace( " ", "") print("{0} :{1}".format(i, province)) for url, city in zip( aa.css("div.fklk p a::attr(href)").extract(), aa.css("div.fklk p a::text").extract()): sel2 = hhnetworm.getRes(url) for bb in sel2.css("div.all ul:nth-child(n+2)"): for number in bb.css("li a::text").extract(): number = str(number).replace(" ", "") if len(number) == 7: dic = { 'province': province, 'city': city.replace(" ", ""), 'number': number } rt_arr.append(dic) print(dic) else: print("错误号码 :", number) HhTime.costPrinter(st_time, pjName='手机号码归属地') finish = True except: print("----------Wrong: {}".format('手机号码归属地')) traceback.print_exc() finally: return rt_arr if finish else []
def jinli(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://www.gionee.com/id-478_op-productPart.shtml' url2 = 'https://www.gionee.com/' wrong_arr = ['one', '智能机'] try: sel1 = hhnetworm.getRes(url1) for aa in sel1.css(".xl_phone a"): sel2 = hhnetworm.getRes(url2 + aa.css("::attr('href')").extract_first()) for bb in sel2.css(".peijian_list li"): sel3 = hhnetworm.getRes( url2 + bb.css("bt a::attr('href')").extract_first()) for cc in sel3.xpath( "//table[@class='MsoNormalTable']/tbody/tr[position()>2 and position()<last()]" ): price = str( cc.css("td:nth-child(2) p span span::text"). extract_first()).strip() if cc.css( "td:nth-child(2) p span span::text").extract_first( ) else str( cc.css("td:nth-child(2) p span::text"). extract_first())[1:].strip() price = price.strip().replace('¥', '').replace(',', '') if price and price not in wrong_arr: dic = { 'business': '官修', 'brand': '金立', 'type': '手机', 'model': bb.css("bt a::text").extract_first(), 'color': '', 'malfunction': str( cc.css("td:nth-child(1) p span::text"). extract_first()).strip(), 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='金立', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('金立')) traceback.print_exc() finally: return rt_arr if finish else []
def baidu_search(wordArr, need_secondNet_title=False): st_time, rt_arr, finish = time.time(), [], False url1 = 'https://www.baidu.com/s' try: for word in wordArr: hhnetworm = HhNetworm() data1 = {'word': word, 'tn': '88093251_hao_pg', # 定参 + 必传 # 'ie': 'utf-8', # 定参 + 可不传 # 'srcqid': '2239491606901131802', # 定参 + 可不传 # 'sc': 'UWY3rj04n1cdnNqCmyqxTAThIjYkPHnzPj6snW0kPWbdFhnqpA7EnHc1Fh7W5Hn1PWDkPjbYPs' # 定参 + 可不传 } sel1 = hhnetworm.getRes(url1, data=data1, verify=False) for aa in sel1.css("#content_left div.result.c-container"): title_1 = aa.css("h3 a:nth-child(1) em::text").extract() title_2 = aa.css("h3 a:nth-child(1)::text").extract() brief_1 = aa.css("div.c-abstract em::text").extract() brief_2 = aa.css("div.c-abstract::text").extract() href = aa.css("h3 a:nth-child(1)::attr(href)").extract_first() # 网址 title = help_func_baidu(title_1, title_2) # 标题 brief = help_func_baidu(brief_1, brief_2) # 简介 dic = { 'word': word, # 检索词 'title': title, # 标题 'brief': brief, # 简介 'href': href # 网址 } if need_secondNet_title: sel2 = hhnetworm.getRes(href) secondNet_title = sel2.css("head title::text").extract_first() # 二级连接标题 dic['secondNet_title'] = secondNet_title rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='模拟百度搜索') finish = True except: print("----------Wrong: {}".format('模拟百度搜索')) traceback.print_exc() finally: return rt_arr if finish else []
def samsung(model_arr): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "http://support-cn.samsung.com/supportcn/support/material_price/GetCailAjax.aspx" url2 = 'http://support-cn.samsung.com/supportcn/support/material_price/default.aspx' try: js1 = hhnetworm.getRes(url1, data={'pid': 1}, result='j') for model in model_arr: for aa in js1['Items']: sel1 = hhnetworm.getRes(url2, method='p', data={ '__EVENTTARGET': '', 'btnSearch': '提交', 'ddlCail': aa['PName'], 'ddlProduct': 1, 'txtModel': model }) mal_arr, price_arr = [], [] # 故障,价格 for bb in sel1.css( "div.table_box:nth-child(4) table:nth-child(1) tr:nth-child(n+2)" ): price = int( bb.css( "td:nth-child(2)::text").extract_first().strip()) malfunction = bb.css( "td:nth-child(1)::text").extract_first().strip() malfunction = malfunction[malfunction.find(' ') + 1:] mal_arr.append(malfunction), price_arr.append(price) # 辅料价格 help_price = sum([ price_arr[index] for index, bb in enumerate(mal_arr) if bb.find('辅料') != -1 ]) for index, bb in enumerate(mal_arr): if bb.find('辅料') == -1: dic = { 'business': '官修', 'brand': '三星', 'type': '手机', 'model': model, 'color': '', 'malfunction': bb, 'plan': '', 'price': price_arr[index] + help_price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='三星', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('三星')) traceback.print_exc() finally: return rt_arr if finish else []
def jikexiu(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://www.jikexiu.com/common/brands.json' url2 = 'https://www.jikexiu.com/order/selSolution' url3 = 'https://www.jikexiu.com/order/getDeviceAttributeList.json' url4 = 'https://www.jikexiu.com/order/getDeviceSolution.json' url5 = 'https://www.jikexiu.com/order/getSolutionMalfunction.json' try: js1 = hhnetworm.getRes(url1, method='p', result='j') for aa in js1['brandList']: sel2 = hhnetworm.getRes(url2, data={'brandId': aa['id'], 'categoryId': 12}) for bb in sel2.css("#selectDevice ul li"): color = '' attributeId, color_id = '', '' js3 = hhnetworm.getRes(url3, method='p', result='j', data={'deviceId': bb.css("::attr(deviceid)").extract_first()}) for cc in js3['deviceAttributeList']: color += cc['attributeValue'] + "," # 颜色 attributeId, color_id = cc['attributeId'], cc['id'] js4 = hhnetworm.getRes(url4, method='p', result='j', data={'attrs[0].attributeId': attributeId, 'attrs[0].valueId': color_id, 'deviceId': bb.css("::attr(deviceid)").extract_first()}) for dd in js4['malfunctionList']: js5 = hhnetworm.getRes(url5, method='p', result='j', data={'attrs[0].attributeId': attributeId, 'attrs[0].valueId': color_id, 'deviceId': bb.css("::attr(deviceid)").extract_first(), 'malfunctionId': dd['id']}) for ee in js5['solutionMalfunctionList']: dic = { 'business': '极客修', 'brand': aa['name'], 'type': '手机', 'model': bb.css("::text").extract_first(), 'color': color[:-1], 'malfunction': dd['name'], 'plan': ee['method'], 'price': ee['price'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='极客修', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('极客修')) traceback.print_exc() finally: return rt_arr if finish else []
def huawei(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://consumer.huawei.com/support/services/service/parts/product/list' url2 = 'http://consumer.huawei.com/support/services/service/parts/list' wrong_arr = ['None'] try: js1 = hhnetworm.getRes(url1, result='j', data={ 'json': 'jQuery111308920180139684155_1508484428667', 'productId': 4903, '_': 1508484428671, 'siteCode': 'cn' }) for aa in js1: js2 = hhnetworm.getRes( url2, result='j', data={ 'json': 'jQuery111308920180139684155_1508484428667', 'productCode': aa['productCode'], '_': 1508484428672, 'siteCode': 'cn' }) for bb in js2: price = str(bb['price']).replace("¥", '') if price not in wrong_arr: dic = { 'business': '官修', 'brand': '华为', 'type': '手机', 'model': aa['productTypeName'], 'color': '', 'malfunction': bb['partsType'], 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='华为', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('华为')) traceback.print_exc() finally: return rt_arr if finish else []
def hiweixiu(): st_time, n, rt_arr, finish, hhnetworm = time.time( ), 0, [], False, HhNetworm() url1 = 'https://www.hiweixiu.com/step/selectInfo' url2 = 'https://www.hiweixiu.com/step/getMouldlistsByBrandid' url3 = 'https://www.hiweixiu.com/step/detailInfo' try: sel1 = hhnetworm.getRes(url1) for aa in sel1.css("div.brand_list ul li"): brand = aa.css("a::text").extract_first().strip() js2 = hhnetworm.getRes( url2, result='j', data={'brand_id': aa.css("::attr(data-id)").extract_first() })['data']['mould'] for key in js2.keys(): for bb in js2[key]: sel3 = hhnetworm.getRes(url3, data={'mid': bb['MouldId']}) rp_info = json.loads( sel3.css("input.rp_info::attr(value)").extract_first()) for cc in rp_info.keys(): for dd in rp_info[cc].keys(): for ee in rp_info[cc][dd].keys(): data = rp_info[cc][dd][ee] dic = { 'business': 'Hi维修', 'brand': brand, 'type': bb['ProductName'], 'model': bb['MouldName'], 'color': data['ColorName'].replace("/", ","), 'malfunction': data['faulttype_detail_name'], 'plan': data['RepairType'], 'price': data['Price'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='Hi维修', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('Hi维修')) traceback.print_exc() finally: return rt_arr if finish else []
def maoyan100(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://maoyan.com/board/4' try: for offset in range(0, 100, 10): sel1 = hhnetworm.getRes(url1, data={'offset': offset}) for aa in sel1.css("#app div div div.main dl dd:nth-child(n+1)"): actors = aa.css("p.star::text").extract_first() year = aa.css("p.releasetime::text").extract_first() year = year[year.find(':') + 1:] dic = { 'source': '猫眼', # 商家 "name": aa.css("a::attr(title)").extract_first(), # 名称 "sorce": float( aa.css("i.integer::text").extract_first() + aa.css("i.fraction::text").extract_first()), # 评分 "type": '', # 类型 "country": year[year.find('(') + 1:year.find(')')] if year.find('(') != -1 and year.find(')') != -1 else '', # 国家 "year": year[:year.find('(')] if year.find('(') != -1 and year.find(')') != -1 else year, # 年份 "director": '', # 导演 "actors": actors[actors.find(':') + 1:].strip(), # 主演 "pictureUrl": aa.css("a img::attr(data-src)").extract_first(), # 图片url 'have_watched': 'N' # 是否观看过 } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='猫眼100', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('猫眼100')) traceback.print_exc() finally: return rt_arr if finish else []
def shanxiuxia(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://api.shanxiuxia.com/api/PhoneType/brand' url2 = 'http://api.shanxiuxia.com/api/PhoneType/brandPhone' url3 = 'http://api.shanxiuxia.com/api/PhoneType/malclass' url4 = 'http://api.shanxiuxia.com/api/PhoneType/maldetails' try: js1 = hhnetworm.getRes(url1, method='p', result='j') js2 = hhnetworm.getRes(url2, method='p', result='j') for aa in js2['data']: js3 = hhnetworm.getRes(url3, method='p', result='j', data={'id': aa['id']}) for bb in js3['data']['malfunction']: js4 = hhnetworm.getRes(url4, method='p', result='j', data={ 'id': aa['id'], 'type_id': bb['id'] }) for cc in js4['data']: for dd in js1['data']: if aa['brand_id'] == dd['id']: dic = { 'business': '闪修侠', 'brand': dd['name'], 'type': aa['category'], 'model': aa['name'], 'color': aa['color'], 'malfunction': cc['malfunction'], 'plan': '', 'price': cc['price_reference'], } rt_arr.append(dic) print(dic) break HhTime.costPrinter(st_time, pjName='闪修侠', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('闪修侠')) traceback.print_exc() finally: return rt_arr if finish else []
def tcl(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/brands' url2 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/models/%s' url3 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/colours/%s' url4 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/material/%s' url5 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/fault/%s' try: js1 = hhnetworm.getRes(url1, method='p', result='j') for aa in js1['data']: if aa['brandName'] != '其它': js2 = hhnetworm.getRes(url2 % aa['brandId'], result='j') for bb in js2['data']: color = '' js3 = hhnetworm.getRes(url3 % bb['modelId'], method='p', result='j') for each in js3['data']: color += each['colourName'] + "," js4 = hhnetworm.getRes(url4 % js3['data'][0]['colourId'], method='p', result='j') for cc in js4['data']: js5 = hhnetworm.getRes(url5 % bb['modelId'], method='p', result='j') for dd in js5['data']['faultInfo']: if cc['faultId'] == dd['faultId']: dic = { 'business': 'TCL', 'brand': aa['brandName'], 'type': '手机', 'model': bb['mobileName'], 'color': color[:-1], 'malfunction': dd['faultName'], 'plan': dd['faultPlan'], 'price': cc['price'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='tcl', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('tcl')) traceback.print_exc() finally: return rt_arr if finish else []
def mashangxiu(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://www.mashangxiu.com/repair/repair' url2 = 'https://www.mashangxiu.com/device/acquireDeviceByBrandName' url3 = 'https://www.mashangxiu.com/device/acquireDeviceByBrandAndModel' try: sel1 = hhnetworm.getRes(url1) for aa in sel1.css(".mobileNav ul li a::attr('id')").extract(): js2 = hhnetworm.getRes(url2, result='j', data={ 'needColor': 'Y', 'productBrand': aa }) for bb in js2['modelList']: js3 = hhnetworm.getRes(url3, result='j', data={ 'needColor': 'Y', 'productBrand': aa, 'productModel': bb }) for cc in js3['materialTypeList']: dic = { 'business': '马上修', 'brand': aa, 'type': '手机', 'model': bb, 'color': '', 'malfunction': cc['materialName'], 'plan': '', 'price': cc['outerFee'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='马上修', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('马上修')) traceback.print_exc() finally: return rt_arr if finish else []
def apple(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://support.apple.com/zh-cn/iphone/repair/service/pricing' try: sel1 = hhnetworm.getRes(url1) for aa in sel1.css( "#faq-regular div:nth-child(4) #tableWraper table tr:nth-child(n+2)" ): model = '' for bb in aa.css("td:nth-child(1)::text").extract(): model += bb.strip() price = HhBase.toInt( aa.css("td:nth-child(2)::text").extract_first().replace( "RMB ", "").replace(",", "")) if not price: price = HhBase.toInt( aa.css("td:nth-child(3)::text").extract_first().replace( "RMB ", "").replace(",", "")) for bb in model.split('、'): dic = { 'business': '官修', 'brand': '苹果', 'type': '手机', 'model': bb, 'color': '', 'malfunction': '内屏', 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='苹果', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('苹果')) traceback.print_exc() finally: return rt_arr if finish else []
def oppo(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'https://www.oppo.com/cn/service/part' url2 = 'https://www.oppo.com/cn/service/productlist' wrong_arr = [' '] try: sel1 = hhnetworm.getRes(url1) for model in sel1.css( "#part-select div.select-dropdown ul li span::text").extract(): js2 = hhnetworm.getRes(url2, data={ 'isapp': 0, 'mobile': model }, result='j') sel2 = Selector(js2['data']) for name, price in zip( sel2.css("div.part-list-name span::text").extract(), sel2.css("div.part-list-price::text").extract()[1:]): if price not in wrong_arr: price = int(float(price.strip().replace('¥', ''))) dic = { 'business': '官修', 'brand': 'oppo', 'type': '手机', 'model': model, 'color': '', 'malfunction': name, 'plan': '', 'price': price, } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='oppo', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('oppo')) traceback.print_exc() finally: return rt_arr if finish else []
def douban250(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "https://movie.douban.com/top250?start=%d&filter=" try: for myindex in range(10): sel = hhnetworm.getRes(url1 % (myindex * 25)) for zzz in sel.css( "#content div div.article ol.grid_view li div.item"): for aa in zzz.css("div.info"): name = "" for bb in aa.css("div.hd a span"): name += bb.css("::text").extract_first() body = aa.css("div.bd p:nth-child(1)::text").extract() introduction1 = body[0].replace(" ", "").replace(" ", "") introduction2 = body[1].replace(" ", "").replace(" ", "") director = introduction1[introduction1.find("导演:") + len("导演:"):introduction1. find("主演:")].replace("'", "~") # 导演 actors = introduction1[introduction1.find("主演:") + len("主演:"):].replace("'", "~") # 主演 year = int(introduction2[:introduction2.find("/")].replace( "\n", "")[:4]) # 年份 introduction2 = introduction2[introduction2.find("/") + 1:] country = introduction2[:introduction2.find("/")].replace( " ", "").replace(" ", "") # 国家 introduction2 = introduction2[introduction2.find("/") + 1:] dic = { 'source': '豆瓣', # 商家 "name": name.replace(" ", "").replace(" ", "").replace("'", "~"), # 名称 "sorce": float( aa.css("div.bd div span.rating_num::text"). extract_first()), # 评分 "type": introduction2[:introduction2.find("/")].replace( " ", "").replace(" ", ""), # 类型 "country": country, # 国家 "year": year, # 年份 "director": director, # 导演 "actors": actors, # 主演 "pictureUrl": zzz.css("div.pic a img::attr(src)").extract_first( ), # 图片url 'have_watched': 'N' # 是否观看过 } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='豆瓣250', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('豆瓣250')) traceback.print_exc() finally: return rt_arr if finish else []
def jihouhou(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = 'http://www.hohofast.com/api/web/order/create' url2 = "http://www.hohofast.com/api/web/brand/list" url3 = "http://www.hohofast.com/api/web/model/list" url4 = "http://www.hohofast.com/api/web/model/submit" url5 = "http://www.hohofast.com/api/web/order/other" url6 = "http://www.hohofast.com/api/web/order/otherSubmit" url7 = "http://www.hohofast.com/api/web/brief/appraisement/data" try: sel1 = hhnetworm.getRes(url1, method='p') for aa in sel1.css("#select_brand div"): brand_id = aa.css("::attr(data-brand)").extract_first() js2 = hhnetworm.getRes(url2, method='p', data={'uuid': brand_id}, result='j') for bb in js2['items']: js3 = hhnetworm.getRes(url3, data={ 'type': brand_id, 'uuid': bb['uuid'] }, result='j') for cc in js3['items']: color, color_id = '', '' for dd in json.loads(cc['info'])['colors']: color += dd['color'] + "," # 颜色 color_id = dd['uuid'] hhnetworm.getRes(url4, method='p', data={ 'type': brand_id, 'bUuid': bb['uuid'], 'mUuid': cc['uuid'], 'color': color_id }) sel5 = hhnetworm.getRes(url5) for ee in sel5.css( "#select_part div.item.malfunction-item"): hhnetworm.getRes( url6, method='p', data={ 'part': ee.css("::attr(data-part)").extract_first(), 'service': 1 }) js7 = hhnetworm.getRes(url7, result='j') js7 = js7['data']['commonTechItems'] if js7: dic = { 'business': '极吼吼', 'brand': aa.css("::attr(data-name)").extract_first(), 'type': bb['name'], 'model': cc['name'], 'color': color[:-1], 'malfunction': ee.css("h5::text").extract_first(), 'plan': js7[0]['solution']['showname'], 'price': js7[0]['price'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='极吼吼', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('极吼吼')) traceback.print_exc() finally: return rt_arr if finish else []
def wuyixiu(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url2 = 'http://www.51xiu.cc/fi/choosePlans' url3 = 'http://www.51xiu.cc/fi/select' url4 = 'http://www.51xiu.cc/fi/getPlan' try: sel1 = hhnetworm.getRes('http://www.51xiu.cc/repair') for aa in sel1.css("div.big-box div div:nth-child(2) div div"): bId = aa.css("::attr('brandid')").extract_first().strip() tId = aa.css("::attr('tid')").extract_first().strip() js2 = hhnetworm.getRes(url2, method='p', data={ 'bId': bId, 'tId': tId }, result='j') for bb in js2['versionList']: versionName = bb['versionName'] # 型号 color, cId = '', '' text3 = hhnetworm.getRes(url3, data={ 'bId': bId, 'tId': tId, 'versionName': versionName, 'versionId': bb['id'] }, result='t') for index, each in enumerate( BeautifulSoup(text3, 'lxml').find('div', class_='clear')): if index % 2: color += each.text + ',' cId = each['colorid'] js4 = hhnetworm.getRes(url4, method='p', result='j', data={'cId': cId}) for cc in js4['result']: dic = { 'business': '51修', 'brand': aa.css("::text").extract_first().strip(), 'type': '手机', 'model': versionName, 'color': color[:-1], 'malfunction': cc['detail'], 'plan': cc['plan'], 'price': cc['price'], } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='51修', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('51修')) traceback.print_exc() finally: return dl_wuyixiu_data(rt_arr) if finish else []
def tcmap(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "http://www.tcmap.com.cn/list/daima_list.html" con_url = "http://www.tcmap.com.cn" try: sel = hhnetworm.getRes(url1) for i, aa in enumerate(sel.css("#list360"), 1): province = aa.css("strong a::text").extract_first().replace( " ", "") # 省 print("{0} :{1}".format(i, province)) sel = hhnetworm.getRes( con_url + aa.css("strong a::attr(href)").extract_first()) for bb in sel.css( "#page_left table:nth-child(5) tr:nth-child(n+2)"): for cc in bb.css("td:nth-child(6) a::attr(href)").extract(): res = requests.get(con_url + str(cc)).content # 编码匹配 sel = False try: sel = Selector(res.decode("gb18030")) except: try: sel = Selector(res.decode("utf-8")) except: print("decode all failed!") if sel: for dd in sel.css( "#page_left div:nth-child(4) div:nth-child(2) table" ): dic = { 'province': province, # 省 'id1': aa.css("::text").extract_first().replace( " ", ""), # 身份证编号1 'city': bb.css("td strong a::text").extract_first(). replace(" ", ""), # 市 'id2': bb.css("td:nth-child(5)::text").extract_first( ).replace(" ", ""), # 身份证编号2 'district': dd.css("tr:nth-child(1) td:nth-child(1)::text" ).extract_first().replace(" ", "")[1:], # 区 'id3': dd.css("tr:nth-child(2) td:nth-child(2)::text" ).extract_first().replace( " ", "")[1:], # 身份证编号3 'phoneAreaCode': bb.css("td:nth-child(4)::text").extract_first( ).replace(" ", ""), # 电话区号 'postCode': dd.css("tr:nth-child(3) td:nth-child(2)::text" ).extract_first().replace( " ", "")[1:], # 邮政编码 'carCode': dd.css("tr:nth-child(4) td:nth-child(1)::text" ).extract_first().replace(" ", "")[1:], # 车牌 'population': check_1( dd.css( "tr:nth-child(5) td:nth-child(1)::text" ).extract_first()), # 人口 's_area': check_1( dd.css("tr:nth-child(6) td::text"). extract_first()) # 区域面积 } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='身份证户籍编号') finish = True except: print("----------Wrong: {}".format('身份证户籍编号')) traceback.print_exc() finally: return rt_arr if finish else []
def jingdong(): st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm() url1 = "http://psfw.jd.com/help/front/initArea.do?" url2 = "http://psfw.jd.com/help/front/initCity.do" url3 = "http://psfw.jd.com/help/front/initArea.do" url4 = "http://psfw.jd.com/help/front/initFouth.do" try: js1 = hhnetworm.getRes(url1, method='p', result='j') for aa in js1['result']['resultList']: province, province_id = aa['name'], str(aa['id']) js2 = hhnetworm.getRes(url2, method='p', data={'provinceId': province_id}, result='j') for bb in js2['result']['resultList']: city, city_id = bb['name'], str(bb['id']) js3 = hhnetworm.getRes(url3, method='p', result='j', data={ 'provinceId': province_id, 'cityId': city_id }) for cc in js3['result']['resultList']: district, district_id = cc['name'], str(cc['id']) js4 = hhnetworm.getRes(url4, method='p', result='j', data={ 'provinceId': province_id, 'cityId': city_id, 'areaId': district_id }) if js4['result']['hasNext']: for dd in js4['result']['resultList']: dic = { 'province': province, 'city': city, 'district': district, 'area': dd['name'] } rt_arr.append(dic) print(dic) else: dic = { 'province': province, 'city': city, 'district': district, 'area': '' } rt_arr.append(dic) print(dic) HhTime.costPrinter(st_time, pjName='京东地址库', dataArr=rt_arr) finish = True except: print("----------Wrong: {}".format('京东地址库')) traceback.print_exc() finally: return rt_arr if finish else []