예제 #1
0
파일: meizu.py 프로젝트: hellohman/netWorm
def meizu():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://mwx-api.meizu.com/metter-price/get-cat'
    url2 = 'https://mwx-api.meizu.com/metter-price/get-metter'
    try:
        js1 = hhnetworm.getRes(url1, result='j')
        for aa in js1['data']:

            js2 = hhnetworm.getRes(url2,
                                   data={'mobile_cat': {aa['id']}},
                                   result='j')
            for bb in js2['data']:
                price = int(float(bb['price']))
                dic = {
                    'business': '官修',
                    'brand': '魅族',
                    'type': '手机',
                    'model': aa['name'],
                    'color': '',
                    'malfunction': bb['repair_name'],
                    'plan': '',
                    'price': price,
                }
                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='魅族', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('魅族'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #2
0
def xiaomi():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://www.mi.com/service/materialprice/'
    try:
        sel1 = hhnetworm.getRes(url1, result='t-s')
        text = sel1.css("body script:nth-child(3)").extract_first()
        js1 = json.loads(text[text.find("=") + 1:text.find("</script>")])
        for aa in js1:
            for bb in aa['child']:
                if 'child' in bb.keys():
                    try:
                        for cc in bb['child']: rt_arr.append(dicMaker(cc))
                    except:
                        for cc in bb['child']:
                            for dd in cc['child']: rt_arr.append(dicMaker(dd))
                else:
                    try:
                        rt_arr.append(dicMaker(bb))
                    except:
                        print(bb['child'])
        HhTime.costPrinter(st_time, pjName='小米', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('小米'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #3
0
def vivo():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://www.vivo.com.cn/service/accessory/product/list'
    url2 = "http://www.vivo.com.cn/service/accessory/query"
    try:
        js1 = hhnetworm.getRes(url1, method='p', result='j')
        for aa in js1['data']:

            for bb in aa['products']:

                js2 = hhnetworm.getRes(url2,
                                       method='p',
                                       data={'productId': bb['id']},
                                       result='j')
                for cc in js2['data']:
                    price = int(float(str(cc['price'])))
                    dic = {
                        'business': '官修',
                        'brand': 'vivo',
                        'type': '手机',
                        'model': bb['name'],
                        'color': '',
                        'malfunction': cc['name'],
                        'plan': '',
                        'price': price,
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='vivo', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('vivo'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #4
0
def haoyun56():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://www.haoyun56.com/xue/chepaihao/'
    try:
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css("#dlProvince tr td div a"):
            pro = aa.css("::text").extract_first().strip()

            sel2 = hhnetworm.getRes(url1 + "?province={}".format(
                aa.css("::attr(href)").extract_first()[-6:]))
            for bb in sel2.css("#div_Provice table tr:nth-child(n+2)"):
                dic = {
                    'source':
                    'haoyun56',
                    "province":
                    pro,
                    "city":
                    bb.css("td:nth-child(2) a::text").extract_first().strip(),
                    "code":
                    bb.css("td:nth-child(3)::text").extract_first().strip()
                }
                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='haoyun56')
        finish = True
    except:
        print("----------Wrong: {}".format('haoyun56'))
        traceback.print_exc()
    finally:
        return dlData_haoyun56(rt_arr) if finish else []
예제 #5
0
def shunfeng():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "http://www.sf-express.com/sf-service-owf-web/service/region/A000086000/subRegions"
    url2 = "http://www.sf-express.com/sf-service-owf-web/service/region/%s/subRegions"
    try:
        js1 = hhnetworm.getRes(url1, data={'lang': 'sc'}, result='j')
        for aa in js1:
            province = aa['name']

            js2 = hhnetworm.getRes(url2 % str(aa['code']), data={'level': 2, 'lang': 'sc'}, result='j')
            for bb in js2:
                city = bb['name']

                if str(bb['level']).strip() == "4":
                    dic = {'province': province, 'city': city, 'district': ''}
                    rt_arr.append(dic)
                    print(dic)
                else:
                    js3 = hhnetworm.getRes(url2 % str(bb['code']), data={'level': 3, 'lang': 'sc', 'region': 'cn'}, result='j')
                    for cc in js3:
                        dic = {'province': province, 'city': city, 'district': cc['name']}
                        rt_arr.append(dic)
                        print(dic)
        HhTime.costPrinter(st_time, pjName='顺丰地址库', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('顺丰地址库'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #6
0
def tcmap():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "http://www.tcmap.com.cn/list/car_list.html"
    try:
        # 除 上海 海南 重庆 新疆 青海
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css("#list360 table"):
            pro = aa.css("tr:nth-child(1) td:nth-child(1) strong a::text"
                         ).extract_first()

            for bb in aa.css("tr:nth-child(n+2)"):
                dic = {
                    'source': 'tcmap',
                    "province": pro,
                    "city": bb.css("td:nth-child(1) a::text").extract_first(),
                    "code": bb.css("td:nth-child(2)::text").extract_first()
                }
                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='tcmap')
        finish = True
    except:
        print("----------Wrong: {}".format('tcmap'))
        traceback.print_exc()
    finally:
        return dlData_tcmap(rt_arr) if finish else []
예제 #7
0
파일: haocc.py 프로젝트: hellohman/netWorm
def haocc():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    try:
        sel1 = hhnetworm.getRes("http://www.51hao.cc/")
        for i, aa in enumerate(sel1.css("div.fkt:nth-child(n+3)"), 1):
            province = aa.css("div.fkbj p a::text").extract_first().replace(
                " ", "")
            print("{0} :{1}".format(i, province))

            for url, city in zip(
                    aa.css("div.fklk p a::attr(href)").extract(),
                    aa.css("div.fklk p a::text").extract()):

                sel2 = hhnetworm.getRes(url)
                for bb in sel2.css("div.all ul:nth-child(n+2)"):
                    for number in bb.css("li a::text").extract():
                        number = str(number).replace(" ", "")
                        if len(number) == 7:
                            dic = {
                                'province': province,
                                'city': city.replace(" ", ""),
                                'number': number
                            }
                            rt_arr.append(dic)
                            print(dic)
                        else:
                            print("错误号码 :", number)

        HhTime.costPrinter(st_time, pjName='手机号码归属地')
        finish = True
    except:
        print("----------Wrong: {}".format('手机号码归属地'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #8
0
def jinli():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://www.gionee.com/id-478_op-productPart.shtml'
    url2 = 'https://www.gionee.com/'
    wrong_arr = ['one', '智能机']
    try:
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css(".xl_phone a"):

            sel2 = hhnetworm.getRes(url2 +
                                    aa.css("::attr('href')").extract_first())
            for bb in sel2.css(".peijian_list li"):

                sel3 = hhnetworm.getRes(
                    url2 + bb.css("bt a::attr('href')").extract_first())
                for cc in sel3.xpath(
                        "//table[@class='MsoNormalTable']/tbody/tr[position()>2 and position()<last()]"
                ):
                    price = str(
                        cc.css("td:nth-child(2) p span span::text").
                        extract_first()).strip() if cc.css(
                            "td:nth-child(2) p span span::text").extract_first(
                            ) else str(
                                cc.css("td:nth-child(2) p span::text").
                                extract_first())[1:].strip()
                    price = price.strip().replace('¥', '').replace(',', '')
                    if price and price not in wrong_arr:
                        dic = {
                            'business':
                            '官修',
                            'brand':
                            '金立',
                            'type':
                            '手机',
                            'model':
                            bb.css("bt a::text").extract_first(),
                            'color':
                            '',
                            'malfunction':
                            str(
                                cc.css("td:nth-child(1) p span::text").
                                extract_first()).strip(),
                            'plan':
                            '',
                            'price':
                            price,
                        }
                        rt_arr.append(dic)
                        print(dic)
        HhTime.costPrinter(st_time, pjName='金立', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('金立'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #9
0
def samsung(model_arr):
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "http://support-cn.samsung.com/supportcn/support/material_price/GetCailAjax.aspx"
    url2 = 'http://support-cn.samsung.com/supportcn/support/material_price/default.aspx'
    try:
        js1 = hhnetworm.getRes(url1, data={'pid': 1}, result='j')
        for model in model_arr:
            for aa in js1['Items']:
                sel1 = hhnetworm.getRes(url2,
                                        method='p',
                                        data={
                                            '__EVENTTARGET': '',
                                            'btnSearch': '提交',
                                            'ddlCail': aa['PName'],
                                            'ddlProduct': 1,
                                            'txtModel': model
                                        })
                mal_arr, price_arr = [], []  # 故障,价格
                for bb in sel1.css(
                        "div.table_box:nth-child(4) table:nth-child(1) tr:nth-child(n+2)"
                ):
                    price = int(
                        bb.css(
                            "td:nth-child(2)::text").extract_first().strip())
                    malfunction = bb.css(
                        "td:nth-child(1)::text").extract_first().strip()
                    malfunction = malfunction[malfunction.find(' ') + 1:]
                    mal_arr.append(malfunction), price_arr.append(price)
                # 辅料价格
                help_price = sum([
                    price_arr[index] for index, bb in enumerate(mal_arr)
                    if bb.find('辅料') != -1
                ])
                for index, bb in enumerate(mal_arr):
                    if bb.find('辅料') == -1:
                        dic = {
                            'business': '官修',
                            'brand': '三星',
                            'type': '手机',
                            'model': model,
                            'color': '',
                            'malfunction': bb,
                            'plan': '',
                            'price': price_arr[index] + help_price,
                        }
                        rt_arr.append(dic)
                        print(dic)
        HhTime.costPrinter(st_time, pjName='三星', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('三星'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #10
0
def jikexiu():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://www.jikexiu.com/common/brands.json'
    url2 = 'https://www.jikexiu.com/order/selSolution'
    url3 = 'https://www.jikexiu.com/order/getDeviceAttributeList.json'
    url4 = 'https://www.jikexiu.com/order/getDeviceSolution.json'
    url5 = 'https://www.jikexiu.com/order/getSolutionMalfunction.json'
    try:
        js1 = hhnetworm.getRes(url1, method='p', result='j')
        for aa in js1['brandList']:

            sel2 = hhnetworm.getRes(url2, data={'brandId': aa['id'], 'categoryId': 12})
            for bb in sel2.css("#selectDevice ul li"):
                color = ''

                attributeId, color_id = '', ''
                js3 = hhnetworm.getRes(url3, method='p', result='j', data={'deviceId': bb.css("::attr(deviceid)").extract_first()})
                for cc in js3['deviceAttributeList']:
                    color += cc['attributeValue'] + ","  # 颜色
                    attributeId, color_id = cc['attributeId'], cc['id']

                js4 = hhnetworm.getRes(url4, method='p', result='j', data={'attrs[0].attributeId': attributeId, 'attrs[0].valueId': color_id,
                                                                           'deviceId': bb.css("::attr(deviceid)").extract_first()})
                for dd in js4['malfunctionList']:

                    js5 = hhnetworm.getRes(url5, method='p', result='j', data={'attrs[0].attributeId': attributeId, 'attrs[0].valueId': color_id,
                                                                               'deviceId': bb.css("::attr(deviceid)").extract_first(),
                                                                               'malfunctionId': dd['id']})
                    for ee in js5['solutionMalfunctionList']:
                        dic = {
                            'business': '极客修',
                            'brand': aa['name'],
                            'type': '手机',
                            'model': bb.css("::text").extract_first(),
                            'color': color[:-1],
                            'malfunction': dd['name'],
                            'plan': ee['method'],
                            'price': ee['price'],
                        }
                        rt_arr.append(dic)
                        print(dic)
        HhTime.costPrinter(st_time, pjName='极客修', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('极客修'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #11
0
def huawei():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://consumer.huawei.com/support/services/service/parts/product/list'
    url2 = 'http://consumer.huawei.com/support/services/service/parts/list'
    wrong_arr = ['None']
    try:
        js1 = hhnetworm.getRes(url1,
                               result='j',
                               data={
                                   'json':
                                   'jQuery111308920180139684155_1508484428667',
                                   'productId': 4903,
                                   '_': 1508484428671,
                                   'siteCode': 'cn'
                               })
        for aa in js1:

            js2 = hhnetworm.getRes(
                url2,
                result='j',
                data={
                    'json': 'jQuery111308920180139684155_1508484428667',
                    'productCode': aa['productCode'],
                    '_': 1508484428672,
                    'siteCode': 'cn'
                })
            for bb in js2:
                price = str(bb['price']).replace("¥", '')
                if price not in wrong_arr:
                    dic = {
                        'business': '官修',
                        'brand': '华为',
                        'type': '手机',
                        'model': aa['productTypeName'],
                        'color': '',
                        'malfunction': bb['partsType'],
                        'plan': '',
                        'price': price,
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='华为', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('华为'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #12
0
def hiweixiu():
    st_time, n, rt_arr, finish, hhnetworm = time.time(
    ), 0, [], False, HhNetworm()
    url1 = 'https://www.hiweixiu.com/step/selectInfo'
    url2 = 'https://www.hiweixiu.com/step/getMouldlistsByBrandid'
    url3 = 'https://www.hiweixiu.com/step/detailInfo'
    try:
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css("div.brand_list ul li"):
            brand = aa.css("a::text").extract_first().strip()

            js2 = hhnetworm.getRes(
                url2,
                result='j',
                data={'brand_id': aa.css("::attr(data-id)").extract_first()
                      })['data']['mould']
            for key in js2.keys():
                for bb in js2[key]:

                    sel3 = hhnetworm.getRes(url3, data={'mid': bb['MouldId']})

                    rp_info = json.loads(
                        sel3.css("input.rp_info::attr(value)").extract_first())
                    for cc in rp_info.keys():
                        for dd in rp_info[cc].keys():
                            for ee in rp_info[cc][dd].keys():
                                data = rp_info[cc][dd][ee]
                                dic = {
                                    'business': 'Hi维修',
                                    'brand': brand,
                                    'type': bb['ProductName'],
                                    'model': bb['MouldName'],
                                    'color':
                                    data['ColorName'].replace("/", ","),
                                    'malfunction':
                                    data['faulttype_detail_name'],
                                    'plan': data['RepairType'],
                                    'price': data['Price'],
                                }
                                rt_arr.append(dic)
                                print(dic)
        HhTime.costPrinter(st_time, pjName='Hi维修', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('Hi维修'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #13
0
def maoyan100():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://maoyan.com/board/4'
    try:
        for offset in range(0, 100, 10):
            sel1 = hhnetworm.getRes(url1, data={'offset': offset})
            for aa in sel1.css("#app div div div.main dl dd:nth-child(n+1)"):
                actors = aa.css("p.star::text").extract_first()
                year = aa.css("p.releasetime::text").extract_first()
                year = year[year.find(':') + 1:]

                dic = {
                    'source':
                    '猫眼',  # 商家
                    "name":
                    aa.css("a::attr(title)").extract_first(),  # 名称
                    "sorce":
                    float(
                        aa.css("i.integer::text").extract_first() +
                        aa.css("i.fraction::text").extract_first()),  # 评分
                    "type":
                    '',  # 类型
                    "country":
                    year[year.find('(') +
                         1:year.find(')')] if year.find('(') != -1
                    and year.find(')') != -1 else '',  # 国家
                    "year":
                    year[:year.find('(')] if year.find('(') != -1
                    and year.find(')') != -1 else year,  # 年份
                    "director":
                    '',  # 导演
                    "actors":
                    actors[actors.find(':') + 1:].strip(),  # 主演
                    "pictureUrl":
                    aa.css("a img::attr(data-src)").extract_first(),  # 图片url
                    'have_watched':
                    'N'  # 是否观看过
                }
                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='猫眼100', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('猫眼100'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #14
0
def shanxiuxia():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://api.shanxiuxia.com/api/PhoneType/brand'
    url2 = 'http://api.shanxiuxia.com/api/PhoneType/brandPhone'
    url3 = 'http://api.shanxiuxia.com/api/PhoneType/malclass'
    url4 = 'http://api.shanxiuxia.com/api/PhoneType/maldetails'
    try:
        js1 = hhnetworm.getRes(url1, method='p', result='j')
        js2 = hhnetworm.getRes(url2, method='p', result='j')
        for aa in js2['data']:

            js3 = hhnetworm.getRes(url3,
                                   method='p',
                                   result='j',
                                   data={'id': aa['id']})
            for bb in js3['data']['malfunction']:
                js4 = hhnetworm.getRes(url4,
                                       method='p',
                                       result='j',
                                       data={
                                           'id': aa['id'],
                                           'type_id': bb['id']
                                       })
                for cc in js4['data']:
                    for dd in js1['data']:
                        if aa['brand_id'] == dd['id']:
                            dic = {
                                'business': '闪修侠',
                                'brand': dd['name'],
                                'type': aa['category'],
                                'model': aa['name'],
                                'color': aa['color'],
                                'malfunction': cc['malfunction'],
                                'plan': '',
                                'price': cc['price_reference'],
                            }
                            rt_arr.append(dic)
                            print(dic)
                            break
        HhTime.costPrinter(st_time, pjName='闪修侠', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('闪修侠'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #15
0
def tcl():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/brands'
    url2 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/models/%s'
    url3 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/colours/%s'
    url4 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/material/%s'
    url5 = 'http://wechat-api.tclsfdj.com:88/ha-wechat/v1/repair/fault/%s'
    try:
        js1 = hhnetworm.getRes(url1, method='p', result='j')
        for aa in js1['data']:
            if aa['brandName'] != '其它':

                js2 = hhnetworm.getRes(url2 % aa['brandId'], result='j')
                for bb in js2['data']:
                    color = ''

                    js3 = hhnetworm.getRes(url3 % bb['modelId'], method='p', result='j')
                    for each in js3['data']:
                        color += each['colourName'] + ","

                    js4 = hhnetworm.getRes(url4 % js3['data'][0]['colourId'], method='p', result='j')
                    for cc in js4['data']:

                        js5 = hhnetworm.getRes(url5 % bb['modelId'], method='p', result='j')
                        for dd in js5['data']['faultInfo']:
                            if cc['faultId'] == dd['faultId']:
                                dic = {
                                    'business': 'TCL',
                                    'brand': aa['brandName'],
                                    'type': '手机',
                                    'model': bb['mobileName'],
                                    'color': color[:-1],
                                    'malfunction': dd['faultName'],
                                    'plan': dd['faultPlan'],
                                    'price': cc['price'],
                                }
                                rt_arr.append(dic)
                                print(dic)
        HhTime.costPrinter(st_time, pjName='tcl', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('tcl'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #16
0
def mashangxiu():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://www.mashangxiu.com/repair/repair'
    url2 = 'https://www.mashangxiu.com/device/acquireDeviceByBrandName'
    url3 = 'https://www.mashangxiu.com/device/acquireDeviceByBrandAndModel'
    try:
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css(".mobileNav ul li a::attr('id')").extract():

            js2 = hhnetworm.getRes(url2,
                                   result='j',
                                   data={
                                       'needColor': 'Y',
                                       'productBrand': aa
                                   })
            for bb in js2['modelList']:

                js3 = hhnetworm.getRes(url3,
                                       result='j',
                                       data={
                                           'needColor': 'Y',
                                           'productBrand': aa,
                                           'productModel': bb
                                       })
                for cc in js3['materialTypeList']:
                    dic = {
                        'business': '马上修',
                        'brand': aa,
                        'type': '手机',
                        'model': bb,
                        'color': '',
                        'malfunction': cc['materialName'],
                        'plan': '',
                        'price': cc['outerFee'],
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='马上修', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('马上修'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #17
0
파일: Baidu.py 프로젝트: hellohman/netWorm
def baidu_search(wordArr, need_secondNet_title=False):
    st_time, rt_arr, finish = time.time(), [], False
    url1 = 'https://www.baidu.com/s'
    try:
        for word in wordArr:
            hhnetworm = HhNetworm()
            data1 = {'word': word,
                     'tn': '88093251_hao_pg',  # 定参 + 必传
                     # 'ie': 'utf-8',                # 定参 + 可不传
                     # 'srcqid': '2239491606901131802',       # 定参 + 可不传
                     # 'sc': 'UWY3rj04n1cdnNqCmyqxTAThIjYkPHnzPj6snW0kPWbdFhnqpA7EnHc1Fh7W5Hn1PWDkPjbYPs'     # 定参 + 可不传
                     }
            sel1 = hhnetworm.getRes(url1, data=data1, verify=False)
            for aa in sel1.css("#content_left div.result.c-container"):
                title_1 = aa.css("h3 a:nth-child(1) em::text").extract()
                title_2 = aa.css("h3 a:nth-child(1)::text").extract()
                brief_1 = aa.css("div.c-abstract em::text").extract()
                brief_2 = aa.css("div.c-abstract::text").extract()
                href = aa.css("h3 a:nth-child(1)::attr(href)").extract_first()  # 网址
                title = help_func_baidu(title_1, title_2)  # 标题
                brief = help_func_baidu(brief_1, brief_2)  # 简介

                dic = {
                    'word': word,  # 检索词
                    'title': title,  # 标题
                    'brief': brief,  # 简介
                    'href': href  # 网址
                }

                if need_secondNet_title:
                    sel2 = hhnetworm.getRes(href)
                    secondNet_title = sel2.css("head title::text").extract_first()  # 二级连接标题
                    dic['secondNet_title'] = secondNet_title

                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='模拟百度搜索')
        finish = True
    except:
        print("----------Wrong: {}".format('模拟百度搜索'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #18
0
def oppo():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://www.oppo.com/cn/service/part'
    url2 = 'https://www.oppo.com/cn/service/productlist'
    wrong_arr = [' ']
    try:
        sel1 = hhnetworm.getRes(url1)
        for model in sel1.css(
                "#part-select div.select-dropdown ul li span::text").extract():

            js2 = hhnetworm.getRes(url2,
                                   data={
                                       'isapp': 0,
                                       'mobile': model
                                   },
                                   result='j')
            sel2 = Selector(js2['data'])
            for name, price in zip(
                    sel2.css("div.part-list-name span::text").extract(),
                    sel2.css("div.part-list-price::text").extract()[1:]):
                if price not in wrong_arr:
                    price = int(float(price.strip().replace('¥', '')))
                    dic = {
                        'business': '官修',
                        'brand': 'oppo',
                        'type': '手机',
                        'model': model,
                        'color': '',
                        'malfunction': name,
                        'plan': '',
                        'price': price,
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='oppo', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('oppo'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #19
0
파일: apple.py 프로젝트: hellohman/netWorm
def apple():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'https://support.apple.com/zh-cn/iphone/repair/service/pricing'
    try:
        sel1 = hhnetworm.getRes(url1)
        for aa in sel1.css(
                "#faq-regular div:nth-child(4) #tableWraper table tr:nth-child(n+2)"
        ):

            model = ''
            for bb in aa.css("td:nth-child(1)::text").extract():
                model += bb.strip()

            price = HhBase.toInt(
                aa.css("td:nth-child(2)::text").extract_first().replace(
                    "RMB ", "").replace(",", ""))
            if not price:
                price = HhBase.toInt(
                    aa.css("td:nth-child(3)::text").extract_first().replace(
                        "RMB ", "").replace(",", ""))

            for bb in model.split('、'):
                dic = {
                    'business': '官修',
                    'brand': '苹果',
                    'type': '手机',
                    'model': bb,
                    'color': '',
                    'malfunction': '内屏',
                    'plan': '',
                    'price': price,
                }
                rt_arr.append(dic)
                print(dic)
        HhTime.costPrinter(st_time, pjName='苹果', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('苹果'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #20
0
def douban250():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "https://movie.douban.com/top250?start=%d&filter="
    try:
        for myindex in range(10):
            sel = hhnetworm.getRes(url1 % (myindex * 25))
            for zzz in sel.css(
                    "#content div div.article ol.grid_view li div.item"):
                for aa in zzz.css("div.info"):
                    name = ""
                    for bb in aa.css("div.hd a span"):
                        name += bb.css("::text").extract_first()
                    body = aa.css("div.bd p:nth-child(1)::text").extract()
                    introduction1 = body[0].replace(" ", "").replace(" ", "")
                    introduction2 = body[1].replace(" ", "").replace(" ", "")
                    director = introduction1[introduction1.find("导演:") +
                                             len("导演:"):introduction1.
                                             find("主演:")].replace("'",
                                                                  "~")  # 导演
                    actors = introduction1[introduction1.find("主演:") +
                                           len("主演:"):].replace("'", "~")  # 主演
                    year = int(introduction2[:introduction2.find("/")].replace(
                        "\n", "")[:4])  # 年份
                    introduction2 = introduction2[introduction2.find("/") + 1:]
                    country = introduction2[:introduction2.find("/")].replace(
                        " ", "").replace(" ", "")  # 国家
                    introduction2 = introduction2[introduction2.find("/") + 1:]

                    dic = {
                        'source':
                        '豆瓣',  # 商家
                        "name":
                        name.replace(" ", "").replace(" ",
                                                      "").replace("'",
                                                                  "~"),  # 名称
                        "sorce":
                        float(
                            aa.css("div.bd div span.rating_num::text").
                            extract_first()),  # 评分
                        "type":
                        introduction2[:introduction2.find("/")].replace(
                            " ", "").replace(" ", ""),  # 类型
                        "country":
                        country,  # 国家
                        "year":
                        year,  # 年份
                        "director":
                        director,  # 导演
                        "actors":
                        actors,  # 主演
                        "pictureUrl":
                        zzz.css("div.pic a img::attr(src)").extract_first(
                        ),  # 图片url
                        'have_watched':
                        'N'  # 是否观看过
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='豆瓣250', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('豆瓣250'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #21
0
def jihouhou():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = 'http://www.hohofast.com/api/web/order/create'
    url2 = "http://www.hohofast.com/api/web/brand/list"
    url3 = "http://www.hohofast.com/api/web/model/list"
    url4 = "http://www.hohofast.com/api/web/model/submit"
    url5 = "http://www.hohofast.com/api/web/order/other"
    url6 = "http://www.hohofast.com/api/web/order/otherSubmit"
    url7 = "http://www.hohofast.com/api/web/brief/appraisement/data"
    try:
        sel1 = hhnetworm.getRes(url1, method='p')
        for aa in sel1.css("#select_brand div"):
            brand_id = aa.css("::attr(data-brand)").extract_first()

            js2 = hhnetworm.getRes(url2,
                                   method='p',
                                   data={'uuid': brand_id},
                                   result='j')
            for bb in js2['items']:

                js3 = hhnetworm.getRes(url3,
                                       data={
                                           'type': brand_id,
                                           'uuid': bb['uuid']
                                       },
                                       result='j')
                for cc in js3['items']:

                    color, color_id = '', ''
                    for dd in json.loads(cc['info'])['colors']:
                        color += dd['color'] + ","  # 颜色
                        color_id = dd['uuid']

                    hhnetworm.getRes(url4,
                                     method='p',
                                     data={
                                         'type': brand_id,
                                         'bUuid': bb['uuid'],
                                         'mUuid': cc['uuid'],
                                         'color': color_id
                                     })
                    sel5 = hhnetworm.getRes(url5)

                    for ee in sel5.css(
                            "#select_part div.item.malfunction-item"):

                        hhnetworm.getRes(
                            url6,
                            method='p',
                            data={
                                'part':
                                ee.css("::attr(data-part)").extract_first(),
                                'service': 1
                            })
                        js7 = hhnetworm.getRes(url7, result='j')

                        js7 = js7['data']['commonTechItems']
                        if js7:
                            dic = {
                                'business':
                                '极吼吼',
                                'brand':
                                aa.css("::attr(data-name)").extract_first(),
                                'type':
                                bb['name'],
                                'model':
                                cc['name'],
                                'color':
                                color[:-1],
                                'malfunction':
                                ee.css("h5::text").extract_first(),
                                'plan':
                                js7[0]['solution']['showname'],
                                'price':
                                js7[0]['price'],
                            }
                            rt_arr.append(dic)
                            print(dic)
        HhTime.costPrinter(st_time, pjName='极吼吼', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('极吼吼'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #22
0
def wuyixiu():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url2 = 'http://www.51xiu.cc/fi/choosePlans'
    url3 = 'http://www.51xiu.cc/fi/select'
    url4 = 'http://www.51xiu.cc/fi/getPlan'
    try:
        sel1 = hhnetworm.getRes('http://www.51xiu.cc/repair')
        for aa in sel1.css("div.big-box div div:nth-child(2) div div"):
            bId = aa.css("::attr('brandid')").extract_first().strip()
            tId = aa.css("::attr('tid')").extract_first().strip()

            js2 = hhnetworm.getRes(url2,
                                   method='p',
                                   data={
                                       'bId': bId,
                                       'tId': tId
                                   },
                                   result='j')
            for bb in js2['versionList']:
                versionName = bb['versionName']  # 型号

                color, cId = '', ''
                text3 = hhnetworm.getRes(url3,
                                         data={
                                             'bId': bId,
                                             'tId': tId,
                                             'versionName': versionName,
                                             'versionId': bb['id']
                                         },
                                         result='t')
                for index, each in enumerate(
                        BeautifulSoup(text3, 'lxml').find('div',
                                                          class_='clear')):
                    if index % 2:
                        color += each.text + ','
                        cId = each['colorid']

                js4 = hhnetworm.getRes(url4,
                                       method='p',
                                       result='j',
                                       data={'cId': cId})
                for cc in js4['result']:
                    dic = {
                        'business': '51修',
                        'brand': aa.css("::text").extract_first().strip(),
                        'type': '手机',
                        'model': versionName,
                        'color': color[:-1],
                        'malfunction': cc['detail'],
                        'plan': cc['plan'],
                        'price': cc['price'],
                    }
                    rt_arr.append(dic)
                    print(dic)
        HhTime.costPrinter(st_time, pjName='51修', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('51修'))
        traceback.print_exc()
    finally:
        return dl_wuyixiu_data(rt_arr) if finish else []
예제 #23
0
def jingdong():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "http://psfw.jd.com/help/front/initArea.do?"
    url2 = "http://psfw.jd.com/help/front/initCity.do"
    url3 = "http://psfw.jd.com/help/front/initArea.do"
    url4 = "http://psfw.jd.com/help/front/initFouth.do"
    try:
        js1 = hhnetworm.getRes(url1, method='p', result='j')
        for aa in js1['result']['resultList']:
            province, province_id = aa['name'], str(aa['id'])

            js2 = hhnetworm.getRes(url2,
                                   method='p',
                                   data={'provinceId': province_id},
                                   result='j')
            for bb in js2['result']['resultList']:
                city, city_id = bb['name'], str(bb['id'])

                js3 = hhnetworm.getRes(url3,
                                       method='p',
                                       result='j',
                                       data={
                                           'provinceId': province_id,
                                           'cityId': city_id
                                       })
                for cc in js3['result']['resultList']:
                    district, district_id = cc['name'], str(cc['id'])

                    js4 = hhnetworm.getRes(url4,
                                           method='p',
                                           result='j',
                                           data={
                                               'provinceId': province_id,
                                               'cityId': city_id,
                                               'areaId': district_id
                                           })
                    if js4['result']['hasNext']:
                        for dd in js4['result']['resultList']:
                            dic = {
                                'province': province,
                                'city': city,
                                'district': district,
                                'area': dd['name']
                            }
                            rt_arr.append(dic)
                            print(dic)
                    else:
                        dic = {
                            'province': province,
                            'city': city,
                            'district': district,
                            'area': ''
                        }
                        rt_arr.append(dic)
                        print(dic)
        HhTime.costPrinter(st_time, pjName='京东地址库', dataArr=rt_arr)
        finish = True
    except:
        print("----------Wrong: {}".format('京东地址库'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []
예제 #24
0
파일: tcmap.py 프로젝트: hellohman/netWorm
def tcmap():
    st_time, rt_arr, finish, hhnetworm = time.time(), [], False, HhNetworm()
    url1 = "http://www.tcmap.com.cn/list/daima_list.html"
    con_url = "http://www.tcmap.com.cn"
    try:
        sel = hhnetworm.getRes(url1)
        for i, aa in enumerate(sel.css("#list360"), 1):
            province = aa.css("strong a::text").extract_first().replace(
                " ", "")  # 省
            print("{0} :{1}".format(i, province))

            sel = hhnetworm.getRes(
                con_url + aa.css("strong a::attr(href)").extract_first())
            for bb in sel.css(
                    "#page_left table:nth-child(5) tr:nth-child(n+2)"):
                for cc in bb.css("td:nth-child(6) a::attr(href)").extract():
                    res = requests.get(con_url + str(cc)).content

                    # 编码匹配
                    sel = False
                    try:
                        sel = Selector(res.decode("gb18030"))
                    except:
                        try:
                            sel = Selector(res.decode("utf-8"))
                        except:
                            print("decode all failed!")

                    if sel:
                        for dd in sel.css(
                                "#page_left div:nth-child(4) div:nth-child(2) table"
                        ):
                            dic = {
                                'province':
                                province,  # 省
                                'id1':
                                aa.css("::text").extract_first().replace(
                                    " ", ""),  # 身份证编号1
                                'city':
                                bb.css("td strong a::text").extract_first().
                                replace(" ", ""),  # 市
                                'id2':
                                bb.css("td:nth-child(5)::text").extract_first(
                                ).replace(" ", ""),  # 身份证编号2
                                'district':
                                dd.css("tr:nth-child(1) td:nth-child(1)::text"
                                       ).extract_first().replace(" ",
                                                                 "")[1:],  # 区
                                'id3':
                                dd.css("tr:nth-child(2) td:nth-child(2)::text"
                                       ).extract_first().replace(
                                           " ", "")[1:],  # 身份证编号3
                                'phoneAreaCode':
                                bb.css("td:nth-child(4)::text").extract_first(
                                ).replace(" ", ""),  # 电话区号
                                'postCode':
                                dd.css("tr:nth-child(3) td:nth-child(2)::text"
                                       ).extract_first().replace(
                                           " ", "")[1:],  # 邮政编码
                                'carCode':
                                dd.css("tr:nth-child(4) td:nth-child(1)::text"
                                       ).extract_first().replace(" ",
                                                                 "")[1:],  # 车牌
                                'population':
                                check_1(
                                    dd.css(
                                        "tr:nth-child(5) td:nth-child(1)::text"
                                    ).extract_first()),  # 人口
                                's_area':
                                check_1(
                                    dd.css("tr:nth-child(6) td::text").
                                    extract_first())  # 区域面积
                            }
                            rt_arr.append(dic)
                            print(dic)
        HhTime.costPrinter(st_time, pjName='身份证户籍编号')
        finish = True
    except:
        print("----------Wrong: {}".format('身份证户籍编号'))
        traceback.print_exc()
    finally:
        return rt_arr if finish else []