Пример #1
0
def crawl(vehicle, verify_code=None):
    global cxlm, chfs, sf
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'beijing_new'
    result['version'] = 1
    
    data = {"sf":sf,
                "fdjhhm":vehicle.engine_num.encode("utf-8"),
                "carnono":vehicle.license_plate_num[-6:],
                "cxlm":cxlm,
                "yzm":verify_code.verify_code}
    if sf == 11:
        data["chfs"] = ""
    else:
        data["hpzllb"] = "02"
    print data
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)

    if second_resp.content.__contains__(u"验证码输入有误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"您输入的车牌号或发动机号有误") or second_resp.content.__contains__(u"您没有输入完整的车牌号和发动机号"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    if second_resp.content.__contains__(u"您没有未接受处理的违法记录") or second_resp.content.__contains__(u"您没有未接受处理的在京违法记录"):
        result['violations'] = []
        return result
    
    tr_list = reg(second_resp.content, r"<tr[\s\S]+?(<td>\d{4}-\d{2}-\d{2}[\s\S]+?)</tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        tds = reg(tr[0], r"<td.*?>([\s\S]+?)</td>")
        violation.fine = tds[4][0]
        violation.address = tds[1][0]
        violation.time = tds[0][0]
        violation.violation_type = reg(tds[2][0], r"<a.+?>(.+?)</a>")[0][0]
        handled = True
        if tds[5][0].encode("utf-8") == "未处理":
            handled = False
        violation.handled = handled
        violation.point = tds[3][0]
        violation.agency = ""
        violations.append(dict(violation))
        
        print violation.time
        print violation.address
        print violation.violation_type
        print violation.fine
        print violation.point
        print violation.handled
        print " -------- "
    result['violations'] = violations
    return result
Пример #2
0
def crawl(vehicle, verify_code=None):
    global gg_value
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'dongguan'
    result['version'] = 1
    
    data = {"action":"Illagel",
                "headno":vehicle.license_plate_num[:-6],
                "no":vehicle.license_plate_num[-6:],
                "back4":vehicle.body_num,
                "fdjh6":vehicle.engine_num,
                "validate":verify_code.verify_code,
                "type":"02",
                "tele":"18607325868",
                "gg":gg_value}
    
    for k in data.keys():
        print k, data[k] 
    
    
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)

    if second_resp.content.__contains__(u"验证码错误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"发动机号错误") or second_resp.content.__contains__(u"车架号错误") or second_resp.content.__contains__(u"车辆信息错误"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    if second_resp.content.__contains__(u"没有违章信息"):
        result['violations'] = []
        return result
    
    tr_list = reg(second_resp.content, r"<tr><td>(.+?</table></td>.+?)</td></tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        violation.fine = reg(tr[0], r"<td>(\d{1,6}?)</td><td>")[0][0]
        sub_table_content = reg(tr[0], r'<table class="illegal_table".+?>(.+?)</table>')[0][0]
        sub_info_list = reg(sub_table_content, r'<td[^r]*?>(.+?)</td>')
        violation.address = sub_info_list[2][0]
        violation.time = sub_info_list[0][0].replace("年","-").replace("月","-").replace("日","").replace("时",":").replace("分",":").replace("秒","")
        violation.violation_type = sub_info_list[3][0]
        violation.handled = False
        point = sub_info_list[1][0][1]
        if point == "7":
            point = "12"
        violation.point = point
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result
Пример #3
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()

    if verify_code.verify_code == None:
        raise WrongVerifyCode()

    result = dict()
    result["name"] = "huizhou"
    result["version"] = 1

    data = {
        "action": "Illagel",
        "headno": vehicle.license_plate_num[:-6],
        "no": vehicle.license_plate_num[-6:],
        "back4": vehicle.body_num,
        "validate": verify_code.verify_code,
        "type": "02",
    }
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
    if second_resp.content.__contains__(u"验证码错误!0"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"车架号错误"):
        result["error"] = "VEHICLE INFO ERROR"
        return result
    if second_resp.content.__contains__(u"没有违章信息"):
        result["violations"] = []
        return result

    tr_list = reg(second_resp.content, r"<tr><td>(.+?</table></td>.+?)</td></tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        violation.fine = reg(tr[0], r"<td>(\d+?)</td>")[1][0]
        sub_table_content = reg(tr[0], r'<table class="illegal_table".+?>(.+?)</table>')[0][0]
        sub_info_list = reg(sub_table_content, r"<td[^3]*?>(.+?)</td>")
        violation.address = sub_info_list[2][0]
        violation.time = (
            sub_info_list[0][0]
            .replace("年", "-")
            .replace("月", "-")
            .replace("日", "")
            .replace("时", ":")
            .replace("分", ":")
            .replace("秒", "")
        )
        violation.violation_type = sub_info_list[3][0]
        violation.handled = False
        violation.point = sub_info_list[1][0][1]
        violation.agency = ""
        violations.append(dict(violation))
    result["violations"] = violations
    return result
Пример #4
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'jilin'
    result['version'] = 1
    
    data = {"province":vehicle.license_plate_num[:-6],
                "hphm":vehicle.license_plate_num[-6:],
                "engine":vehicle.body_num[-4:],
                "yzm":verify_code.verify_code.strip(),
                "hpzl":"02"}
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
#    print second_resp.content
#    return
    if second_resp.content.__contains__(u"验证码错误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"车辆识别代号后四位输入有误"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    
    result_table = reg(second_resp.content, r'<table id="wzjl_table"[\s\S]+?>([\s\S]+?)</table>')
    tr_list = reg(result_table[0][0], r"<tr>([\s\S]+?)</tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        tds = reg(tr[0], r"<td[\s\S]+?>([\s\S]+?)</td>")
        violation.fine = tds[4][0].strip()
        violation.address = tds[3][0].strip()
        violation.time = tds[2][0].strip()
        violation.violation_type = tds[6][0].strip()
        violation.handled = False
        violation.point = tds[5][0].strip()
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result
Пример #5
0
def crawl(vehicle, verify_code=None):
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'guangdong'
    result['version'] = 1
    
    data = {
                "province":vehicle.license_plate_num[:-6].encode("gb2312"),
                "hphm":vehicle.license_plate_num[-6:],
                "CJHM":vehicle.body_num[-6:],
                "fdjh":vehicle.body_num[-6:],
                "mofei":verify_code.verify_code,
                "hpzl":"02",
                "x":random.randint(10, 99),
                "y":random.randint(10, 99)
                }
    
    print data
    
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers, proxies=proxies)
    print second_resp.content
#    return
    if second_resp.content.__contains__(u"车辆目前无未处理的违章记录"):
        result['violations'] = []
        return result
    if second_resp.content.__contains__(u"您查询的次数过多") or second_resp.content.__contains__(u"系统繁忙,请等待30秒后再查"):
        result['error'] = 'NETWORK ERROR'
        return result
    
    tr_list = reg(second_resp.content, r'<tr.*?>[\s\S]+?(<td.+\d{4}-\d{2}-\d{2} \d{2}:\d{2}[\s\S]+?)</tr>')
    violations = []
    further_request_url_prefix = "http://www.ttdaiban.com/"
    for tr in tr_list:
        tds = reg(tr[0], r'<td.*?>(.*?)</td>')
        further_request_url_suffix = reg(tds[1][0], r"<a.+'../(.+?)'")[0][0]
#        print further_request_url_prefix+further_request_url_suffix
        further_resp = fetch_http(further_request_url_prefix+further_request_url_suffix, "get", headers=verify_code.headers, proxies=proxies)
#        print further_resp.content
        sec_tds = reg(further_resp.content.encode("utf-8"), r"<td.*?>.+?:(.+?)</td>")
        violation = Violation()
        violation.fine = sec_tds[6][0].strip()
        violation.address = reg(sec_tds[3][0], r".*】([\S]+)")[0][0].strip()
        violation.time = sec_tds[2][0].strip()
        violation.violation_type = sec_tds[4][0].strip()
        violation.handled = False
        violation.point = reg(sec_tds[5][0], r"\[.*?(\d+?).*?分\]")[0][0].strip()
        violation.agency = ""
        violations.append(dict(violation))
        
        print violation.time
        print violation.address
        print violation.violation_type
        print violation.fine
        print violation.point
        print " -------- "
    result['violations'] = violations
    return result
Пример #6
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'guangzhou'
    result['version'] = 1
    
    captcha_data = {"captchaId":verify_code.verify_code}
    count = 0
    while count < 5:
        count += 1
        captcha_resp = fetch_http(captcha_confirm_url, "post", data=captcha_data, headers=verify_code.headers)
        print "captcha_content : ", captcha_resp.content
        if captcha_resp.content.__contains__("fail"):
            print "captcha retry ~!"
            continue
        else:
            break
    else:
        raise WrongVerifyCode()
    
    pre_data = {"hpzl":"02",
                    "hphm":vehicle.license_plate_num,
                    "fdjh":vehicle.engine_num,
                    "clsbdh":vehicle.body_num,
                    "captcha":verify_code.verify_code}
    count = 0
    while count < 5:
        count += 1
        pre_resp = fetch_http(pre_request_url, "post", data=pre_data, headers=verify_code.headers)
        print "pre_content", pre_resp.content
        if pre_resp.content != "":
            break
    if pre_resp.content.__contains__("_error"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    key = reg(pre_resp.content, r"(?<=key:)(.+)(?=\$\d+)")[0][0]
    total = reg(pre_resp.content, r"(?<=\$)(\d+)")[0][0]
    if total == "0":
        result['violations'] = []
        return result
    
    data = {"platenumtype":"02",
                    "platenum":vehicle.license_plate_num,
                    "engineno":vehicle.engine_num,
                    "vehicleidnum":vehicle.body_num,
                    "key":key}
    count = 0
    while count < 5:
        count += 1
        main_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
        print "main_content : ", main_resp.content
        if main_resp.content.__contains__('"data":"",'):
            print "retry ~!"
            continue
        else:
            break
    vio_data = json.loads(main_resp.content)
    violations = []
    for elem in vio_data["data"]:
        violation = Violation()
        violation.fine = elem["FKJE"]
        violation.point = elem["WFJFS"]
        violation.address = elem["WFDZ"]
        violation.time = elem["WFSJ"]
        violation.violation_type = elem["WFXWMC"]
        violation.handled = False
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result