def _get_unicom_bills_sms_captcha(args):
    """联通发送一般短信验证码"""
    ret_data = {}
    username = args["username"].strip()
    the_time = get_js_time()

    form_data = {'mobile': username,
                 'req_time': the_time,
                 '_': int(the_time) + 1,
                 'callback': "jQuery1720" + str(randint(1E16, 1E17 - 1)) + "_" + the_time
                 }
    # url = "https://uac.10010.com/portal/Service/SendMSG?" + urlencode(form_data)
    url = "https://uac.10010.com/portal/Service/SendMSG"
    key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + args["account_type"]

    try:
        ssdb_conn = get_ssdb_conn()
        headers = ssdb_conn.get(key)
        if headers is not None:
            sms_content = http_get(url, headers=eval(headers), params=form_data, verify=False).text
            if 'resultCode:"0000"' in sms_content:
                add_ajax_ok_json(ret_data)
            elif 'resultCode:"7096"' in sms_content:  # 验证码请求过快
                add_ajax_error_json(ret_data, "验证码请求过快,请稍后再试。")
            elif 'resultCode:"7098"' in sms_content:  # 7098谁请求达到上限
                add_ajax_error_json(ret_data, "请求短信验证码达到上限,请明天再试!")
            else:
                add_ajax_error_json(ret_data, "发送失败:" + sms_content)
        else:
            add_ajax_error_json(ret_data, "无法获取短信验证码,请刷新页面重试!")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取短信验证码,请重试。")

    return JsonResponse(ret_data)
def _get_mobile_bills_sms_captcha(args):
    """移动发送账单短信验证码"""
    ret_data = {}
    username = args["username"].strip()
    form_data = {"callback": "jQuery1830" + str(randint(1E16, 1E17 - 1)) + "_" + get_js_time(),
                 "_": get_js_time(),
                 }
    url = "https://shop.10086.cn/i/v1/fee/detbillrandomcodejsonp/" + username + "?" + urlencode(form_data)

    key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + args["account_type"]
    try:
        ssdb_conn = get_ssdb_conn()
        headers = ssdb_conn.get(key)
        if headers is not None:
            sms_content = http_get(url, headers=eval(headers), verify=False).content.decode()
            if '"retCode":"000000"' in sms_content:  # 成功
                add_ajax_ok_json(ret_data)
            elif '"retCode":"570007"' in sms_content:  # 系统繁忙!
                add_ajax_error_json(ret_data, "系统繁忙,请重试。")
            else:
                add_ajax_error_json(ret_data, sms_content)
        else:
            add_ajax_error_json(ret_data, "无法获取短信验证码,请刷新页面重试!")
    except Exception:
        add_ajax_error_json(ret_data, "获取短信验证码失败,请重试。")

    return JsonResponse(ret_data)
Пример #3
0
def save_img_file_from_ssdb(uid, website=""):
    ssbd_connect = get_ssdb_conn()
    file_b64_data = ssbd_connect.get(uid +
                                     ACCOUNT_CRAWLING_IMG_DATA_SSDB_SUFFIX)
    _uid, file_type = uid.rsplit(".", 1)
    save_captcha_2_file(b64decode(file_b64_data), _uid, "." + file_type,
                        website)
Пример #4
0
def get_sohu_img_captcha(request):
    """获取图片验证码"""
    ret_data = {}
    args = request.POST
    username = args["username"].strip()
    account_type = args["account_type"]

    if not username:
        add_ajax_error_json(ret_data, "用户名为空")
        return JsonResponse(ret_data)

    header_key = username + ACCOUNT_CRAWLING_IMG_HEADERS_SSDB_SUFFIX + account_type
    url_key = username + ACCOUNT_CRAWLING_IMG_URL_SSDB_SUFFIX + account_type
    try:
        ssdb_conn = get_ssdb_conn()
        cookies_dict = ssdb_conn.get(header_key)
        if cookies_dict:
            cookies_dict = eval(cookies_dict)
            captcha_url = ssdb_conn.get(url_key)
            img_content = http_get(captcha_url, cookies=cookies_dict).content
            ret_data["img_data"] = bytes.decode(b64encode(img_content))
        else:
            add_ajax_error_json(ret_data, "无法获取图片验证码")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取图片验证码")
    else:
        add_ajax_ok_json(ret_data)

    return JsonResponse(ret_data)
Пример #5
0
def push_data_2_ssbd(username,
                     spider_name,
                     account_type,
                     ssdb_data,
                     crawling_info=""):
    ssdb_conn = get_ssdb_conn()

    ssdb_conn.multi_del(
        username + ACCOUNT_CRAWLING_NEED_IMG_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_NEED_SMS_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_NEED_IMG_SMS_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_NEED_EXTRA_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_NEED_QRCODE_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_MSG_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_IMG_HEADERS_SSDB_SUFFIX + account_type,
        username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + account_type,
        # username + ACCOUNT_CRAWLING_DATA_SSDB_SUFFIX + account_type,
    )

    ssdb_conn.setx(
        username + ACCOUNT_CRAWLING_STATUS_SSDB_SUFFIX + account_type,
        "crawling", DATA_EXPIRE_TIME)
    ssdb_conn.setx(username + ACCOUNT_CRAWLING_INFO_SSDB_SUFFIX + account_type,
                   crawling_info, DATA_EXPIRE_TIME)
    ssdb_conn.qpush_back(spider_name + ACCOUNT_CRAWLING_QUEUE_SSDB_SUFFIX,
                         data_dumps(ssdb_data))
Пример #6
0
def crawl_account(request):
    args = request.POST
    account_type = args["account_type"]
    username = args["username"]

    ssdb_conn = get_ssdb_conn()
    status = ssdb_conn.get(username + ACCOUNT_CRAWLING_STATUS_SSDB_SUFFIX +
                           account_type)
    if status not in CHECK_STATUS_LIST:
        ret_data = {}
        add_ajax_ok_json(ret_data)
        return JsonResponse(ret_data)

    if "communications" == account_type:
        return _handle_communications_crawling(args)
    elif account_type in ["housefund", "shebao"]:
        return _handle_5xian1jin_crawling(args, account_type)
    elif "xuexin" == account_type:
        return _handle_xuexin_crawling(args)
    elif "bank" == account_type:
        return _handle_bank_crawling(args)
    elif "emailbill" == account_type:
        return _handle_emailbill_crawling(args)
    elif "zhengxin" == account_type:
        password = request.session['zhengxin_password']
        return _handle_zhengxin_crawling(args, password)
    elif account_type in ["jingdong", "alipay", "taobao", "yhd"]:
        return _handle_ecommerce_crawling(args)

    return HttpResponseBadRequest()
Пример #7
0
def get_img_captcha(request):
    """
    获取登录图片验证码
    :param request:
    :return:
    """
    ret_data = {}
    args = request.POST
    username = args["username"].strip()
    account_type = args["account_type"]
    if not username:
        add_ajax_error_json(ret_data, "用户名为空")
        return JsonResponse(ret_data)

    key = username + ACCOUNT_CRAWLING_IMG_HEADERS_SSDB_SUFFIX + account_type
    try:
        ssdb_conn = get_ssdb_conn()
        headers_data = ssdb_conn.get(key)
        if headers_data is not None:
            headers_data_dic = json_loads(headers_data)
            tmp_headers = headers_data_dic.get("headers")
            uuid = headers_data_dic.get("uuid")
            captcha_url = "https://authcode.jd.com/verify/image?a=1&acid={uuid}&" \
                          "yys={stime}".format(uuid=uuid, stime=get_js_time())
            img_content = http_get(captcha_url, headers=tmp_headers, verify=False).content
            ret_data["img_data"] = bytes.decode(b64encode(img_content))
        else:
            add_ajax_error_json(ret_data, "无法获取验证码")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取验证码")
    else:
        add_ajax_ok_json(ret_data)

    return JsonResponse(ret_data)
Пример #8
0
def _get_zhengxin_crawling_status(args):
    """
    征信数据爬虫状态
    """
    username = args["username"]
    account_type = args["account_type"]

    data = {}
    add_ajax_ok_json(data)

    ssbd_connect = get_ssdb_conn()

    # 需要识别图片验证码
    uid = ssbd_connect.get(username + ACCOUNT_CRAWLING_NEED_IMG_SSDB_SUFFIX +
                           account_type)
    if uid is not None:
        save_img_file_from_ssdb(uid)
        data["crawling_status"] = "img_captcha"
        data["username"] = username
        data["img_uid"] = uid
        data["img_src"] = static("/".join((CAPTCHA_DIR, uid)))
        return JsonResponse(data)

    # 返回爬取状态
    data.update(
        get_crawling_data(ssbd_connect, username, account_type, username))
    return JsonResponse(data)
Пример #9
0
    def get_search_request(self):
        ssdb_conn = get_ssdb_conn()
        mongo_instance = MongoDB(MONGO_COMPANY_DB, MONGO_COMPANY_COLLECTIONS)
        _add_proxy = self._add_proxy
        parse_search = self.parse_search
        name_set = self.name_set
        while True:
            company = get_one_company(mongo_instance, ssdb_conn)
            if company is not None:
                company_name = company["name"]
                if company_name in name_set:
                    continue

                form_data = {
                    "action": "getSSDJBList",
                    "keyword": company_name,
                    "PageIndex": "1",
                }
                request = FormRequest(
                    "http://app03.szmqs.gov.cn/xyjggs.webui/xyjggs/Ajax/Ajax.ashx",
                    parse_search,
                    dont_filter=True,
                    formdata=form_data)
                request.meta["company_other_info"] = company
                _add_proxy(request)
                yield request
            else:
                yield Request(DO_NOTHING_URL,
                              self.do_nothing,
                              errback=self.do_nothing,
                              dont_filter=True)
def get_img_captcha(request):
    """获取图片验证码"""
    ret_data = {}
    args = request.POST
    username = args["username"].strip()
    account_type = args["account_type"]
    url = "http://shop.10086.cn/i/authImg?t=" + str(rand_0_1())

    if not username:
        add_ajax_error_json(ret_data, "用户名为空")
        return JsonResponse(ret_data)

    key = username + ACCOUNT_CRAWLING_IMG_HEADERS_SSDB_SUFFIX + account_type
    try:
        ssdb_conn = get_ssdb_conn()
        headers = ssdb_conn.get(key)
        if headers is not None:
            img_content = http_get(url, headers=eval(headers)).content
            ret_data["img_data"] = bytes.decode(b64encode(img_content))
        else:
            add_ajax_error_json(ret_data, "无法获取图片验证码")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取图片验证码")
    else:
        add_ajax_ok_json(ret_data)

    return JsonResponse(ret_data)
def _get_mobile_login_sms_captcha(args):
    """移动发送登录短信验证码"""
    ret_data = {}
    username = args["username"].strip()
    url = "https://login.10086.cn/sendRandomCodeAction.action"

    form_data = {
        "userName": username,
        "type": "01",
        "channelID": "12003"
    }

    key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + args["account_type"]
    try:
        ssdb_conn = get_ssdb_conn()
        headers = ssdb_conn.get(key)
        if headers is not None:
            sms_content = http_post(url, headers=eval(headers), data=form_data, verify=False).text
            if sms_content == '0':
                add_ajax_ok_json(ret_data)
            elif sms_content == '2':
                add_ajax_error_json(ret_data, "当日短信验证码已达上限,请明天再试!")
            else:
                add_ajax_error_json(ret_data, "短信验证码发送失败,请重试!")
        else:
            add_ajax_error_json(ret_data, "无法获取短信验证码,请刷新页面重试!")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取短信验证码,请重试。")

    return JsonResponse(ret_data)
Пример #12
0
def ask_qrcode_status(request):
    """
    获取扫描二维码状态
    :param request:
    :return:
    """
    ret_data = {}
    succ = False
    need_refresh = False
    try:
        args = request.POST
        username = args["username"]
        account_type = args["account_type"]
        lg_token = args.get("lg_token", "")
        check_url_base = "https://qrlogin.taobao.com/qrcodelogin/qrcodeLoginCheck.do?" \
                         "lgToken={lgToken}&defaulturl=https%3A%2F%2Fwww.taobao.com%2F"
        check_url = check_url_base.format(lgToken=lg_token)
        res_json = get_response_by_requests(check_url,
                                            headers=DEFAULT_HEADERS).json()

        session = req_session()
        msg = "通过扫描二维码登录失败"
        code = res_json.get("code")
        if code == "10000":
            msg = "请先扫描二维码"
        elif code == "10001":
            msg = "扫描成功后,请确认登录"
            succ = True
        elif code == "10004":
            msg = "二维码已失效,请重试"
            need_refresh = True
        elif code == "10006":
            redirect_url = res_json.get("url")
            resp = session.get(redirect_url,
                               headers=DEFAULT_HEADERS,
                               verify=False)
            if resp.status_code == 200:
                msg = "登录成功"
                cookies = session.cookies.get_dict(domain='.taobao.com')
                cookies_str = json_dumps(cookies)
                # 将登录成功的cookies信息存入ssdb,供爬虫端使用
                ssdb_connect = get_ssdb_conn()
                key = username + ACCOUNT_CRAWLING_QRCODE_COOKIES_SSDB_SUFFIX + account_type
                ssdb_connect.setx(key, cookies_str, DATA_EXPIRE_TIME)
                succ = True
        else:
            msg = res_json.get("msg", "通过扫描二维码登录失败")
    except Exception:
        msg = "获取扫描二维码状态出错"

    if succ:
        add_ajax_ok_json(ret_data)
    else:
        ret_data["need_refresh"] = need_refresh
        add_ajax_error_json(ret_data, msg)

    return JsonResponse(ret_data)
Пример #13
0
 def __init__(self, *args, item_class, **kwargs):
     super().__init__(*args, **kwargs)
     self._start_url_ = self.start_urls[0]
     self.queue_name = self.name + ACCOUNT_CRAWLING_QUEUE_SSDB_SUFFIX
     self.ssdb_conn = get_ssdb_conn()
     self.RETRY_TIMES = 600
     self.RETRY_SLEEP = 0.5
     self.SSDB_RETRY = 2
     self.ItemClass = item_class
Пример #14
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.PROXY_USE_COUNT_SSDB_PREFIX = "ROXY_USE_COUNT-"
     self.builder = None
     self.proxy = self.proxy_api.get_proxy_one()
     self.use_count = 1
     self.ssdb_conn = get_ssdb_conn()
     self.set_ip_proxy_expire_till_tommorrow(1)
     self.logger.info("获取代理,代理ip:%s" % self.proxy)
Пример #15
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.ssdb_conn = get_ssdb_conn()
     self.mongo_instance = MongoDB(MONGO_WENSHU_DB,
                                   MONGO_WENSHU_CONDITION_COLLECTIONS)
     self.proxy_api = ProxyApi()
     self.proxy = self.proxy_api.get_proxy_one()
     self.pid = getpid()
     self.lock = Lock()
     self.logger.info("init pid->%d" % self.pid)
def _get_telecom_bills_sms_captcha(args):
    """电信发送一般短信验证码"""
    ret_data = {}
    username = args["username"].strip()
    dx_conver = DXConvertData()

    url = "http://cservice.client.189.cn:8004/map/clientXML?encrypted=true"
    key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + args["account_type"]
    try:
        ssdb_conn = get_ssdb_conn()
        headers = ssdb_conn.get(key)
        if headers is not None:
            token = json_loads(headers)["token"]
            form_data = {
                "Request": {
                    "HeaderInfos": {
                        "ClientType": "#6.2.1#channel8#Huawei DUK-AL20#",
                        "Source": "110003",
                        "SourcePassword": "******",
                        "Token": token,
                        "UserLoginName": username,
                        "Code": "getRandomV2",
                        "Timestamp": strftime("%Y%m%d%H%M%S"),
                    },
                    "Content": {
                        "Attach": "test",
                        "FieldData": {
                            "PhoneNbr": username,
                            "SceneType": "7",
                            "Imsi": {}
                        }
                    }
                }
            }
            form_str = dx_conver.convert_request_data(form_data)
            sms_text = http_post(url, headers=CSERVICE_HEADERS, data=form_str, verify=False).text

            sms_dict = dx_conver.convert_response_data(sms_text)
            sms_str = json_dumps(sms_dict, ensure_ascii=False)
            if '"ResultCode":{"value":"0000"}' in sms_str:
                add_ajax_ok_json(ret_data)
            elif "服务中断" in sms_text:
                add_ajax_error_json(ret_data, "电信服务中断,请稍后再试!")
            else:
                add_ajax_error_json(ret_data, "发送失败:" + sms_str)
        else:
            add_ajax_error_json(ret_data, "无法获取短信验证码,请刷新页面重试!")
    except Exception:
        add_ajax_error_json(ret_data, "无法获取短信验证码,请重试。")

    return JsonResponse(ret_data)
Пример #17
0
def _get_captcha_result(uid):
    ssbd_connect = get_ssdb_conn()
    ssbd_connect.delete(uid)

    open_new_tab(HOST_URL + "captchas_upload/show_captcha/?uid=" + uid)

    captcha = None
    for i in range(100):
        captcha = ssbd_connect.get(uid)
        if captcha is not None:
            ssbd_connect.delete(uid)
            break
        sleep(0.5)

    return captcha
Пример #18
0
def record_all_zhixing_id():
    ssdb_conn = get_ssdb_conn()
    ssdb_conn.hclear(SSDB_ZHIXING_ID_HSET_NAME)

    with MongoDB(MONGO_SHIXIN_DB,
                 MONGO_ZHIXING_DETAIL_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(fields={"link_id": 1, "_id": 0}):
            try:
                ssdb_conn.hset(SSDB_ZHIXING_ID_HSET_NAME, item["link_id"], "")
            except Exception:
                print_exc()

    ssdb_conn.close()

    print("record_all_zhixing_id done.")
Пример #19
0
def _get_ecommerce_crawling_status(args):
    """
    电子商务数据爬虫状态
    """
    username = args["username"]
    account_type = args["account_type"]

    data = {}
    add_ajax_ok_json(data)

    ssbd_connect = get_ssdb_conn()

    # 需要识别图片验证码
    uid = ssbd_connect.get(username + ACCOUNT_CRAWLING_NEED_IMG_SSDB_SUFFIX +
                           account_type)
    if uid is not None:
        save_img_file_from_ssdb(uid)
        data["crawling_status"] = "img_captcha"
        data["username"] = username
        data["img_uid"] = uid
        data["img_src"] = static("/".join((CAPTCHA_DIR, uid)))
        return JsonResponse(data)

    # 需要短信验证码
    uid = ssbd_connect.get(username + ACCOUNT_CRAWLING_NEED_SMS_SSDB_SUFFIX +
                           account_type)
    if uid is not None:
        data["crawling_status"] = "sms_captcha"
        data["username"] = username
        data["sms_uid"] = uid
        return JsonResponse(data)

    # 需要扫描二维码
    uid = ssbd_connect.get(username +
                           ACCOUNT_CRAWLING_NEED_QRCODE_SSDB_SUFFIX +
                           account_type)
    if uid is not None:
        data["crawling_status"] = "scan_qrcode"
        data["username"] = username
        data["qrcode_uid"] = uid
        data["qrcode_pic"] = ssbd_connect.get(
            uid + ACCOUNT_CRAWLING_IMG_DATA_SSDB_SUFFIX)
        return JsonResponse(data)

    # 返回爬取状态
    data.update(
        get_crawling_data(ssbd_connect, username, account_type, username))
    return JsonResponse(data)
Пример #20
0
def verify_captcha(request):
    args = request.POST
    captcha = args["captcha"]
    uid = args["uid"]

    ssbd_connect = get_ssdb_conn()
    ssbd_connect.setx(uid, captcha, 1800)

    try:
        captcha_obj = CaptchaList.objects.get(uid=uid)
        captcha_obj.result = captcha
        captcha_obj.save()
    except ObjectDoesNotExist:
        return HttpResponseNotFound(u"验证码不存在")

    return HttpResponse(u"谢谢")
Пример #21
0
def send_sms_code(request):
    """
    登录发送短信验证码
    :param request:
    :return:
    """
    ret_data = {}
    try:
        args = request.POST
        session = request.session
        if args.get("is_first", False) == "true":
            username = args["username"].strip()
            account_type = args["account_type"]
            key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + account_type
            ssdb_conn = get_ssdb_conn()
            headers_data = ssdb_conn.get(key)
            if not headers_data:
                add_ajax_error_json(ret_data, "获取短信验证码失败")
                return JsonResponse(ret_data)

            headers_dict = json_loads(headers_data)
            send_url = headers_dict.get("url", "")
            session["send_url"] = send_url
            session["last_send_time"] = time()

            # 第一次会自动发送,默认为发送成功
            res_json = {"stat": "ok", "info": {"sent": True}}
        else:
            last_send_time = session.get("last_send_time", 0)
            need_sleep_time = max(last_send_time + SMS_SLEEP_TIME + 2 -
                                  time(), 0) if last_send_time else 0
            sleep(need_sleep_time)

            send_url = session.get("send_url")
            res_json = get_response_by_requests(
                send_url, headers=DEFAULT_HEADERS).json()
        if res_json.get("stat") == "ok" and res_json.get("info",
                                                         {}).get("sent"):
            add_ajax_ok_json(ret_data)
        else:
            error_msg = res_json.get("info", {}).get("errorMessage")
            add_ajax_error_json(ret_data, error_msg or "发送短信验证码失败")
    except Exception:
        add_ajax_error_json(ret_data, "发送短信验证码出错")

    return JsonResponse(ret_data)
Пример #22
0
def _ask_send_ecommerce_sms_captcha(args):
    """告诉爬虫端 需要电商发送短信验证码"""
    username = args['username'].strip()
    account_type = args['account_type'].strip()
    ret_data = {}

    if not username:
        add_ajax_error_json(ret_data, "用户名为空")
        return JsonResponse(ret_data)

    ssbd_connect = get_ssdb_conn()
    ssbd_connect.setx(
        username + ACCOUNT_CRAWLING_ASK_SEND_SMS_SSDB_SUFFIX + account_type,
        True, DATA_EXPIRE_TIME)

    add_ajax_ok_json(ret_data)
    return JsonResponse(ret_data)
Пример #23
0
def _get_5xian1jin_crawling_status(args):
    """
    五险一金爬虫状态
    """
    username = args["username"]
    account_type = args["account_type"]
    city = args["city"]

    data = {}
    add_ajax_ok_json(data)

    ssbd_connect = get_ssdb_conn()

    # 返回爬取状态
    data.update(
        get_crawling_data(ssbd_connect, username, account_type,
                          city + ":" + username))
    return JsonResponse(data)
Пример #24
0
def record_all_company_name():
    ssdb_conn = get_ssdb_conn()
    ssdb_conn.hclear(SSDB_COMPANY_HSET_NAME)

    with MongoDB(MONGO_COMPANY_DB,
                 MONGO_COMPANY_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(fields={"name": 1, "_id": 0}):
            try:
                # 将爬取过的公司名单加入SSDB,用于避免重复爬取
                name = item["name"]
                if len(name) < 60:
                    ssdb_conn.hset(SSDB_COMPANY_HSET_NAME, name, "")
            except Exception:
                print_exc()

    ssdb_conn.close()

    print("record_all_company_name done.")
Пример #25
0
def push_all_tianyancha_company_id():
    ssdb_conn = get_ssdb_conn()
    ssdb_conn.qclear(SSDB_TIANYANCHA_QUEUE_NAME)

    with MongoDB(MONGO_COMPANY_DB,
                 MONGO_COMPANY_DETAIL2_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(
                fields={"_id": 1},
                filter={
                    "search_url":
                    re_compile(r"^http://www\.tianyancha\.com/company/")
                },
                sort=[("_id", MONGO_DESCENDING)]):
            ssdb_conn.qpush_back(SSDB_TIANYANCHA_QUEUE_NAME, str(item["_id"]))

    ssdb_conn.close()

    print("push_all_tianyancha_company_id done.")
Пример #26
0
def record_all_shixin_id():
    ssdb_conn = get_ssdb_conn()
    ssdb_conn.hclear(SSDB_SHIXIN_ID_HSET_NAME)

    with MongoDB(MONGO_SHIXIN_DB,
                 MONGO_SHIXIN_DETAIL_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(fields={
                "from_web": 1,
                "link_id": 1,
                "_id": 0
        }):
            try:
                the_id = item["from_web"] + "_" + str(item["link_id"])
                ssdb_conn.hset(SSDB_SHIXIN_ID_HSET_NAME, the_id, "")
            except Exception:
                print_exc()

    ssdb_conn.close()

    print("record_all_shixin_id done.")
Пример #27
0
def record_all_shixinlist_id():
    ssdb_conn = get_ssdb_conn()
    ssdb_conn.hclear(SSDB_SHIXIN_LIST_ID_HSET_NAME)

    with MongoDB(MONGO_SHIXIN_DB,
                 MONGO_SHIXIN_LIST_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(fields={
                "id": 1,
                "name": 1,
                "_id": 0
        }):
            try:
                the_id = item["name"] + item["id"]
                ssdb_conn.hset(SSDB_SHIXIN_LIST_ID_HSET_NAME, the_id, "")
            except Exception:
                print_exc()

    ssdb_conn.close()

    print("record_all_shixin_list_id done.")
Пример #28
0
def push_all_company_id():
    with MongoDB(MONGO_COMPANY_DB,
                 MONGO_COMPANY_DETAIL_COLLECTIONS) as mongo_instance:
        finished = set(item["name"] for item in mongo_instance.getAll(fields={
            "name": 1,
            "_id": 0
        }))

    with MongoDB(MONGO_COMPANY_DB,
                 MONGO_COMPANY_DETAIL2_COLLECTIONS) as mongo_instance:
        finished.update(item["name"] for item in mongo_instance.getAll(fields={
            "name": 1,
            "_id": 0
        }))

    ssdb_conn = get_ssdb_conn()
    ssdb_conn.qclear(SSDB_COMPANY_QUEUE_NAME)

    with MongoDB(MONGO_COMPANY_DB,
                 MONGO_COMPANY_COLLECTIONS) as mongo_instance:
        for item in mongo_instance.getAll(fields={
                "name": 1,
                "_id": 1
        },
                                          filter={
                                              "$or": [{
                                                  "area": "shenzhen"
                                              }, {
                                                  "name":
                                                  re_compile(r".*深圳.*")
                                              }]
                                          },
                                          sort=[("_id", MONGO_DESCENDING)]):
            name = item["name"]
            if name not in finished:
                ssdb_conn.qpush_back(SSDB_COMPANY_QUEUE_NAME, str(item["_id"]))

    ssdb_conn.close()
    del finished

    print("push_all_company_id done.")
def check_sms_timeout(request):
    """检测短信验证码是否超时"""
    ret_data = {}
    args = request.POST
    account_type = args["account_type"]
    username = args["username"].strip()

    if not username:
        add_ajax_error_json(ret_data, "用户名为空")
        return JsonResponse(ret_data)

    ssbd_connect = get_ssdb_conn()
    try:
        crawling_info = ssbd_connect.get(username + ACCOUNT_CRAWLING_INFO_SSDB_SUFFIX + account_type)
        ret_data.update(get_crawling_data(ssbd_connect, username, account_type, crawling_info))
    except Exception:
        add_ajax_error_json(ret_data, "获取用户状态失败")
    else:
        add_ajax_ok_json(ret_data)

    return JsonResponse(ret_data)
Пример #30
0
def submit_captcha_code(request):
    args = request.POST
    account_type = args["account_type"]
    username = args["username"]

    ssbd_connect = get_ssdb_conn()
    for the_type in ["sms", "img", "extra", "qrcode", "name_idcard_sms"]:
        arg_key = the_type + "_captcha"
        if arg_key in args:
            uid = args[the_type + "_uid"]
            captcha_code = args[arg_key].strip()
            ssbd_connect.delete(username + "-need_" + the_type + "_captcha-" +
                                account_type)
            ssbd_connect.setx(uid, captcha_code, DATA_EXPIRE_TIME)
            if "img" == the_type:
                ssbd_connect.delete(username +
                                    ACCOUNT_CRAWLING_NEED_IMG_SMS_SSDB_SUFFIX +
                                    account_type)

    data = {}
    add_ajax_ok_json(data)
    return JsonResponse(data)