Пример #1
0
 def report(self, cid):
     """
     report result of captcha, flag is 0
     """
     post_data = spider.make_post_data(
         {
             "username": self.user_name,
             "password": self.pass_word,
             "appid": self.appid,
             "appkey": self.appkey,
             "cid": cid,
             "flag": 0,
             "method": "report",
         },
         boundary=self.boundary)
     try:
         request = urllib.request.Request(self.base_url,
                                          data=post_data,
                                          headers=self.base_headers)
         request.add_header(
             "Content-Type",
             "multipart/form-data; boundary=%s" % self.boundary)
         json_data = json.loads(
             urllib.request.urlopen(request,
                                    timeout=10).read().decode("utf-8"))
     except Exception as excep:
         json_data = {"ret": -1, "errMsg": excep}
     logging.warning("YunDaMa report %s: %s",
                     "succeed" if json_data["ret"] == 0 else "failed",
                     json_data)
     return
Пример #2
0
 def upload(self, file_bytes, file_name, file_type, codetype):
     """
     upload image file, return cid or None
     """
     post_data = spider.make_post_data(
         {
             "username": self.user_name,
             "password": self.pass_word,
             "codetype": codetype,
             "appid": self.appid,
             "appkey": self.appkey,
             "timeout": 60,
             "method": "upload",
             "_file_image": [file_bytes, file_name, "file", file_type],
         },
         boundary=self.boundary)
     try:
         request = urllib.request.Request(self.base_url,
                                          data=post_data,
                                          headers=self.base_headers)
         request.add_header(
             "Content-Type",
             "multipart/form-data; boundary=%s" % self.boundary)
         json_data = json.loads(
             urllib.request.urlopen(request,
                                    timeout=60).read().decode("utf-8"))
     except Exception as excep:
         json_data = {"ret": -1, "errMsg": excep}
     logging.warning("YunDaMa upload %s: %s",
                     "succeed" if json_data["ret"] == 0 else "failed",
                     json_data)
     return json_data.get("cid", "")
Пример #3
0
    def check_anti_by_captcha(self, html):
        """
        check anti-spider by captcha
        """
        soup = bs4.BeautifulSoup(html, "html.parser")

        cid, code = None, None
        while not code:
            captcha_url = soup.find("img", attrs={
                "node-type": "yzm_img"
            }).get("src")
            response = self.opener.open(
                spider.get_url_legal(captcha_url, self.search_url))
            cid, code = self.yundama.get_captcha(response.read(),
                                                 "captcha.jpeg",
                                                 "image/jpeg",
                                                 codetype="1004")

        verified_url = "http://s.weibo.com/ajax/pincode/verified?__rnd=%d" % int(
            time.time() * 1000)
        post_data = spider.make_post_data({
            "secode": code,
            "type": "sass",
            "pageid": "weibo",
            "_t": 0
        })
        temp = json.loads(
            spider.get_html_content(
                self.opener.open(verified_url, data=post_data)))
        if temp["code"] == "100000":
            logging.warning("WeiBoSearch anti-spider succeed")
        else:
            logging.warning("WeiBoSearch anti-spider failed")
            self.yundama.report(cid) if cid else 0
        return
Пример #4
0
    def login(self, user_name, pass_word, proxies=None):
        """
        login weibo.com, return True or False
        """
        # 变量赋值初始化
        self.user_name = user_name
        self.pass_word = pass_word
        self.user_uniqueid = None
        self.user_nick = None

        # 构建cookie_jar和opener,这里不使用代理,同时保证整个流程中不需要关心cookie问题
        self.cookie_jar, self.opener = spider.make_cookiejar_opener(
            is_cookie=True, proxies=proxies)
        self.opener.addheaders = spider.make_headers(
            user_agent="pc",
            host="weibo.com",
            referer="http://weibo.com/",
            accept=
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            accept_encoding="gzip, deflate",
            accept_language="zh-CN,zh;q=0.8").items()

        # (1) 打开weibo.com/login.php,先请求一些必要的cookie信息
        self.opener.open("http://weibo.com/login.php")

        # (2) 根据用户名获取加密后的用户名
        s_user_name = self.get_username()

        # (3) 利用加密后的用户名,获取其他一些数据:json格式
        json_data = self.get_json_data(su_value=s_user_name)
        if not json_data:
            return False

        # (4) 根据第三步得到的json数据,获取加密后的密码
        s_pass_word = self.get_password(json_data["servertime"],
                                        json_data["nonce"],
                                        json_data["pubkey"])

        # (5) 构造登录中用到的postdata
        post_dict = {
            "entry": "weibo",
            "gateway": "1",
            "from": "",
            "savestate": "7",
            "userticket": "1",
            "vsnf": "1",
            "service": "miniblog",
            "encoding": "UTF-8",
            "pwencode": "rsa2",
            "sr": "1280*800",
            "prelt": "529",
            "url":
            "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack",
            "rsakv": json_data["rsakv"],
            "servertime": json_data["servertime"],
            "nonce": json_data["nonce"],
            "su": s_user_name,
            "sp": s_pass_word,
            "returntype": "TEXT",
        }

        # (6) 判断是否需要输入验证码,如果需要,获取验证码并进行打码操作
        if json_data.get("showpin", None) == 1:
            url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(
                time.time()), json_data["pcid"])
            with open("captcha.jpeg", "wb") as file_out:
                file_out.write(self.opener.open(url).read())
            code = input("请输入验证码:")
            # cid, code = self.yundama.get_captcha(self.opener.open(url).read(), "captcha.jpeg", "image/jpeg", codetype="1005")
            # if not code:
            #     return False
            post_dict["pcid"] = json_data["pcid"]
            post_dict["door"] = code

        # (7) 根据构造的postdata,登录微博
        login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(
            time.time())
        json_data_1 = json.loads(
            spider.get_html_content(
                self.opener.open(login_url_1,
                                 data=spider.make_post_data(post_dict))))
        if json_data_1["retcode"] == "0":
            # 登录后有一个跳转, 构造跳转链接的postdata
            post_dict = {
                "callback": "sinaSSOController.callbackLoginStatus",
                "ticket": json_data_1["ticket"],
                "ssosavestate": int(time.time()),
                "client": "ssologin.js(v1.4.18)",
                "_": int(time.time() * 1000),
            }
            login_url_2 = "https://passport.weibo.com/wbsso/login?" + urllib.parse.urlencode(
                post_dict)
            html_data = spider.get_html_content(self.opener.open(login_url_2),
                                                charset="gbk")
            json_data_2 = json.loads(
                re.search("\((?P<result>.*)\)", html_data).group("result"))

            # 检查登录是否成功,并获取用户唯一ID,用户昵称等
            if json_data_2["result"] is True:
                self.user_uniqueid = json_data_2["userinfo"]["uniqueid"]
                self.user_nick = json_data_2["userinfo"]["displayname"]
                logging.warning("WeiBoLogin succeed: %s", json_data_2)
            else:
                logging.warning("WeiBoLogin failed: %s", json_data_2)
        else:
            logging.warning("WeiBoLogin failed: %s", json_data_1)
        return True if self.user_uniqueid and self.user_nick else False