Пример #1
0
    def parse_captcha(self, site):
        soup_obj = HttpUtils.get("https://pt.sjtu.edu.cn/login.php",
                                 headers=site.login_headers)

        captcha_image_list = soup_obj.select("form img")

        # if captcha image exists, parse expression and return
        if len(captcha_image_list) > 0:
            image_url = "https://pt.sjtu.edu.cn/" + captcha_image_list[0]["src"]
            HttpUtils.download_file(image_url, "/tmp/cap.png", over_write=True)
            return PuTaoCaptchaParser.analyze("/tmp/cap.png")
        else:
            return "XxXx"
Пример #2
0
 def do_process(cls, item):
     path = item[0]
     url = item[1]
     retry = item[2]
     if retry <= 5:
         if not HttpUtils.download_file(url=url, dest_path=path):
             item[2] += 1
             cls.task_pool.put(item)
             cls.init_thread()
     else:
         print("Exceed max retry time: " + path)
Пример #3
0
    def login(self, site):
        if not self.isLogin and site.login_needed and not self.check_login(
                site):

            soup_obj = HttpUtils.get("https://pt.sjtu.edu.cn/login.php",
                                     headers=site.login_headers)

            # parse captcha image and return result
            image_url = "https://pt.sjtu.edu.cn/" + soup_obj.select(
                "form img")[0]["src"]
            HttpUtils.download_file(image_url, "/tmp/cap.png", over_write=True)
            site.check_code = PuTaoCaptchaParser.analyze("/tmp/cap.png")

            resp = HttpUtils.post(site.login_page,
                                  data=self.build_post_data(site),
                                  headers=site.login_headers,
                                  returnRaw=True)

            self.isLogin = self.check_login(site)
            return self.isLogin
        else:
            self.isLogin = True
            return True
Пример #4
0
 def convert(cls, text, audio_file_path):
     url = 'http://tts.baidu.com/text2audio?idx=1&tex=%s&cuid=baidu_speech_demo&cod=1&lan=zh&ctp=1&pdt=1&spd=4&per=5&vol=5&pit=7' % text
     url = quote(url, safe=string.printable)
     HttpUtils.download_file(url, audio_file_path, over_write=True)