Пример #1
0
    def start(cls):

        root_url = "http://www.mangabz.com/manga-list-p%d/"
        page_num = 0

        while True:
            cls.init_thread()

            page_num += 1
            print("Now page " + str(page_num))
            url = root_url % page_num
            resp = HttpUtils.get_with_retry(url, headers=cls.headers)
            if resp is None:
                break

            links = HttpUtils.get_attrs(resp, ".mh-item-detali > .title > a",
                                        "href")
            if len(links) == 0:
                break

            for link in links:
                cls.task_pool.put(link)

        cls.process_thread.join()
        cls.fp.close()
Пример #2
0
    def parse_lvl_two(cls, info):
        url = info[0]
        index = info[1]

        # create folder once
        folder_name = "output/龙珠/" + str(index)
        if not os.path.exists(folder_name):
            os.makedirs(folder_name, exist_ok=True)

        retry = 0
        while True:
            resp = HttpUtils.get(url)
            if resp is not None:
                break
            else:
                retry += 1

            assert retry < 5, "fail to query %s" % url

        links = HttpUtils.get_attrs(resp, ".ListContainer .ItemThumb a",
                                    "style")

        assert links is not None

        for link in links:
            url = re.search("background:url\(.*'(.*)'",
                            link).group(1).replace("_thumb.", "")
            file_name = url.split("/")[-1]
            cls.task_pool.put([folder_name + "/" + file_name, url, 0])
Пример #3
0
    def async_sign(self):
        self.site = self.generate_site()
        while True:
            t = time.strftime("%M:%S", time.localtime())
            if t.endswith("59"):
                break
            time.sleep(1)

        print(HttpUtils.get_time_stamp())
        while True:
            t = int(datetime.datetime.now().microsecond / 10000)
            if t >= 90:
                break
            time.sleep(0.001)

        print(HttpUtils.get_time_stamp())
        print("go go go!")
        while True:
            loop = asyncio.get_event_loop()
            loop.run_until_complete(self.run(500))
            print(HttpUtils.get_time_stamp())

            t = int(datetime.datetime.now().microsecond / 10000)
            print(t)
            if t >= 30:
                break
            time.sleep(0.001)
Пример #4
0
    def parse_lvl_one(cls):
        if cls.book_id is None:
            return

        url = "http://www.js518.net/mohuanmanhua/%s/" % cls.book_id
        retry = 0
        while True:
            resp = HttpUtils.get(url)
            if resp is not None:
                break
            else:
                retry += 1

            assert retry < 5, "fail to query %s" % url

        cls.comic_name = HttpUtils.get_content(resp, "title").strip()
        links = HttpUtils.get_attrs(resp, "#mh-chapter-list-ol-0 a", "href")

        titles = HttpUtils.get_contents(resp, "#mh-chapter-list-ol-0 a")

        assert len(titles) == len(links)

        cls.init_thread()

        for index in range(len(titles)):
            link = links[index]
            title = titles[index].strip()
            cls.parse_lvl_two((link, title))
        cls.process_thread.join()

        # code below should be useless if everything goes well
        while not cls.task_pool.empty():
            print("pool size = " + str(cls.task_pool.qsize()))
            cls.init_thread()
            cls.process_thread.join()
Пример #5
0
    def load_weather_data(cls):
        headers = {
            "User-Agent":
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
            "Content-Type": "application/x-www-form-urlencoded",
            "Host": "d1.weather.com.cn",
            "Referer": "http://www.weather.com.cn/weather1d/%s.shtml" % cls.city_code
        }

        res = HttpUtils.get("http://d1.weather.com.cn/sk_2d/%s.html?_=%d" % (cls.city_code, round(time.time() * 1000)),
                            headers=headers,
                            return_raw=True)
        html = res.content.decode("utf-8")
        data = json.loads(html.replace("var dataSK = ", ""))

        res = HttpUtils.get(
            "http://d1.weather.com.cn/dingzhi/%s.html?_=%d" % (cls.city_code, round(time.time() * 1000)),
            headers=headers,
            return_raw=True)

        html = res.content.decode("utf-8")
        html2 = html.replace("var cityDZ101020100 =", "").replace(";var alarmDZ101020100 ={\"w\":[]}", "")
        data2 = json.loads(html2).get("weatherinfo")

        return "今天%s,最高气温%s,最低气温%s,%s%s, 当前气温%s,空气质量指数%s,相对湿度%s" % (
            data2.get("weather"), data2.get("temp"), data2.get("tempn"), data2.get("wd"), data2.get("ws"),
            data.get("temp"), data.get("aqi"), data.get("sd"))
Пример #6
0
    def crawl_sub_category_book(cls, sub_category_meta):
        ku_book_title_list = list()

        category_name = sub_category_meta[0]
        sub_category_name = sub_category_meta[1]
        sub_category_link = cls.amazon_base_url + sub_category_meta[2]
        page_num = int(sub_category_meta[3])

        for page in range(1, page_num + 1):
            print("reading cat=%s,sub-cat=%s,page=%s" %
                  (category_name, sub_category_name, page))
            url = sub_category_link.split("%page=")[0] + "&page=" + str(page)
            soup_obj = HttpUtils.get(url, headers=cls.amazon_headers)

            if soup_obj is None:
                print("blocked?")
                break

            title_list = HttpUtils.get_contents(
                soup_obj,
                "div.s-result-list div.sg-col-inner h2.a-size-mini span.a-size-medium"
            )
            current_page_title_list = list()
            for title in title_list:
                # remove meta info
                title = title.split("(")[0].split("(")[0].split("【")[0]
                ku_book_title_list.append(title)
                current_page_title_list.append(title)

            print(current_page_title_list)
            sleep(random() * 0.5 + 0.5)

        return ku_book_title_list
Пример #7
0
    def get_score(self):
        self.check_in()

        soup = HttpUtils.get("http://www.miui.com/space-uid-2248502469.html")
        assert soup is not None
        score = HttpUtils.get_content(
            soup, "#statistic_content li:nth-of-type(1) a")
        return int(score)
Пример #8
0
    def parse_users(cls, url):
        soup_obj = HttpUtils.get(url)
        if soup_obj is None:
            print(">>>>>> Fail to parse " + url)
            return None

        data_state = HttpUtils.get_attr(soup_obj, "#data", "data-state")
        data_map = json.loads(data_state)
        return data_map['entities']['users']
Пример #9
0
    def parse_lvl_one(cls):
        if cls.book_id is None:
            print(">>>>> ERROR Cannot Parse Comic ID, QUIT! <<<<<")
            return

        resp = HttpUtils.get_with_retry("%s/%s/" % (cls.root_url, cls.book_id),
                                        headers=cls.headers)
        assert resp is not None

        cls.comic_name = HttpUtils.get_content(resp,
                                               ".detail-info-title").strip()
        cls.root_folder = os.path.join("output", cls.comic_name)
        links = HttpUtils.get_attrs(resp, "div.detail-list-form-con a", "href")

        titles = HttpUtils.get_contents(resp, "div.detail-list-form-con a")
        image_numbers = HttpUtils.get_contents(
            resp, "div.detail-list-form-con a span")
        image_numbers = list(
            map(lambda x: re.search("(\d+)P", x).group(1), image_numbers))

        assert len(titles) == len(image_numbers)
        assert len(titles) == len(links)

        cnt = 0
        for index in range(len(titles)):
            cls.init_thread()

            link = links[index].replace("/", "").replace("m", "")
            title = titles[index].strip()
            image_number = image_numbers[index]
            if (cls.chapter_mode == 1 and "第" not in title and "话" not in title
                    and "話" not in title) or (cls.chapter_mode == 2
                                              and "卷" not in title
                                              and "第" not in title):
                print("Skip " + title)
                continue

            is_skip = False
            if cls.inclusion_list is not None:
                for inclusion in cls.inclusion_list:
                    if inclusion not in title:
                        is_skip = True
                        break

            if not is_skip and cls.parse_lvl_two((link, title, image_number)):
                cnt += 1

        if cnt > 0:
            cls.process_thread.join()

        # code below should be useless if everything goes well
        while not cls.task_pool.empty():
            print("pool size = " + str(cls.task_pool.qsize()))
            cls.init_thread()
            cls.process_thread.join()
Пример #10
0
    def check_and_notify(cls):
        url = "https://www.flyertea.com/forum.php?mod=forumdisplay&orderby=dateline&sum=226&fid=226&mobile=2"
        soup_obj = HttpUtils.get(url, return_raw=False)
        titles = list(map(lambda title: title.strip(), HttpUtils.get_contents(soup_obj, "div.n5sq_htmk p.n5_htnrbt")))
        readers = list(map(lambda x: int(x), HttpUtils.get_contents(soup_obj, "div.n5sq_htmk div.n5_hthfcs")))
        flowers = list(
            map(lambda x: int(x) if x else 0, HttpUtils.get_contents(soup_obj, "div.n5sq_htmk div.n5_htdzcs")))

        print(titles)
        print(readers)
        print(flowers)
Пример #11
0
    def action(self, data):
        vote_url = "https://kp.m-team.cc/vote.php?tid=%s&type=1"
        success_cnt = 0
        for id in data:
            res_obj = HttpUtils.get(url=vote_url % id,
                                    headers=self.site.login_headers)
            msg = HttpUtils.get_content(res_obj, "#outer table h2")
            if msg == "操作成功":
                success_cnt += 1

        print("Vote success: " + str(success_cnt))
Пример #12
0
    def parse_current_seeds(cls, print_log=True):
        seeds = []
        cmd_result = os.popen("transmission-remote -l").read()
        lines = cmd_result.split("\n")[1: -2]  # remove first and last line

        now = datetime.datetime.now()
        for line in lines:
            seed = TransmissionSeed()
            seeds.append(seed)

            data = line.split()
            seed.id = data[0].replace("*", "")
            cmd_result = os.popen("transmission-remote -t {0} -i".format(seed.id)).read()
            seed_details = cmd_result.split("\n")

            for detail in seed_details:
                if detail.startswith("  Name: "):
                    seed.name = detail.replace("  Name: ", "")
                elif detail.startswith("  State: "):
                    seed.status = detail.replace("  State: ", "")
                elif detail.startswith("  Percent Done:"):
                    seed.done = float(detail.replace("  Percent Done: ", "").replace('%', ''))
                elif detail.startswith("  ETA: "):
                    seed.ETA = detail.replace("  ETA: ", "").replace(" ", "").split("(")[0]
                elif detail.startswith("  Download Speed: "):
                    seed.down = HttpUtils.pretty_format(
                        detail.replace("  Download Speed: ", "").replace(" ", "").split("/s")[0], "KB")
                elif detail.startswith("  Upload Speed: "):
                    seed.up = HttpUtils.pretty_format(
                        detail.replace("  Upload Speed: ", "").replace(" ", "").split("/s")[0], "KB")
                elif detail.startswith("  Total size: "):
                    seed.size = HttpUtils.pretty_format(
                        detail.replace("  Total size: ", "").replace(" ", "").split("(")[0], "MB")
                elif detail.startswith("  Ratio: "):
                    ratio_str = detail.replace("  Ratio: ", "")
                    if ratio_str == "None":
                        seed.ratio = 0.0
                    else:
                        seed.ratio = float(ratio_str)
                elif detail.startswith("  Date added: "):
                    start_time = parser.parse(detail.replace("  Date added: ", "").strip())
                    seed.since = (now - start_time).seconds
                elif detail.startswith("  Downloaded: "):
                    seed.done_size = HttpUtils.pretty_format(
                        detail.replace("  Downloaded: ", ""), "KB")
                elif detail.startswith("  Location: "):
                    seed.location = detail.replace("  Location: ", "")

        if print_log:
            for seed in seeds:
                print(seed)

        return seeds
Пример #13
0
    def say_thank(self, id):
        site = self.generate_site()
        assert self.login(site)

        url = "http://hdhome.org/thanks.php"

        form_data = {"id": id}
        HttpUtils.post(url,
                       data=form_data,
                       headers=self.site.login_headers,
                       returnRaw=True)
        print("Say thanks to " + str(id))
Пример #14
0
    def parse_captcha(self, site):
        soup_obj = HttpUtils.get("https://pt.sjtu.edu.cn/login.php",
                                 headers=site.login_headers)

        captcha_image_list = soup_obj.select("form img")

        # if captcha image exists, parse expression and return
        if len(captcha_image_list) > 0:
            image_url = "https://pt.sjtu.edu.cn/" + captcha_image_list[0]["src"]
            HttpUtils.download_file(image_url, "/tmp/cap.png", over_write=True)
            return PuTaoCaptchaParser.analyze("/tmp/cap.png")
        else:
            return "XxXx"
Пример #15
0
    def sign(self):
        self.check_in()

        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
        time_start = time.time()
        for i in range(100):
            HttpUtils.get(
                "http://www.miui.com/extra.php?mod=sign/index&op=sign",
                headers=self.site.login_headers,
                return_raw=True)
        time_end = time.time()
        print('time cost', time_end - time_start, 's')
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
Пример #16
0
    def stat(self, unit="GB", update_cache=True):
        self.login_if_not()

        soup_obj = HttpUtils.get(self.site.stat_page,
                                 headers=self.site.login_headers)
        assert soup_obj is not None

        div_list = soup_obj.select(
            "table.mainouter tr td table tr td div[align='center']")
        assert len(div_list) == 1

        content = div_list[0].contents[0]
        m = re.search(u"获取(\d+.\d+)个魔力", content)
        assert m
        mp = float(m.group(1))

        span_list = soup_obj.select("#usermsglink span")
        up = HttpUtils.pretty_format(span_list[1].contents[2], unit)
        down = HttpUtils.pretty_format(span_list[1].contents[4], unit)

        prev_up = Cache().get(self.get_site_name() + "_up")
        prev_down = Cache().get(self.get_site_name() + "_down")

        if prev_up is None:
            prev_up = 0
        else:
            prev_up = float(prev_up.decode())

        if prev_down is None:
            prev_down = 0
        else:
            prev_down = float(prev_down.decode())

        delta_up = round(up - prev_up, 2)
        delta_down = round(down - prev_down, 2)
        if delta_down == 0:
            delta_ratio = "Inf"
        else:
            delta_ratio = round(delta_up / delta_down, 2)

        current_upload = round(up - down, 2)
        print(
            "%s, mp=%s, up=%s, down=%s, current=%s, delta_up=%s, delta_down=%s, delta_ratio=%s"
            % (str(time.strftime("%Y-%m-%d %H:%M:%S")), mp, up, down,
               current_upload, delta_up, delta_down, delta_ratio))

        if update_cache:
            Cache().set(self.get_site_name() + "_up", up)
            Cache().set(self.get_site_name() + "_down", down)

        return mp, up, down
Пример #17
0
    def login(self, site):
        if not self.isLogin and site.login_needed and not self.check_login(site):
            if site.need_captcha:
                site.login_captcha_value = self.parse_captcha(site)

            # trigger login action
            HttpUtils.post(site.login_page, data=self.build_post_data(site),
                           headers=site.login_headers, returnRaw=True)

            self.isLogin = self.check_login(site)
            return self.isLogin
        else:
            self.isLogin = True
            return True
Пример #18
0
    def check_login(self, site):
        HttpUtils.create_session_if_absent()
        HttpUtils.load_cookie()

        soup_obj = HttpUtils.get(site.home_page, headers=site.login_headers)
        content = HttpUtils.get_content(soup_obj, site.login_verify_css_selector)
        print("Current user is " + str(content))
        result = content is not None and content == site.login_verify_str

        if result:
            HttpUtils.save_cookie()
        else:
            HttpUtils.clear_cookie()

        return result
Пример #19
0
    def parse_lvl_two(cls, info):
        chapter_url = info[0]
        title = info[1]

        # create folder once
        folder_name = "output/" + cls.comic_name + "/" + title
        if not os.path.exists(folder_name):
            os.makedirs(folder_name, exist_ok=True)

        #
        # path_file_number = len(glob.glob(pathname=folder_name + '/*'))
        # if path_file_number == image_number:
        #     print("下载完毕:" + title)
        #     # already downloaded all
        #     return

        print("开始下载: " + title)

        query_url = cls.root_url + chapter_url

        retry = 0
        while True:
            content = HttpUtils.get(query_url, headers=cls.headers)
            if content is not None:
                break
            else:
                retry += 1

        assert retry < 5, "fail to query %s" % query_url

        script_content = HttpUtils.get_contents(content, "script")
        print(script_content[2][1:].replace(";;", ";").replace(";", ";\n"))

        image_url_list = re.search("chapterImages.*=.*\[(.*)\];",
                                   script_content[2]).group(1).replace(
                                       "\"", "").split(",")

        path = re.search("chapterPath.*?=.*?\"(.*?)\";",
                         script_content[2]).group(1)

        assert len(image_url_list) > 0

        index = 1
        for image_url in image_url_list:
            full_image_url = "http://js1.zzszs.com.cn/" + path + image_url
            file_path = "%s/%03d_%s" % (folder_name, index, image_url)
            cls.task_pool.put([file_path, full_image_url, 0])
            index += 1
Пример #20
0
    def parse_lvl_two(cls, url):
        content = HttpUtils.get(url, return_raw=True)
        assert content is not None
        json_data = json.loads(content.text)
        book = json_data["data"]["animeName"]
        title = json_data["data"]["title"]
        number = json_data["data"]["numberStart"]
        images = json_data["data"]["contentImg"]

        # create folder once
        '''
        folder_name = "%s/%03d_%s" % (book, int(number), title)
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        for image in images:
            image_file_name = image["name"]
            image_url = image["url"]
            file_path = "/".join([folder_name, image_file_name])
            cls.task_pool.put([file_path, image_url, 0])
        '''
        folder_name = "%s/%03d_%s" % (book, int(number), title)

        for image in images:
            image_file_name = image["name"]
            image_url = image["url"]
            file_path = folder_name + image_file_name
            cls.task_pool.put([file_path, image_url, 0])
Пример #21
0
    def parse_lvl_one(cls):
        if cls.book_id is None:
            return

        resp = HttpUtils.get(
            "https://api.ishuhui.shop/ver/4e198319/anime/detail?id=%d&type=comics&.json"
            % cls.book_id,
            return_raw=True)
        assert resp is not None

        json_data = json.loads(resp.text)
        cartoons = json_data["data"]["comicsIndexes"]["1"]["nums"]

        cls.init_thread()

        for type in cartoons.keys():
            posts = cartoons[type]
            for index in posts.keys():
                post_id = posts[index][0]["id"]

                final_url = "https://prod-api.ishuhui.com/comics/detail?id=%s" % post_id
                cls.parse_lvl_two(final_url)
        cls.process_thread.join()

        # code below should be useless if everything goes well
        while not cls.task_pool.empty():
            print("pool size = " + str(cls.task_pool.qsize()))
            cls.init_thread()
            cls.process_thread.join()
Пример #22
0
    def parse(self, soup_obj):
        assert soup_obj is not None

        tr_list = soup_obj.select("table.torrents tr")

        seeds = []
        cnt = 0
        for tr in tr_list:
            cnt += 1
            if cnt == 1:
                # skip the caption tr
                continue

            seed = SeedInfo()
            td_list = tr.select("td.rowfollow")
            if len(td_list) < 9:
                # skip embedded contents
                continue

            seed.sticky = len(
                td_list[1].select("table td img[alt=\"Sticky\"]")) > 0
            seed.title = td_list[1].select("table td a")[0]["title"]
            seed.url = td_list[1].select("table td a")[0]['href']
            seed.free = len(td_list[1].select("table font.free")) > 0
            seed.hot = len(td_list[1].select("table font.hot")) > 0
            seed.since = HttpUtils.get_content(td_list[3], "span")
            seed.size = float(self.parse_size(td_list[4]))
            seed.upload_num = int(self.clean_tag(td_list[5]))
            seed.download_num = int(self.clean_tag(td_list[6]))
            seed.finish_num = int(self.clean_tag(td_list[7]))
            seed.id = self.parse_id(seed.url)

            seeds.append(seed)

        return seeds
Пример #23
0
    def parse_lvl_two(cls, url):
        content = HttpUtils.get(url, return_raw=True)
        assert content is not None

        m = re.search("chapter: \$\.evalJSON\(\'(.*)\'\),", content.text)
        if not m or m.group(1) == "null":
            m = re.search("chapter: (.*),", content.text)
        assert m
        json_data = json.loads(m.group(1))
        book = json_data["comic_id"]
        number = json_data["chapter_id"]
        title = json_data["name"].strip().replace(" ", "-").replace(
            "(", "(").replace(")", ")")

        # create folder once
        folder_name = "%s/%08d_%s" % (book, int(number), title)
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        m = re.search("image_list: \$\.evalJSON\(\'(.*)\'\),", content.text)
        if not m or m.group(1) == "null":
            m = re.search("image_list: (.*),", content.text)
        assert m
        json_data = json.loads(m.group(1))

        for index in json_data.keys():
            image_data = json_data[index]
            page = image_data["page"]
            image_url = base64.decodebytes(
                image_data["src"].encode("utf-8")).decode("utf-8")
            format = image_url.split(".")[-1]
            image_file_name = "%03d.%s" % (int(page), format)

            file_path = "/".join([folder_name, image_file_name])
            cls.task_pool.put([file_path, image_url, 0])
Пример #24
0
    def parse_lvl_two(cls, url):
        content = HttpUtils.get(url, return_raw=True)
        assert content is not None

        location = os.path.join(os.path.dirname(__file__), "../bin/phantomjs")
        jsFile = os.path.join(os.path.dirname(__file__),
                              "../static/tencent_comic.js")

        print(">>> parsing " + url)
        data = os.popen("%s %s %s" % (location, jsFile, url)).read()
        # retry twice
        if data is None:
            data = os.popen("%s %s %s" % (location, jsFile, url)).read()

        assert data is not None
        print("****** data=" + data)
        json_data = json.loads(data)

        book = json_data["title"]
        number = json_data["cid"]
        title = json_data["cTitle"].strip().replace(" ", "-").replace(
            "(", "(").replace(")", ")")

        # create folder once
        folder_name = "%s/%08d_%s" % (book, int(number), title)
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        for index in json_data["picture"].keys():
            image_url = json_data["picture"][index]
            format = "png"
            image_file_name = "%03d.%s" % (int(index), format)

            file_path = "/".join([folder_name, image_file_name])
            cls.task_pool.put([file_path, image_url, 0])
Пример #25
0
    def parse_size(self, soup_obj):
        assert soup_obj is not None
        assert len(soup_obj.contents) == 3

        size_num = round(float(soup_obj.contents[0]) * self.size_factor, 2)
        size_unit = soup_obj.contents[2]

        return HttpUtils.pretty_format(str(size_num) + str(size_unit), "MB")
Пример #26
0
 def check_login(self, site):
     resp = HttpUtils.post(site.home_page, data={}, returnRaw=True).text
     jsonValue = json.loads(resp)
     if jsonValue['errNo'] == 0:
         content = jsonValue['data']['name']
         return content is not None and content == site.login_verify_str
     else:
         return False
Пример #27
0
    def parse(self, soup_obj):
        assert soup_obj is not None

        info_block = soup_obj.select(
            "#info_block table tr td:nth-of-type(1) span")[0]

        prev_info = ""
        upload = 0
        download = 0
        for info in info_block.contents:
            if "上傳量" in prev_info:
                upload = HttpUtils.pretty_format(info, "GB")
            elif "下載量" in prev_info:
                download = HttpUtils.pretty_format(info, "GB")
                break
            prev_info = str(info)

        return upload, download
Пример #28
0
    def read_msg(self, index):
        self.login_if_not()

        soup_obj = HttpUtils.get(self.url + index,
                                 headers=self.site.login_headers)
        assert soup_obj is not None

        tr_list = soup_obj.select("#outer form table tr")

        messages = []
        cnt = 0
        for tr in tr_list:
            cnt += 1
            if cnt == 1:
                # skip the caption tr
                continue

            td_list = tr.select("td.rowfollow")

            if len(td_list) < 4:
                # skip footer
                continue

            msg = Message()
            msg.read = len(td_list[0].select("img[alt=\"Read\"]")) > 0
            msg.title = HttpUtils.get_content(td_list[1], "a")
            msg.from_user = HttpUtils.get_content(td_list[2], "span a b")
            if msg.from_user is None:
                # for ad.
                msg.from_user = td_list[2].contents[0]
            msg.since = HttpUtils.get_content(td_list[3], "span")
            link = HttpUtils.get_attr(td_list[1], "a", "href")
            msg.id = link.split("id=")[1]
            messages.append(msg)

        print("--------------------------------------")
        index = 1
        for msg in messages:
            print("{:<2}|".format(index) + str(msg))
            index += 1
        print("--------------------------------------")

        return messages
Пример #29
0
    def crawl(self):
        site = self.generate_site()
        assert self.login(site)

        for i in range(107, 164):
            soup_obj = HttpUtils.get(site.home_page + "?page=" + str(i),
                                     headers=site.login_headers)
            ids = self.parse(soup_obj)
            ParallelTemplate(150).run(func=self.say_thank, inputs=ids)
            print(">>>>>> finish page " + str(i))
Пример #30
0
    def init_setting(self):
        self.login_if_not()

        # enable adult torrent
        setting_url = "https://kp.m-team.cc/usercp.php"
        lab_data = {
            "action": "laboratory",
            "type": "save",
            "laboratory_adult_mode": "0",
            "laboratory_torrent_page_https": "0"
        }
        res = HttpUtils.post(url=setting_url,
                             data=lab_data,
                             headers=self.site.login_headers,
                             returnRaw=True)
        assert res.status_code == 200

        # do not show picture
        tracker_data = {
            "action": "tracker",
            "type": "save",
            "t_look": "1",  # show pic
            "tooltip": "off",
            "timetype": "timealive",
            "appendsticky": "yes",
            "radio": "icon",
            "smalldescr": "yes",
            "dlicon": "yes",
            "bmicon": "yes",
            "show_hot": "yes",
            "showfb": "yes",
            "showdescription": "yes",
            "showimdb": "yes",
            "showcomment": "yes",
            "appendnew": "yes",
            "appendpicked": "yes",
            "showcomnum": "yes"
        }
        res = HttpUtils.post(url=setting_url,
                             data=tracker_data,
                             headers=self.site.login_headers,
                             returnRaw=True)
        assert res.status_code == 200