Exemplo n.º 1
0
 def get_access_token(cls, appid, appsecret, code):
     """
     通过code换取网页授权acces_token
     :return: 返回dict. keys:access_token,expires_in,refresh_token,openid,scope
     """
     params = {'appid': appid, 'secret': appsecret, 'code':code, 'grant_type':'authorization_code'}
     return HttpUtil.get(cls._ACCESS_TOKEN_URL, params)
Exemplo n.º 2
0
 def get_user_info(cls, openid, access_token, lang='zh_CN'):
     """
     拉取用户信息, snsapi_base中不需此步骤
     :return:用户信息dict
     """
     params = {'access_token': access_token, 'openid': openid, 'lang':lang}
     return HttpUtil.get(cls._USERINFO_URL, params)
Exemplo n.º 3
0
 def refresh_access_token(cls, appid, refresh_token):
     """
     刷新auth_access_token
     :return: 返回dict, keys:access_token,expires_in,refresh_token,openid,scope
     """
     params = {'appid':appid, 'grant_type':'refresh_token', 'refresh_token':refresh_token}
     return HttpUtil.get(cls._REFRESH_TOKEN_UEL, params)
Exemplo n.º 4
0
 def request_access_token(appid, appsecret):
     """
     网络请求获取access_token
     :param appid:
     :param appsecret:
     :return: {'access_token':'', 'expires_in':3600}
     """
     url = WxApi.BASE_URL + "/cgi-bin/token?grant_type=client_credential"
     params = {'appid': appid, 'secret': appsecret}
     return HttpUtil.get(url, params)
Exemplo n.º 5
0
 def check_access_token(cls, openid, access_token):
     """
     检验授权凭证(access_token)是否有效
     """
     params = {'access_token':access_token, 'openid': openid}
     return HttpUtil.get(cls._CHECK_TOKEN_URL, params)
Exemplo n.º 6
0
 def _get(self, url, params=None):
     return HttpUtil.get(self._final_url(url), params)
Exemplo n.º 7
0
def download_latest_images(page, directory):
    try:
        SystemUtil.restart_process(os.path.abspath(__file__))

        html = BeautifulSoup(
            HttpUtil.get("https://www.pexels.com/zh-cn/new-photos?page=" +
                         str(page)).text,
            features="lxml")
        articles = html.find_all("article")
        pages_html = BeautifulSoup(str(
            html.find("div", {"class": "pagination"})),
                                   features="lxml").find_all("a")
        page_total = int(pages_html[len(pages_html) - 2].text)

        print(page, len(articles), page_total)
        if page > page_total:
            page = 1
            raise ValueError("page超出范围")

        for article in articles:
            # 图片id
            image_id = article["data-photo-modal-medium-id"]
            # 图片原始大小
            # image_org_size = article["data-photo-modal-download-value-original"]
            # 图片下载链接
            download_url = article["data-photo-modal-image-download-link"]
            image_name = f"pexels-photo-{image_id}.jpg"

            info_html = BeautifulSoup(
                HttpUtil.get("https://www.pexels.com/zh-cn/photo/" +
                             image_id).text,
                features="lxml")
            tags = info_html.find("meta", {
                "name": "keywords"
            }).attrs["content"]
            if len(tags) > 0 and tags != "":
                # 简繁转换
                tags = zhconv.convert(tags[:len(tags) - 7], 'zh-cn')
                tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "",
                              tags).replace(",,", ",")
            s3.execute_commit(f"""
            INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) 
            VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}',
            '{download_url}','latest','{page}','{tags}')
            """)
            # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('href')).attrs["href"]
            # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('data-url')).attrs["data-url"]

            # 判断文件是否存在
            if not os.path.exists(os.path.join(directory, image_name)):
                # 每张图片启用单个线程下载
                # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name)
                # done.add_done_callback(ThreadPool.thread_call_back)
                asyncio.run(
                    HttpUtil.download_one_async(download_url, directory,
                                                image_name))

        global run_count
        run_count += 1

        # 如果获取到的页数大于0不是最后一页
        if page_total > 0 and page <= page_total and run_count <= 10:
            download_latest_images(page + 1, directory)
        else:
            if len(pages_html) > 0 and page <= page_total:
                page += 1
            if page > page_total:
                page = 1
            run_count = 0

    except Exception as e:
        print(e)
    finally:
        print("当前活跃线程数:", threading.activeCount())
        time.sleep(400)
        download_latest_images(page, directory)
Exemplo n.º 8
0
def lin_shi_you_xiang_get_mail(prefix, id):
    url = Constants.LIN_SHI_YOU_XIANG + "/mailbox/" + prefix + "/" + id + "/source"
    return HttpUtil.get(url=url, data=None).text
Exemplo n.º 9
0
def download_images(url, page, directory):
    """
    下载图片
    :param url: 链接
    :param page: 页
    :param directory: 文件存放目录
    :return:
    """
    try:
        SystemUtil.restart_process(os.path.abspath(__file__))

        html = BeautifulSoup(HttpUtil.get(url + str(page)).text,
                             features="lxml")
        figure = html.find_all("figure")
        # 获取所有包含指定属性的标签
        page_all = html.find_all(lambda tag: tag.has_attr('original-title'))
        page_total = int(page_all[len(page_all) - 1].text)

        print(page, len(figure), page_total)
        if page > page_total:
            page = 1
            raise ValueError("page超出范围")

        for label in figure:
            image_id = label.attrs["data-wallpaper-id"]

            # 图片详情页
            info_html = BeautifulSoup(HttpUtil.get("https://wallhaven.cc/w/" +
                                                   image_id).text,
                                      features="lxml")
            tags_html = info_html.find_all("a", {
                "class": "tagname",
                "rel": "tag"
            })
            # 图片的标签
            tags = ",".join([tag_html.text
                             for tag_html in tags_html]).replace("'", "")
            if len(tags) > 0 and tags != "":
                tags = TranslationUtil.translate_google(tags).replace(",", ",")
                tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "",
                              tags).replace(",,", ",")

            download_url = info_html.find("img", {
                "id": "wallpaper"
            }).attrs["src"]
            if len(download_url) <= 0 or download_url == "":
                raise ConnectionError("获取下载链接失败")

            s3.execute_commit(f"""
            INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) 
            VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}',
            '{download_url}','latest','{page}','{tags}')
            """)

            image_name = download_url.split("/")
            image_name = image_name[len(image_name) - 1]
            # 判断文件是否存在
            # if not os.path.exists(name):
            if not os.path.isfile(os.path.join(directory, image_name)):
                # 每张图片启用单个线程下载
                # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name)
                # done.add_done_callback(ThreadPool.thread_call_back)
                asyncio.run(
                    HttpUtil.download_one_async(download_url, directory,
                                                image_name))
        global run_count
        run_count += 1

        # 如果获取到的页数大于0不是最后一页,并且内存占用率小于80%时
        if len(page_all) > 0 and page <= page_total and run_count <= 10:
            download_images(url, page + 1, directory)
        else:
            if len(page_all) > 0:
                page += 1
            if page > page_total:
                page = 1
            run_count = 0

    except Exception as e:
        print(e)
    finally:
        print("当前活跃线程数:", threading.activeCount())
        time.sleep(400)
        download_images(url, page, directory)