Python CloudflareScraper.CloudflareScraper示例，aiocfscrape.CloudflareScraper.CloudflareScraper Python示例

示例#1

0

显示文件

 def __init__(self,proxy,loop,localsession,id=0):
     self.proxy=proxy
     self.loop=loop
     self.session=CloudflareScraper(headers=genHeaders(),timeout=timeoutConfig,loop=loop)
     self.localsession=localsession
     self.id=id #用于标记这次投票是第几次
     #if proxy:
     self.fingerprint=md5((proxy+'ChNeWi').encode()).hexdigest()

示例#2

0

显示文件

文件： tiktok.py 项目： MStudioBuildInc/TikTok-API

    async def request(self,
                      url: str,
                      kwargs: dict = {},
                      return_bytes=False,
                      payload=None) -> dict:
        async with CloudflareScraper(
                loop=self._loop,
                headers={
                    'authority':
                    'm.tiktok.com',
                    'accept':
                    'application/json, text/plain, */*',
                    'accept-encoding':
                    'gzip, deflate',
                    'accept-language':
                    'en-US,en;q=0.9',
                    'referrer':
                    'https://m.tiktok.com/',
                    'sec-fetch-dest':
                    'empty',
                    'sec-fetch-mode':
                    'cors',
                    'sec-fetch-site':
                    'same-site',
                    'user-agent':
                    self._user_agent,
                    'cookie':
                    ';'.join([
                        f'{key}={value}'
                        for key, value in self.browser.cookies.items()
                    ])
                }) as session:
            url = await self._browser.signature(url, kwargs)
            if payload is not None:
                async with session.post(url, json=payload) as response:
                    return await response.text()

            async with session.get(url) as response:
                if return_bytes:
                    return response.content
                try:
                    _json = await response.json(content_type=None)
                    code = _json.get('code', -1)
                    if code != '10000':
                        return _json
                    return await self.captcha(_json, url, kwargs, return_bytes)

                except Exception as e:
                    logging.error(e, exc_info=True)
                    print(
                        f'Failed on {url}; Converting to json error; Text: {await response.text()}'
                    )
                    raise Exception('Invalid Response!!!')

示例#3

0

显示文件

文件： liqui.py 项目： treverson/coinlisting

 async def coin_name(self, symbol: str) -> str:
     try:
         async with CloudflareScraper() as session:
             async with session.get(
                     'https://liqui.io/Market/Currencies/') as resp:
                 currencies = await resp.json()
     except Exception as e:
         raise LiquiPairNamesException(e)
     coin_name = next(
         (i['Name'] for i in currencies if i['Symbol'] == symbol), None)
     if not coin_name:
         raise LiquiPairNamesException(f'cannot find coin {symbol!r}')
     return coin_name

示例#4

0

显示文件

async def get_election_offices():
    """Starting point of the scraper program. Scrapes BASE_URL for election office
    information and both dumps results to a .json file and returns the results as json.

    @return: list of scraped results as json.
    """
    # Get list of county names from registrar to populate form
    # Define coroutine functions (context managers)
    async with CloudflareScraper() as session:
        async with session.get(INFO_URL) as s:
            # ClientResponse.read() is a coroutine function so it must be awaited
            text = await s.read()
        soup = bS(text, "html5lib")

        info_list = soup.find('div', {'class': 'content'}).findAll('li')
        counties = [info.text for info in info_list]

        # Use list of counties and IDs to get county info for each county
        tasks: List[Task] = []
        num_scraped = 0
        master_list = []

        for i in range(len(counties)):
            # Create task for a future asynchronous operation and store it in task list
            tasks.append(
                asyncio.create_task(scrape_one_county(session, counties[i])))

        # Run the coroutines and iterate over the yielded results as they complete
        # (out-of-order). Use asyncio.gather() with a couple code modifications to
        # preserve list order
        future: Future[Tuple[str, str, str, str, str, str]]
        for future in asyncio.as_completed(tasks):
            # Unpack awaited result of scrape_one_county()
            (address, county_website, phone_number, email_address,
             director_name, county_name) = await future
            schema = format_data_into_schema(address, county_website,
                                             phone_number, email_address,
                                             director_name, county_name)
            master_list.append(schema)
            num_scraped += 1
            print(f"[South Carolina] Scraped {county_name} county: "
                  f"#{num_scraped} of {len(counties)} .... "
                  f"[{round((num_scraped / len(counties)) * 100, 2)}%]")
    master_list = sorted(master_list, key=lambda county: county['countyName'])

    with open(
            os.path.join(ROOT_DIR, "scrapers", "south_carolina",
                         "south_carolina.json"), "w") as f:
        json.dump(master_list, f)
    return master_list

示例#5

0

显示文件

async def get_election_offices():
    """Starting point of the scraper program. Scrapes BASE_URL for election office
    information and both dumps results to a .json file and returns the results as json.

    @return: list of scraped results as json.
    """
    # Define coroutine functions (context managers)
    async with CloudflareScraper() as session:
        async with session.get(BASE_URL) as s:
            # ClientResponse.read() is a coroutine function so it must be awaited
            text = await s.read()
        soup = bS(text.decode("utf-8"), "html.parser")

        test_county_data = get_county_codes_and_names(soup)
        county_data = sorted(test_county_data, key=lambda k: k["countyName"])
        num_scraped = 0
        master_list = []

        # Create list that will store asyncio tasks
        tasks: List[Task] = []
        for county in county_data:
            code = county["countyCode"]
            name = county["countyName"]
            # Create task for a future asynchronous operation and store it in task list
            tasks.append(asyncio.create_task(scrape_one_county(session, code, name)))

        # Run the coroutines and iterate over the yielded results as they complete
        # (out-of-order). Use asyncio.gather() with a couple code modifications to
        # preserve list order
        future: Future[Tuple[str, str, str, str]]
        for future in asyncio.as_completed(tasks):
            # Unpack awaited result of scrape_one_county()
            cleaned_string, protected_email, _, county_name = await future
            schema = format_data_into_schema(
                cleaned_string, protected_email, county_name
            )
            master_list.append(schema)
            num_scraped += 1
            print(
                f"[Florida] Scraped {county_name} county: "
                f"#{num_scraped} of {len(county_data)} .... "
                f"[{round((num_scraped / len(county_data)) * 100, 2)}%]"
            )

    with open(os.path.join(ROOT_DIR, "scrapers", "florida", "florida.json"), "w") as f:
        json.dump(master_list, f)
    return master_list

示例#6

0

显示文件

文件： scumlogs.py 项目： AgentDaun/discord_bot

async def read_logs():
    result_chat_lines = []
    result_kill_lines = []
    values = ('user', 'password', 'serverid', 'loc', 'folder', 'admin_file',
              'admin_line', 'chat_file', 'chat_line', 'kill_file', 'kill_line',
              'login_file', 'login_line', 'violations_file', 'violations_line')

    try:
        load_configini()
    except Exception:
        global configini
        configini = {}
    for value in values:
        if value not in configini:
            configini[value] = ''
    if configini['folder'] != '':
        if configini['folder'][-1:] != '/' and configini['folder'][-1:] != '\\':
            configini['folder'] = configini['folder'] + '/'
    save_configini()

    URL_LOGIN = '******'.format(
        configini['loc'])
    URL_LOGS = 'https://www.g-portal.{}/en/scum/logs/{}'.format(
        configini['loc'], configini['serverid'])

    async with CloudflareScraper() as session:
        try:
            log('connecting g-portal...')
            payload = {
                '_method': 'POST',
                'login': configini['user'],
                'password': configini['password'],
                'rememberme': '1'
            }

            async with session.post(URL_LOGIN, data=payload) as raw_response:
                response = await raw_response.text()
            async with session.get(URL_LOGS) as raw_response:
                response = await raw_response.text()

            html = BeautifulSoup(response, 'html.parser')
            select = html.find('div', {'class': 'wrapper logs'})
            loglist = select['data-logs']
            logs = json.loads(loglist)

            for i in range(len(logs)):
                getid = logs["file_" + str(i + 1)]
                id = (getid[int(getid.find('Logs')) + 5:])
                type = id.split('_')[0]

                if type in ['chat', 'kill']:
                    if configini[type + '_file'] != '':
                        if id < configini[type + '_file']:
                            continue

                    payload = {
                        '_method': 'POST',
                        'load': 'true',
                        'ExtConfig[config]': getid
                    }
                    async with session.post(URL_LOGS,
                                            data=payload) as raw_response:
                        response = await raw_response.text()
                    content = json.loads(response)
                    lines = content["ExtConfig"]["content"].splitlines()

                    found = False
                    writing = False
                    for line in lines:
                        #  Replace all "%" symbols to ";" coz
                        # .ini files can't save "%" symbol.
                        if "%" in line:
                            ready_line = ""
                            # continue
                            for i in line:
                                if i == "%":
                                    ready_line += ";"
                                else:
                                    ready_line += i
                            index = lines.index(line)
                            lines[index] = ready_line
                        if id == configini[type + '_file'] and not found:
                            if line == configini[type + '_line']:
                                found = True
                                continue
                            # Replace all ";" symbold to ";"
                            elif line.find("%") > -1:
                                ready_line = ""
                                for i in line:
                                    if i == "%":
                                        ready_line += ";"
                                    else:
                                        ready_line += i
                                if ready_line == configini[type + "_line"]:
                                    found = True
                                    continue
                        else:
                            if type == "chat":
                                result_chat_lines.append(line)
                            else:
                                result_kill_lines.append(line)
                            writing = True
                    if writing:
                        if found:
                            log('updating {}'.format(id))
                        else:
                            log('creating {}'.format(id))
                    # file.close()
                    configini[type + '_file'] = id
                    configini[type + '_line'] = lines[-1]
            save_configini()
            if not result_chat_lines and not result_kill_lines:
                return [], False
            return [result_chat_lines, result_kill_lines], True

        except Exception:
            print(traceback.format_exc())
            return [], False
        await session.close()

示例#7

0

显示文件

文件： Voter.py 项目： ysguoqiang/Ignareo-ISML-auto-voter

                future=asyncio.ensure_future(task,loop=self.loop)
                future.add_done_callback(functools.partial(printer))
                task=self.PostFingerprint()
                future=asyncio.ensure_future(task,loop=self.loop)
                future.add_done_callback(functools.partial(doNothing))
                return('%d %s %s'%(self.id,self.proxy,result))
            if('refresh' in result): #session expired等各种原因
                print('%d %s %s %s'%(self.id,self.proxy,result,'开始重试整个投票流程'))
                if random.random()<0.6:
                    await self.Vote() #再来一次！
                else:
                    return('%d %s %s %s'%(self.id,self.proxy,result,'放弃治疗')) #结束投票
            #if('An entry' in result): #这个ip被抢先投票
            return('%d %s %s'%(self.id,self.proxy,result)) #结束投票
        except RetryExhausted:
            return('%d %s %s'%(self.id,self.proxy,'连续重试次数超限'))
        except (aiohttp.ClientError,asyncio.TimeoutError):
            return('%d %s %s'%(self.id,self.proxy,'代理可能失效，放弃治疗'))

    def Launch(self):
        vote=self.Vote()
        vote_future=asyncio.ensure_future(vote,loop=self.loop)
        vote_future.add_done_callback(functools.partial(printer))
##        res=await vote_future
##        if(res==300):
##            vote_future.add_done_callback(functools.partial(self.Launch))

if __name__=='__main__':
    voter=Voter('192.168.1.1:9999',asyncio.get_event_loop(),CloudflareScraper())
    print(voter)

示例#8

0

显示文件

文件： discord-wesuck.py 项目： enfuego311/discord-py-wesuck

async def cs_page(url):
    async with CloudflareScraper() as session:
        async with session.get(url) as resp:
            return await resp.text()

示例#9

0

显示文件

文件： scumlogs.py 项目： javiwolf1/scumlogs

async def read_logs():
    values = ('user', 'password', 'serverid', 'loc', 'folder', 'admin_file',
              'admin_line', 'chat_file', 'chat_line', 'kill_file', 'kill_line',
              'login_file', 'login_line', 'violations_file', 'violations_line')
    print(
        'scumlogs v1.0, scum server logs downloader from gportal\nby htttps://GAMEBotLand.com'
    )
    try:
        load_configini()
    except:
        global configini
        configini = {}
    for value in values:
        if value not in configini:
            configini[value] = ''
    if configini['folder'] != '':
        if configini['folder'][-1:] != '/' and configini['folder'][-1:] != '\\':
            configini['folder'] = configini['folder'] + '/'
    save_configini()

    if configini['loc'] == 'com':
        loc = 'com'
    else:
        loc = 'us'
    URL_LOGIN = '******'.format(
        configini['loc'])
    URL_LOGS = 'https://www.g-portal.{}/en/scum/logs/{}'.format(
        configini['loc'], configini['serverid'])

    async with CloudflareScraper() as session:
        try:
            log('connecting g-portal...')
            payload = {
                '_method': 'POST',
                'login': configini['user'],
                'password': configini['password'],
                'rememberme': '1'
            }
            async with session.post(URL_LOGIN, data=payload) as raw_response:
                response = await raw_response.text()
            async with session.get(URL_LOGS) as raw_response:
                response = await raw_response.text()
            html = BeautifulSoup(response, 'html.parser')
            select = html.find('div', {'class': 'wrapper logs'})
            loglist = select['data-logs']
            logs = json.loads(loglist)

            for i in range(len(logs)):
                getid = logs["file_" + str(i + 1)]
                id = (getid[int(getid.find('Logs')) + 5:])
                type = id.split('_')[0]

                if configini[type + '_file'] != '':
                    if id < configini[type + '_file']:
                        continue
                payload = {
                    '_method': 'POST',
                    'load': 'true',
                    'ExtConfig[config]': getid
                }
                async with session.post(URL_LOGS,
                                        data=payload) as raw_response:
                    response = await raw_response.text()
                content = json.loads(response)
                lines = content["ExtConfig"]["content"].splitlines()
                filename = configini['folder'] + id
                file = open(filename, "a+", encoding='utf-8')
                found = False
                writing = False
                for line in lines:
                    if id == configini[type + '_file'] and not found:
                        if line == configini[type + '_line']:
                            found = True
                            continue
                    else:
                        file.write(line + '\n')
                        writing = True
                if writing:
                    if found:
                        log('updating {}'.format(id))
                    else:
                        log('creating {}'.format(id))
                file.close()
                configini[type + '_file'] = id
                configini[type + '_line'] = lines[-1]

            save_configini()
        except:
            log('error connecting, check connectivity and scumlogs.ini')
            help()
        await session.close()

示例#10

0

显示文件

文件： Voter.py 项目： ForeverChthollist/ISML_auto_voter

                future.add_done_callback(functools.partial(printer))
                task = self.PostFingerprint()
                future = asyncio.ensure_future(task, loop=self.loop)
                future.add_done_callback(functools.partial(printer))
                return ('%d %s %s' % (self.id, self.proxy, result))
            if ('refresh' in result):  #session expired等各种原因
                print('%d %s %s %s' %
                      (self.id, self.proxy, result, '开始重试整个投票流程'))
                await self.Vote()  #再来一次！
            #if('An entry' in result): #这个ip被抢先投票
            return ('%d %s %s' % (self.id, self.proxy, result))  #结束投票
        except RetryExhausted:
            return ('%d %s %s' % (self.id, self.proxy, '连续重试次数超限'))
        except (aiohttp.ClientError, asyncio.TimeoutError):
            return ('%d %s %s' % (self.id, self.proxy, '代理可能失效，放弃治疗'))

    def Launch(self):
        vote = self.Vote()
        vote_future = asyncio.ensure_future(vote, loop=self.loop)
        vote_future.add_done_callback(functools.partial(printer))


##        res=await vote_future
##        if(res==300):
##            vote_future.add_done_callback(functools.partial(self.Launch))

if __name__ == '__main__':
    voter = Voter('192.168.1.1:9999', asyncio.get_event_loop(),
                  CloudflareScraper())
    print(voter)

示例#11

0

显示文件

    "Accept-Language": "zh-CN,zh;q=0.8",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": uaGen.random,
}

##def printer(future):
##    print(future.result())

request_timeout = 50  #单次http请求的默认超时。
#你可以随时暂时覆盖这一设置
captcha_timeout = 60  #单次取验证码的默认超时
timeoutConfig = aiohttp.ClientTimeout(total=request_timeout)
captchaTimeoutConfig = aiohttp.ClientTimeout(total=captcha_timeout)

localsession = CloudflareScraper(headers=headers,
                                 loop=worker_loop,
                                 timeout=timeoutConfig)


#async def localsession_get(url='https://coinone.co.kr/'):#珂以测试防火墙
async def localsession_get(url="https://www.internationalsaimoe.com"):
    async with localsession.get(url) as res:
        text = await res.text()
        return ('Ignaleo:本地session请求%s，状态码为%d' % (url, res.status))
        #print('Ignaleo:本地session请求%s，状态码为%d'%(url,res.status))
        #return res.status


##    await asyncio.sleep(80)
##    async with localsession.post(url,data=b'test',ssl=False) as res:
##        text = await res.text()

示例#12

0

显示文件

async def get_election_offices():
    """Starting point of the scraper program. Scrapes BASE_URL for election office
    information and both dumps results to a .json file and returns the results as json.

    @return: list of scraped results as json.
    """
    # Get list of county names from registrar to populate form
    # Define coroutine functions (context managers)
    async with CloudflareScraper() as session:
        async with session.get(REGISTRAR_URL) as s:
            # ClientResponse.read() is a coroutine function so it must be awaited
            text = await s.read()
        soup = bS(text, "html5lib")

        county_option_list = soup.findAll(
            attrs={"name": "idTown"})[0].findAll("option")

        id_list = [
            county_option["value"] for county_option in county_option_list
        ]
        county_list = [
            county_option.string for county_option in county_option_list
        ]

        # Use list of counties and IDs to get county info for each county
        tasks: List[Task] = []
        num_scraped = 0
        master_list = []

        for i in range(len(id_list)):
            county_id = id_list[i]
            county_name = county_list[i]

            # Create task for a future asynchronous operation and store it in task list
            tasks.append(
                asyncio.create_task(
                    scrape_one_county(session, county_id, county_name)))

        # Run the coroutines and iterate over the yielded results as they complete
        # (out-of-order). Use asyncio.gather() with a couple code modifications to
        # preserve list order
        future: Future[Tuple[str, str, str, str, str, str]]
        for future in asyncio.as_completed(tasks):
            # Unpack awaited result of scrape_one_county()
            (
                registrar_name,
                phys_address,
                mail_address,
                phone_number,
                email_address,
                county_name,
            ) = await future
            schema = format_data_into_schema(
                registrar_name,
                phys_address,
                mail_address,
                phone_number,
                email_address,
                county_name,
            )
            master_list.append(schema)
            num_scraped += 1
            print(f"[Georgia] Scraped {county_name} county: "
                  f"#{num_scraped} of {len(county_list)} .... "
                  f"[{round((num_scraped / len(county_list)) * 100, 2)}%]")

    with open(os.path.join(ROOT_DIR, "scrapers", "georgia", "georgia.json"),
              "w") as f:
        json.dump(master_list, f)
    return master_list

示例#13

0

显示文件

文件： tiktok.py 项目： MStudioBuildInc/TikTok-API

async def url_2_image(url: str):
    async with CloudflareScraper() as session:
        async with session.get(url) as response:
            return await response.read()

示例#14

0

显示文件

文件： tiktok.py 项目： MStudioBuildInc/TikTok-API

 async def __get_js(self):
     async with CloudflareScraper(loop=self._loop, headers={}) as session:
         async with session.get(
                 'https://sf-tb-sg.ibytedtos.com/obj/rc-web-sdk-sg/acrawler.js'
         ) as response:
             return await response.text()