def __init__(self,proxy,loop,localsession,id=0): self.proxy=proxy self.loop=loop self.session=CloudflareScraper(headers=genHeaders(),timeout=timeoutConfig,loop=loop) self.localsession=localsession self.id=id #用于标记这次投票是第几次 #if proxy: self.fingerprint=md5((proxy+'ChNeWi').encode()).hexdigest()
async def request(self, url: str, kwargs: dict = {}, return_bytes=False, payload=None) -> dict: async with CloudflareScraper( loop=self._loop, headers={ 'authority': 'm.tiktok.com', 'accept': 'application/json, text/plain, */*', 'accept-encoding': 'gzip, deflate', 'accept-language': 'en-US,en;q=0.9', 'referrer': 'https://m.tiktok.com/', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', 'user-agent': self._user_agent, 'cookie': ';'.join([ f'{key}={value}' for key, value in self.browser.cookies.items() ]) }) as session: url = await self._browser.signature(url, kwargs) if payload is not None: async with session.post(url, json=payload) as response: return await response.text() async with session.get(url) as response: if return_bytes: return response.content try: _json = await response.json(content_type=None) code = _json.get('code', -1) if code != '10000': return _json return await self.captcha(_json, url, kwargs, return_bytes) except Exception as e: logging.error(e, exc_info=True) print( f'Failed on {url}; Converting to json error; Text: {await response.text()}' ) raise Exception('Invalid Response!!!')
async def coin_name(self, symbol: str) -> str: try: async with CloudflareScraper() as session: async with session.get( 'https://liqui.io/Market/Currencies/') as resp: currencies = await resp.json() except Exception as e: raise LiquiPairNamesException(e) coin_name = next( (i['Name'] for i in currencies if i['Symbol'] == symbol), None) if not coin_name: raise LiquiPairNamesException(f'cannot find coin {symbol!r}') return coin_name
async def get_election_offices(): """Starting point of the scraper program. Scrapes BASE_URL for election office information and both dumps results to a .json file and returns the results as json. @return: list of scraped results as json. """ # Get list of county names from registrar to populate form # Define coroutine functions (context managers) async with CloudflareScraper() as session: async with session.get(INFO_URL) as s: # ClientResponse.read() is a coroutine function so it must be awaited text = await s.read() soup = bS(text, "html5lib") info_list = soup.find('div', {'class': 'content'}).findAll('li') counties = [info.text for info in info_list] # Use list of counties and IDs to get county info for each county tasks: List[Task] = [] num_scraped = 0 master_list = [] for i in range(len(counties)): # Create task for a future asynchronous operation and store it in task list tasks.append( asyncio.create_task(scrape_one_county(session, counties[i]))) # Run the coroutines and iterate over the yielded results as they complete # (out-of-order). Use asyncio.gather() with a couple code modifications to # preserve list order future: Future[Tuple[str, str, str, str, str, str]] for future in asyncio.as_completed(tasks): # Unpack awaited result of scrape_one_county() (address, county_website, phone_number, email_address, director_name, county_name) = await future schema = format_data_into_schema(address, county_website, phone_number, email_address, director_name, county_name) master_list.append(schema) num_scraped += 1 print(f"[South Carolina] Scraped {county_name} county: " f"#{num_scraped} of {len(counties)} .... " f"[{round((num_scraped / len(counties)) * 100, 2)}%]") master_list = sorted(master_list, key=lambda county: county['countyName']) with open( os.path.join(ROOT_DIR, "scrapers", "south_carolina", "south_carolina.json"), "w") as f: json.dump(master_list, f) return master_list
async def get_election_offices(): """Starting point of the scraper program. Scrapes BASE_URL for election office information and both dumps results to a .json file and returns the results as json. @return: list of scraped results as json. """ # Define coroutine functions (context managers) async with CloudflareScraper() as session: async with session.get(BASE_URL) as s: # ClientResponse.read() is a coroutine function so it must be awaited text = await s.read() soup = bS(text.decode("utf-8"), "html.parser") test_county_data = get_county_codes_and_names(soup) county_data = sorted(test_county_data, key=lambda k: k["countyName"]) num_scraped = 0 master_list = [] # Create list that will store asyncio tasks tasks: List[Task] = [] for county in county_data: code = county["countyCode"] name = county["countyName"] # Create task for a future asynchronous operation and store it in task list tasks.append(asyncio.create_task(scrape_one_county(session, code, name))) # Run the coroutines and iterate over the yielded results as they complete # (out-of-order). Use asyncio.gather() with a couple code modifications to # preserve list order future: Future[Tuple[str, str, str, str]] for future in asyncio.as_completed(tasks): # Unpack awaited result of scrape_one_county() cleaned_string, protected_email, _, county_name = await future schema = format_data_into_schema( cleaned_string, protected_email, county_name ) master_list.append(schema) num_scraped += 1 print( f"[Florida] Scraped {county_name} county: " f"#{num_scraped} of {len(county_data)} .... " f"[{round((num_scraped / len(county_data)) * 100, 2)}%]" ) with open(os.path.join(ROOT_DIR, "scrapers", "florida", "florida.json"), "w") as f: json.dump(master_list, f) return master_list
async def read_logs(): result_chat_lines = [] result_kill_lines = [] values = ('user', 'password', 'serverid', 'loc', 'folder', 'admin_file', 'admin_line', 'chat_file', 'chat_line', 'kill_file', 'kill_line', 'login_file', 'login_line', 'violations_file', 'violations_line') try: load_configini() except Exception: global configini configini = {} for value in values: if value not in configini: configini[value] = '' if configini['folder'] != '': if configini['folder'][-1:] != '/' and configini['folder'][-1:] != '\\': configini['folder'] = configini['folder'] + '/' save_configini() URL_LOGIN = '******'.format( configini['loc']) URL_LOGS = 'https://www.g-portal.{}/en/scum/logs/{}'.format( configini['loc'], configini['serverid']) async with CloudflareScraper() as session: try: log('connecting g-portal...') payload = { '_method': 'POST', 'login': configini['user'], 'password': configini['password'], 'rememberme': '1' } async with session.post(URL_LOGIN, data=payload) as raw_response: response = await raw_response.text() async with session.get(URL_LOGS) as raw_response: response = await raw_response.text() html = BeautifulSoup(response, 'html.parser') select = html.find('div', {'class': 'wrapper logs'}) loglist = select['data-logs'] logs = json.loads(loglist) for i in range(len(logs)): getid = logs["file_" + str(i + 1)] id = (getid[int(getid.find('Logs')) + 5:]) type = id.split('_')[0] if type in ['chat', 'kill']: if configini[type + '_file'] != '': if id < configini[type + '_file']: continue payload = { '_method': 'POST', 'load': 'true', 'ExtConfig[config]': getid } async with session.post(URL_LOGS, data=payload) as raw_response: response = await raw_response.text() content = json.loads(response) lines = content["ExtConfig"]["content"].splitlines() found = False writing = False for line in lines: # Replace all "%" symbols to ";" coz # .ini files can't save "%" symbol. if "%" in line: ready_line = "" # continue for i in line: if i == "%": ready_line += ";" else: ready_line += i index = lines.index(line) lines[index] = ready_line if id == configini[type + '_file'] and not found: if line == configini[type + '_line']: found = True continue # Replace all ";" symbold to ";" elif line.find("%") > -1: ready_line = "" for i in line: if i == "%": ready_line += ";" else: ready_line += i if ready_line == configini[type + "_line"]: found = True continue else: if type == "chat": result_chat_lines.append(line) else: result_kill_lines.append(line) writing = True if writing: if found: log('updating {}'.format(id)) else: log('creating {}'.format(id)) # file.close() configini[type + '_file'] = id configini[type + '_line'] = lines[-1] save_configini() if not result_chat_lines and not result_kill_lines: return [], False return [result_chat_lines, result_kill_lines], True except Exception: print(traceback.format_exc()) return [], False await session.close()
future=asyncio.ensure_future(task,loop=self.loop) future.add_done_callback(functools.partial(printer)) task=self.PostFingerprint() future=asyncio.ensure_future(task,loop=self.loop) future.add_done_callback(functools.partial(doNothing)) return('%d %s %s'%(self.id,self.proxy,result)) if('refresh' in result): #session expired等各种原因 print('%d %s %s %s'%(self.id,self.proxy,result,'开始重试整个投票流程')) if random.random()<0.6: await self.Vote() #再来一次! else: return('%d %s %s %s'%(self.id,self.proxy,result,'放弃治疗')) #结束投票 #if('An entry' in result): #这个ip被抢先投票 return('%d %s %s'%(self.id,self.proxy,result)) #结束投票 except RetryExhausted: return('%d %s %s'%(self.id,self.proxy,'连续重试次数超限')) except (aiohttp.ClientError,asyncio.TimeoutError): return('%d %s %s'%(self.id,self.proxy,'代理可能失效,放弃治疗')) def Launch(self): vote=self.Vote() vote_future=asyncio.ensure_future(vote,loop=self.loop) vote_future.add_done_callback(functools.partial(printer)) ## res=await vote_future ## if(res==300): ## vote_future.add_done_callback(functools.partial(self.Launch)) if __name__=='__main__': voter=Voter('192.168.1.1:9999',asyncio.get_event_loop(),CloudflareScraper()) print(voter)
async def cs_page(url): async with CloudflareScraper() as session: async with session.get(url) as resp: return await resp.text()
async def read_logs(): values = ('user', 'password', 'serverid', 'loc', 'folder', 'admin_file', 'admin_line', 'chat_file', 'chat_line', 'kill_file', 'kill_line', 'login_file', 'login_line', 'violations_file', 'violations_line') print( 'scumlogs v1.0, scum server logs downloader from gportal\nby htttps://GAMEBotLand.com' ) try: load_configini() except: global configini configini = {} for value in values: if value not in configini: configini[value] = '' if configini['folder'] != '': if configini['folder'][-1:] != '/' and configini['folder'][-1:] != '\\': configini['folder'] = configini['folder'] + '/' save_configini() if configini['loc'] == 'com': loc = 'com' else: loc = 'us' URL_LOGIN = '******'.format( configini['loc']) URL_LOGS = 'https://www.g-portal.{}/en/scum/logs/{}'.format( configini['loc'], configini['serverid']) async with CloudflareScraper() as session: try: log('connecting g-portal...') payload = { '_method': 'POST', 'login': configini['user'], 'password': configini['password'], 'rememberme': '1' } async with session.post(URL_LOGIN, data=payload) as raw_response: response = await raw_response.text() async with session.get(URL_LOGS) as raw_response: response = await raw_response.text() html = BeautifulSoup(response, 'html.parser') select = html.find('div', {'class': 'wrapper logs'}) loglist = select['data-logs'] logs = json.loads(loglist) for i in range(len(logs)): getid = logs["file_" + str(i + 1)] id = (getid[int(getid.find('Logs')) + 5:]) type = id.split('_')[0] if configini[type + '_file'] != '': if id < configini[type + '_file']: continue payload = { '_method': 'POST', 'load': 'true', 'ExtConfig[config]': getid } async with session.post(URL_LOGS, data=payload) as raw_response: response = await raw_response.text() content = json.loads(response) lines = content["ExtConfig"]["content"].splitlines() filename = configini['folder'] + id file = open(filename, "a+", encoding='utf-8') found = False writing = False for line in lines: if id == configini[type + '_file'] and not found: if line == configini[type + '_line']: found = True continue else: file.write(line + '\n') writing = True if writing: if found: log('updating {}'.format(id)) else: log('creating {}'.format(id)) file.close() configini[type + '_file'] = id configini[type + '_line'] = lines[-1] save_configini() except: log('error connecting, check connectivity and scumlogs.ini') help() await session.close()
future.add_done_callback(functools.partial(printer)) task = self.PostFingerprint() future = asyncio.ensure_future(task, loop=self.loop) future.add_done_callback(functools.partial(printer)) return ('%d %s %s' % (self.id, self.proxy, result)) if ('refresh' in result): #session expired等各种原因 print('%d %s %s %s' % (self.id, self.proxy, result, '开始重试整个投票流程')) await self.Vote() #再来一次! #if('An entry' in result): #这个ip被抢先投票 return ('%d %s %s' % (self.id, self.proxy, result)) #结束投票 except RetryExhausted: return ('%d %s %s' % (self.id, self.proxy, '连续重试次数超限')) except (aiohttp.ClientError, asyncio.TimeoutError): return ('%d %s %s' % (self.id, self.proxy, '代理可能失效,放弃治疗')) def Launch(self): vote = self.Vote() vote_future = asyncio.ensure_future(vote, loop=self.loop) vote_future.add_done_callback(functools.partial(printer)) ## res=await vote_future ## if(res==300): ## vote_future.add_done_callback(functools.partial(self.Launch)) if __name__ == '__main__': voter = Voter('192.168.1.1:9999', asyncio.get_event_loop(), CloudflareScraper()) print(voter)
"Accept-Language": "zh-CN,zh;q=0.8", "Upgrade-Insecure-Requests": "1", "User-Agent": uaGen.random, } ##def printer(future): ## print(future.result()) request_timeout = 50 #单次http请求的默认超时。 #你可以随时暂时覆盖这一设置 captcha_timeout = 60 #单次取验证码的默认超时 timeoutConfig = aiohttp.ClientTimeout(total=request_timeout) captchaTimeoutConfig = aiohttp.ClientTimeout(total=captcha_timeout) localsession = CloudflareScraper(headers=headers, loop=worker_loop, timeout=timeoutConfig) #async def localsession_get(url='https://coinone.co.kr/'):#珂以测试防火墙 async def localsession_get(url="https://www.internationalsaimoe.com"): async with localsession.get(url) as res: text = await res.text() return ('Ignaleo:本地session请求%s,状态码为%d' % (url, res.status)) #print('Ignaleo:本地session请求%s,状态码为%d'%(url,res.status)) #return res.status ## await asyncio.sleep(80) ## async with localsession.post(url,data=b'test',ssl=False) as res: ## text = await res.text()
async def get_election_offices(): """Starting point of the scraper program. Scrapes BASE_URL for election office information and both dumps results to a .json file and returns the results as json. @return: list of scraped results as json. """ # Get list of county names from registrar to populate form # Define coroutine functions (context managers) async with CloudflareScraper() as session: async with session.get(REGISTRAR_URL) as s: # ClientResponse.read() is a coroutine function so it must be awaited text = await s.read() soup = bS(text, "html5lib") county_option_list = soup.findAll( attrs={"name": "idTown"})[0].findAll("option") id_list = [ county_option["value"] for county_option in county_option_list ] county_list = [ county_option.string for county_option in county_option_list ] # Use list of counties and IDs to get county info for each county tasks: List[Task] = [] num_scraped = 0 master_list = [] for i in range(len(id_list)): county_id = id_list[i] county_name = county_list[i] # Create task for a future asynchronous operation and store it in task list tasks.append( asyncio.create_task( scrape_one_county(session, county_id, county_name))) # Run the coroutines and iterate over the yielded results as they complete # (out-of-order). Use asyncio.gather() with a couple code modifications to # preserve list order future: Future[Tuple[str, str, str, str, str, str]] for future in asyncio.as_completed(tasks): # Unpack awaited result of scrape_one_county() ( registrar_name, phys_address, mail_address, phone_number, email_address, county_name, ) = await future schema = format_data_into_schema( registrar_name, phys_address, mail_address, phone_number, email_address, county_name, ) master_list.append(schema) num_scraped += 1 print(f"[Georgia] Scraped {county_name} county: " f"#{num_scraped} of {len(county_list)} .... " f"[{round((num_scraped / len(county_list)) * 100, 2)}%]") with open(os.path.join(ROOT_DIR, "scrapers", "georgia", "georgia.json"), "w") as f: json.dump(master_list, f) return master_list
async def url_2_image(url: str): async with CloudflareScraper() as session: async with session.get(url) as response: return await response.read()
async def __get_js(self): async with CloudflareScraper(loop=self._loop, headers={}) as session: async with session.get( 'https://sf-tb-sg.ibytedtos.com/obj/rc-web-sdk-sg/acrawler.js' ) as response: return await response.text()