def test_anonym_filter(self): test1 = FreeProxy() cnt1 = len(test1.get_proxy_list()) test2 = FreeProxy(anonym=True) cnt2 = len(test2.get_proxy_list()) self.assertTrue(cnt2 < cnt1)
def test_empty_proxy_list(self): test = FreeProxy() test.get_proxy_list = MagicMock(return_value=[]) with self.assertRaises(RuntimeError): test.get()
from datetime import datetime import time import json import logging import traceback import config logging.basicConfig(filename='SNKRS.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s', level=logging.DEBUG) software_names = [SoftwareName.CHROME.value] hardware_type = [HardwareType.MOBILE__PHONE] user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type) proxy_obj = FreeProxy(country_id=config.FREE_PROXY_LOCATION, rand=True) INSTOCK = [] def scrape_site(headers, proxy): """ Scrapes SNKRS site and adds items to array """ items = [] # Makes request to site anchor = 0 while anchor < 160: url = f'https://api.nike.com/product_feed/threads/v3/?anchor={anchor}&count=50&filter=marketplace%28{config.LOCATION}%29&filter=language%28{config.LANGUAGE}%29&filter=channelId%28010794e5-35fe-4e32-aaff-cd2c74f89d61%29&filter=exclusiveAccess%28true%2Cfalse%29' html = rq.get(url=url, timeout=20, verify=False, headers=headers, proxies=proxy)
def update_proxy(self): self.proxy = FreeProxy(country_id=["RU"]).get() if debug == "true": print("[DEBUG] New proxy:", self.proxy)
from selenium import webdriver from selenium.webdriver.support.ui import Select import pyfiglet from os import system from time import sleep import sys from fp.fp import FreeProxy proxy = FreeProxy(timeout=1, rand=True).get() print(proxy[7:]) proxy = proxy[7:] chrome_options = webdriver.ChromeOptions() #chrome_options.add_argument('--headless') mobile_emulation = {"deviceName": "Nexus 5"} chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) chrome_options.add_argument('--proxy-server=%s' % proxy) driver = webdriver.Chrome( 'C:/chromedriver.exe', chrome_options=chrome_options) #If you use Windows you must add .exe #driver.set_window_size(480, 590) i = 0 def loop1(): global i sleep(10) try:
from random_user_agent.user_agent import UserAgent from random_user_agent.params import SoftwareName, HardwareType from fp.fp import FreeProxy logging.basicConfig(filename='Shopifylog.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s', level=logging.DEBUG) software_names = [SoftwareName.CHROME.value] hardware_type = [HardwareType.MOBILE__PHONE] user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type) CONFIG = dotenv.dotenv_values() proxyObject = FreeProxy(country_id=[CONFIG['LOCATION']], rand=True) INSTOCK = [] def check_url(url): """ Checks whether the supplied URL is valid :return: Boolean - True if valid """ return 'products.json' in url def scrape_site(url, headers, proxy): """ Scrapes the specified Shopify site and adds items to array
from fp.fp import FreeProxy from requests.exceptions import ProxyError total_sleeps = 0 latest_query_id = 0 webstyles = {} sleep_time = 1800 scraped_queries = [] # the data structure for storing queries: {retailer: {30: [queryList], 1: [queryList2], 2: [queryList3]}} queries = {} default_header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', } current_header = {"User-Agent": "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",} first_proxy = FreeProxy(country_id=['US']).get() current_proxy = {"http": first_proxy, } class CustomFormatting: @staticmethod def format(listing): return listing class CustomFilter: @staticmethod def filter(filtered_listing, unfiltered_listing): return filtered_listing
def get_proxy(): while 1: proxy = FreeProxy(country_id=['US'], rand=True).get() if proxy is not None: break return proxy
def test_invalid_proxy(self): test = FreeProxy() test.get_proxy_list = MagicMock(return_value=['111.111.11:2222']) self.assertEqual( "There are no working proxies at this time.", test.get())
def test_empty_proxy_list(self): test = FreeProxy() test.get_proxy_list = MagicMock(return_value=[]) self.assertEqual( "There are no working proxies at this time.", test.get())
def get_proxies(): print('getting proxies...') proxy = FreeProxy(country_id='US').get() proxies = {'http': proxy} print(f'obtained proxies: {proxies}') return proxies
def scrape_scholar(query, pages=0, max_proxy_tries=5): ''' Name: scrape_scholar Description: Searches Google Scholar using query and returns data for results. Input: @query: search term @pages: number of pages (10 articles per page) to request @start_year: minimum number of words in body of text @log_path: file path for where to create log file Output: A pandas DataFrame with one paper per row ''' generator = FreeProxy(rand=True) page_size = 10 # create log file to write errors to log = open(f'{query}' + log_path + '.txt', 'w+') # initialize list which will contain all article data and be used for DataFrame rows = [] # the number of the current result being pulled from google scholar index = 0 results = str(1) num_tries = 0 while num_tries<max_proxy_tries: # try-catch block that allows errors to be written in log file if they occur try: # proxy = generator.get() # print(proxy) # pg = ProxyGenerator() # pg.SingleProxy(http = "http://157.245.203.17:3128") scholarly.use_proxy(None) # creates a generator object for results for the query results = scholarly.search_pubs(query) #, start=0) # detects whether the limit has been passed, if there is one while not pages or index<page_size*pages: result = next(results) # retrieves current results object curr_result_bib = result.bib #instantiates current row container row = dict() # passes link to article row['Link'] = curr_result['url'] if 'url' in curr_result else np.nan # title of paper, removes quotes at the start and end if there row['Title'] = curr_result['title'] if 'title' in curr_result else np.nan # True if pdf is available, False otherwise # row['Accessible'] = bool(paper['repositoryDocument']['pdfStatus']) # page number paper would be on on the website assuming 10 papers per page row['Page number'] = index//page_size + 1 # list of [initials last-name] row['Authors'] = curr_result['author'] if 'author' in curr_result else np.nan # checks published year row['Publish year'] = int(curr_result['year']) if 'year' in curr_result else np.nan # number of citations row['Citations'] = curr_result['cites'] if 'cites' in curr_result else np.nan # links to related articles row['Related articles'] = 'https://scholar.google.com/scholar?q=related:' + results['url_scholarbib'].split(':')[1] + ':scholar.google.com/&scioq=' + query + '&hl=en&as_sdt=0,14' # checks if publisher is available row['Publisher'] = curr_result['venue'] if 'venue' in curr_result else np.nan rows.append(row) index += 1 # returns pandas DataFrame where each row is 1 paper return pd.DataFrame(rows) # write any errors to log file except Exception as e: # log.write(str(e)) # print(str(e)) # traceback.print_exc(file=sys.stdout) # log.write('\n') if rows: return pd.DataFrame(rows) if str(e) == "Cannot fetch the page from Google Scholar.": num_tries += 1 continue else: return pd.DataFrame(rows) # returns partially filled DataFrame if failed return pd.DataFrame(rows)
from fp.fp import FreeProxy import urllib.request, socket proxy = FreeProxy(country_id=['GB', 'US']).get() prox = proxy[7:] def is_bad_proxy(pip): try: proxy_handler = urllib.request.ProxyHandler({'http': pip}) opener = urllib.request.build_opener(proxy_handler) opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) sock = urllib.request.urlopen('http://www.google.com') except urllib.error.HTTPError as e: print('Error code: ', e.code) return e.code except Exception as detail: print("ERROR:", detail) return 1 return 0 def check_proxy(): prox = [proxy[7:]] for item in prox: if is_bad_proxy(item): print("Bad Proxy") else: print(item)
def get_proxy(): curl = FreeProxy(country_id=['US', 'RU'], timeout=1).get() return {"http": curl, "https": curl}
# Parse the author names file_in = sys.argv[1] authornames = [] with open(file_in, 'r') as f: for line in f: line = line.split('\n')[0] authornames.append(line) # Indicate what data to get (see Author class in https://pypi.org/project/scholarly/) sections = ['basics', 'indices'] max_homonyms = 5 #pip install free-proxy from fp.fp import FreeProxy proxy = FreeProxy(rand=True, timeout=1, country_id=['NO']).get() scholarly.use_proxy(http=proxy, https=proxy) # Loop through the authors t0 = time.time() data = list({}) for i, authname in enumerate(authornames): hindices = [] emails, names, affiliations, citedbys = [], [], [], [] try: search_query = scholarly.search_author(authname) for _ in range(max_homonyms): try: author = next(search_query) tmp_data = author.fill(sections=sections) hindices.append(tmp_data.hindex)
def get_torrent(self, url): print( colored('\n[-] CONNECTING TO PROXY SERVER', 'green', attrs=['bold'])) l = [] USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0" headers = {"user-agent": USER_AGENT} while 1: a = FreeProxy(rand=True, timeout=0.3).get().split('://')[1] if a in l: continue l.append(a) try: response = get(url, proxies={ "http": a, "https": a }, headers=headers, timeout=7) except: print( colored('\n[-] ERROR CONNECTING TO PROXY ' + a + '. TRYING WITH NEXT PROXY', 'white', 'on_red', attrs=['bold'])) continue if response.content: try: s = BeautifulSoup(response.content, 'html.parser') torrent = s.find( 'a', {'title': 'Download attachment'})['href'] except: print( colored('\n[-] ERROR CONNECTING TO PROXY ' + a + '. TRYING WITH NEXT PROXY', 'white', 'on_red', attrs=['bold'])) continue print( colored('\n[-] DOWNLOADING TORRENT FILE', 'green', attrs=['bold'])) try: r = get(torrent, allow_redirects=True, proxies={ "http": a, "https": a }, headers=headers) except: continue open(self.temp_file, 'wb').write(r.content) print( colored('\n[-] DOWNLOADED TORRENT FILE', 'green', attrs=['bold'])) return self.temp_file else: continue return 0
def add_free_proxies(self): for proxy in FreeProxy().get_proxy_list(): self.add(proxy)
proxies={ 'http': проксиадрес, 'https': проксиадрес }) # делаем запрос к сайту, используя прокси запрос.encoding = 'utf-8' #перекодируем результат запроса для корректного отображения кириллици суп = BeautifulSoup( запрос.text, 'html.parser' ) # 'lxml' создаем объект BeautifulSoup (суп) и передаём его конструктору. Вторая опция уточняет объект парсинга. break # завершаем цикл except: # если возникла ошибка (нет доступа через текущий прокси) print("Не удолось подключиться с прокси:", проксиадрес, "Продолжаем.") #сообщаем проксиадрес = FreeProxy(rand=True).get() # выгружаем другой прокси проксиадрес = FreeProxy(rand=True).get() # выгружаем рабочий прокси # ПАРСИНГ ЗАПИСЕЙ итерация = 1 # номер страницы while True: # цикл перебора страниц if итерация == 1: адрес = "https://funkysouls.org/music/index.html" # для первой страницы else: адрес = ("https://funkysouls.org/music/page/" + str(итерация) + ".html" ) # для последующих база = json.loads(open('музыка.json', "r", encoding='utf-8').read()) # открываем базу альбомы = [i['а'] for i in база ] # создаем перечень существующих записей (альбомов) предбаза = [] # создаем пустую предбазу парсинг(адрес) # запускаем функцию парсинга по адресу для получения супа массив = суп.find_all(
from bs4 import BeautifulSoup as scrap import requests import time import schedule from fp.fp import FreeProxy proxy = FreeProxy().get() list = [] counter = 1 def ScrapTorrent(): global counter print(counter) counter += 1 url = 'https://www.oxtorrent.cc/' response = requests.get(url, proxy=proxy) soup = scrap(response.text, 'lxml') table = soup.findAll('table', {'class': 'table table-hover'}) for i in range(0, 7): for item in table[i].findAll('a'): list.append(item.text) if i == 0: print('Films : ') elif i == 1: print('Series : ') elif i == 2: print('Musiques : ') elif i == 3: print('Jeux pc : ') elif i == 4: print('Jeux consoles : ')
import json import logging import traceback import config logging.basicConfig(filename='footlocker.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s', level=logging.DEBUG) software_names = [SoftwareName.CHROME.value] hardware_type = [HardwareType.MOBILE__PHONE] user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type) proxy_obj = FreeProxy(country_id=[config.FREE_PROXY_LOCATION]) INSTOCK = [] def test_webhook(): """ Sends a test Discord webhook notification """ data = { "username": config.USERNAME, "avatar_url": config.AVATAR_URL, "embeds": [{ "title": "Testing Webhook",
def getVideos(proxyStrategy=ProxyStrategy.NONE.value): proxy = None if proxyStrategy == ProxyStrategy.FREE_PROXY.value: try: proxy = FreeProxy(https=True, rand=True).get() except Exception as e: if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry: triedFreeProxyProxies.append(proxy) getVideos(ProxyStrategy.FREE_PROXY.value) return getVideos(ProxyStrategy.PROXYSCRAPE.value) return if proxyStrategy == ProxyStrategy.PROXYSCRAPE.value: try: proxy = get_proxy(excluded_proxies=triedProxyScrapeProxies) except Exception as e: print( json.dumps( {'message': 'Couldn\'t find a working Proxy : ' + str(e)})) sys.exit() videos = [] try: with TikTokApi(proxy=proxy) as api: videosGenerator = api.user(username=username).videos( count=numberOfVideos) for video in videosGenerator: videos.append(video) except exceptions.CaptchaException: if (proxyStrategy == ProxyStrategy.FREE_PROXY.value): if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry: triedFreeProxyProxies.append(proxy) getVideos(ProxyStrategy.FREE_PROXY.value) return if (proxyStrategy == ProxyStrategy.PROXYSCRAPE.value): if len(triedProxyScrapeProxies) < numberOfProxyScrapeProxiesToTry: triedProxyScrapeProxies.append(proxy) getVideos(ProxyStrategy.PROXYSCRAPE.value) return if proxyStrategy == lastProxyStrategyIndex: print( json.dumps( {'message': 'TikTok blocked the request using a Captcha'})) sys.exit() getVideos(proxyStrategy + 1) return except exceptions.NotFoundException: print(json.dumps({'message': 'User not found'})) sys.exit() except Exception as e: if (proxyStrategy == ProxyStrategy.FREE_PROXY.value): if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry: triedFreeProxyProxies.append(proxy) getVideos(ProxyStrategy.FREE_PROXY.value) return if (proxyStrategy == ProxyStrategy.PROXYSCRAPE.value): if len(triedProxyScrapeProxies) < numberOfProxyScrapeProxiesToTry: triedProxyScrapeProxies.append(proxy) getVideos(ProxyStrategy.PROXYSCRAPE.value) return if proxyStrategy == lastProxyStrategyIndex: print(json.dumps({'message': str(e)})) sys.exit() getVideos(proxyStrategy + 1) return print(json.dumps(videos))
async def __call__(self): UserCancel = KeyboardInterrupt # region various embed types creation def publication_embeds(result) -> discord.Embed: embed = discord.Embed( title=result["bib"]["title"], description=result["bib"]["abstract"], url=result["eprint_url"] if "eprint_url" in result.keys() else result["pub_url"], ) embed.add_field( name="Authors", value=", ".join(result["bib"]["author"]).strip(), inline=True, ) embed.add_field(name="Publisher", value=result["bib"]["venue"], inline=True) embed.add_field( name="Publication Year", value=result["bib"]["pub_year"], inline=True ) embed.add_field( name="Cited By", value=result["num_citations"] if "num_citations" in result.keys() else "0", inline=True, ) embed.add_field( name="Related Articles", value=f'https://scholar.google.com{result["url_related_articles"]}', inline=True, ) embed.set_footer(text=f"Requested by {self.ctx.author}") return embed def author_embeds(result) -> discord.Embed: embed = discord.Embed(title=result["name"]) embed.add_field( name="Cited By", value=f"{result['citedby']} articles", inline=True ) embed.add_field(name="Scholar ID", value=result["scholar_id"], inline=True) embed.add_field( name="Affiliation", value=result["affiliation"] if "affiliation" in result.keys() else "None", inline=True, ) embed.add_field( name="Interests", value=f"{', '.join(result['interests']) if 'interests' in result.keys() else 'None'}", inline=True, ) embed.set_image(url=result["url_picture"]) embed.set_footer(text=f"Requested by {self.ctx.author}") return embed def citation_embeds(result) -> discord.Embed: embed = discord.Embed( title=result["bib"]["title"], description=f"```{scholarly.bibtex(result)}```", url=result["eprint_url"] if "eprint_url" in result.keys() else result["pub_url"], ) embed.set_footer(text=f"Requested by {self.ctx.author}") return embed # endregion try: # region user flags processing pg = ProxyGenerator() proxy = FreeProxy(rand=True, timeout=1, country_id=["BR"]).get() pg.SingleProxy(http=proxy, https=proxy) scholarly.use_proxy(pg) # self.args processing if self.args is None: results = [next(scholarly.search_pubs(self.query)) for _ in range(5)] embeds = list(map(publication_embeds, results)) elif "author" in self.args: results = [ next(scholarly.search_author(self.query)) for _ in range(5) ] embeds = list(map(author_embeds, results)) elif "cite" in self.args: results = scholarly.search_pubs(self.query) results = [results for _ in range(5)] embeds = list(map(citation_embeds, results)) else: await self.message.edit(content="Invalid flag") return # endregion # sets the reactions for the search result if len(embeds) > 1: buttons = [[ {Button(style=ButtonStyle.grey, label="◀️", custom_id="◀️"): None}, {Button(style=ButtonStyle.red, label="🗑️", custom_id="🗑️"): None}, {Button(style=ButtonStyle.grey, label="▶️", custom_id="▶️"): None} ]] else: buttons = [[ Button(style=ButtonStyle.red, label="🗑️", custom_id="🗑️") ]] await Sudo.multi_page_system(self.bot, self.ctx, self.message, tuple(embeds), buttons) return except asyncio.TimeoutError: raise except (asyncio.CancelledError, discord.errors.NotFound): pass except scholarly_exceptions._navigator.MaxTriesExceededException: await self.message.edit( content="Google Scholar is currently blocking our requests. Please try again later" ) Log.append_to_log(self.ctx, f"{self.ctx.command} error", "MaxTriesExceededException") return except Exception as e: await error_handler(self.bot, self.ctx, e, self.query) finally: return
def test_invalid_proxy(self): test = FreeProxy() test.get_proxy_list = MagicMock(return_value=['111.111.11:2222']) with self.assertRaises(RuntimeError): test.get()
from random_user_agent.user_agent import UserAgent from random_user_agent.params import SoftwareName, HardwareType from fp.fp import FreeProxy logging.basicConfig(filename='SNKRSlog.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s', level=logging.DEBUG) software_names = [SoftwareName.CHROME.value] hardware_type = [HardwareType.MOBILE__PHONE] user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type) CONFIG = dotenv.dotenv_values() proxyObject = FreeProxy(country_id=['GB'], rand=True) INSTOCK = [] def scrape_site(headers, proxy): """ Scrapes SNKRS site and adds items to array :return: None """ items = [] anchor = 0 while anchor < 180: url = f'https://api.nike.com/product_feed/threads/v2/?anchor={anchor}&count=60&filter=marketplace%28{CONFIG["LOCATION"]}%29&filter=language%28{CONFIG["LANGUAGE"]}%29&filter=channelId%28010794e5-35fe-4e32-aaff-cd2c74f89d61%29&filter=exclusiveAccess%28true%2Cfalse%29&fields=active%2Cid%2ClastFetchTime%2CproductInfo%2CpublishedContent.nodes%2CpublishedContent.subType%2CpublishedContent.properties.coverCard%2CpublishedContent.properties.productCard%2CpublishedContent.properties.products%2CpublishedContent.properties.publish.collections%2CpublishedContent.properties.relatedThreads%2CpublishedContent.properties.seo%2CpublishedContent.properties.threadType%2CpublishedContent.properties.custom%2CpublishedContent.properties.title' try: html = rq.get(url=url,
import requests import time from fp.fp import FreeProxy from requests import ConnectionError from itertools import cycle import os proxy_list = cycle(FreeProxy().get_proxy_list()) def get_html_by_url(url, headers=None): for proxy in proxy_list: try: response = requests.get(url, headers=headers, proxies={'http': proxy}) if response.ok: return response.text except ConnectionError: time.sleep(.5) return get_html_by_url(url, headers) def write_links_to_file(filename, header, links): path_to_file = f'{os.getcwd()}/links/{filename}' mode = 'a' if os.path.exists(path_to_file) else 'w' with open(path_to_file, mode) as file: file.write(header + '\n') file.write('\n'.join(links) + '\n')