def send(self): try: if not self.proxy: r = requests.post(self.url, data=self.data, headers=self.headers) elif self.proxy: r = ProxyRequests(self.url) r.set_headers(self.headers) r.post_with_headers(self.data) else: sys.stdout.write( '\r[!] Ein Fehler ist aufgetreten \r\n' ) if self.check in str(r) or self.check in r.text: self.success += 1 sys.stdout.write( '\r[+] Für %s abgestimmt \r\n' % self.name) else: sys.stdout.write( '\r[!] Ein Fehler ist aufgetreten \r\n' ) except: sys.stdout.write( '\r[!] Ein Fehler ist aufgetreten \r\n' ) self.trys += 1
def test_post_with_headers(henry_post_bucket): r = ProxyRequests(henry_post_bucket + '/post') r.set_headers({'name': 'rootVIII', 'secret_message': '7Yufs9KIfj33d'}) r.post_with_headers({'key1': 'value1', 'key2': 'value2'}) assert r.get_status_code() == 200 assert 'Thank you' in r.__str__() print(r.get_proxy_used())
def gather_info(url): list_of_user_agents = [ 'Mozilla/5.0', 'AppleWebKit/537.36', 'Chrome/79.0.3945.88', 'Safari/537.36' ] stat_code = 0 tag_info = {'url': url} try_count = 0 # continue attempting up to 4 proxies for user_agent in list_of_user_agents: if stat_code != 200: try_count += 1 headers = { "User-Agent": user_agent, "Accept": "text/html, application/xhtml+xml, application/xml; q = 0.9, image/webp,image/apng, */*;\ q = 0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en; q = 0.9" } r = ProxyRequests(url) r.set_headers(headers) r.get_with_headers() source = r.get_raw() stat_code = r.get_status_code() if try_count == len(list_of_user_agents): tag_info['num_of_changed_files'] = -1 tag_info['changed_paths'] = ['ERROR, CANNOT FULFILL REQUEST'] tag_info['error_found'] = 'ERROR, TOO MANY PROXY ATTEMPTS' tag_info['metrics'] = { 'num_of_changed_files': 0, 'changes': 0, 'additions': 0, 'deletions': 0 } return tag_info # proxy successful, continue reading the page if stat_code == 200: soup = BeautifulSoup(source, 'lxml') metrics = get_changed_files_metrics(soup) tag_info['metrics'] = metrics count, changed_files = get_changed_files(soup) if count == 0: tag_info['changed_paths'] = ['NONE FOUND'] else: tag_info['changed_paths'] = changed_files if count != tag_info['metrics']['num_of_changed_files']: tag_info['error_found'] = 'ERROR, MISMATCH IN COUNT' else: tag_info['error_found'] = 'NONE' return tag_info
def test_get_with_headers(): h = {'User-Agent': 'NCSA Mosaic/3.0 (Windows 95)'} r = ProxyRequests('https://postman-echo.com/headers') r.set_headers(h) r.get_with_headers() assert r.get_status_code() == 200 assert 'headers' in r.get_json() print(r.get_proxy_used())
def test_post_file_with_headers(henry_post_bucket): with open('/var/tmp/proxy_requests_testing.txt', 'w') as f_out: f_out.write('testing') h = {'User-Agent': 'NCSA Mosaic/3.0 (Windows 95)'} r = ProxyRequests(henry_post_bucket + '/post') r.set_headers(h) r.set_file('/var/tmp/proxy_requests_testing.txt') r.post_file_with_headers() assert r.get_status_code() == 200 assert 'Thank you' in r.__str__() print(henry_post_bucket) print(r.get_proxy_used())
def fetch_with_proxy(url, headers): r = ProxyRequests(url) if headers: r.set_headers(headers) r.get_with_headers() else: r.get() status_code = r.get_status_code() if status_code != 200: print(f"{status_code}: {url}") return r.get_raw()
def thread_get_info(url): stat_code = 0 this_tag_info = {} this_tag_info['url'] = url try_count = 0 # continue collecting proxies for up to 10 tries while stat_code != 200: try_count += 1 if try_count > 10: this_tag_info['num_changed_files'] = -1 this_tag_info['changed_paths'] = ['NONE FOUND'] this_tag_info['error_found'] = 'ERROR, TOO MANY PROXY ATTEMPTS' return this_tag_info headers = { "User-Agent": "Mozilla/5.0", "Accept": "text/html, application/xhtml+xml, application/xml; q = 0.9, image/webp,image/apng, */*;\ q = 0.8, application/signed-exchange; v = b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en; q = 0.9" } r = ProxyRequests(url) r.set_headers(headers) r.get_with_headers() source = r.get_raw() stat_code = r.get_status_code() # proxy successful, continue reading the page if stat_code == 200: soup = bs.BeautifulSoup(source, 'lxml') # get changed files info read_count = get_num_changed_files(soup) this_tag_info['num_changed_files'] = read_count count, changed_files = get_changed_files(soup) if count == 0: this_tag_info['changed_paths'] = ['NONE FOUND'] else: this_tag_info['changed_paths'] = changed_files if count != read_count: this_tag_info['error_found'] = 'ERROR, MISMATCH IN COUNT' else: this_tag_info['error_found'] = 'OK' return this_tag_info
def __init__(self, query: str): """ Na inicialização é realizada a requisição com as headers, e obtendo a resposta JSON da mesma para permitir as demais propriedades. :param query: """ from urllib.parse import quote from proxy_requests import ProxyRequests import json headers = {"User-Agent": self.user_agent} req = ProxyRequests(self.RA_SEARCH.format(quote( query.encode("utf-8")))) req.set_headers(headers) req.get_with_headers() self.__response = json.loads(req.get_raw().decode())
def bs_scrap_price(self, shop_link, domain, price_tag_name, price_attr_name, price_tag_name_2, price_attr_values, title_tag_name, title_attr_name, title_attr_value): n = 3 while n > 0: user_agents = [ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:70.0)' ' Gecko/20100101 Firefox/70.0', 'Mozilla/5.0 (X11; Linux x86_64) ' 'AppleWebKit/537.36 (KHTML, like Gecko)' 'Ubuntu Chromium/77.0.3865.90 Chrome/77.0.3865.90' ' Safari/537.36', 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) ' 'Presto/2.12.388 Version/12.16' ] # random choose user agent to hide your bot for the site user_agent = random.choice(user_agents) header = { 'User-Agent': user_agent, 'Host': domain, 'Accept': 'text/html,application/' 'xhtml+xml,' 'application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'gzip,deflate', 'Accept-Charset': 'ISO-8859-1,' 'utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '115', 'Connection': 'keep-alive' } try: r = ProxyRequests(shop_link) r.set_headers(header) r.get_with_headers() res = str(r) except Exception as error: return False, error if str(res) == '<Response [404]>': # handling 404 error exception error = 'The page was not found' return False, error # creating soup object of the source soup = bs4.BeautifulSoup(res, features="html.parser") price = product_title = None for price_attr_value in price_attr_values: # finding price on the page try: if price_tag_name_2 == "": price = str( soup.find( price_tag_name, attrs={price_attr_name: price_attr_value})) else: price = str( soup.find(price_tag_name, attrs={ price_attr_name: price_attr_value }).find(price_tag_name_2)) print(price) product_title = (soup.find( title_tag_name, { title_attr_name: title_attr_value }).text.lstrip()) except Exception: pass # if price isn't None breake the while loop and continues # our function if price != None and product_title != None: return price, product_title.lstrip() n -= 1 time.sleep(random.randint(5, 10)) return False, "Can't find price or product title on the web page"
0, 1) fake_rv = random.randint(0, 100) if windows_64_or_not_randomize == "1": windows_x64_or_not_2 = "32" windows_x64_or_not = "86" else: windows_x64_or_not = "64" if f'{url}/' in url: h = { 'User-Agent': f'mouseTor/{version1}.{version2}.{version3} (Windows NT {os_Version}.0; Win64; x86; rv:{fake_rv}.0) mouseTor Relay/A9H8G88F mouseTor/1.0.0' } r = ProxyRequests('http://' + host_2[1] + "/" + fileget[1]) r.set_headers(h) data = r.get_with_headers() print(html2text.html2text(str(data))) break else: os_Version = random.randint(5, 10) h = { 'User-Agent': f'mouseTor/{version1}.{version2}.{version3} (Windows NT {os_Version}.0; Win64 x86; rv:{fake_rv}.0) mouseTor Relay/A9H8G88F mouseTor/1.0.0' } r = ProxyRequests('http://' + host_2[1]) r.set_headers(h) r.get_with_headers() print(html2text.html2text(str(r))) break
def get_page(page_links, user_agent): r = ProxyRequests(link) r.set_headers({'User-Agent': user_agent}) r.get_with_headers() return r