Python ProxyRequests.set_headers示例，proxy_requests.ProxyRequests.set_headers Python示例

示例#1

0

显示文件

文件： main.py 项目： EIonTusk/jugendwortdesjahres_2020

 def send(self):
     try:
         if not self.proxy:
             r = requests.post(self.url,
                               data=self.data,
                               headers=self.headers)
         elif self.proxy:
             r = ProxyRequests(self.url)
             r.set_headers(self.headers)
             r.post_with_headers(self.data)
         else:
             sys.stdout.write(
                 '\r[!] Ein Fehler ist aufgetreten                                                      \r\n'
             )
         if self.check in str(r) or self.check in r.text:
             self.success += 1
             sys.stdout.write(
                 '\r[+] Für %s abgestimmt                                                           \r\n'
                 % self.name)
         else:
             sys.stdout.write(
                 '\r[!] Ein Fehler ist aufgetreten                                                      \r\n'
             )
     except:
         sys.stdout.write(
             '\r[!] Ein Fehler ist aufgetreten                                                      \r\n'
         )
     self.trys += 1

示例#2

0

显示文件

def test_post_with_headers(henry_post_bucket):
    r = ProxyRequests(henry_post_bucket + '/post')
    r.set_headers({'name': 'rootVIII', 'secret_message': '7Yufs9KIfj33d'})
    r.post_with_headers({'key1': 'value1', 'key2': 'value2'})
    assert r.get_status_code() == 200
    assert 'Thank you' in r.__str__()
    print(r.get_proxy_used())

示例#3

0

显示文件

文件： get_changed_files.py 项目： lxylxy123456/bugswarm

def gather_info(url):
    list_of_user_agents = [
        'Mozilla/5.0', 'AppleWebKit/537.36', 'Chrome/79.0.3945.88',
        'Safari/537.36'
    ]
    stat_code = 0
    tag_info = {'url': url}

    try_count = 0
    # continue attempting up to 4 proxies
    for user_agent in list_of_user_agents:
        if stat_code != 200:
            try_count += 1

            headers = {
                "User-Agent": user_agent,
                "Accept":
                "text/html, application/xhtml+xml, application/xml; q = 0.9, image/webp,image/apng, */*;\
                q = 0.8",
                "Accept-Encoding": "gzip, deflate, br",
                "Accept-Language": "en-US,en; q = 0.9"
            }

            r = ProxyRequests(url)
            r.set_headers(headers)
            r.get_with_headers()
            source = r.get_raw()
            stat_code = r.get_status_code()

    if try_count == len(list_of_user_agents):
        tag_info['num_of_changed_files'] = -1
        tag_info['changed_paths'] = ['ERROR, CANNOT FULFILL REQUEST']
        tag_info['error_found'] = 'ERROR, TOO MANY PROXY ATTEMPTS'
        tag_info['metrics'] = {
            'num_of_changed_files': 0,
            'changes': 0,
            'additions': 0,
            'deletions': 0
        }
        return tag_info

    # proxy successful, continue reading the page
    if stat_code == 200:
        soup = BeautifulSoup(source, 'lxml')

        metrics = get_changed_files_metrics(soup)
        tag_info['metrics'] = metrics

        count, changed_files = get_changed_files(soup)
        if count == 0:
            tag_info['changed_paths'] = ['NONE FOUND']
        else:
            tag_info['changed_paths'] = changed_files

        if count != tag_info['metrics']['num_of_changed_files']:
            tag_info['error_found'] = 'ERROR, MISMATCH IN COUNT'
        else:
            tag_info['error_found'] = 'NONE'
    return tag_info

示例#4

0

显示文件

def test_get_with_headers():
    h = {'User-Agent': 'NCSA Mosaic/3.0 (Windows 95)'}
    r = ProxyRequests('https://postman-echo.com/headers')
    r.set_headers(h)
    r.get_with_headers()
    assert r.get_status_code() == 200
    assert 'headers' in r.get_json()
    print(r.get_proxy_used())

示例#5

0

显示文件

def test_post_file_with_headers(henry_post_bucket):
    with open('/var/tmp/proxy_requests_testing.txt', 'w') as f_out:
        f_out.write('testing')
    h = {'User-Agent': 'NCSA Mosaic/3.0 (Windows 95)'}
    r = ProxyRequests(henry_post_bucket + '/post')
    r.set_headers(h)
    r.set_file('/var/tmp/proxy_requests_testing.txt')
    r.post_file_with_headers()
    assert r.get_status_code() == 200
    assert 'Thank you' in r.__str__()
    print(henry_post_bucket)
    print(r.get_proxy_used())

示例#6

0

显示文件

文件： updated_scraper.py 项目： tishyakhanna97/cs4225-project

def fetch_with_proxy(url, headers):
    r = ProxyRequests(url)
    if headers:
        r.set_headers(headers)
        r.get_with_headers()
    else:
        r.get()

    status_code = r.get_status_code()
    if status_code != 200:
        print(f"{status_code}: {url}")

    return r.get_raw()

示例#7

0

显示文件

文件： webscraping_changed_files.py 项目： kingdido999/bugswarm

def thread_get_info(url):
    stat_code = 0
    this_tag_info = {}
    this_tag_info['url'] = url

    try_count = 0
    # continue collecting proxies for up to 10 tries
    while stat_code != 200:
        try_count += 1
        if try_count > 10:
            this_tag_info['num_changed_files'] = -1
            this_tag_info['changed_paths'] = ['NONE FOUND']
            this_tag_info['error_found'] = 'ERROR, TOO MANY PROXY ATTEMPTS'
            return this_tag_info

        headers = {
            "User-Agent": "Mozilla/5.0",
            "Accept":
            "text/html, application/xhtml+xml, application/xml; q = 0.9, image/webp,image/apng, */*;\
            q = 0.8, application/signed-exchange; v = b3",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "en-US,en; q = 0.9"
        }

        r = ProxyRequests(url)
        r.set_headers(headers)
        r.get_with_headers()
        source = r.get_raw()
        stat_code = r.get_status_code()

    # proxy successful, continue reading the page
    if stat_code == 200:
        soup = bs.BeautifulSoup(source, 'lxml')

        # get changed files info
        read_count = get_num_changed_files(soup)
        this_tag_info['num_changed_files'] = read_count

        count, changed_files = get_changed_files(soup)
        if count == 0:
            this_tag_info['changed_paths'] = ['NONE FOUND']
        else:
            this_tag_info['changed_paths'] = changed_files

        if count != read_count:
            this_tag_info['error_found'] = 'ERROR, MISMATCH IN COUNT'
        else:
            this_tag_info['error_found'] = 'OK'

    return this_tag_info

示例#8

0

显示文件

    def __init__(self, query: str):
        """
        Na inicialização é realizada a requisição com as headers, e obtendo a resposta JSON da mesma
        para permitir as demais propriedades.
        :param query:
        """
        from urllib.parse import quote
        from proxy_requests import ProxyRequests
        import json

        headers = {"User-Agent": self.user_agent}

        req = ProxyRequests(self.RA_SEARCH.format(quote(
            query.encode("utf-8"))))
        req.set_headers(headers)
        req.get_with_headers()

        self.__response = json.loads(req.get_raw().decode())

示例#9

0

显示文件

    def bs_scrap_price(self, shop_link, domain, price_tag_name,
                       price_attr_name, price_tag_name_2, price_attr_values,
                       title_tag_name, title_attr_name, title_attr_value):
        n = 3
        while n > 0:
            user_agents = [
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:70.0)'
                ' Gecko/20100101 Firefox/70.0',
                'Mozilla/5.0 (X11; Linux x86_64) '
                'AppleWebKit/537.36 (KHTML, like Gecko)'
                'Ubuntu Chromium/77.0.3865.90 Chrome/77.0.3865.90'
                ' Safari/537.36', 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) '
                'Presto/2.12.388 Version/12.16'
            ]

            # random choose user agent to hide your bot for the site
            user_agent = random.choice(user_agents)
            header = {
                'User-Agent': user_agent,
                'Host': domain,
                'Accept': 'text/html,application/'
                'xhtml+xml,'
                'application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'en-us,en;q=0.5',
                'Accept-Encoding': 'gzip,deflate',
                'Accept-Charset': 'ISO-8859-1,'
                'utf-8;q=0.7,*;q=0.7',
                'Keep-Alive': '115',
                'Connection': 'keep-alive'
            }
            try:
                r = ProxyRequests(shop_link)
                r.set_headers(header)
                r.get_with_headers()
                res = str(r)
            except Exception as error:
                return False, error
            if str(res) == '<Response [404]>':  # handling 404 error exception
                error = 'The page was not found'
                return False, error

            # creating soup object of the source
            soup = bs4.BeautifulSoup(res, features="html.parser")
            price = product_title = None
            for price_attr_value in price_attr_values:
                # finding price on the page
                try:
                    if price_tag_name_2 == "":
                        price = str(
                            soup.find(
                                price_tag_name,
                                attrs={price_attr_name: price_attr_value}))
                    else:
                        price = str(
                            soup.find(price_tag_name,
                                      attrs={
                                          price_attr_name: price_attr_value
                                      }).find(price_tag_name_2))
                        print(price)
                    product_title = (soup.find(
                        title_tag_name, {
                            title_attr_name: title_attr_value
                        }).text.lstrip())
                except Exception:
                    pass
            # if price isn't None breake the while loop and continues
            # our function
            if price != None and product_title != None:
                return price, product_title.lstrip()
            n -= 1
            time.sleep(random.randint(5, 10))

        return False, "Can't find price or product title on the web page"

示例#10

0

显示文件

文件： start.py 项目： TheReactiveMouse/thereactivemouse.github.io

     0, 1)
 fake_rv = random.randint(0, 100)
 if windows_64_or_not_randomize == "1":
     windows_x64_or_not_2 = "32"
     windows_x64_or_not = "86"
 else:
     windows_x64_or_not = "64"
 if f'{url}/' in url:
     h = {
         'User-Agent':
         f'mouseTor/{version1}.{version2}.{version3} (Windows NT {os_Version}.0; Win64; x86; rv:{fake_rv}.0) mouseTor Relay/A9H8G88F mouseTor/1.0.0'
     }
     r = ProxyRequests('http://' +
                       host_2[1] + "/" +
                       fileget[1])
     r.set_headers(h)
     data = r.get_with_headers()
     print(html2text.html2text(str(data)))
     break
 else:
     os_Version = random.randint(5, 10)
     h = {
         'User-Agent':
         f'mouseTor/{version1}.{version2}.{version3} (Windows NT {os_Version}.0; Win64 x86; rv:{fake_rv}.0) mouseTor Relay/A9H8G88F mouseTor/1.0.0'
     }
     r = ProxyRequests('http://' +
                       host_2[1])
     r.set_headers(h)
     r.get_with_headers()
     print(html2text.html2text(str(r)))
     break

示例#11

0

显示文件

文件： proxy_useragent_randomizer.py 项目： surugh/proxy_useragent_randomizer

def get_page(page_links, user_agent):
    r = ProxyRequests(link)
    r.set_headers({'User-Agent': user_agent})
    r.get_with_headers()
    return r