示例#1
0
def Random_agent_generator():

    r_agent = Randomize()
    agent = r_agent.random_agent('desktop',
                                 'windows')  # returns 'Desktop / Linux'

    return agent
示例#2
0
    def __init__(self , showWindow = True ):
        options = webdriver.ChromeOptions()
        options.add_argument("--disable-dev-shm-usage") ;
        options.add_argument("--no-sandbox") ;
        with open("proxy.txt") as f:
            lines = f.readlines()
            global pr
            pr = random.choice(lines)
        options.add_argument('--proxy-server=%s' % pr);
        r_agent = Randomize()
        useragent = r_agent.random_agent('desktop','windows')
        options.add_argument(f'user-agent={useragent}')
        prefs = {"profile.default_content_setting_values.geolocation" :2}
        options.add_experimental_option("prefs", prefs);
        if(not showWindow):
            options.set_headless(headless=True) ; 

        if sys.platform == 'linux' or sys.platform == 'linux2':
            driverfilename = 'chrome_linux'
        elif sys.platform == 'win32':
            driverfilename = 'chrome_windows.exe'
        elif sys.platform == 'darwin':
            driverfilename = 'chrome_mac'
        driverpath =  os.path.join(os.path.split(__file__)[0] , 'drivers{0}{1}'.format(os.path.sep , driverfilename))

        os.chmod(driverpath , 0o755 ) 

        self.driver = webdriver.Chrome(executable_path=driverpath , chrome_options=options)
        self.Key = Keys ;
        self.errors = list() ; 


        [setattr(self , function  , getattr(self.driver , function) ) for function in ['add_cookie' ,'delete_all_cookies','delete_cookie' , 'execute_script' , 'execute_async_script' ,'fullscreen_window','get_cookie' ,'get_cookies','get_log','get_network_conditions','get_screenshot_as_base64' ,'get_screenshot_as_file','get_screenshot_as_png','get_window_position','get_window_rect','get_window_size','maximize_window','minimize_window','implicitly_wait','quit','refresh','save_screenshot','set_network_conditions','set_page_load_timeout','set_script_timeout','set_window_position','set_window_rect','start_client','start_session','stop_client','switch_to_alert']]
示例#3
0
    def start_requests(self):
        r_agent = Randomize()
        firstrequest_headers = {
            "X-FORWARDED-FOR": "2.16.167.33",
            "Host": "www.assetmanagement.hsbc.co.uk",
            "User-Agent": r_agent.random_agent('desktop','windows'),
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate, br"
        }
        firsturl = "https://hsbcbankglobal.sc.omtrdc.net/b/ss/hsbc-amg-uk,hsbc-amg-global-rollup/1/JS-2.0.0/s78194187988030?AQB=1&ndh=1&pf=1&t=7%2F10%2F2019%2011%3A45%3A31%204%20-480&sdid=5D631A30BB0D4398-0DF62F7812D9139F&mid=64095394369463805659018752405389641208&ce=UTF-8&ns=hsbcbankglobal&pageName=Global%20High%20Income%20Bond%20-%20HSBC%20Global%20Asset%20Management%20UK&g=https%3A%2F%2Fwww.assetmanagement.hsbc.co.uk%2Fen%2Fintermediary%2Finvestment-expertise%2Ffixed-income%2Fglobal-high-income-bond&cc=USD&server=www.assetmanagement.hsbc.co.uk&events=event27&v1=Global%20High%20Income%20Bond%20-%20HSBC%20Global%20Asset%20Management%20UK&v2=High%20Income%20Bond%20-%20HSBC%20Global%20Asset%20Management%20UK&v3=www.assetmanagement.hsbc.co.uk%2Fen%2Fintermediary%2Finvestment-expertise%2Ffixed-income%2Fglobal-high-income-bond&c6=hsbc-amg-uk%2Chsbc-amg-global-rollup&c7=11%3A45%20AM%7CThursday&c13=accept&v15=11%3A45%20AM%7CThursday&v16=hsbc-amg-uk%2Chsbc-amg-global-rollup&c17=uk-gam&v17=uk-gam&v96=content&v98=Terms%20and%20conditions&v99=accept&pe=lnk_o&pev2=no%20link_name&pid=Intermediary%20%7C%20Investment%20Expertise%20%7C%20Fixed%20Income%20%7C%20Global%20High%20Income%20Bond&pidt=1&oid=https%3A%2F%2Fwww.assetmanagement.hsbc.co.uk%2Fen%2Fintermediary%2Finvestment-expertise%2Ffixed-income%2Fglobal-high&ot=A&s=1920x1080&c=24&j=1.6&v=N&k=Y&bw=1835&bh=634&AQE=1"
        response = requests.get(firsturl, headers=firstrequest_headers)
        self.log("Http code,reason:%s,%s" % (response.status_code, response.reason))
        referer = response.request.headers.get('Referer', None)
        headers = firstrequest_headers.setdefault("Referer",referer)
        self.log("headers:%s"%headers)
        urls_filepath = os.path.join("./resources/","urls.txt")
        with open(urls_filepath, mode='r') as handler:
            self.start_urls = handler.readlines()

        self.subfoldername = "../staging" + "/" + date.today().strftime("%m-%d-%Y") 
        subfolderpath = os.path.normpath(os.path.join(os.getcwd(), self.subfoldername))
        self.log("subfolderpath:%s"%type(subfolderpath))
        if os.path.exists(subfolderpath):
            # os.rmdir(subfolderpath)
            shutil.rmtree(subfolderpath, ignore_errors=True)
        os.mkdir(subfolderpath)

        for url in self.start_urls:
            yield scrapy.Request(url=url, headers=headers, callback=self.parse)
def user_a():
    m = randint(0, 6)
    agents = []
    # Get aspect ratio list.
    r_agent = Randomize()
    r_agent.get_aspect_ratio_list(
    )  # returns ['3:2', '4:3', '5:3', '5:4', '16:9', '16:10'].

    # Takes 2 arguments (self, aspect_ratio).
    r_agent.random_resolution('3:2')  # returns screen resolution.

    # Takes 3 arguments (self, device_type, os)
    agents.append(r_agent.random_agent('desktop',
                                       'linux'))  # returns 'Desktop / Linux'
    agents.append(r_agent.random_agent('desktop',
                                       'mac'))  # returns 'Desktop / Linux'
    agents.append(r_agent.random_agent(
        'desktop', 'windows'))  # returns 'Desktop / Macintosh'

    agents.append(r_agent.random_agent(
        'tablet', 'android'))  # returns 'Tablet / Android'
    agents.append(r_agent.random_agent('tablet',
                                       'ios'))  # returns 'Tablet / iOS'

    agents.append(r_agent.random_agent(
        'smartphone', 'android'))  # returns 'Smartphone / Android'
    agents.append(r_agent.random_agent('tablet',
                                       'ios'))  # returns 'Smartphone / iOS'
    return (agents[m])
示例#5
0
class RandomUserAgentMiddleware:
    """
    Random user-agent middleware, change a random UA for every request.
    """

    def __init__(self):
        self.r_agent = Randomize()
        self.platform = ['windows', 'mac', 'linux']

    def process_request(self, request, spider):
        random_user_agent = self.r_agent.random_agent('desktop', random.choice(self.platform))
        request.headers['User-Agent'] = random_user_agent
示例#6
0
class UserAgentDownloaderMiddleware(object):
    """
        User-Agent 自动切换插件
    """
    def __init__(self):
        self.r_agent = Randomize()

    def process_request(self, request, spider):
        if hasattr(spider, "user_agent_flag"
                   ) and spider.user_agent_flag and not request.meta.get(
                       "dont_user_agent", False):
            request.headers["User-Agent"] = self.r_agent.random_agent(
                'desktop', 'windows')
示例#7
0
def sayfaalt(variable,vari):
    git="http://www.pinterest.com/"+variable+"/"+vari+"/"
    adres=request.remote_addr
    try:
        
        return render_template(variable+"-"+vari+".html")
    except:
        if adres!="127.0.0.1":
            return render_template("404.html")
        else:    
            r_agent = Randomize()
            useragent = r_agent.random_agent('desktop','windows')
            with open("proxy.txt") as f:
                lines = f.readlines()
                pr = random.choice(lines)	
            http_proxy  = "http://"+chomp(pr)
            https_proxy = "https://"+chomp(pr)
            proxyDict = { "http"  : http_proxy, "https" : https_proxy}
            headers = {'User-Agent': useragent,
            'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7'}
            r = requests.get(git, headers=headers, proxies=proxyDict)
            soup = BeautifulSoup(r.text, "html.parser")
            for tag in soup.find_all("meta"):
                if tag.get("property", None) == "og:title":
                    titlecek= tag.get("content", None)
                elif tag.get("property", None) == "og:description":
                    descek= tag.get("content", None)
            with open('sablonalt.txt', 'r') as file :
                filedata = file.read()
            filedata = filedata.replace('<title>Clean Blog </title>', '<title>'+titlecek+'</title>')
            filedata = filedata.replace('<meta name="description" content="desc">', '<meta name="description" content="'+descek+'">')
            filedata = filedata.replace('<a target="_blank" href="adresss">title</a></p>', '<a target="_blank" href="'+git+'">'+titlecek+'</a></p>')
            filedata = filedata.replace('<a target="_blank" href="adress"><i class="next">', '<a target="_blank" href="'+git+'"><i class="next"> ')
            with open(app.root_path+'/templates/'+variable+"-"+vari+'.html', 'w') as file:
                file.write(filedata)
            liste=open("urllist.txt","a")	
            icerik=titlecek+":"+"/"+variable+"/"+vari+"/"
            print(icerik, file=liste)		
            liste.close()
            
        return "ok"+adres
import gzip
import json
import requests

from datetime import datetime
from time import strptime
from datetime import date
import dateutil.parser

# specific dependency modules next
from bs4 import BeautifulSoup
from tqdm import tqdm

from random_useragent.random_useragent import Randomize
r_agent_agent = Randomize()
rm_agent = r_agent_agent.random_agent('desktop', 'linux')
agent = {"User-Agent": rm_agent}


class AWS:
    def get_all_images(soup):
        # print(soup.prettify())
        meta = soup.find_all('img', attrs={'class': 'card-img-top'})
        # print(meta)
        urls = []
        for data in meta:
            Mystr = data["src"]
            Newstr = re.sub(r"_hu.*$", '.jpg', Mystr)
            urls.append(Newstr)
        return urls
示例#9
0
class EventGenerator:
    '''Generates a set of synthetic behavioral events, with timestamps constrained to a particular date.
    '''
    def __init__(self, start_date):
        self.faker = Faker()
        self.faker.add_provider(internet)
        self.start_date = start_date
        self.ua_generator = Randomize()

    def _gen_user_agent(self):
        devices = [
            ('desktop', 'mac'),
            ('desktop', 'windows'),
            ('tablet', 'ios'),
            ('smartphone', 'ios'),
            ('smartphone', 'android'),
        ]
        ua = self.ua_generator.random_agent(*random.choice(devices))
        return ua

    def _gen_event_type(self):
        '''Creates event type like "io.dagster.page_view".
        '''
        event_types = [
            'page_view',
            'button_click',
            'reload',
            'user_create',
            'user_delete',
            'signup',
        ]
        return 'io.dagster.{}'.format(random.choice(event_types))

    def _gen_timestamp(self):
        midnight = datetime.datetime.combine(
            self.start_date, datetime.time.min,
            tzinfo=datetime.timezone.utc).timestamp()
        return midnight + random.randint(0, 86400 - 1)

    def __iter__(self):
        return self

    def __next__(self):
        # pylint: disable=no-member
        return json.dumps({
            'environment':
            'production',
            'method':
            'GET',
            # Nested dicts
            'cookies': {
                'session': secrets.token_urlsafe(16),
                'persistent': secrets.token_urlsafe(16),
            },
            'run_id':
            self.faker.uuid4(),
            'type':
            self._gen_event_type(),
            'user_agent':
            self._gen_user_agent(),
            'ip_address':
            self.faker.ipv4_public(),
            'timestamp':
            self._gen_timestamp(),
            'url':
            '/' + self.faker.uri_path(),
            # like any good production system, we throw some random PII in our behavioral events
            'name':
            self.faker.name(),
            'email':
            self.faker.ascii_email(),
            # Nested lists
            'location':
            list(self.faker.location_on_land(coords_only=False)),
        })
示例#10
0
def random_pick():
    r_agent = Randomize()
    return r_agent.random_agent(device_type=random.choice(device_types),
                                os=random.choice(os))
示例#11
0
class SlavesAPI:
    def __init__(self, app_auth: str) -> None:
        self.app_auth = app_auth
        self.user_agent = Randomize()
        self.me: Optional["User"] = None
        self.slaves: Optional[List["User"]] = None

        self._error_handler = ErrorHandler
        self._log = logging.getLogger("vkslaves")

    async def accept_duel(self, id: int,
                          rps_type: RpsTypes) -> DuelAcceptResponse:
        """Accept duel request (rock-paper-scissors game)

        :param int id: Duel request id
        :param RpsTypes rps_type: Your move

        :return DuelAcceptResponse: Game result
        """
        req = await self.request("GET", "acceptDuel", {
            "id": id,
            "rps_type": rps_type
        })
        return DuelAcceptResponse(**req)

    async def buy_fetter(self, slave_id: int) -> User:
        """Buy fetter to your slave

        :param int slave_id: Id of your slave

        :return User: Slave data
        """
        self._log.debug(f"Buying fetter for {slave_id}")
        req = await self.request("POST", "buyFetter", {"slave_id": slave_id})
        return User(**req)

    async def buy_slave(self, slave_id: int) -> User:
        """Buy slave

        :param int slave_id: ID of the user you want to buy

        :return User: Your data
        """
        self._log.debug(f"Buying {slave_id}")
        req = await self.request("POST", "buySlave", {"slave_id": slave_id})
        return User(**req)

    async def create_duel(self, user_id: int, amount: int,
                          rps_type: RpsType) -> Duel:
        """Create duel request (rock-paper-scissors game)

        :param int user_id: Opponent id
        :param int amount: Bet
        :param RpsTypes rps_type: Your move

        :return Duel: Game object
        """
        req = await self.request(
            "GET",
            "createDuel",
            {
                "user_id": user_id,
                "amount": amount,
                "rps_type": rps_type
            },
        )
        return Duel(**req)

    async def groups_as_slaves(self) -> List[User]:
        """Doesn't work yet

        :return List[User]: List of users objects
        """
        req = await self.request("GET", "groupAsSlaves")
        return [User(**item) for item in req["slaves"]]

    async def job_slave(self, name: str, slave_id: int) -> User:
        """Give a job for slave

        :param int slave_id: Id of your slave
        :param str name: Job name

        :return User: Slave data
        """
        self._log.debug(f"Setting job {name} for {slave_id}")
        req = await self.request("POST", "jobSlave", {
            "name": name,
            "slave_id": slave_id
        })
        return User(**req["slave"])

    async def reject_duel(self, id: int) -> DuelRejectResponse:
        """Reject duel request (rock-paper-scissors game)

        :param int id: Duel request id

        :return DuelRejectResponse:
        """
        req = await self.request("POST", "rejectDuel", {"id": id})
        return DuelRejectResponse(**req["slave"])

    async def slave_list(self, id: int) -> List[User]:
        """Get a list of user's slaves

        :param int id: User id

        :return List[User]: List of user's slaves
        """
        req = await self.request("GET", "slaveList", {"id": id})
        return [User(**item) for item in req["slaves"]]

    async def sell_slave(self, slave_id: int) -> BalanceResponse:
        """Sell your slave

        :param int slave_id: ID of slave you want to sell

        :return BalanceResponse:
        """
        self._log.debug(f"Selling {slave_id}")
        req = await self.request("POST", "saleSlave", {"slave_id": slave_id})
        return BalanceResponse(**req)

    async def start(self, post=0) -> StartResponse:
        """Start app request

        :param int post: Referral id

        :return StartResponse:
        """
        self._log.debug("Updating data")
        req = StartResponse(
            **(await self.request("GET", "start", {"post": post})))
        self.me = req.me
        self.slaves = req.slaves
        return req

    async def top_friends(self, ids: List[int]) -> List[TopResponseItem]:
        """Get top of your friends

        :param List[int] ids: Your friends ids

        :return List[TopResponseItem]:
        """
        req = await self.request("POST", "topFriends", {"ids": ids})
        return [TopResponseItem(**item) for item in req["list"]]

    async def top_users(self) -> List[TopResponseItem]:
        """Get top of all users

        :return List[TopResponseItem]:
        """
        req = await self.request("GET", "topUsers")
        return [TopResponseItem(**item) for item in req["list"]]

    async def transactions(self) -> List[Transaction]:
        """Get your transactions

        :return List[Transaction]:
        """
        req = await self.request("GET", "transactions")
        return [Transaction(**item) for item in req["list"]]

    async def transfer_money(self, id: int, amount: int) -> BalanceResponse:
        """Give your money to other user

        :param int id: User id
        :param int amount: Amount to transfer

        :return BalanceResponse: Your balance
        """
        req = await self.request("POST", "user", {"id": id, "amount": amount})
        return BalanceResponse(**req)

    async def user(self, id: int) -> User:
        """Get info of user

        :param int id: User id

        :return User: User data
        """
        req = await self.request("GET", "user", {"id": id})
        return User(**req)

    async def users(self, ids: List[int]) -> List[User]:
        """Get info of users (max 5000)

        :param List[int] ids: IDs of users

        :return List[User]: List of users data
        """
        req = await self.request("POST", "user", {"ids": ids})
        return [User(**item) for item in req["users"]]

    async def request(self,
                      method: str,
                      path: str,
                      data: dict = None) -> Optional[dict]:
        params = {"params": data} if method == "GET" else {"json": data}
        headers = {
            "authorization": "Bearer " + self.app_auth,
            "content_type": "application/json",
            "user-agent": self.user_agent.random_agent("desktop", "windows"),
            "origin": PROD_SERVER,
            "referer": PROD_SERVER,
        }
        async with aiohttp.ClientSession(headers=headers) as session:
            async with session.request("OPTIONS", API_URL + path):
                async with session.request(method, API_URL + path,
                                           **params) as response:
                    return self._error_handler.check(await response.text())
示例#12
0
class Scraper(object):
    def __init__(self, proxies):
        self.proxies = proxies
        self.ip_check_urls = [
            "https://wtfismyip.com/json", "https://wtfismyip.com/json"
        ]
        self.r_agent = Randomize()
        self.sessions = self.prepare_sessions()
        return None

    #
    def get_ip_details(self, session):
        for url in self.ip_check_urls:
            check_ip = session.get(url)
            if check_ip.status_code == 200:
                print(check_ip.text)
                break
        return None

    #
    # def prepare_sessions(self):
    #     print("Initializing Scraper and Preparing Sessions")
    #     sessions = []
    #     for session_count in range(15):
    #         proxy_host = "proxy.crawlera.com"
    #         proxy_port = "8010"
    #         proxy_auth = "f7115f81a6444eeab4003ac4a668f3ee:" # Make sure to include ':' at the end
    #         proxy = {
    #                 "https": "https://{}@{}:{}/".format(proxy_auth, proxy_host, proxy_port),
    #                 "http": "http://{}@{}:{}/".format(proxy_auth, proxy_host, proxy_port)
    #             }
    #         _session = requests.Session()
    #         _session.headers["User-Agent"] = self.r_agent.random_agent('desktop','windows')
    #         print(_session.headers["User-Agent"])
    #         _session.proxies = proxy
    #         # Requests counter is good for assessing proxy quality.
    #         _session.requests_count = 0
    #         sessions.append(_session)
    #     del([proxy, _session])
    #     return sessions
    # #
    def prepare_sessions(self):
        print("Initializing Scraper and Preparing Sessions")
        # http_proxy  = "http://194.62.145.248:8080"
        # https_proxy  = "https://194.62.145.248:8080"
        proxies = self.proxies
        sessions = []
        # print(proxies)
        for each_proxy in proxies:
            proxy = {
                "http": "http://{}".format(each_proxy),
                "https": "https://{}".format(each_proxy)
            }
            _session = requests.Session()
            # Add code to change User Agents in the future.
            # _session.headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3205.0 Safari/537.36"
            _session.headers["User-Agent"] = self.r_agent.random_agent(
                'desktop', 'windows')
            # print(_session.headers["User-Agent"])
            _session.proxies = proxy
            # Requests counter is good for assessing proxy quality.
            _session.requests_count = 0
            # self.get_ip_details(_session)
            sessions.append(_session)
        del ([proxies, each_proxy, proxy, _session])
        return sessions

    #
    def get_best_session(self):
        filtered_sessions = list(filter(lambda x: x.active, self.sessions))
        best_session = min(filtered_sessions,
                           key=lambda session: session.requests_count)
        del ([filtered_sessions])
        return best_session

    #
    def make_request(self,
                     url,
                     method="GET",
                     headers={},
                     data={},
                     request_error=False):
        _response = None
        current_session = self.get_best_session()
        # print(current_session, current_session.requests_count, url)
        print(current_session.requests_count, url)
        # print(current_session.headers["User-Agent"])
        # headers["User-Agent"] = self.r_agent.random_agent('desktop','windows')
        # headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"
        current_session.requests_count += 1
        try:
            if method == "GET":
                # _response = current_session.get(url, timeout=15, verify="crawlera-ca.crt")
                _response = current_session.get(url,
                                                timeout=5,
                                                headers=headers,
                                                auth=HTTPProxyAuth(
                                                    'bbercaw10', 'RU9EFHLx'))
            elif method == "POST":
                # _response = current_session.post(url, timeout=15, verify="crawlera-ca.crt")
                _response = current_session.post(url,
                                                 timeout=5,
                                                 headers=headers,
                                                 auth=HTTPProxyAuth(
                                                     'bbercaw10', 'RU9EFHLx'))
            if _response:
                # Filter out responses
                if _response.status_code == 503:
                    # Sleep for some random time before making the next request and change the header
                    current_session.headers[
                        "User-Agent"] = self.r_agent.random_agent(
                            'desktop', 'windows')
                    current_session.pause()
                    return self.make_request(url, method, headers, data)
                if _response.status_code == 407:
                    # Proxy authentication error, stop bot
                    raise SystemExit
                    return None
        #     time.sleep(2)
        # except Exception as e:
        #     print("\n\n\n\n\n\n\n")
        #     print("Timeout Exception Occured")
        #     print(current_session.proxies, url, e)
        #     print("\n\n\n\n\n\n\n")
        #     return self.make_request(url, method, headers, data)
        except ConnectionError as ce:
            if (isinstance(ce.args[0], MaxRetryError)
                    and isinstance(ce.args[0].reason, ProxyError)):
                print(
                    "Could not connect to Proxy, removing the current session")
                self.sessions.remove(current_session)
            return _response
        #
        except Exception as e:
            print("\n\n\n\n\n\n\n")
            print("Errror occured")
            print(current_session.proxies, url, e)
            print("\n\n\n\n\n\n\n")
            if not request_error:
                print("Retrying request")
                return self.make_request(url,
                                         method,
                                         headers,
                                         data,
                                         request_error=True)
            # raise SystemExit
        del ([current_session, url, method, headers, data])
        return _response
示例#13
0
#

from zipcodes import l
import time
import requests
from tqdm import tqdm
from random_useragent.random_useragent import Randomize
import random

r_agent = Randomize()
prefix = "https://www.redfin.com"

for zipcode in tqdm(sorted(l)):
    # Setup agent and timer for each iteration
    zipcode_str = str(zipcode).zfill(5)
    ua = r_agent.random_agent('desktop', 'windows')

    # Get webpage
    address = prefix + "/zipcode/" + zipcode_str
    res = requests.get(address, headers={'User-Agent': ua}).content
    time.sleep(1 + random.uniform(1, 3))

    # Parse webpage and get csv address
    webpage = str(res)
    end = webpage.find('" class="downloadLink"')
    begin = webpage.find('/stingray/api/gis-csv?')
    csv_address = prefix + webpage[begin:end]
    csv_address = csv_address.replace('\n', '').replace('&amp',
                                                        '').replace(';', '&')

    # Download csv