示例#1
0
class ProxyRefresh():

    def __init__(self, proxy_type='https'):
        if proxy_type == 'https':
            self.redis_handler = RedisClient('https_proxy')
        elif proxy_type == 'http':
            self.redis_handler = RedisClient('http_proxy')
        else:
            raise Exception('type must be https or http')
        self.proxy_type = proxy_type
        self.proxy_pool = set([*fuzz_all(), *self.redis_handler.get_all()])

    def refresh(self, pool_num=10):
        pool = ThreadPool(pool_num)
        pool.map(self.valid_ip, self.proxy_pool)
        pool.close()
        pool.join()

    def refresh_in_async(self):
        asynctask = AsyncTask()
        for ip in self.proxy_pool:
            asynctask.add_task(self.valid_ip, ip)
        asynctask.run()

    def valid_ip(self, ip):
        if proxy_is_useful(ip, self.proxy_type):
            self.redis_handler.add(ip)
            print('ok', ip)
        else:
            self.redis_handler.delete(ip)
示例#2
0
class ProxyManage(Resource):

    def __init__(self):
        self.http_proxy = RedisClient('http_proxy')
        self.https_proxy = RedisClient('https_proxy')
        self.parser = reqparse.RequestParser()
        self.parser.add_argument('type', type=str, required=True,
                                 help='required args of proxy type: http/https, like ?type=http')

    @error_handle
    def get(self):
        self.parser.add_argument('all', type=str, default='false', required=False, help='')
        get_all = self.parser.parse_args()['all']
        proxy_type = self.parser.parse_args()['type']
        if proxy_type == 'http':
            if get_all == 'true':
                return self.http_proxy.get_all()
            else:
                return self.http_proxy.get_one()
        elif proxy_type == 'https':
            if get_all == 'true':
                return self.https_proxy.get_all()
            else:
                return self.https_proxy.get_one()
        else:
            raise ParamError(msg='proxy type param error,must be http/https')

    @error_handle
    def delete(self):
        self.parser.add_argument('ip', type=str, required=True)

        args = self.parser.parse_args()

        proxy_type = args.get('type')
        ip = args.get('ip')
        if proxy_type == 'http':
            return self.http_proxy.delete(ip)
        if proxy_type == 'https':
            return self.https_proxy.delete(ip)
        else:
            self.http_proxy.delete(ip)
            self.https_proxy.delete(ip)
            return
class ProxyManager(object):
    """
    manager of the proxy pool
    """
    def __init__(self, logger):
        self.db = RedisClient(name='raw_proxy', host='localhost', port=6379)
        self.raw_proxy_queue = 'raw_proxy'
        self.useful_proxy_queue = 'useful_proxy'
        self.log = logger

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """

        for proxy_getter in proxy_getter_methods:
            proxy_set = set()

            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxy_getter)():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxy_getter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy): continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """

        self.db.changeTable(self.useful_proxy_queue)
        proxies = self.db.getAll()
        return random.choice(list(proxies.keys())) if proxies else None

    def delete(self, proxy):
        """
        delete the given proxy from proxy pool
        :param proxy:
        :return:
        """

        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from proxy pool
        :return: list
        """

        self.db.changeTable(self.useful_proxy_queue)
        proxies = self.db.getAll()
        return list(proxies.keys()) if proxies else list()

    def getNumber(self):
        """
        get number of the raw and useful proxies
        :return: dict
        """

        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    @staticmethod
    def validUsefulProxy(proxy, logger):
        """
        check whether if the proxy is useful
        if timeout of the proxy over 20s, deprecate it
        :param proxy:
        :return:
        """

        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf8')
        proxies = {"http": "http://{proxy}".format(proxy=proxy)}
        try:
            r = requests.get('http://httpbin.org/ip',
                             proxies=proxies,
                             timeout=20,
                             verify=False)
            if r.status_code == 200:
                logger.info('%s is ok' % proxy)
                return True
        except:
            return False
示例#4
0
class Generator():
    
    def __init__(self, hostname):
        """
            connect redis get cookies map and username map
            and init browser(use selenium)
        """
        fd = open("conf/%s_website.json" % hostname, "r")
        tmp = fd.read()
        data = json.loads(tmp)
        self.website = data["website_name"]
        self.login_url = data["login_url"]
        self.cookies_db = RedisClient('cookies', self.website)
        self.users_db = RedisClient('users', self.website)
        self.users_db.set("15320347357","123456wyq")
        self.users_db.set("15320343017","123456wyq")
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--window-size=1980,1980')
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        self.browser = webdriver.Chrome(chrome_options=chrome_options)
        self.wait = WebDriverWait(self.browser, 20)

    def get_cookie_dict(self, cookie):
        """
            return a cookie type is dict
        """
        res = {}
        for item in cookie:
            res[item["name"]] = item["value"]
        return res
    
    def open_lianjia(self, username, password):
        """
            open website,input username and password finally click it
        """
        self.browser.get(self.login_url)
        time.sleep(2)
        #点击登录
        button = self.browser.find_element_by_css_selector("a.btn-login.bounceIn.actLoginBtn")
        button.click()
        time.sleep(2)
        #使用密码账号登录
        button = self.browser.find_element_by_css_selector("#con_login_user_tel a.tologin")
        button.click()
        username_input = self.wait.until(EC.presence_of_element_located(
            (By.CSS_SELECTOR, "input.the_input.topSpecial.users")
        ))
        password_input = self.wait.until(EC.presence_of_element_located(
            (By.CSS_SELECTOR, "input.the_input.password")
        ))
        loginSubmit = self.wait.until(EC.presence_of_element_located(
            (By.CSS_SELECTOR, ".li_btn a.login-user-btn")
        ))
        #输入账号密码login
        username_input.send_keys(username)
        password_input.send_keys(password)
        loginSubmit.click()
        time.sleep(3)

    def open_qfang(self, username, password):
        """
            open linajia,input username and password finally click it
        """
        self.browser.get(self.login_url)
        time.sleep(2)
        #点击登录
        button = self.browser.find_element_by_css_selector("#noLoginUser .nav-link a")
        button.click()
        time.sleep(2)
        #使用密码账号登录
        button = self.browser.find_element_by_css_selector("#loginTbs a:nth-child(2)")
        button.click()
        username_input = self.wait.until(EC.presence_of_element_located(
            (By.ID, "phone")
        ))
        password_input = self.wait.until(EC.presence_of_element_located(
            (By.ID, "password")
        ))
        loginSubmit = self.wait.until(EC.presence_of_element_located(
            (By.ID, "loginSubmit")
        ))
        #输入账号密码login
        username_input.send_keys(username)
        password_input.send_keys(password)
        loginSubmit.click()
        time.sleep(3)

    def new_cookie_qfang(self, username, password):
        """
            request website,login and get cookie
        """
        self.open_qfang(username, password)
        #确认是否登录成功
        check = self.browser.find_element_by_css_selector("#loginOrUserName a.frontUserName")
        text = check.text
        res = {}
        if text == "我的Q房":
            res["code"] = 1
            res["data"] = self.browser.get_cookies()
        else:
            res["code"] = -1
            res["data"] = "login failed"
        return res
 
            
    def new_cookie_lianjia(self, username, password):
        """
            request lianjia,login and get cookie
        """
        self.open_lianjia(username, password)
        check = self.browser.find_element_by_css_selector(".ti-hover .typeShowUser a:link")
        res = {}
        if "1" in check.text:
            res["code"] = 1
            res["data"] = self.browser.get_cookies()
        else:
            res["code"] = -1
            res["data"] = "login failed"
        return res
        
    def save_cookies(self):
        """
            get all cookies and save
        """
        all_users = self.users_db.all_users()
        done_users = self.cookies_db.all_users()
        if len(all_users) == len(done_users):
            print("No users can get cookie")
        for user in all_users:
            if user not in done_users:
                pw = self.users_db.get(user)
                print("get cookie user:%s,website:%s..." % (user, self.website) )
                if self.website == "qfang":
                    result = self.new_cookie_qfang(user, pw)
                elif self.website == "lianjia":
                    result =self.new_cookie_lianjia(user, pw)
                else:
                    print("not support this website")
                if result["code"] == 1:
                    cookie = self.get_cookie_dict(result["data"])
                    self.cookies_db.set(user, json.dumps(cookie))
                    print("save cookie %s succ" % cookie)
                elif result["code"] == -1:
                    print(result["data"])
                    self.users_db.delete(user)
                    print("delete account :%s" % user)
                else:
                    print(result["data"])             
                    
    def get_cookie(self):
        """
            get_cookie from redis
        """
        cookie = cookies_db.get_cookie()
        return cookie

    def close(self):
        self.browser.quit()
        os.system('pkill chromedriver')