def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() page = get_html(BASE_URL) page = unicode(page, 'GBK').encode('UTF-8') page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() if page_count != 0: last_proxy = None for i in xrange(1, page_count): page = get_html(URL_HEADER + str(i) + URL_END, last_proxy) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str( datetime.now()).split('.')[0] proxy.is_in_china = 2 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() last_proxy = None for url in self.urls: page = get_html(url) page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() for i in xrange(1, page_count): page = get_html(url + str(i)) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str( datetime.now()).split('.')[0] proxy.is_in_china = 0 if url.endswith(CHINA_ANONYMOUS) or url.endswith( CHINA_NORMAL): proxy.is_in_china = 1 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] self.client_address = client_addr[0] self.url_after_split = None self.http_status = None
def personal_proxy_example(n_req=10): # Set PROXY_LIST and PORT_LIST in definitions.py pm = ProxyManager(PROXY_LIST, PORT_LIST) for _ in range(n_req): pm.set_proxy() foo()
def process_client_request(self, data): """ Main algorithm. Note that those are high level steps, and most of them may require futher implementation details 1. get url and private mode status from client 2. if private mode, then mask ip address: mask_ip_address method 3. check if the resource (site) is in cache. If so and not private mode, then: 3.1 check if site is blocked for this employee 3.2 check if site require credentials for this employee 3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy with credentials to check 3.1 and 3.2 access 3.3.1 if credentials are valid, send a HEAD request to the original server to check last_date_modified parameter. If the cache header for that site is outdated then move to step 4. Otherwise, send a response to the client with the requested site and the appropiate status code. 4. If site is not in cache, or last_data_modified is outdated, then create a GET request to the original server, and store in cache the reponse from the server. :param data: :return: VOID """ self.data = data url = data["url"] privateMode = data["is_private_mode"] if privateMode == '1': self._mask_ip_adress() manager = ProxyManager() managerResult = manager.get_cached_resource(data) if data["url"] == managerResult["url"]: if data["is_private_mode"] == managerResult["is_private_mode"]: manager.is_site_blocked(data) if manager.is_site_blocked(data) == True: return True
def __init__(self, conn, client_addr, server_ip): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] self.client_ip = "" self.permission = False # whether or not user is authenticated self.role = "EMPLOYEE" # permission mode of user: "******", "user", or "EMPLOYEE" self.KEEP_ALIVE_TIME = 115 # time to keep idle connection alive(seconds) self.server_ip = server_ip
def public_proxy_example(n_req=10): proxy_df = get_proxy_list(n_proxy=5, anonymity='elite', https='true') proxy_list = proxy_df['IP'].values port_list = proxy_df['PORT'].values pm = ProxyManager(proxy_list, port_list) for _ in range(n_req): pm.set_proxy() foo()
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] # get id self.client_ip = client_addr[0] # get ip address self.http_version = "1.1" if self.DEBUG: print( "[proxy_thread.py -> __init__] new instance of ProxyThread() class " )
def __init__(self, validate=True, **kwargs): self.proxy_mgr = ProxyManager() self.validate = validate try: self.address = kwargs['address'] self.port = kwargs['port'] except KeyError: raise Exception("Missing required attribute(s) address and/or port for Proxy object") for req in self.__class__.required_proxy_attrs: if not getattr(self, req): raise Exception("Attribute %s must evaluate to True" % req) for kwarg, default in self.__class__.default_proxy_attrs.items(): if(kwarg in kwargs and kwargs[kwarg]): default = kwargs[kwarg] setattr(self, kwarg, default) if self.validate: self.do_proxy_validation() self.proxy_id = self.id print(self.proxy_id)
# Proxychecker from fastapi import FastAPI import motor.motor_tornado import uvicorn from proxy_manager import ProxyManager import asyncio def get_db_conn(): client = motor.motor_tornado.MotorClient("mongodb://18.185.77.185:27017/") return client["mar_wit"] db_conn = get_db_conn() proxy = ProxyManager( db_conn, "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt" ) app = FastAPI() @app.on_event("startup") async def boot(): await asyncio.sleep(5) asyncio.create_task(proxy.set()) asyncio.create_task(proxy.check()) if __name__ == "__main__": uvicorn.run("proxychecker:app", port=8000, reload=True)
def getStatus(): status = ProxyManager(logger).getNumber() return jsonify(status)
def delete(): proxy = request.args.get('proxy') ProxyManager().delete(proxy) return 'success'
def getAll(): proxies = ProxyManager(logger).getAll() return jsonify(proxies)
from storage_manager import Redis from proxy_manager import ProxyManager from autoproxy_config.config import configuration DESIGNATED_ENDPOINT = configuration.app_config['designated_endpoint']['value'] from IPython import embed import time redis = Redis(**configuration.redis_config) pm = ProxyManager() embed() pm = ProxyManager() for i in range(500): proxy = pm.get_proxy(DESIGNATED_ENDPOINT) proxy.callback(success=False) proxy = pm.get_proxy('https://google.com') proxy.callback(success=True) pm.storage_mgr.sync_to_db()
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.get_settings() self.init_thread(conn) self.client_id = client_addr[1]
finally: s.close() if (len(times) + 1 % 25 == 0): print('made 25 requests, host still up') if __name__ == "__main__": target = sys.argv[1] if len(sys.argv) - 1 > 0 else 'https://www.google.com' port = sys.arg[2] if len(sys.argv) - 1 > 1 else 443 proxy_queue = Queue() abort_q = Queue() pman = ProxyManager() print('filling proxy stack') pman.fill_proxy_stack() while not pman.proxy_stack: pass num_processes = cpu_count() processes = [] print('spinning processes') for i in range(num_processes): processes.append( Process(target=spin_threads, args=(target, port, proxy_queue, abort_q))) processes[i].start() try:
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1]
from db_classes.saver import Saver from proxy_manager import ProxyManager import config as cfg import my_log main_logger = my_log.get_logger(__name__) main_logger.info('program started') saver = Saver() proxy_manager = ProxyManager()
def get(): proxy = ProxyManager(logger).get() return proxy if proxy else 'no proxy!'