Пример #1
0
def personal_proxy_example(n_req=10):
    # Set PROXY_LIST and PORT_LIST in definitions.py
    pm = ProxyManager(PROXY_LIST, PORT_LIST)

    for _ in range(n_req):
        pm.set_proxy()
        foo()
Пример #2
0
	def run(self):
		spider_start_time = str(datetime.now()).split('.')[0]
		print spider_start_time, 'time to spider start!'
		proxy_manager = ProxyManager()
		page = get_html(BASE_URL)
		page = unicode(page,'GBK').encode('UTF-8')
		page_count = self.get_page_count(page)
		page_count_time = str(datetime.now()).split('.')[0]
		print page_count_time, 'get page count:', page_count
		default_ip = get_default_ip()
		if page_count != 0:
			last_proxy = None
			for i in xrange(1, page_count):
				page = get_html(URL_HEADER + str(i) + URL_END, last_proxy)
				proxy_list = filte(page)
				for proxy in proxy_list:
					if proxy.anonymous_type == '高匿':
						check_result = check_anonymous(proxy, default_ip)
						spider_time = str(datetime.now()).split('.')[0]
						if check_result[0]:
							proxy.delay_time = check_result[1]
							proxy.created_time = str(datetime.now()).split('.')[0]
							proxy.is_in_china = 2
							proxy_manager.add_proxy(proxy, spider_time)
							last_proxy = proxy
						else:
							pass
Пример #3
0
	def run(self):
		spider_start_time = str(datetime.now()).split('.')[0]
		print spider_start_time, 'time to spider start!'
		proxy_manager = ProxyManager()
		last_proxy = None
		for url in self.urls:
			page = get_html(url)
			page_count = self.get_page_count(page)
			page_count_time = str(datetime.now()).split('.')[0]
			print page_count_time, 'get page count:', page_count
			default_ip = get_default_ip()
			for i in xrange(1, page_count):
				page = get_html(url + str(i))
				proxy_list = filte(page)
				for proxy in proxy_list:
					if proxy.anonymous_type == '高匿':
						check_result = check_anonymous(proxy, default_ip)
						spider_time = str(datetime.now()).split('.')[0]
						if check_result[0]:
							proxy.delay_time = check_result[1]
							proxy.created_time = str(datetime.now()).split('.')[0]
							proxy.is_in_china = 0
							if url.endswith(CHINA_ANONYMOUS) or url.endswith(CHINA_NORMAL):
								proxy.is_in_china = 1
							proxy_manager.add_proxy(proxy, spider_time)
							last_proxy = proxy
						else:
							pass
Пример #4
0
 def run(self):
     spider_start_time = str(datetime.now()).split('.')[0]
     print spider_start_time, 'time to spider start!'
     proxy_manager = ProxyManager()
     page = get_html(BASE_URL)
     page = unicode(page, 'GBK').encode('UTF-8')
     page_count = self.get_page_count(page)
     page_count_time = str(datetime.now()).split('.')[0]
     print page_count_time, 'get page count:', page_count
     default_ip = get_default_ip()
     if page_count != 0:
         last_proxy = None
         for i in xrange(1, page_count):
             page = get_html(URL_HEADER + str(i) + URL_END, last_proxy)
             proxy_list = filte(page)
             for proxy in proxy_list:
                 if proxy.anonymous_type == '高匿':
                     check_result = check_anonymous(proxy, default_ip)
                     spider_time = str(datetime.now()).split('.')[0]
                     if check_result[0]:
                         proxy.delay_time = check_result[1]
                         proxy.created_time = str(
                             datetime.now()).split('.')[0]
                         proxy.is_in_china = 2
                         proxy_manager.add_proxy(proxy, spider_time)
                         last_proxy = proxy
                     else:
                         pass
Пример #5
0
 def run(self):
     spider_start_time = str(datetime.now()).split('.')[0]
     print spider_start_time, 'time to spider start!'
     proxy_manager = ProxyManager()
     last_proxy = None
     for url in self.urls:
         page = get_html(url)
         page_count = self.get_page_count(page)
         page_count_time = str(datetime.now()).split('.')[0]
         print page_count_time, 'get page count:', page_count
         default_ip = get_default_ip()
         for i in xrange(1, page_count):
             page = get_html(url + str(i))
             proxy_list = filte(page)
             for proxy in proxy_list:
                 if proxy.anonymous_type == '高匿':
                     check_result = check_anonymous(proxy, default_ip)
                     spider_time = str(datetime.now()).split('.')[0]
                     if check_result[0]:
                         proxy.delay_time = check_result[1]
                         proxy.created_time = str(
                             datetime.now()).split('.')[0]
                         proxy.is_in_china = 0
                         if url.endswith(CHINA_ANONYMOUS) or url.endswith(
                                 CHINA_NORMAL):
                             proxy.is_in_china = 1
                         proxy_manager.add_proxy(proxy, spider_time)
                         last_proxy = proxy
                     else:
                         pass
Пример #6
0
 def __init__(self, conn, client_addr, server_ip):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
     self.client_ip = ""
     self.permission = False  # whether or not user is authenticated
     self.role = "EMPLOYEE"  # permission mode of user: "******", "user", or "EMPLOYEE"
     self.KEEP_ALIVE_TIME = 115  # time to keep idle connection alive(seconds)
     self.server_ip = server_ip
Пример #7
0
def public_proxy_example(n_req=10):
    proxy_df = get_proxy_list(n_proxy=5, anonymity='elite', https='true')
    proxy_list = proxy_df['IP'].values
    port_list = proxy_df['PORT'].values

    pm = ProxyManager(proxy_list, port_list)

    for _ in range(n_req):
        pm.set_proxy()
        foo()
Пример #8
0
    def __init__(self, conn, client_addr):
        self.proxy_manager = ProxyManager()
        self.client = conn
        self.client_id = client_addr[1]  # get id
        self.client_ip = client_addr[0]  # get ip address
        self.http_version = "1.1"

        if self.DEBUG:
            print(
                "[proxy_thread.py -> __init__] new instance of ProxyThread() class "
            )
Пример #9
0
def user_search_start_running():
    global bfs_queue
    bfs_queue = Queue(maxsize=0)
    bfs_queue.put(9149967)

    #start proxy manager
    global proxy_manager
    proxy_manager = ProxyManager()
    proxy_manager.retrieve_new_proxies()
    #start daemon refreshing available proxies
    t = threading.Thread(target=proxy_manager.refresh_proxies)
    t.start()
    tp = threading.Thread(target=periodic_info_backup)
    tp.start()
    retrieve_user_from_queue()
Пример #10
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
     self.client_address = client_addr[0]
     self.url_after_split = None
     self.http_status = None
Пример #11
0
def user_search_start_running():
    global bfs_queue
    bfs_queue = Queue(maxsize=0)
    bfs_queue.put(6546594)

    #start proxy manager
    global proxy_manager
    proxy_manager = ProxyManager()
    proxy_manager.retrieve_new_proxies()
    #restore bitarray from file
    # f = open('bitcopy.txt','rb')
    # user_manager.user_bit_array =  pickle.load(f)
    # f.close()

    tp = threading.Thread(target=periodic_info_backup)
    tp.start()
    retrieve_user_from_queue()
Пример #12
0
 def process_client_request(self, data):
     """
            Main algorithm. Note that those are high level steps, and most of them may
            require futher implementation details
            1. get url and private mode status from client 
            2. if private mode, then mask ip address: mask_ip_address method
            3. check if the resource (site) is in cache. If so and not private mode, then:
                3.1 check if site is blocked for this employee 
                3.2 check if site require credentials for this employee
                3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy
                    with credentials to check 3.1 and 3.2 access 
                    3.3.1 if credentials are valid, send a HEAD request to the original server
                          to check last_date_modified parameter. If the cache header for that 
                          site is outdated then move to step 4. Otherwise, send a response to the 
                          client with the requested site and the appropiate status code.
             4. If site is not in cache, or last_data_modified is outdated, then create a GET request 
                to the original server, and store in cache the reponse from the server. 
            :param data: 
            :return: VOID
     """
     self.data = data
     url = data["url"]
     privateMode = data["is_private_mode"]
     if privateMode == '1':
         self._mask_ip_adress()
     manager = ProxyManager()
     managerResult = manager.get_cached_resource(data)
     if data["url"] == managerResult["url"]:
         if data["is_private_mode"] == managerResult["is_private_mode"]:
             manager.is_site_blocked(data)
             if  manager.is_site_blocked(data) == True:
                 return True
Пример #13
0
    def __init__(self, validate=True, **kwargs):
        self.proxy_mgr = ProxyManager()
        self.validate = validate
        try:
            self.address = kwargs['address']
            self.port = kwargs['port']
        except KeyError:
            raise Exception("Missing required attribute(s) address and/or port for Proxy object")

        for req in self.__class__.required_proxy_attrs:
            if not getattr(self, req):
                raise Exception("Attribute %s must evaluate to True" % req)

        for kwarg, default in self.__class__.default_proxy_attrs.items():
            if(kwarg in kwargs and kwargs[kwarg]):
                default = kwargs[kwarg]
            setattr(self, kwarg, default)

        if self.validate:
            self.do_proxy_validation()

        self.proxy_id = self.id
        print(self.proxy_id)
Пример #14
0
# Proxychecker

from fastapi import FastAPI
import motor.motor_tornado
import uvicorn
from proxy_manager import ProxyManager
import asyncio


def get_db_conn():
    client = motor.motor_tornado.MotorClient("mongodb://18.185.77.185:27017/")
    return client["mar_wit"]


db_conn = get_db_conn()
proxy = ProxyManager(
    db_conn,
    "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt"
)
app = FastAPI()


@app.on_event("startup")
async def boot():
    await asyncio.sleep(5)
    asyncio.create_task(proxy.set())
    asyncio.create_task(proxy.check())


if __name__ == "__main__":
    uvicorn.run("proxychecker:app", port=8000, reload=True)
Пример #15
0
class Proxy(object):
    default_proxy_attrs = {'location':'Unknown', 'protocol':'http', 'id':None}
    required_proxy_attrs = ('address', 'port')

    def __init__(self, validate=True, **kwargs):
        self.proxy_mgr = ProxyManager()
        self.validate = validate
        try:
            self.address = kwargs['address']
            self.port = kwargs['port']
        except KeyError:
            raise Exception("Missing required attribute(s) address and/or port for Proxy object")

        for req in self.__class__.required_proxy_attrs:
            if not getattr(self, req):
                raise Exception("Attribute %s must evaluate to True" % req)

        for kwarg, default in self.__class__.default_proxy_attrs.items():
            if(kwarg in kwargs and kwargs[kwarg]):
                default = kwargs[kwarg]
            setattr(self, kwarg, default)

        if self.validate:
            self.do_proxy_validation()

        self.proxy_id = self.id
        print(self.proxy_id)

    @property
    def address(self):
        return self._address

    @address.setter
    def address(self, addr):
        adr_re = r'^[\d\.]+$'
        if not re.match(adr_re, addr):
            raise Exception("Invalid address only IPV4 supported.")
        else:
            self._address=addr

    @property
    def port(self):
        return self._port

    @port.setter
    def port(self, port):
        if is_int(port):
            self._port = port
        else:
            raise Exception("value %s is not an int" % port)

    @property
    def protocol(self):
        return self._protocol

    @protocol.setter
    def protocol(self, protocol):
        if protocol not in ('http','https','ssl','socksV4','socksV5','socks_v4','socks_v5','ftp'):
            raise Exception("Protocol must be http, https, ssl, socksv4, socksv5, or ftp")
        else:
            self._protocol = protocol

    @property
    def id(self):
        return self._id

    @id.setter
    def id(self, id):
        if is_int(id) or id is None:
            self._id = id
            self._proxy_id = id
        else:
            raise Exception("Invalid ID, must be an integer")

    @property
    def proxy_id(self):
        return self._proxy_id

    @proxy_id.setter
    def proxy_id(self, pid):
        if pid is None:
            self._proxy_id = None
            return
        if is_int(pid):
            self._id=pid
            self._proxy_id = pid
            return


    @property
    def location(self):
        return self._location

    @location.setter
    def location(self, loc):
        self._location = loc

    @property
    def last_active(self):
        return self._last_active

    @last_active.setter
    def last_active(self, la):
        if isinstance(la, datetime.datetime):
            self._last_active = la
        else:
            raise Exception("last_active time must be instance of datetime.datetime")


    def to_string(self):
        prefix = "http"
        if(re.compile("socks").search(self.protocol)):
            prefix = "socks"
        return "%s://%s:%s" % (prefix, self.address, self.port)

    @classmethod
    def from_string(self, str, **kwargs):
        regex = re.compile('^(http|socks)?(?:\:\/\/)?(.*):(.*)$')
        res = regex.findall(str)
        if(not res):
            raise Exception("Could not parse proxy string '%s'" %str)
        else:
            res = list(res[0])
            if not 'protocol' in kwargs:
                kwargs['protocol'] = res[0]
            return Proxy(address=res[1], port=res[2], **kwargs)

    def clone(self):
        return copy.copy(self)

    def do_proxy_validation(self):
        self.proxy_mgr.get_or_create_proxy_id(self)
Пример #16
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
Пример #17
0
class ProxyThread:
    """
    The proxy thread class represents a threaded proxy instance to handle a specific request from a client socket
    """
    BUFFER_SIZE = 4096
    KEEP_ALIVE_REQUESTS = 5  # max number of requests made over connection

    def __init__(self, conn, client_addr, server_ip):
        self.proxy_manager = ProxyManager()
        self.client = conn
        self.client_id = client_addr[1]
        self.client_ip = ""
        self.permission = False  # whether or not user is authenticated
        self.role = "EMPLOYEE"  # permission mode of user: "******", "user", or "EMPLOYEE"
        self.KEEP_ALIVE_TIME = 115  # time to keep idle connection alive(seconds)
        self.server_ip = server_ip

    def get_settings(self):
        return self.proxy_manager

    def is_non_persistent(self, http_request_string):
        """
        Determines whether or not given http_request_string has
        persistent features (Connection: Open, etc., 1.0, etc..)
        """
        if parse_for_field(http_request_string, "http_version") != "1.1":
            return True
        if parse_for_field(http_request_string, "Connection") == "Keep-Alive":
            # set keep alive time.
            try:
                self.KEEP_ALIVE_TIME = min(
                    int(parse_for_field(http_request_string, "Keep-Alive")),
                    self.KEEP_ALIVE_TIME)
            except ParsingError:
                # Keep-Alive field does not exist. Use default timeout
                pass
            return False
        return True

    def init_thread(self):
        """
        this is where you put your thread ready to receive data from the client like in assign #1
        calling the method self.client.rcv(..) in the appropiate loop
        and then proccess the request done by the client
        :return: VOID
        """
        try:
            # grab first request
            client_req = self._receive()
            non_persistent = self.is_non_persistent(client_req)
            num_of_requests = 0

            while True:
                # start timer
                tick = time.time()
                self.process_client_request(client_req)
                # increment number of request/responses
                num_of_requests += 1
                # if non_persistent or idle time is up or number of requests exceeded.
                if non_persistent or time.time(
                ) - tick > self.KEEP_ALIVE_TIME or num_of_requests >= self.KEEP_ALIVE_REQUESTS:
                    break
                client_req = self._receive()
        except socket.timeout:
            print("Client {} has timed out.".format(self.client_id))
        except socket.error as sock_error:
            print(f"An HTTPError occurred: {sock_error}")
        except ClientDisconnect:
            print("Client has disconnected")
        except Exception as e:
            print(f"Something went wrong: {e}")
        self.client.close()

    def _mask_ip_address(self):
        """
        When private mode, mask ip address to browse in private
        This is easy if you think in terms of client-server sockets
        :return: VOID
        """
        self.client_ip = self.server_ip

    def respond_ok(self, params):
        """
        Sends 200 response to client
        """
        req_map = params['request_map']
        response = params['response']
        http_version = req_map["http_version"]
        content_length = len(response.text)

        response_header = "HTTP/{} 200 OK\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n".format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            content_length, response.text)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        if "Date" in response.headers:
            response_header += "Date: {}\r\n".format(response.headers["Date"])

        self._send(response_header + content_end)

    def respond_not_modified(self, params):
        """
        Sends 304 response to client
        """
        req_map = params['request_map']
        response = params['response']
        http_version = req_map["http_version"]
        content_length = len(response.text)

        response_header = "HTTP/{} 304 Not Modified\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n".format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            content_length, response.text)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        if "Date" in response.headers:
            response_header += "Date: {}\r\n".format(response.headers["Date"])

        self._send(response_header + content_end)

    def respond_unauthorized(self, params):
        """
        Sends 401 response to client
        """
        req_map = params['request_map']
        http_version = req_map["http_version"]
        content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>401 Unauthorized:</h1> resource is blocked or not authorized for the current user.</body></html>"

        response_header = 'HTTP/{} 401 Unauthorized\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\nProxy-Authenticate: Basic realm="proxyserver"\r\n'.format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            len(content), content)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        self._send(response_header + content_end)

    def respond_bad_request(self, params):
        """
        Sends 400 response to client Bad Request 
        """
        req_map = params['request_map']
        http_version = '1.1'  # req_map["http_version"]
        content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>400 Bad Request:</h1> the request is not understood by the original server or the proxy server</body></html>"

        response_header = 'HTTP/{} 400 Bad Request\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            len(content), content)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        self._send(response_header + content_end)

    def respond_not_found(self, params):
        """
        Sends 404 response to client Not Found
        """
        req_map = params['request_map']
        http_version = req_map["http_version"]
        content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>404 Not Found:</h1> the original server has not found anything matching the Request URI provided by the proxy server.</body></html>"

        response_header = 'HTTP/{} 404 Not Found\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            len(content), content)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        self._send(response_header + content_end)

    def respond_need_auth(self, params):
        """
        Sends 407 response to client
        """
        req_map = params['request_map']
        http_version = req_map["http_version"]
        content = '<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>407 Proxy Authentication Required:</h1> the proxy needs authorization based on client credentials in order to continue with the request.</body></head>'

        response_header = 'HTTP/{} 407 Proxy Authentication Required\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\nProxy-Authenticate: Basic realm="proxyserver"\r\n'.format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            len(content), content)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        self._send(response_header + content_end)

    def respond_forbidden(self, params):
        """
        Sends 403 response to client
        """
        req_map = params['request_map']
        http_version = req_map["http_version"]
        content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>403 Forbidden:</h1> the server or the proxy server understood the request but the current user is forbidden to see the content of the resource requested. Authorization won’t work either in this case.</body></html>"

        response_header = 'HTTP/{} 403 Forbidden\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format(
            http_version)
        content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format(
            len(content), content)

        if http_version == '1.1':
            connection = req_map['Connection']
            response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format(
                connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS)

        self._send(response_header + content_end)

    def handle_client_response(self, code, params):
        """
        Handles which response to send to users given code.
        """
        print("Sending: {} response to {}".format(code, self.client_id))
        if code == 200:
            self.respond_ok(params)
        elif code == 304:
            self.respond_not_modified(params)
        elif code == 401:
            self.respond_unauthorized(params)
        elif code == 400:
            self.respond_bad_request(params)
        elif code == 404:
            self.respond_not_found(params)
        elif code == 407:
            self.respond_need_auth(params)
        elif code == 403:
            self.respond_forbidden(params)
        else:
            raise Exception("Code not understood")

    def check_permissions(self):
        """
        Because after login, user role is known, check proxy manager to see if userrole
        is allowed access to resource.
        :return: Bool if user is able to access resource.
        """
        return self.role == "super"

    def login(self, username, password, mask=False):
        """
        Gets userrole from proxy manager, returns True if user is authorized
        """
        # if mask:
        #     self._mask_ip_address()
        # check proxymanager if user/pw is in proxymanager.
        if self.proxy_manager.is_admin(
                username, password) or self.proxy_manager.is_manager(
                    username, password):
            self.role = "super"
            return True
        if self.proxy_manager.is_private_mode_user(username, password):
            self.role = "user"
            return True
        return False

    def handle_auth(self, request_map, response_params, blocked_sites=False):
        """
        Note that execution flows back to calling function if the user is authenticating/authenticated
        :param request_map: used to check request
        :param response_params: used in response
        :return: Bool if to continue execution
        """
        if not self.permission or request_map["method"] == "POST":
            # ask for permissions
            if request_map["method"] != "POST":
                # if not an attempt to login and not logged in, ask to login
                self.handle_client_response(407, response_params)
                return False

            body = request_map["body"]
            post_params = params_to_map(body)
            if "user_name" not in post_params or "password" not in post_params:
                # 403 error: Something is wrong with input params...
                self.handle_client_response(403, response_params)
                return False

            username = post_params["user_name"]
            password = post_params["password"]
            self.permission = self.login(username, password, mask=True)

            if not self.permission:
                # insufficient permissions
                self.handle_client_response(401, response_params)
                return False

        if blocked_sites and not self.check_permissions():
            # unauthorized
            self.handle_client_response(401, response_params)
            return False
        return True

    def handle_cache(self, url, request_map, response_params):
        """
        Handles execution of checking cache if-modified-since date,
        and if not returns execution
        :param url: url to check
        :return: Bool if to continue execution
        """
        # if item is cached...
        if self.proxy_manager.is_cached(url):
            old_response = self.proxy_manager.get_cached_resource(url)
            new_head = self.head_request_to_server(url, request_map)
            # compare
            try:
                if old_response.headers[
                        "If-Modified-Since"] == new_head.headers[
                            "If-Modified-Since"]:
                    response_params['response'] = old_response

                    self.handle_client_response(304, response_params)
                    return False
            except KeyError:
                pass

        return True

    def process_client_request(self, http_request_string):
        """
        Main algorithm. Note that those are high level steps, and most of them may
        require futher implementation details
        1. get url and private mode status from client 
        2. if private mode, then mask ip address: mask_ip_address method
        3. check if the resource (site) is in cache. If so and not private mode, then:
            3.1 check if site is blocked for this employee 
            3.2 check if site require credentials for this employee
            3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy
                with credentials to check 3.1 and 3.2 access 
                3.3.1 if credentials are valid, send a HEAD request to the original server
                        to check last_date_modified parameter. If the cache header for that 
                        site is outdated then move to step 4. Otherwise, send a response to the 
                        client with the requested site and the appropiate status code.
        4. If site is not in cache, or last_data_modified is outdated, then create a GET request 
            to the original server, and store in cache the reponse from the server. 
        :param http_request_string: 
        :return: VOID
        """
        try:
            # get a mapping of HTTP request string to HTTP request fields
            request_map = parse_for_field(http_request_string)

            url = request_map["url"]
            url, query_params = extract_query_params(url, "is_private_mode")

            query_params = params_to_map(query_params)
            # holds data needed to parse in respond
            response_params = {'request_map': request_map}
            # get client's ip
            self.client_ip = request_map["Host"]

            if int(query_params["is_private_mode"]) == 1:
                # make sure authed
                if not self.handle_auth(request_map, response_params):
                    return
                # user has sufficient permissions at this point.
                response = self.get_request_to_server(url, request_map)
            else:
                # in handle_cache checks if proxy manager has url. if it does, it does a head request,
                # checks if_modified_since date, if it is ok, it sends 300 instead. else execution flows
                # back here.
                if not self.handle_cache(url, request_map, response_params):
                    return

                response = self.get_request_to_server(url, request_map)

            if self.proxy_manager.is_site_blocked(url):
                if not self.handle_auth(
                        request_map, response_params, blocked_sites=True):
                    return

            response_params['response'] = response
            if 200 <= response.status_code < 300 or response.status_code == 304:
                if int(query_params["is_private_mode"]) != 1:
                    self.proxy_manager.update_cache(url, response)
                    self.proxy_manager.add_history(url)

                self.handle_client_response(200, response_params)
            else:
                self.handle_client_response(404, response_params)
        except (ParsingError, KeyError):
            # this is encountered when during parsing of the headers, there is an error.
            # In this case, a 400 Bad Request is sent back to client.
            self.handle_client_response(400, {})

    def _send(self, data):
        """
        Serialialize data 
        send with the send() method of self.client
        :param data: the response data
        :return: VOID
        """
        data_serialized = pickle.dumps(data)
        self.client.send(data_serialized)

    def _receive(self):
        """
        deserialize the data 
        :return: the deserialized data
        """
        self.client.settimeout(self.KEEP_ALIVE_TIME)
        client_request = self.client.recv(self.BUFFER_SIZE)
        if not client_request:
            raise ClientDisconnect()
        # Deserializes the data.
        client_data = pickle.loads(client_request)
        return client_data

    def head_request_to_server(self, url, param):
        """
        HEAD request does not return the HTML of the site
        :param url:
        :param param: additions to session header
        :return: the headers of the response from the original server
        """
        headers = {}
        # add custom headers
        if "Connection" in param:
            headers["Connection"] = param["Connection"]
        if "Keep-Alive" in param:
            headers["Keep-Alive"] = param["Keep-Alive"]
        response = requests.head(url, headers=headers)
        return response

    def get_request_to_server(self, url, param):
        """
        GET request
        :param url: 
        :param param: additions to session header
        :return: the complete response including the body of the response
        """
        headers = {}
        # add custom headers
        if "Connection" in param:
            headers["Connection"] = param["Connection"]
        if "Keep-Alive" in param:
            headers["Keep-Alive"] = param["Keep-Alive"]
        response = requests.get(url, headers=headers)
        return response
 def __init__(self):
     log = LogHandler('refresh_schedule')
     ProxyManager.__init__(self, log)
Пример #19
0
from db_classes.saver import Saver
from proxy_manager import ProxyManager
import config as cfg
import my_log

main_logger = my_log.get_logger(__name__)
main_logger.info('program started')
saver = Saver()
proxy_manager = ProxyManager()
Пример #20
0
def getAll():
    proxies = ProxyManager(logger).getAll()
    return jsonify(proxies)
Пример #21
0
def get():
    proxy = ProxyManager(logger).get()
    return proxy if proxy else 'no proxy!'
 def __init__(self):
     log = LogHandler('valid_schedule')
     ProxyManager.__init__(self, log)
     Thread.__init__(self)
Пример #23
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.get_settings()
     self.init_thread(conn)
     self.client_id = client_addr[1]
Пример #24
0
from proxy_manager import ProxyManager
import proxy_manager
import threading
import time

pm = ProxyManager()
pm.retrieve_new_proxies()
while True:
	if len(pm.proxy_list)==0:
		print "wait a minute"
		time.sleep(3)
	else:
		break
print pm.proxy_list
Пример #25
0
class ProxyThread(object):
    MAX_DATA_RECV = 1000000000
    """
    The proxy thread class represents a threaded proxy instance to handle a specific request from a client socket
    """
    DEBUG = False

    def __init__(self, conn, client_addr):
        self.proxy_manager = ProxyManager()
        self.client = conn
        self.client_id = client_addr[1]  # get id
        self.client_ip = client_addr[0]  # get ip address
        self.http_version = "1.1"

        if self.DEBUG:
            print(
                "[proxy_thread.py -> __init__] new instance of ProxyThread() class "
            )

    def get_settings(self):
        if self.DEBUG:
            print("[proxy_thread.py -> get_settings] called ")
        return self.proxy_manager

    def init_thread(self):
        try:
            data = self._receive()
            self.process_client_request(data)
            if self.DEBUG:
                print(
                    "[proxy_thread.py -> init_thread] data received. data: \n"
                    + str(data))
        except socket.error as err:
            print("[proxy_thread -> init_thread] error! " + str(err))

    def client_id(self):
        if self.DEBUG:
            print("[proxy_thread.py -> client_id] called. returned: " +
                  str(self.client_id))
        return self.client_id

    def _mask_ip_adress(self):
        # the proxy server is already masking the user IP
        if self.DEBUG:
            print(
                "[proxy_thread.py -> _mask_ip_address] set self.client_ip to: "
                + str(self.client_ip))

    def process_client_request(self, data):
        req = HttpHelper().convert_http_request_to_dict(data)
        self.http_version = req['http']
        url = req['url']
        is_private_mode = req['header']['is_private_mode']

        username = req['header']['username']  # fetch username
        password = req['header']['password']  # fetch password

        # if private mode then mask ip
        if is_private_mode == 1:
            self._mask_ip_adress()

        if self.proxy_manager.is_site_blocked(url):
            if self.proxy_manager.is_admin(username, password):
                self.check_cache_and_send_to_client(url)
            else:
                self.send_response_to_client(
                    str(407), "", """<!DOCTYPE html>
                <html>
                    <head>
                        <title>407 Proxy Authentication Required</title>
                    </head>
                    <body>
                        <h1>407 Proxy Authentication Required</h1><p> the proxy needs authorization based on client credentials in order to continue with the request. (try logging in as admin)</p>
                    </body>
                </html>""")

        elif self.proxy_manager.is_site_blocked_except_managers(url):
            if self.proxy_manager.is_manager(
                    username, password) or self.proxy_manager.is_admin(
                        username, password):
                self.check_cache_and_send_to_client(url)
            else:
                self.send_response_to_client(
                    str(401), "", """<!DOCTYPE html>
                    <html>
                        <head>
                            <title>401</title>
                        </head>
                        <body>
                            <h1>401 Unauthoritzed</h1><p> resource is blocked or not authorized for the current user</p>
                        </body>
                    </html>""")
        else:
            self.check_cache_and_send_to_client(url)

    def check_cache_and_send_to_client(self, url):
        if self.proxy_manager.is_cached(
                url) and not self.is_outdated_cache(url):  # <-- crashing here
            cached_site = self.proxy_manager.get_cached_resource(
                url)  # contains url, last_modified, and html
            self.send_response_to_client(str(200),
                                         cached_site['last_modified'],
                                         cached_site['html'])  # response

        else:  # website not cached or might be outdated resource
            res = self.response_from_server({
                'mode': 'GET',
                'url': url,
                'param': []
            })
            if self.DEBUG:
                print("url: " + url + " status code: " + str(res.status_code))
                print("headers: " + str(res.headers))

            # add to cache
            try:
                self.proxy_manager.add_cached_resource(
                    url, res.headers['last-modified'], str(res.content))
            except KeyError as e:
                print("proxy_thread, last-modified header did not exist: " +
                      str(e))
                self.proxy_manager.add_cached_resource(url,
                                                       res.headers['date'],
                                                       str(res.content))

            # send response
            last_modified = ""
            try:
                last_modified = res.headers['last-modified']
            except KeyError:
                last_modified = res.headers['date']
            self.send_response_to_client(res.status_code, last_modified,
                                         str(res.content))

    def is_outdated_cache(self, url):
        cache = self.proxy_manager.get_cached_resource(url)
        headers = self.response_from_server({
            'mode': 'HEAD',
            'url': str(url),
            'param': []
        })
        try:
            return headers['last-modified'] == cache['last_modified']
        except KeyError as err:
            print("outdated cache: " + str(err))
        return False  #returning false to fake success in cache

    def _send(self, data):
        try:
            serialized = pickle.dumps(data)
            self.client.send(serialized)
            if self.DEBUG:
                print("[proxy_thread.py -> _send] sent data to client: " +
                      str(data))
        except socket.error as err:
            print("proxy_thread send failed with error %s" % err)
        return

    def _receive(self):
        while True:
            try:
                serialized = self.client.recv(self.MAX_DATA_RECV)
                data = pickle.loads(serialized)
                if self.DEBUG:
                    print(
                        "[proxy_thread.py -> _receive] received data from Client: \n"
                        + str(data))
                return data
            except socket.error as err:
                print("proxy_thread receive failed with error %s " % err)
            except EOFError:
                # print("[proxy_thread -> _receive] EOFError! (unable to pickle.loads successfully)")
                print("proxy thread EOFError exiting...")
                return "     "
                pass

    def head_request_to_server(self, url):
        session = requests.session()
        session.headers['Connection'] = 'close'
        session.headers['Keep-Alive'] = '0'

        try:
            response = session.head(url)
            if self.DEBUG:
                print("head_request_to_server: " + response)
            return response.headers  # .headers is a dictionary
        except requests.exceptions.MissingSchema:
            # retry logic
            print("request failed. retrying with http:// added to url")
            response = session.head('http://' + url)
            if self.DEBUG:
                print("head_request_to_server: " + response)
            return response.headers  # .headers is a dictionary

    def get_request_to_server(self, url):
        session = requests.session()
        session.headers['Connection'] = 'close'
        session.headers['Keep-Alive'] = '0'

        try:
            response = session.get(url)
            if self.DEBUG:
                print("get_request_to_server: " + str(response))
            return response  # .headers, .content, .json, .status_code
        except requests.exceptions.MissingSchema:
            # retry logic
            print("request failed. retrying with http:// added to url: " +
                  "http://" + url)
            response = session.get('http://' + url)
            if self.DEBUG:
                print("get_request_to_server: " + str(response))
            return response

    def response_from_server(self, request):
        mode = request['mode']
        url = request['url']
        if mode == "GET":
            return self.get_request_to_server(url)
        return self.head_request_to_server(url)

    def send_response_to_client(self, status_code, last_modified, html):
        response_string = HttpHelper().build_http_response(
            self.http_version, str(status_code), last_modified, str(html))
        self._send(response_string)
Пример #26
0
from storage_manager import Redis
from proxy_manager import ProxyManager
from autoproxy_config.config import configuration

DESIGNATED_ENDPOINT = configuration.app_config['designated_endpoint']['value']
from IPython import embed
import time

redis = Redis(**configuration.redis_config)
pm = ProxyManager()

embed()

pm = ProxyManager()
for i in range(500):
    proxy = pm.get_proxy(DESIGNATED_ENDPOINT)
    proxy.callback(success=False)
    proxy = pm.get_proxy('https://google.com')
    proxy.callback(success=True)

pm.storage_mgr.sync_to_db()
Пример #27
0
def getStatus():
    status = ProxyManager(logger).getNumber()
    return jsonify(status)
Пример #28
0
def delete():
    proxy = request.args.get('proxy')
    ProxyManager().delete(proxy)
    return 'success'
Пример #29
0
        finally:
            s.close()

        if (len(times) + 1 % 25 == 0):
            print('made 25 requests, host still up')


if __name__ == "__main__":
    target = sys.argv[1] if len(sys.argv) - 1 > 0 else 'https://www.google.com'
    port = sys.arg[2] if len(sys.argv) - 1 > 1 else 443

    proxy_queue = Queue()
    abort_q = Queue()

    pman = ProxyManager()
    print('filling proxy stack')
    pman.fill_proxy_stack()
    while not pman.proxy_stack:
        pass

    num_processes = cpu_count()
    processes = []
    print('spinning processes')
    for i in range(num_processes):
        processes.append(
            Process(target=spin_threads,
                    args=(target, port, proxy_queue, abort_q)))
        processes[i].start()

    try: