Пример #1
0
 def run(self):
     spider_start_time = str(datetime.now()).split('.')[0]
     print spider_start_time, 'time to spider start!'
     proxy_manager = ProxyManager()
     page = get_html(BASE_URL)
     page = unicode(page, 'GBK').encode('UTF-8')
     page_count = self.get_page_count(page)
     page_count_time = str(datetime.now()).split('.')[0]
     print page_count_time, 'get page count:', page_count
     default_ip = get_default_ip()
     if page_count != 0:
         last_proxy = None
         for i in xrange(1, page_count):
             page = get_html(URL_HEADER + str(i) + URL_END, last_proxy)
             proxy_list = filte(page)
             for proxy in proxy_list:
                 if proxy.anonymous_type == '高匿':
                     check_result = check_anonymous(proxy, default_ip)
                     spider_time = str(datetime.now()).split('.')[0]
                     if check_result[0]:
                         proxy.delay_time = check_result[1]
                         proxy.created_time = str(
                             datetime.now()).split('.')[0]
                         proxy.is_in_china = 2
                         proxy_manager.add_proxy(proxy, spider_time)
                         last_proxy = proxy
                     else:
                         pass
Пример #2
0
 def run(self):
     spider_start_time = str(datetime.now()).split('.')[0]
     print spider_start_time, 'time to spider start!'
     proxy_manager = ProxyManager()
     last_proxy = None
     for url in self.urls:
         page = get_html(url)
         page_count = self.get_page_count(page)
         page_count_time = str(datetime.now()).split('.')[0]
         print page_count_time, 'get page count:', page_count
         default_ip = get_default_ip()
         for i in xrange(1, page_count):
             page = get_html(url + str(i))
             proxy_list = filte(page)
             for proxy in proxy_list:
                 if proxy.anonymous_type == '高匿':
                     check_result = check_anonymous(proxy, default_ip)
                     spider_time = str(datetime.now()).split('.')[0]
                     if check_result[0]:
                         proxy.delay_time = check_result[1]
                         proxy.created_time = str(
                             datetime.now()).split('.')[0]
                         proxy.is_in_china = 0
                         if url.endswith(CHINA_ANONYMOUS) or url.endswith(
                                 CHINA_NORMAL):
                             proxy.is_in_china = 1
                         proxy_manager.add_proxy(proxy, spider_time)
                         last_proxy = proxy
                     else:
                         pass
Пример #3
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
     self.client_address = client_addr[0]
     self.url_after_split = None
     self.http_status = None
Пример #4
0
def personal_proxy_example(n_req=10):
    # Set PROXY_LIST and PORT_LIST in definitions.py
    pm = ProxyManager(PROXY_LIST, PORT_LIST)

    for _ in range(n_req):
        pm.set_proxy()
        foo()
Пример #5
0
 def process_client_request(self, data):
     """
            Main algorithm. Note that those are high level steps, and most of them may
            require futher implementation details
            1. get url and private mode status from client 
            2. if private mode, then mask ip address: mask_ip_address method
            3. check if the resource (site) is in cache. If so and not private mode, then:
                3.1 check if site is blocked for this employee 
                3.2 check if site require credentials for this employee
                3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy
                    with credentials to check 3.1 and 3.2 access 
                    3.3.1 if credentials are valid, send a HEAD request to the original server
                          to check last_date_modified parameter. If the cache header for that 
                          site is outdated then move to step 4. Otherwise, send a response to the 
                          client with the requested site and the appropiate status code.
             4. If site is not in cache, or last_data_modified is outdated, then create a GET request 
                to the original server, and store in cache the reponse from the server. 
            :param data: 
            :return: VOID
     """
     self.data = data
     url = data["url"]
     privateMode = data["is_private_mode"]
     if privateMode == '1':
         self._mask_ip_adress()
     manager = ProxyManager()
     managerResult = manager.get_cached_resource(data)
     if data["url"] == managerResult["url"]:
         if data["is_private_mode"] == managerResult["is_private_mode"]:
             manager.is_site_blocked(data)
             if  manager.is_site_blocked(data) == True:
                 return True
Пример #6
0
 def __init__(self, conn, client_addr, server_ip):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
     self.client_ip = ""
     self.permission = False  # whether or not user is authenticated
     self.role = "EMPLOYEE"  # permission mode of user: "******", "user", or "EMPLOYEE"
     self.KEEP_ALIVE_TIME = 115  # time to keep idle connection alive(seconds)
     self.server_ip = server_ip
Пример #7
0
def public_proxy_example(n_req=10):
    proxy_df = get_proxy_list(n_proxy=5, anonymity='elite', https='true')
    proxy_list = proxy_df['IP'].values
    port_list = proxy_df['PORT'].values

    pm = ProxyManager(proxy_list, port_list)

    for _ in range(n_req):
        pm.set_proxy()
        foo()
Пример #8
0
    def __init__(self, conn, client_addr):
        self.proxy_manager = ProxyManager()
        self.client = conn
        self.client_id = client_addr[1]  # get id
        self.client_ip = client_addr[0]  # get ip address
        self.http_version = "1.1"

        if self.DEBUG:
            print(
                "[proxy_thread.py -> __init__] new instance of ProxyThread() class "
            )
Пример #9
0
    def __init__(self, validate=True, **kwargs):
        self.proxy_mgr = ProxyManager()
        self.validate = validate
        try:
            self.address = kwargs['address']
            self.port = kwargs['port']
        except KeyError:
            raise Exception("Missing required attribute(s) address and/or port for Proxy object")

        for req in self.__class__.required_proxy_attrs:
            if not getattr(self, req):
                raise Exception("Attribute %s must evaluate to True" % req)

        for kwarg, default in self.__class__.default_proxy_attrs.items():
            if(kwarg in kwargs and kwargs[kwarg]):
                default = kwargs[kwarg]
            setattr(self, kwarg, default)

        if self.validate:
            self.do_proxy_validation()

        self.proxy_id = self.id
        print(self.proxy_id)
Пример #10
0
# Proxychecker

from fastapi import FastAPI
import motor.motor_tornado
import uvicorn
from proxy_manager import ProxyManager
import asyncio


def get_db_conn():
    client = motor.motor_tornado.MotorClient("mongodb://18.185.77.185:27017/")
    return client["mar_wit"]


db_conn = get_db_conn()
proxy = ProxyManager(
    db_conn,
    "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt"
)
app = FastAPI()


@app.on_event("startup")
async def boot():
    await asyncio.sleep(5)
    asyncio.create_task(proxy.set())
    asyncio.create_task(proxy.check())


if __name__ == "__main__":
    uvicorn.run("proxychecker:app", port=8000, reload=True)
Пример #11
0
def getStatus():
    status = ProxyManager(logger).getNumber()
    return jsonify(status)
Пример #12
0
def delete():
    proxy = request.args.get('proxy')
    ProxyManager().delete(proxy)
    return 'success'
Пример #13
0
def getAll():
    proxies = ProxyManager(logger).getAll()
    return jsonify(proxies)
Пример #14
0
from storage_manager import Redis
from proxy_manager import ProxyManager
from autoproxy_config.config import configuration

DESIGNATED_ENDPOINT = configuration.app_config['designated_endpoint']['value']
from IPython import embed
import time

redis = Redis(**configuration.redis_config)
pm = ProxyManager()

embed()

pm = ProxyManager()
for i in range(500):
    proxy = pm.get_proxy(DESIGNATED_ENDPOINT)
    proxy.callback(success=False)
    proxy = pm.get_proxy('https://google.com')
    proxy.callback(success=True)

pm.storage_mgr.sync_to_db()
Пример #15
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.get_settings()
     self.init_thread(conn)
     self.client_id = client_addr[1]
Пример #16
0
        finally:
            s.close()

        if (len(times) + 1 % 25 == 0):
            print('made 25 requests, host still up')


if __name__ == "__main__":
    target = sys.argv[1] if len(sys.argv) - 1 > 0 else 'https://www.google.com'
    port = sys.arg[2] if len(sys.argv) - 1 > 1 else 443

    proxy_queue = Queue()
    abort_q = Queue()

    pman = ProxyManager()
    print('filling proxy stack')
    pman.fill_proxy_stack()
    while not pman.proxy_stack:
        pass

    num_processes = cpu_count()
    processes = []
    print('spinning processes')
    for i in range(num_processes):
        processes.append(
            Process(target=spin_threads,
                    args=(target, port, proxy_queue, abort_q)))
        processes[i].start()

    try:
Пример #17
0
 def __init__(self, conn, client_addr):
     self.proxy_manager = ProxyManager()
     self.client = conn
     self.client_id = client_addr[1]
Пример #18
0
from db_classes.saver import Saver
from proxy_manager import ProxyManager
import config as cfg
import my_log

main_logger = my_log.get_logger(__name__)
main_logger.info('program started')
saver = Saver()
proxy_manager = ProxyManager()
Пример #19
0
def get():
    proxy = ProxyManager(logger).get()
    return proxy if proxy else 'no proxy!'