Пример #1
0
 async def tearDown(self):
     entity.get_session(database_url=self.database_url).close()
     try:
         os.remove(self.db_file_path)
     except:
         pass
     await self.app.close()
     await self.runner.cleanup()
     await self.session.close()
Пример #2
0
    def __init__(self, *args, db=None, loop=None, **kwargs):
        super().__init__(*args, **kwargs)

        self.router.add_get('/list', self.list)
        self.router.add_post('/list', self.list)
        self.router.add_post('/add', self.add)
        self.router.add_post('/remove', self.remove)
        self.router.add_post('/add_check', self.add_check)
        self.router.add_get('/list_check', self.list_check)
        self.router.add_post('/list_check', self.list_check)
        self.router.add_post('/remove_check', self.remove_check)
        self.router.add_post('/add_proxy_check', self.add_proxy_check)
        self.router.add_post('/remove_proxy_check', self.remove_proxy_check)

        self.on_startup.append(self._load_default_checks)

        if not db:
            self.db = entity.get_session()
        else:
            self.db = db
        self._db_semaphore = asyncio.Semaphore()
        entity.create_models()

        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(settings.LOG_LEVEL)
Пример #3
0
    def __init__(self):
        self.queue = {}
        self._is_running = False
        self._is_need_to_stop = False
        self.workers = []
        self.logger = logging.getLogger(__class__.__name__)
        self.logger.setLevel(settings.LOG_LEVEL)

        self.sync_every = 5

        self.session = entity.get_session()
Пример #4
0
    async def setUp(self):
        self.loop = asyncio.get_event_loop()
        self.db_file_counter += 1
        self.db_file_path = 'test{}.db'.format(self.db_file_counter)
        self.db_file_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), self.db_file_path)
        self.database_url = entity.get_sqlite_database_url(self.db_file_path)
        entity.create_models(engine=entity.get_engine(
            database_url=self.database_url))

        server.Server._load_default_checks = asynctest.mock.CoroutineMock()
        self.app = server.Server(db=entity.get_session(
            database_url=self.database_url))
        self.app.logger.propagate = False

        self.runner = web.AppRunner(self.app)
        await self.runner.setup()

        self._host = '127.0.0.1'
        self._port = 7766
        await web.TCPSite(self.runner, self._host, self._port).start()

        self.session = aiohttp.ClientSession()
Пример #5
0
def main():
    concurent_requests = 50
    workers_count = 5
    timeout = 3

    warnings.simplefilter("ignore", category=sa_exc.SAWarning)

    args, opts = arg_parser.parse_known_args()
    if not len(opts):
        opts.append(args.default_file_path)
    if args.debug:
        settings.enable_debug_mode()
        args.quiet = False

    tmp_database_file_name = 'tmp_database.db'
    tmp_database_file_path = entity.get_sqlite_database_path(
        db_file_name=tmp_database_file_name)
    database_url = entity.get_sqlite_database_url(
        db_path=tmp_database_file_path)
    session = entity.get_session(database_url=database_url, force=True)
    entity.create_models()

    manager = Manager()
    manager.logger.disabled = args.quiet
    logging.getLogger('asyncio').disabled = args.quiet

    try:
        if not len(opts):
            raise FileNotFoundError
        file_path = opts[0]
        with open(file_path, 'r') as f:
            proxies = [x.strip() for x in f.readlines()]
            proxies = [x for x in proxies if x]
    except FileNotFoundError:
        manager.logger.warning('File not exist by path: {}'.format(file_path))
        manager.logger.info('Trying to use opts as proxies input')
        proxies = opts

    if not proxies:
        manager.logger.critical('No any proxies found')
        return False

    manager.logger.debug('Trying to check these proxies: {}'.format(
        ', '.join(proxies)))

    start_time = time.time()
    checks = []
    checks.append(
        entity.Check(
            'http://google.com',
            status=[200, 301],
            xpath_list=(XPathCheck(
                './/input[contains(@name, "btn") and @type="submit"]'), )))
    checks.append(
        entity.Check(
            'https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=Xiaomi+MI+A1+(64GB%2C+4GB+RAM)&rh=i%3Aaps%2Ck%3AXiaomi+MI+A1+(64GB%5Cc+4GB+RAM)',
            status=200,
            xpath_list=(
                XPathCheck(
                    './/span[contains(text(), "Xiaomi MI A1 (64GB, 4GB RAM")]'
                ),
                BanXPathCheck(
                    './/*[contains(text(), "To discuss automated access to Amazon data please contact")]'
                ),
                BanXPathCheck(
                    './/*[contains(@alt, "Something went wrong on our end. Please go back and")]'
                ),
                BanXPathCheck(
                    './/*[contains(text(), "Type the characters you see in this image")]'
                ),
            )))
    checks.append(
        entity.Check('https://www.olx.ua',
                     status=200,
                     xpath_list=(
                         XPathCheck('.//input[@id="headerSearch"]'),
                         BanXPathCheck('.//img[contains(@src, "failover")]'),
                     )))

    for check in checks:
        check.logger.disabled = args.quiet

    progress_bar_list = []
    for i in range(workers_count):
        if settings.PROGRESS_BAR_ENABLED or args.progress_bar:
            progress_bar = tqdm.tqdm(position=i)
            progress_bar_list.append(progress_bar)
        else:
            progress_bar = None
        worker = Worker(concurent_requests=concurent_requests,
                        progress_bar=progress_bar)
        manager.workers.append(worker)

        worker.logger.disabled = args.quiet

    proxies = [parse_proxy_string(proxy) for proxy in proxies]
    for proxy in proxies[:]:
        if not proxy.protocol:
            for protocol in settings.POSSIBLE_PROTOCOLS:
                if protocol == 'http':
                    continue
                buffer_proxy = proxy.make_proxy_string(protocol=protocol)
                buffer_proxy = parse_proxy_string(buffer_proxy)

                proxies.append(buffer_proxy)

    for proxy in proxies:
        for check in checks:
            proxy.add_check_definition(check)
    session.commit()

    for proxy in proxies:
        manager.put(proxy)

    loop = asyncio.get_event_loop()

    asyncio.ensure_future(asyncio.gather(*[x.start()
                                           for x in manager.workers]))
    asyncio.ensure_future(manager.start())

    loop.run_until_complete(
        asyncio.gather(*[x.stop() for x in manager.workers]))
    loop.run_until_complete(
        asyncio.gather(*[x.wait_stop() for x in manager.workers]))
    loop.run_until_complete(manager.stop())
    loop.run_until_complete(manager.wait_stop())

    loop.close()

    for proxy in proxies:
        session.add(proxy)
    session.commit()

    if args.progress_bar:
        for x in progress_bar_list:
            x.close()
            print('')

    alive = sorted(filter(lambda x: x.is_alive, proxies), key=lambda x: x.time)
    for proxy in alive:
        banned_on = proxy.banned_on
        banned_on = ', banned on: ' + (', '.join([x for x in banned_on
                                                  ])) if banned_on else ''

        if not args.quiet:
            manager.logger.info('{:0.3f} s, {}{}'.format(
                proxy.time, proxy, banned_on))
        else:
            print(proxy, flush=True)

    delta_time = time.time() - start_time
    manager.logger.info(
        '{}/{} proxies alive. Checked {} proxies for {:0.2f} s. {:0.0f} proxies per second with {} concurent requests.'
        .format(len(alive), len(proxies), len(proxies), delta_time,
                len(proxies) / delta_time, concurent_requests))

    os.remove(tmp_database_file_path)
Пример #6
0
def main():
    from proxies import proxies
    global proxies

    args = arg_parser.parse_args()
    if args.debug:
        settings.enable_debug_mode()

    session = entity.get_session(database_url=entity.get_sqlite_database_url())
    entity.create_models()

    concurent_requests = 50
    workers_count = 5
    timeout = 3

    start_time = time.time()
    checks = []
    checks.append(
        entity.Check(
            'http://google.com',
            status=[200, 301],
            xpath='.//input[contains(@name, "btn") and @type="submit"]'))
    checks.append(
        entity.Check(
            'https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=Xiaomi+MI+A1+(64GB%2C+4GB+RAM)&rh=i%3Aaps%2Ck%3AXiaomi+MI+A1+(64GB%5Cc+4GB+RAM)',
            status=200,
            xpath=(
                XPathCheck(
                    './/span[contains(text(), "Xiaomi MI A1 (64GB, 4GB RAM")]'
                ),
                BanXPathCheck(
                    './/*[contains(text(), "To discuss automated access to Amazon data please contact")]'
                ),
                BanXPathCheck(
                    './/*[contains(@alt, "Something went wrong on our end. Please go back and")]'
                ),
                BanXPathCheck(
                    './/*[contains(text(), "Type the characters you see in this image")]'
                ),
            )))
    checks.append(
        entity.Check('https://www.olx.ua',
                     status=200,
                     xpath=(
                         XPathCheck('.//input[@id="headerSearch"]'),
                         BanXPathCheck('.//img[contains(@src, "failover")]'),
                     )))

    manager = Manager()
    for i in range(workers_count):
        if settings.PROGRESS_BAR_ENABLED:
            progress_bar = tqdm.tqdm(position=i)
        else:
            progress_bar = None
        worker = Worker(concurent_requests=concurent_requests,
                        progress_bar=progress_bar)
        manager.workers.append(worker)

    proxies = [parse_proxy_string(proxy) for proxy in proxies]
    for proxy in proxies[:]:
        if not proxy.protocol:
            for protocol in settings.POSSIBLE_PROTOCOLS:
                if protocol == 'http':
                    continue
                buffer_proxy = proxy.make_proxy_string(protocol=protocol)
                buffer_proxy = parse_proxy_string(buffer_proxy)

                proxies.append(buffer_proxy)

    for proxy in proxies:
        for check in checks:
            proxy.add_check_definition(check)
    session.commit()

    for proxy in proxies:
        manager.put(proxy)

    loop = asyncio.get_event_loop()

    asyncio.ensure_future(asyncio.gather(*[x.start()
                                           for x in manager.workers]))
    asyncio.ensure_future(manager.start())

    loop.run_until_complete(
        asyncio.gather(*[x.stop() for x in manager.workers]))
    loop.run_until_complete(
        asyncio.gather(*[x.wait_stop() for x in manager.workers]))
    loop.run_until_complete(manager.stop())
    loop.run_until_complete(manager.wait_stop())

    loop.close()

    for proxy in proxies:
        session.add(proxy)
    session.commit()

    alive = sorted(filter(lambda x: x.is_alive, proxies), key=lambda x: x.time)
    for proxy in alive:
        banned_on = proxy.banned_on
        banned_on = ', banned on: ' + (', '.join([x for x in banned_on
                                                  ])) if banned_on else ''
        manager.logger.info('{:0.3f} s, {}{}'.format(proxy.time, proxy,
                                                     banned_on))

    delta_time = time.time() - start_time
    manager.logger.info(
        '{}/{} proxies alive. Checked {} proxies for {:0.2f} s. {:0.0f} proxies per second with {} concurent requests.'
        .format(len(alive), len(proxies), len(proxies), delta_time,
                len(proxies) / delta_time, concurent_requests))