async def api_v1_stats(request: Request): median_query: ProxyIP = ProxyIP.raw("""SELECT latency FROM proxy_ips WHERE is_valid = 1 ORDER BY latency LIMIT 1 OFFSET ( SELECT COUNT(*) FROM proxy_ips WHERE is_valid = 1 ) / 2""").get() median = median_query.latency mean_query: ProxyIP = ProxyIP.raw("""SELECT AVG(latency) as latency FROM proxy_ips WHERE is_valid = 1 AND latency < 9999""").get() mean = mean_query.latency valid_count = _get_valid_proxies_query().count() total_count = ProxyIP.select().count() return json({ 'median': median, 'valid_count': valid_count, 'total_count': total_count, 'mean': mean, })
def create_test_ip() -> str: ip_str = _gen_random_ip() ip = ProxyIP(ip=ip_str, port=3306, latency=200.00, stability=100.0, is_valid=True) ip.save() return ip_str
def validate_proxy_ip(p: ProxyIP): # logger.debug('Validating ip: {}'.format(p.ip)) policy = ValidationPolicy(proxy_ip=p) if not policy.should_validate(): return v = Validator(host=p.ip, port=int(p.port), using_https=policy.should_try_https()) try: v.validate() except (KeyboardInterrupt, SystemExit): logger.info('KeyboardInterrupt terminates validate_proxy_ip: ' + p.ip) meta = v.meta if v.meta else {} validated_ip = ProxyIP(ip=p.ip, port=p.port, **meta) # save valid ip into database validated_ip.latency = v.latency validated_ip.stability = v.success_rate validated_ip.is_valid = v.valid validated_ip.is_anonymous = v.anonymous # Increase attempts and https_attempts validated_ip.attempts = validated_ip.attempts + 1 if v.using_https: validated_ip.https_attempts = validated_ip.https_attempts + 1 if v.valid: validated_ip.is_https = v.using_https # logger.debug('Save valid ip into database: \n' + validated_ip.__str__()) save_ip(validated_ip)
def test_save_ip(): p1 = ProxyIP(ip='192.168.0.1', port=443, latency=200, stability=0.5) save_ip(p1) # basically the same ip p2 = ProxyIP(ip='192.168.0.1', port=443, latency=200, stability=0.5) save_ip(p2) count = ProxyIP.select().where(ProxyIP.ip == '192.168.0.1').count() assert count == 1 ProxyIP.delete().execute()
def test_create_ip_floor_latency(): ip_str = gen_random_ip() ip = ProxyIP(ip=ip_str, port=3306, latency=100.66, stability=100.0, is_valid=True) ip.save() assert ip.latency == 100.0 delete_test_ip(ip_str)
def save_ip(p: ProxyIP): basic_query = ProxyIP.select().where(ProxyIP.ip == p.ip) count = basic_query.count() if count == 0: # logger.debug('Creating new ip record: ' + p.__str__()) p.save() else: # logger.debug('Update an existing ip record: ' + p.__str__()) existing_proxy: ProxyIP = ProxyIP.get(ProxyIP.ip == p.ip) existing_proxy.assign_from(p) existing_proxy.save()
def test_create_ip(): ip_str = create_test_ip() count = ProxyIP.select().count() assert count > 0 delete_test_ip(ip_str)
def test_validate_proxy_ip(mocker): method = mocker.patch('scylla.validator.Validator.validate') method2 = mocker.patch('scylla.jobs.save_ip') p = ProxyIP(ip='127.0.0.1', port=80) validate_proxy_ip(p) method.assert_called_once() method2.assert_called_once()
def feed_from_db(): # TODO: better query (order by attempts) proxies = ProxyIP.select().where(ProxyIP.updated_at > datetime.now() - timedelta(days=14)) for p in proxies: scheduler.validator_queue.put(p) logger.debug('Feed {} proxies from the database for a second time validation'.format(len(proxies)))
def get_proxy(https=False) -> ProxyIP: proxies: [ProxyIP] = ProxyIP.select().where(ProxyIP.is_valid == True).where(ProxyIP.stability >= 0.9) if https: proxies = proxies.where(ProxyIP.is_https == True) proxies = proxies.order_by(ProxyIP.updated_at.desc()).limit(63) proxy: ProxyIP = random.choice(proxies) return proxy
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in html.find('.proxylist tbody tr'): ip_port = ip_row.find('td:nth-child(1)', first=True).text ip_address, port = ip_port.split(":") p = ProxyIP(ip=ip_address, port=port) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for tr in html.find('table table tr'): ip_element = tr.find('td:nth-of-type(1)', first=True) port_element = tr.find('td:nth-of-type(2)', first=True) if ip_element and port_element: ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_element.text).group(0) port = re.search(r'\d{2,5}', port_element.text).group(0) ip_list.append(ProxyIP(ip=ip, port=port)) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] text = html.raw_html.decode('utf-8') obj = json.loads(text) for ip_row in obj: p = ProxyIP(ip=ip_row['ip'], port=ip_row['port'], is_anonymous=ip_row['anonymous']) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('#list table tr'): ip_row: PyQuery = ip_row ip_element = ip_row.find('td[data-title="IP"]') port_element = ip_row.find('td[data-title="PORT"]') if ip_element and port_element: p = ProxyIP(ip=ip_element.text(), port=port_element.text()) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('#ip_list tr'): ip_row: PyQuery = ip_row ip_element = ip_row.find('td:nth-child(2)') port_element = ip_row.find('td:nth-child(3)') if ip_element and port_element: p = ProxyIP(ip=ip_element.text(), port=port_element.text()) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in html.find('.wlist > ul > li:nth-child(2) .l2'): ip_element = ip_row.find('span:nth-child(1)', first=True) port_element = ip_row.find('span:nth-child(2)', first=True) if ip_element and port_element: p = ProxyIP(ip=ip_element.text, port=port_element.text) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('.proxylist tbody tr'): ip_row: PyQuery = PyQuery(ip_row) ip_port: str = ip_row.find('td:nth-child(1)').text() ip_address, port = ip_port.split(":") p = ProxyIP(ip=ip_address, port=port) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] text = document.html() json_object = json.load(text) if not json_object or type(json_object['usproxy']) != list: return ip_list for ip_port in json_object['usproxy']: p = ProxyIP(ip=ip_port['ip'], port=ip_port['port']) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('#proxylisttable tbody tr'): ip_row: PyQuery = ip_row ip_address: str = ip_row.find('td:nth-child(1)').text() port: str = ip_row.find('td:nth-child(2)').text() p = ProxyIP(ip=ip_address, port=port) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] text = html.raw_html.decode('utf-8') obj = json.loads(text) if not obj or type(obj['usproxy']) != list: return ip_list for ip_port in obj['usproxy']: p = ProxyIP(ip=ip_port['ip'], port=ip_port['port']) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('.wlist > ul > li:nth-child(2) .l2'): ip_row: PyQuery = ip_row ip_element = ip_row.find('span:nth-child(1)') port_element = ip_row.find('span:nth-child(2)') if ip_element and port_element: p = ProxyIP(ip=ip_element.text(), port=port_element.text()) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in html.find('#list table tr'): ip_element = ip_row.find('td[data-title="IP"]', first=True) port_element = ip_row.find('td[data-title="PORT"]', first=True) if ip_element and port_element: p = ProxyIP(ip=ip_element.text, port=port_element.text) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for tr in document.find('table table tr'): tr: PyQuery = tr ip_element = tr.find('td:nth-of-type(1)') port_element = tr.find('td:nth-of-type(2)') if ip_element and port_element: ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_element.text).group(0) port = re.search(r'\d{2,5}', port_element.text).group(0) ip_list.append(ProxyIP(ip=ip, port=port)) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in html.find('#proxylisttable tbody tr'): ip_address = ip_row.find('td:nth-child(1)', first=True).text port = ip_row.find('td:nth-child(2)', first=True).text p = ProxyIP(ip=ip_address, port=port, provider=self.__class__.__name__) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in document.find('table tr'): ip_row: PyQuery = ip_row ip_element: PyQuery = ip_row.find('td:nth-child(1)') port_element: PyQuery = ip_row.find('td:nth-child(2)') if ip_element and port_element: p = ProxyIP(ip=re.sub(r'document\.write\(.+\)', '', ip_element.text()), port=port_element.text()) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ul in html.find('#proxy-table > div.table-wrap ul'): js_code = ul.find('li.proxy script', first=True).text matched = re.findall(r"Proxy\('(.+)'\)", js_code) if matched and len(matched) > 0: encoded = matched[0] ip_port = base64.b64decode(encoded).decode("utf-8") ip = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port)[0] port = re.findall(r':(\d{2,5})', ip_port)[0] ip_list.append(ProxyIP(ip=ip, port=port)) return ip_list
def save_ip(p: ProxyIP): basic_query = ProxyIP.select().where(ProxyIP.ip == p.ip) count = basic_query.count() if count == 0: logger.debug('Creating new ip record: ' + p.__str__()) p.save() else: logger.debug('Update an existing ip record: ' + p.__str__()) ProxyIP.update(latency=p.latency, stability=p.stability, is_valid=p.is_valid, is_anonymous=p.is_anonymous, updated_at=datetime.datetime.now()).where( ProxyIP.ip == p.ip).execute() logger.debug('Saved: ' + p.__str__())
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] for ip_row in html.find('table tr'): ip_element = ip_row.find('td:nth-child(1)', first=True) port_element = ip_row.find('td:nth-child(2)', first=True) if ip_element and port_element: p = ProxyIP(ip=re.sub(r'document\.write\(.+\)', '', ip_element.text), port=port_element.text) ip_list.append(p) return ip_list
def parse(self, html: HTML) -> [ProxyIP]: ip_list: [ProxyIP] = [] text = html.raw_html ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8')) for ip_port in ip_port_str_list: ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0) port = re.search(r':(\d{2,5})', ip_port).group(1) if ip and port: p = ProxyIP(ip=ip, port=port, provider=self.__class__.__name__) ip_list.append(p) return ip_list
def parse(self, document: PyQuery) -> [ProxyIP]: ip_list: [ProxyIP] = [] text = document.html() ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8')) for ip_port in ip_port_str_list: ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0) port = re.search(r':(\d{2,5})', ip_port).group(1) if ip and port: p = ProxyIP(ip=ip, port=port) ip_list.append(p) return ip_list