def convertToProxy(self, row): proxy = Proxy() proxy.addr = row[0].decode('utf-8') proxy.port = row[1].decode('utf-8') proxy.protocol = row[2].decode('utf-8').lower() proxy.info = row[3].decode('utf-8') if row[3] else None proxy.source = row[4].decode('utf-8') if row[4] else None proxy.testCount = row[5] proxy.successCount = row[6] proxy.averageTime = row[7] return proxy
def parse(self, html, source): """Parse a html page, and return all contained proxy.""" document = BeautifulSoup(html) content = document.find('p') proxies = [] if content: for string in content.stripped_strings: proxy = Proxy.parse(string, source) if proxy: proxies.append(proxy) else: log.warning('Can not find content element.') return proxies
def testProxyPool(): proxy1 = Proxy() proxy1.addr = u'1.1.1.1' proxy1.port = u'1' proxy1.protocol = u'http' proxy1.info = u'1234' proxy1.source = u'3321' proxy1.testCount = 5 proxy1.successCount = 5 proxy1.averageTime = 20 proxy2 = Proxy() proxy2.addr = u'2.2.2.2' proxy2.port = u'2' proxy2.protocol = u'http' proxy2.info = u'信息' proxy2.source = u'来源' proxy2.testCount = 5 proxy2.successCount = 5 proxy2.averageTime = 200 proxy3 = Proxy() proxy3.addr = u'3.3.3.3' proxy3.port = u'3' proxy3.protocol = None proxy3.info = None proxy3.source = None proxy3.testCount = 5 proxy3.successCount = 3 proxy3.averageTime = 12000 proxy4 = Proxy() proxy4.addr = u'4.4.4.4' proxy4.port = u'4' proxy4.protocol = u'http' proxy4.info = None proxy4.source = None pool = createTestProxyPool() assert len(pool.getAllProxies()) == 0 pool.insertProxy(proxy1) pool.insertProxy(proxy2) pool.insertProxy(proxy3) pool.insertProxy(proxy4) assert len(pool.getAllProxies()) == 4 proxies = pool.getProxies(2) assert len(proxies) == 2 assertProxyEqual(proxies[0], proxy1) assertProxyEqual(proxies[1], proxy2) proxies = pool.getProxies(4) assert len(proxies) == 3 assertProxyEqual(proxies[0], proxy1) assertProxyEqual(proxies[1], proxy2) assertProxyEqual(proxies[2], proxy3) proxy3.averageTime = 100 pool.updateProxy(proxy3) proxies = pool.getProxies(4) assert len(proxies) == 3 assertProxyEqual(proxies[0], proxy1) assertProxyEqual(proxies[1], proxy3) assertProxyEqual(proxies[2], proxy2) pool.deleteProxy(proxy1) assert len(pool.getAllProxies()) == 3 pool.deleteAllProxies() assert len(pool.getAllProxies()) == 0 pool.close()