def show_results_nip(nip): validator = Validator() result_nip = validator.check_nip(nip) if result_nip: return '<b style="color:green">NIP number ' + nip + ' is correct</b>' else: return '<b style="color:red">NIP number ' + nip + ' is incorrect</b>'
def test_nip_correct(self): no = Validator() nip_list = ["4178292374", "338-471-29-22"] for nip in nip_list: result_nip = no.check_nip(nip) self.assertTrue(result_nip)
def test_regon_correct(self): re = Validator() regon_list = ["413498285", "711508900"] for regon in regon_list: result_regon = re.check_regon(regon) self.assertTrue(result_regon)
def test_pesel_correct(self): pe = Validator() pesel_list = ["84021034288", "74081519098"] for pesel in pesel_list: result_pesel = pe.check_pesel(pesel) self.assertTrue(result_pesel)
def test_regon_incorrect(self): re = Validator() regon_list = ["4732314", "4732abc"] for regon in regon_list: result_regon = re.check_regon(regon) self.assertFalse(result_regon)
def test_pesel_incorrect(self): pe = Validator() pesel_list = ["5005051", "5005abc", "12345678912"] for pesel in pesel_list: result_pesel = pe.check_pesel(pesel) self.assertFalse(result_pesel)
def show_results_pesel(pesel): validator = Validator() result_pesel = validator.check_pesel(pesel) if result_pesel: return '<b style="color:green">Pesel number ' \ + pesel + ' is correct</b>' else: return '<b style="color:red">Pesel number ' \ + pesel + ' is incorrect</b>'
def show_results_regon(regon): validator = Validator() result_regon = validator.check_regon(regon) if result_regon: return '<b style="color:green">Regon number ' \ + regon + ' is correct</b>' else: return '<b style="color:red">Regon number ' \ + regon + ' is incorrect</b>'
def test_nip_incorrect(self): # pesel should be incorect no = Validator() nip_list = ["111233", "111abc"] for nip in nip_list: result_nip = no.check_nip(nip) self.assertFalse(result_nip)
def run(self): while True: print('*****************Spider Begin****************') pool = Pool(THREADNUM) proxyList = pool.map(self.crawl, PROXYWEB_LIST) temp = [] for proxies in proxyList: temp.extend(proxies) # remove duplicates temp = [ dict(x) for x in set([tuple(proxy.items()) for proxy in temp]) ] print('*****************Validator begin****************') validator = Validator() # todo i dont know whether this is efficient when i put two function in one pool proxys = pool.map(validator.validate, temp) proxys = [x for x in proxys if x is not None] dbHelper = MysqlHelper() dbHelper.batch_save(proxys) print('*****************Validator stop****************') print('*****************Spider are Pausing****************') time.sleep(SLEEP_TIME)
def check(): nip = request.form.get('nip') regon = request.form.get('regon') pesel = request.form.get('pesel') validator = Validator() result_nip = validator.check_nip(nip) result_regon = validator.check_regon(regon) result_pesel = validator.check_pesel(pesel) return render_template('form.html', nip=nip, regon=regon, pesel=pesel, result_nip=result_nip, result_regon=result_regon, result_pesel=result_pesel)
def run(self): # dbtype = {'Mongo': MongoHelper, 'Sqlite': SqliteHelper} while True: print 'spider beginning -------' # sqlHelper = SqliteHelper() sqlHelper = MongoHelper() print 'validator beginning -------' validator = Validator(sqlHelper) count = validator.run_db() # count = sqlHelper.selectCount() print 'validator end ----count=%s' % count if count < MINNUM: proxys = self.crawl_pool.map(self.crawl, parserList) # 这个时候proxys的格式是[[{},{},{}],[{},{},{}]] proxys_tmp = [] for proxy in proxys: proxys_tmp.extend(proxy) proxys = proxys_tmp print 'first_proxys--%s', len(proxys) # 这个时候proxys的格式是[{},{},{},{},{},{}] # 这个时候开始去重: proxys = [dict(t) for t in set([tuple(proxy.items()) for proxy in proxys])] print 'spider proxys -------%s' % type(proxys) proxys = validator.run_list(proxys) # 这个是检测后的ip地址 proxys = [value for value in proxys if value is not None] print 'end_proxys--%s', len(proxys) for proxy in proxys: exist = sqlHelper.selectOne({'ip': proxy['ip'], 'port': proxy['port'], 'type': proxy['type']}) if exist: sqlHelper.update(exist, {'$set': {'updatetime': datetime.datetime.now()}}) else: sqlHelper.update({'ip': proxy['ip'], 'port': proxy['port']}, proxy) print 'success ip = %s' % sqlHelper.selectCount() print 'spider end -------' time.sleep(UPDATE_TIME)
def run(self): while True: logger.info("Start to run spider") sqlHelper = SqliteHelper() logger.info('Start to run validator') validator = Validator(sqlHelper) count = validator.run_db() logger.info('Finished to run validator, count=%s' % count) if count[0] < MINNUM: proxys = self.crawl_pool.map(self.crawl, parserList) #这个时候proxys的格式是[[{},{},{}],[{},{},{}]] # print proxys #这个时候应该去重: proxys_tmp = [] for proxy in proxys: proxys_tmp.extend(proxy) proxys = proxys_tmp logger.info('first_proxys: %s' % len(proxys)) #这个时候proxys的格式是[{},{},{},{},{},{}] proxys_tmp = None #这个时候开始去重: proxys = [ dict(t) for t in set([tuple(proxy.items()) for proxy in proxys]) ] logger.info('end_proxy: %s' % len(proxys)) logger.info('spider proxys: %s' % type(proxys)) proxys = validator.run_list(proxys) #这个是检测后的ip地址 sqlHelper.batch_insert(sqlHelper.tableName, proxys) logger.info('success ip: %s' % sqlHelper.selectCount()) sqlHelper.close() logger.info('Finished to run spider') time.sleep(UPDATE_TIME)
def run(self): while True: print 'spider beginning -------' sqlHelper = SqliteHelper() print 'validator beginning -------' validator = Validator(sqlHelper) count = validator.run_db() print 'validator end ----count=%s'%count if count[0]< MINNUM: proxys = self.crawl_pool.map(self.crawl,parserList) #这个时候proxys的格式是[[{},{},{}],[{},{},{}]] # print proxys #这个时候应该去重: proxys_tmp = [] for proxy in proxys: proxys_tmp.extend(proxy) proxys = proxys_tmp print 'first_proxys--%s',len(proxys) #这个时候proxys的格式是[{},{},{},{},{},{}] proxys_tmp=None #这个时候开始去重: proxys = [dict(t) for t in set([tuple(proxy.items()) for proxy in proxys])] print 'end_proxys--%s',len(proxys) print 'spider proxys -------%s'%type(proxys) proxys = validator.run_list(proxys)#这个是检测后的ip地址 sqlHelper.batch_insert(sqlHelper.tableName,proxys) print 'success ip =%s'%sqlHelper.selectCount() sqlHelper.close() print 'spider end -------' time.sleep(UPDATE_TIME)
def test_parseDateString_invalidString_returnFalse(self): self.assertFalse(Validator.parse_date_string("19940125"))
def test_checkCity_digits_returnFalse(self): self.assertFalse(Validator.check_city("44"))
def test_checkZipCode_validString_returnTrue(self): self.assertTrue(Validator.check_zip_code("4253"))
def test_isValidMobileNumber_badFormatString_returnFalse(self): self.assertFalse(Validator.is_it_valid_mobile_number("06-20/432-4235"))
def test_isValidMobileNumber_emptyString_returnFalse(self): self.assertFalse(Validator.is_it_valid_mobile_number(""))
def test_isGenderValid_invalidStringOne_returnFalse(self): self.assertFalse(Validator.is_gender_valid("jhfsdjhf"))
def test_isGenderValid_validStringTwo_returnTrue(self): self.assertTrue(Validator.is_gender_valid("M"))
def test_isRealName_emptyString_returnFalse(self): self.assertFalse((Validator.is_real_name("")))
def test_isRealName_invalidString_returnFalse(self): self.assertFalse(Validator.is_real_name("Valaki1"))
def test_isRealName_validStringThree_returnTrue(self): self.assertTrue(Validator.is_real_name("lel Dlgk Kkff Fgrsi"))
def test_isRealName_validStringTwo_returnTrue(self): self.assertTrue(Validator.is_real_name("Fjkesj Jjfkds Tklds"))
def test_isRealName_validStringOne_returnTrue(self): self.assertTrue(Validator.is_real_name("Fkdd Fkgnsri"))
def test_parseDateString_emptyString_retunFalse(self): self.assertFalse(Validator.parse_date_string(""))
def test_isValidMobileNumber_validString_returnTrue(self): self.assertTrue(Validator.is_it_valid_mobile_number("+36303665666"),(Validator.is_it_valid_mobile_number("06303665666")))
def test_isGenderValid_invalidStringTwo_returnFalse(self): self.assertFalse(Validator.is_gender_valid("3"))
def test_isValidMobileNumber_tooShortString_returnFalse(self): self.assertFalse(Validator.is_it_valid_mobile_number("0630366566"))
def test_checkAddress_tooLongtring_returnFalse(self): self.assertFalse(Validator.check_address("rgnrjogr ghrigh ierghi erug"))
def test_isValidMobileNumber_containsLetterString_returnFalse(self): self.assertFalse(Validator.is_it_valid_mobile_number("063053i6435"))
def test_getmyip(self): v = Validator() myIp = v.getMyIP() self.assertTrue(myIp != None, "get my IP failed! return:%s" % myIp) print(myIp)
def test_checkAddress_validString_returnTrue(self): self.assertTrue(Validator.check_address("Jfjsf ffe 32."))
def test_emailIsValid_invalidString_returnFalse(self): self.assertFalse(Validator.email_is_valid("*****@*****.**"), Validator.email_is_valid("*****@*****.**"))
def test_checkCity_emptyString_returnFalse(self): self.assertFalse(Validator.check_city(""))
def test_emailIsValid_insufficientString_returnFalse(self): self.assertFalse(Validator.email_is_valid("@sefa.con"))
def test_checkCity_valueNotInList_returnFalse(self): self.assertFalse(Validator.check_city("Budapest"))
def test_emailIsValid_digits_returnFalse(self): self.assertFalse(Validator.email_is_valid("435453"))
def test_parseDateString_validString_returnTrue(self): self.assertTrue(Validator.parse_date_string("1994.12.25"))
def test_emailIsValid_emptyString_returnFalse(self): self.assertFalse(Validator.email_is_valid(""))
def test_isGenderValid_emptyString_returnFalse(self): self.assertFalse(Validator.is_gender_valid(""))
def test_checkAddress_emptyString_returnFalse(self): self.assertFalse(Validator.check_address(""))
def test_emailIsValid_validString_returnTrue(self): self.assertTrue(Validator.email_is_valid("*****@*****.**"), Validator.email_is_valid("*****@*****.**"))
def test_detect_proxy(self): v = Validator() p = v.detect_proxy({'ip': '42.81.58.199', 'port': 80}) self.assertTrue('score' in p.keys())