def get_code(): url = 'http://www.rongtudai.com/validimg.html' f = requests.get(url) print(f) with open("code.jpg", "wb") as code: code.write(f.content) img = Image.open('code.jpg') img = img.convert("RGBA") pixdata = img.load() for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) img.save('newcode.jpg') img = Image.open('newcode.jpg') vcode = image_to_string(img) return vcode
def get_code(self, url): # url = 'http://www.rongtudai.com/validimg.html' try: f = requests.get(url) print(f) name = url[-39:-19] path = '/codepic/' + name with open(path, "wb") as code: code.write(f.content) img = Image.open(path) img = img.convert("RGBA") pixdata = img.load() for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) img.save('/codepic/' + 'newcode.gif') img = Image.open('/codepic/' + 'newcode.gif') vcode = image_to_string(img) except: vcode = [8080, 80, 8888, 3128, 9999] #反正识别不出来我将默认的端口全部试一遍 return vcode
def get_code(): url = 'http://www.rongtudai.com/validimg.html' f=requests.get(url) print(f) with open("code.jpg", "wb") as code: code.write(f.content) img = Image.open('code.jpg') img = img.convert("RGBA") pixdata = img.load() for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in range(img.size[1]): for x in range(img.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) img.save('newcode.jpg') img = Image.open('newcode.jpg') vcode =image_to_string(img) return vcode
def getverify1(name): # 打开图片 im = Image.open(name) # 转化到灰度图 imgry = im.convert('L') # 保存图像 imgry.save('g' + name.split('/')[-1]) # 二值化,采用阈值分割法,threshold为分割点 out = imgry.point(table, '1') out.save('b' + name.split('/')[-1]) # 识别 text = pytesser3.image_to_string(out) # 识别对吗 text = text.strip() text = text.upper() for r in rep: text = text.replace(r, rep[r]) # out.save(text+'.jpg') print(text) return text
def recognize_captcha(self): print('正在识别验证码!!!') # 灰度化 img = Image.open('captcha.jpg') img = img.convert('L') # 二值化 pixdata = img.load() width, height = img.size threshold = sum(img.getdata()) / (width * height) # 计算图片的平均阈值 # 遍历所有像素,大于阈值的为白色 for y in range(height): for x in range(width): if pixdata[x, y] < threshold: pixdata[x, y] = 0 else: pixdata[x, y] = 255 # 去掉黑边 for y in range(height): pixdata[0, y] = 255 pixdata[width - 1, y] = 255 for x in range(width): pixdata[x, 0] = 255 pixdata[x, height - 1] = 255 # 降噪 N = 2 for y in range(1, height - 1): for x in range(1, width - 1): count = 0 if pixdata[x, y - 1] == 255: # 上 count = count + 1 if pixdata[x, y + 1] == 255: # 下 count = count + 1 if pixdata[x - 1, y] == 255: # 左 count = count + 1 if pixdata[x + 1, y] == 255: # 右 count = count + 1 if count > N: pixdata[x, y] = 255 # 设置为白色 captcha_code = pytesser3.image_to_string(img).strip() print(captcha_code) return captcha_code
def mode_by_num(s, n): if n == '1': im = get_code(s, codeurl) p_im = multiprocessing.Process(target=im_show, args=(im, )) p_im.start() code = input("验证码内容为:") p_im.join(3) return code elif n == '2': try: im = get_code(s, codeurl, False) code = pytesser3.image_to_string(im) except: return 2 print('验证码为:', code) return code elif n == '3': # 加载模型,预测结果------------------------------------ pass return '0000' else: return 0
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36", "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive" } # def get_url(url): # content=requests.get(url,headers=headers) def pre_process(): threshold = 0.5 table = [] for i in range(256): if (i < threshold): table.append(0) else: table.append(1) return table link = r"C:\Users\john\Downloads\1.bmp" im = Image.open(link) #打开图片 imagry = im.convert("L") # imagry.show() # table=pre_process() # out=imagry.point(table,"1") # out.show() print(pytesser3.image_to_string(imagry))
def ocr(self): import pytesser3 from PIL import Image tt = pytesser3.image_to_string(Image.open(self.img)) print(tt)
def captcha(self, img): # 输入Image.Image格式 # TODO FileNotFound s = pytesser3.image_to_string(img) return s
def scan(): image = cv2.imread("paper6.jpg") ratio = image.shape[0] / 500.0 orig = image.copy() image = imutils.resize(image, height=500) kernel = cv2.getStructuringElement(cv2.MORPH_ERODE, (5, 5)) image = cv2.erode(image, kernel, iterations=1) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(gray, 30, 175) cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if imutils.is_cv2() else cnts[1] cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screenCnt = approx break cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2) warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) # convert the warped image to grayscale, then threshold it # to give it that 'black and white' paper effect warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) T = threshold_local(warped, 11, offset=10, method="gaussian") warped = (warped > T).astype("uint8") * 255 warped = cv2.threshold(warped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, warped) import builtins img = Image.open(filename) try: builtins.open = bin_open bts = pytesser3.image_to_string(img) finally: builtins.open = original_open os.remove(filename) file = open("text.txt", "x") file = open("text.txt", "r+") file.write(str(bts, 'cp1252', 'ignore')) file.close() file = open("text.txt", "r+") f = open("temp.txt", "x") f = open("temp.txt", "r+") while True: c = file.read(1) if not c: break if ord(c) < 126: f.write(c) f.close() file.close() os.remove("text.txt") os.renames("temp.txt", "text.txt")
for y in range(1, h - 1): for x in range(1, w - 1): count = 0 if pixdata[x, y - 1] > 245: count += 1 if pixdata[x, y + 1] > 245: count += 1 if pixdata[x - 1, y] > 245: count += 1 if pixdata[x + 1, y] > 245: count += 1 if count > 3: pixdata[x, y] = 255 return image # # image = Image.open('code.jpg') # im = binaring(image=image) # im.show() # im = depoint(im) # im.show() from pytesser3 import image_to_string image = Image.open('code.jpg') image = binaring(image) image = depoint(image) print(image_to_string(image))
print("666") print(u"\u5218\u6653\u78ca") HomePage_req=session.get(url) g_cookie=HomePage_req.headers['set-cookie'] print(g_cookie) HomePage_html=HomePage_req.text HomePage_soup=BeautifulSoup(HomePage_html, 'html.parser') checkcodeurl = url+HomePage_soup.find('img', {'id':'yw0'})['src'] checkcode = session.get(url=checkcodeurl, headers=headers).content with open('./check.png', 'wb') as f: f.write(checkcode) f.close() image=PIL.Image.open('./check.png') image.show() imgry = image.convert('L') code = pytesser3.image_to_string(imgry).strip() print(count) print(code) params={ 'userName':'******', 'password':'******', 'verifyCode':code } response = session.get('https://data.cma.cn/user/Login.html', params=params) result = response.text print(u"登录消息为:"+result) print("*"*100) count=count+1 #print(response.headers) cookies=response.headers['set-cookie'] #print(cookies)
# -*- coding:utf-8 -*- from PIL import Image import pytesser3 image = Image.open(r'E:\python\queryhb\oriimg\8.gif') imgry = image.convert('L') threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) out = imgry.point(table, '1') out.show() text = pytesser3.image_to_string(out) #print(pytesser.image_file_to_string(r'E:\python\queryhb\oriimg\1.gif')) print(text)
def verfyCode(self, driver, ID=None, Class=None, css=None, link_text=None, xpath=None): vCodePath = os.path.join(os.path.abspath(".."), 'verPic') if not os.path.exists(vCodePath): os.mkdir(vCodePath) # 保存图片 imagePath = vCodePath + '\\' + 'CreateCaptcha.png' driver.get_screenshot_as_file(imagePath) if ID != None: ID = str(ID) # 获取验证码的x,y self.imageEle = driver.find_element_by_id(ID) if Class != None: Class = str(Class) # 获取验证码的x,y self.imageEle = driver.find_element_by_class_name(Class) if css != None: css = str(css) # 获取验证码的x,y self.imageEle = driver.find_element_by_css_selector(css) if link_text != None: link_text = str(link_text) # 获取验证码的x,y self.imageEle = driver.find_element_by_link_text(link_text) if xpath != None: xpath = str(xpath) # 获取验证码的x,y self.imageEle = driver.find_element_by_xpath(xpath) location = self.imageEle.location # 获取size size = self.imageEle.size self.codeRange = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height'])) # print(size) # 打开png图片 imageTemp = Image.open(imagePath) imageFrame = imageTemp.crop(self.codeRange) #截取验证码图片区域 imageFrame.save(imagePath) time.sleep(2) image = Image.open(imagePath) image = image.convert( 'L') #图像加强,二值化,PIL中有九种不同模式。分别为1,L,P,RGB,RGBA,CMYK,YCbCr,I,F。L为灰度图像 ImageEnhance.Contrast(image) # 对比度增强 threshold = 80 #设定阈值 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) # print(table) image.point(table, '1') # image = image.convert('RGBA') # picData = image.load() # for y in range(image.size[1]): # for x in range(image.size[0]): # # 循环图像里的每一个像素。每个像素为一个长度为4的列表。因为图片转换成RGBA模式,所以列表长度为4,A就是透明度 # if picData[x, y][0] > 80 and picData[x, y][1] > 80 and picData[x, y][2] > 80 and picData[x, y][3] > 80: # picData[x, y] = (255, 255, 255, 0) # else: # picData[x, y] = (0, 0, 0, 0) # image.resize((500,400)) #image.show() result = pytesser3.image_to_string(image).replace(' ', '').replace( '"', '').replace('-', '').replace('.', '').replace('`', '').replace(';', '') print(u'验证码为:%s' % result) return result
pixdata = image.load() # print(pixdata) w, h = image.size for y in range(h): for x in range(w): # print(pixdata[x,y]) pix_l.append(pixdata[x, y]) if pixdata[x, y] < threshold: pixdata[x, y] = 0 else: pixdata[x, y] = 255 return image from pytesser3 import image_to_string # from pytesser3 import image_file_to_string from PIL import Image tesseract_exe_name = 'c:\\Program Files (x86)\\Tesseract-OCR\\tesseract' image = Image.open('db\\qwq.png') pix_l = [] # image.show() # pix_l_set = sorted(list(set(pix_l))) # print(pix_l_set[:len(pix_l_set)//2]) # 求平均数的值 image2 = binaring(image) # 二值化 image3 = depoint(image2) # 降噪 # image3.show() # 识别文字 print('code: ', image_to_string(image3))
except: '' j = j + 1 book_lst = 'http://211.87.177.4/reader/book_lst.php' r = urllib.request.urlopen( urllib.request.Request( url=book_lst, headers=getHeaders(Cookie))).read().decode('utf-8') soup = BeautifulSoup(r, "lxml") info = soup.find_all('li')[10].string #print(info) if (info): verify = "1234" else: img = Image.open("verify.png") verify = pytesser3.image_to_string(img) verify = verify[0:4] #print(verify) post_url = 'http://211.87.177.4/reader/redr_verify.php' post_data = { 'number': '你的学号', 'passwd': '你的图书馆密码', 'captcha': verify, 'select': 'cert_no', 'returnUrl': '' } book_lst = 'http://211.87.177.4/reader/book_lst.php' #print(urllib.request.urlopen(urllib.request.Request(url=post_url, data=urllib.parse.urlencode(post_data).encode('utf-8'),headers=getHeaders(Cookie))).read().decode('utf-8')) s = urllib.request.urlopen( urllib.request.Request(