def get_next_batch(batch_size=64, cnt=0): """ # 生成一个训练batch :param batch_size cnt :return: """ batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH]) batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN]) f = open(root + "mappings.txt", 'r') lines = f.readlines() f.close() i = 0 for j in range(cnt * batch_size, (cnt + 1) * batch_size): text = lines[j].split(",")[-1] text = text.split("=")[0] image = Image.open(root + str(j).zfill(4) + ".jpg") # print(j) # print(text) # image.show() image = convert2gray(image) batch_x[ i, :] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0 batch_y[i, :] = text2vec(text) i += 1 return batch_x, batch_y
def batch_hack_captcha(inroad, outroad): try: fw = open(outroad, 'w') with tf.Session() as sess: output = crack_captcha_cnn() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(model_path)) dirs = os.listdir(inroad) for i in dirs: QApplication.processEvents() image = Image.open(inroad + '/' + i) # 用于测试测试集准确率 # text = lines[i].split(",")[1].strip() # text1 = text.split("=")[0] # label_ans = eval(text1) image = convert2gray(image) image = image.flatten() / 255 pred = hack_function(sess, tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2), image) predict_text = eval(pred) # if label_ans == predict_text: # right_cnt += 1 # else: # pass i = i.split(".")[0] print("{},{}".format(i, str(pred) + "=" + str(predict_text))) fw.write("{},{}\n".format(i, str(pred) + "=" + str(predict_text))) fw.flush() except: print("ERROR!") return -1
def batch_hack_captcha(): """ 批量生成验证码,然后再批量进行识别 :return: """ # 定义预测计算图 output = crack_captcha_cnn() predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2) saver = tf.train.Saver() with tf.Session() as sess: #saver = tf.train.import_meta_graph(s_path) saver.restore(sess, tf.train.latest_checkpoint(model_path)) stime = time.time() #imgpath="D:/gitrepos/captcha-tensorflow/work/crack/y-capt-data/capt-python-36/train" imgpath="E:/MyProjects/captcha-tensorflow/vcode1" imgs=os.listdir(imgpath) task_cnt = len(imgs) right_cnt = 0 for i in imgs: #text, image = wrap_gen_captcha_text_and_image() text=i.replace('.png','') img=(imgpath+"/"+i) #print(text) #img = tf.read_file(img) #img = tf.image.decode_png(img) #img = tf.image.convert_image_dtype(img, dtype=tf.uint8) img = Image.open(img) img=img.convert("RGB") #img=img.resize((160,60),Image.ANTIALIAS) img=np.array(img) #print(img) image = convert2gray(img) image = image.flatten()/255 predict_text = hack_function(sess, predict, image) if text == predict_text: right_cnt += 1 else: print("标记: {} 预测: {}".format(text, predict_text)) pass # print("标记: {} 预测: {}".format(text, predict_text)) print('task:', task_cnt, ' cost time:', (time.time() - stime), 's') print('right/total-----', right_cnt, '/', task_cnt) print('正确率:',right_cnt/task_cnt)
def recognize(self, bytes): """ 对验证码图片进行识别。 :param bytes: 图片的二进制数据 :return: 识别结果字符串 """ try: bin_img = Image.open(BytesIO(bytes)) except OSError: self.ui.statusbar.showMessage('输入的数据不是验证码!') return if (160, 60) != bin_img.size: bin_img = bin_img.resize((160, 60)) img = np.array(bin_img) img = convert2gray(img) img = img.flatten() / 255 return hack_function(self.sess, self.predict, img)
def get_next_batch(batch_size=128): """ # 生成一个训练batch :param batch_size: :return: """ batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH]) batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN]) for i in range(batch_size): text, image = wrap_gen_captcha_text_and_image() image = convert2gray(image) batch_x[i, :] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0 batch_y[i, :] = text2vec(text) return batch_x, batch_y
def predict_captcha(): """ 识别测试集路径的验证码 :return: """ output = cnn_architecture.crack_captcha_cnn() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # 获取训练后参数路径 checkpoint = tf.train.get_checkpoint_state("model") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find CNN network Model.") n = 0 test_image_files = os.listdir(config.test_data_path) for f in test_image_files: image = Image.open(os.path.join(config.test_data_path, f)) image = np.array(image) image = utils.convert2gray(image) image = image.flatten() time1 = time.time() predict = tf.argmax( tf.reshape(output, [-1, config.MAX_CAPTCHA, config.CHAR_SET_LEN]), 2) text_list = sess.run(predict, feed_dict={ cnn_architecture.X: [image], cnn_architecture.keep_prob: 1 }) predict_text = utils.vec2text(text_list) time2 = time.time() elapsed = time2 - time1 print("{} predict:{} elapsed time: {} ms".format( f, predict_text, format(elapsed * 1000, '0.2f'))) index = f.rfind(".") if predict_text == f[index - config.MAX_CAPTCHA:index]: n += 1 print("ACC {}".format( n / (len(test_image_files) if len(test_image_files) > 0 else 1)))
def get_next_batch(batch_size=128): """ # 生成一个训练batch :param batch_size: :return: """ imgpath = "D:/gitrepos/captcha-tensorflow/vcode1/" batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH]) batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN]) td = train_data() for i in range(batch_size): #text, image = wrap_gen_captcha_text_and_image() text, image = td.get_text_img(imgpath) image = convert2gray(image) batch_x[ i, :] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0 batch_y[i, :] = text2vec(text) return batch_x, batch_y
def discern(imgFile): output = crack_captcha_cnn() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(model_path)) for i in range(100): predict = tf.argmax( tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2) image = Image.open("image/%s.png" % (i + 1)) image = image.resize((160, 60)) image.save("/tmp/big/%s.png" % (i + 1)) image = np.array(image) image = convert2gray(image) image = image.flatten() / 255 captcha = hack_function(sess, predict, image) print("%s:%s" % (i + 1, captcha)) return captcha
def get_captcha(image_path): """ 识别测试集路径的验证码 :return: """ tf.reset_default_graph() image = Image.open(os.path.join(config.test_data_path, image_path)) image = np.array(image) image = utils.convert2gray(image) image = image.flatten() image = np.reshape(image, (-1, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, 1)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # 获取训练后参数路径 checkpoint = tf.train.get_checkpoint_state("model") if checkpoint and checkpoint.model_checkpoint_path: saver = tf.train.import_meta_graph( checkpoint.model_checkpoint_path + '.meta') saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) graph = tf.get_default_graph() # 加载默认图 input_x = graph.get_tensor_by_name("input_x:0") out_y = graph.get_tensor_by_name("out_y:0") keep_prob = graph.get_tensor_by_name("keep_prob:0") predict = tf.argmax( tf.reshape(out_y, [-1, config.MAX_CAPTCHA, config.CHAR_SET_LEN]), 2) text_list = sess.run(predict, feed_dict={ input_x: image, keep_prob: 1 }) predict_text = utils.vec2text(text_list) return predict_text else: print("Could not find CNN network Model.") return None
def run(self): right_cnt = 0 for i in range(1, self.count + 1): if not self.status: return text, image = wrap_gen_captcha_text_and_image() image = convert2gray(image) image = image.flatten() / 255 predict_text = hack_function(self.sess, self.predict, image) if text == predict_text: right_cnt += 1 else: # print("===========({}/{})\n标记: {}\n预测: {}".format(i, task_cnt, text, predict_text)) self.signal.emit( QtGui.QListWidget, 'listWidget_2', "===========({}/{})\n标记: {}\n预测: {}".format( i, self.count, text, predict_text)) # print("标记: {} 预测: {}".format(text, predict_text)) # print('task:', task_cnt, ' cost time:', (time.time() - stime), 's') # print('right/total-----', right_cnt, '/', task_cnt) self.signal.emit( QtGui.QListWidget, 'listWidget_2', 'right/total-----{}/{}({}%)'.format(right_cnt, self.count, right_cnt * 100 / self.count))
def get_next_batch(batch_size=128): """ # 生成一个训练batch :param batch_size: :return: """ batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH]) #生成表示图片的二维向量,也就是输入图片的矩阵?????? batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN]) #生成一个二维向量,这里是过滤器?????? for i in range(batch_size): text, image = wrap_gen_captcha_text_and_image() image = convert2gray(image) #x[:,i]表示取所有维中第i个数据,通常返回数组 #x[:,m:n],即取所有维中第m到n-1个数据,含左不含右 #x[i,:]表示取第一维中下标为i的所有元素,通常返回数组 #flatten() 是将多维数组降位到一维并返回拷贝,默认降维是横向的 batch_x[ i, :] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0 batch_y[i, :] = text2vec(text) return batch_x, batch_y