def main(): pdfToImage('assets/pdfs/form.pdf') image = "assets/images/page.jpg" crop(image, (95, 634, 1557, 708), 'assets/images/date.jpg') crop(image, (99, 776, 1557, 1008), 'assets/images/comments.jpg') print(ocr('assets/images/date.jpg')) print(ocr('assets/images/comments.jpg'))
def obtener_traducciones(mapa): img = detectar_area_variantes(mapa)['tra']['im'] img_mejorada = cvr.preprocesar_texto_otsu(img,127,255,(3,7),3) text_eng = ocr(img_mejorada,'eng') text_fra = ocr(img_mejorada,'fra') traducciones = {} traducciones['frances']=text_fra[0] traducciones['ingles']=text_eng[1] return(traducciones)
def main(): image = cv2.imread("pic2.jpg") edged = edgeDetection(image) screenCnt = findContour(edged) scannedImage = scan(screenCnt,image) ### OCR processedImg = preprocess(scannedImage) ocr(processedImg) cv2.imshow("Scanned", imutils.resize(scannedImage, height = 650)) cv2.waitKey(0) cv2.destroyAllWindows()
def menu(): if request.method == 'POST': uploaded_file = request.files['img'] filename = uploaded_file.filename if filename != '': file_ext = os.path.splitext(filename)[1] if (file_ext not in app.config['UPLOAD_EXTENSIONS']): abort(400) uploaded_file.save( os.path.join(app.config['UPLOAD_PATH'], filename)) path = "static/uploads/" + str(filename) try: content = ocr(path) content = json.dumps(content) newFile = FileContents(name=filename, path=path, content=content) db.session.add(newFile) db.session.commit() data = FileContents.query.order_by(FileContents.date_created).all() return render_template('menu.html', data=data) except: return '<h1>There was a problem processing the receipt, make sure you uploaded the correct file</h1>' else: data = FileContents.query.order_by(FileContents.date_created).all() return render_template('menu.html', data=data)
def test_numero_cinco(self): numero_cinco = """ __ |__ __| """ self.assertEquals(5, ocr(numero_cinco))
def test_numero_nove(self): numero_nove = """ __ |__| __| """ self.assertEquals(9, ocr(numero_nove))
def test_numero_zero(self): numero_zero = """ __ | | |__| """ self.assertEquals(0, ocr(numero_zero))
def gameResult(self): print('=' * 20 + 'game reslut:') if self.imgbg is None: self.updateImgbg() img_royale_red = cv2.imread(self.game_params.img['royale_red']) is_red, red_royale = self.inHere(img_royale_red, self.imgbg, 0.97) if is_red: print('red crown:', len(red_royale), red_royale) img_royale_blue = cv2.imread(self.game_params.img['royale_blue']) is_blue, blue_royale = self.inHere(img_royale_blue, self.imgbg, 0.97) if is_blue: print('blue crown:', len(blue_royale), blue_royale) sign = '' img_win_blue = cv2.imread(self.game_params.img['winner_blue']) win_blue, _ = self.inHere(img_win_blue, self.imgbg, 0.95) if win_blue: print('blue win!', end=' ') sign = '+' img_win_red = cv2.imread(self.game_params.img['winner_red']) win_red, _ = self.inHere(img_win_red, self.imgbg, 0.95) if win_red: print('red win!', end=' ') sign = '-' cups_img = self.grabScreen(self.game_params.game_area_left + 310, self.game_params.game_area_top + 410, self.game_params.game_area_left + 310 + 24, self.game_params.game_area_top + 410 + 15) # cups_img.show() cups_str = ocr(cups_img, type='digits') print(cups_str) print('=' * 20 + 'game over')
def ocr_image(cache, url, codelang): # This is checked in bot_listening but must be redone here, so if # the ocr for the same page is asked multiple time, we will do the ocr # only once. text = get_from_cache(cache, url, codelang) if text: return ret_val(0, text) url = url.encode('utf-8') cache_key = image_key(url) lang = ocr.tesseract_languages.get(codelang, 'eng') basename = os.path.expanduser('~/tmp') + '/tesseract/image_%s' % cache_key image_filename = basename + ".jpg" utils.copy_file_from_url(url, image_filename) if not os.path.exists(image_filename): return ret_val(1, "could not download url: %s" % url) text = ocr.ocr(image_filename, basename, lang) if text == None: return ret_val(2, "ocr failed") os.remove(image_filename) if os.path.exists(basename + ".txt"): os.remove(basename + ".txt") cache.set(cache_key, text) return ret_val(0, text)
def test_numero_seis(self): numero_seis = """ __ |__ |__| """ self.assertEquals(6, ocr(numero_seis))
def test2(): body = request.json #pprint.pprint(body) image_url = body['action']['detailParams']['namecard']['origin'] #image_url =image_url.startswith('http://dn-m.talk.kakao.com/talkm') #print(image_url) with urllib.request.urlopen(image_url) as input: with open('./image.jpeg', 'wb') as output: output.write(input.read()) result_file = ocr('./image.jpeg') #print(result_file) pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract' result_string = pytesseract.image_to_string(result_file, lang='kor+eng') result_string = result_string.strip().replace(' ', '') print(result_string) return { "version": "2.0", "template": { "outputs": [{ "simpleText": { "text": result_string } }] } }
def build(self): if self.cells is None: self.find_cells() ocr_data = ocr(self.image) table_data = [] for row in self.cells: table_data.append([]) for _ in row: table_data[-1].append([]) for i, data in ocr_data.iterrows(): centroid = get_centroid(data['left'], data['left'] + data['width'], data['top'], data['top'] + data['height']) cell = self.find_cell_for_point(centroid) if cell is not None: table_data[cell[0]][cell[1]].append(data['text']) for i, row in enumerate(table_data): for j, cell in enumerate(row): if len(cell) == 0: table_data[i][j] = None else: table_data[i][j] = ' '.join(cell) self.data = table_data
def do_POST(self): formData = cgi.FieldStorage(self.rfile, headers=self.headers, environ={'REQUEST_METHOD': 'POST'}) content = formData.getvalue('content') img = Image.open(BytesIO(content)) base_name = str(int(time.time())) image_name = base_name + '.bmp' img.save(image_name) ocr_image = ocr.ocr(image_name, base_name) ocr_name = base_name + '.png' Image.fromarray(ocr_image).save(ocr_name) blob = storage.upload(CONFIG.project_id, CONFIG.buket, ocr_name, service_account_file='service_account.json') signed_url = storage.signed_url_for_png(blob) shutil.os.remove(image_name) shutil.os.remove(base_name + '.tsv') shutil.os.remove(ocr_name) self.send_response(HTTPStatus.OK) self.send_header('Content-type', 'application/json') self.end_headers() responseBody = json.dumps({'url': signed_url}) self.wfile.write(responseBody.encode('utf-8'))
def test_numero_tres(self): numero_tres=""" __ __| __| """ self.assertEquals(3, ocr(numero_tres))
def solve(path): eq = ocr.ocr(path) if (detect.isArith(eq)): if (detect.isArithEq(eq)): return (f"\t\t{arith.arithEq(eq)}\n") else: if (arith.arithIneq(eq)): return ("\t\tTrue\n") else: return ("\t\tFalse\n") else: nterm, var = alg.checknovar(eq) if (nterm == 1): pow = alg.checkdegree(eq, var[0]) if (pow == 1): return (linear(eq, var[0])) elif (pow == 2): return (quadratic(eq, var[0])) elif (pow == 3): return ("Cubic equation\n") else: return ("Can solve up to 3rd degree polynomial only!!\n") elif (nterm == 2): return (simult(eq, var, nterm)) elif (nterm == 3): print("Simultaneous equation three variables\n") else: print("Can solve equations upto 3 vaiables only!!!\n")
def test_numero_sete(self): numero_sete = """ __ | | """ self.assertEquals(7, ocr(numero_sete))
def test_numero_oito(self): numero_oito = """ __ |__| |__| """ self.assertEquals(8, ocr(numero_oito))
def do_one_page(opt, page_nr, filename): tiff_name = extract_image(opt, page_nr, filename) if not tiff_name: return out_filename = opt.out_dir + "page_%04d" % page_nr if opt.config == 'hocr': out_filename += '.html' else: out_filename += '.txt' ocr.ocr(tiff_name, opt.out_dir + 'page_%04d' % page_nr, opt.lang, opt.config) if opt.compress: utils.compress_file(out_filename, out_filename, opt.compress) os.remove(out_filename) os.remove(tiff_name)
def test_numero_dez(self): numero_dez = """ __ | | | | |__| """ print repr(numero_dez) self.assertEquals(10, ocr(numero_dez))
def test_numero_um(self): numero_um = """ | | """ self.assertEquals(1, ocr(numero_um))
def test_numero_quatro(self): numero_quatro=""" |__| | """ self.assertEquals(4, ocr(numero_quatro))
def obtener_titulo(mapa): img = detectar_area_variantes(mapa)['tit']['im'] img_mejorada = cvr.preprocesar_texto_adapta(img, 255,71,30) text = ocr(img_mejorada,'spa') titulo = {} titulo['numero']=text[0].replace(' ','') titulo['ententrada']=[' '.join(text[1:])][0] return(titulo)
def test_numero_dois(self): numero_dois = """ __ __| |__ """ self.assertEquals(2, ocr(numero_dois))
def do_one_page(opt, page_nr, filename): tiff_name = extract_image(opt, page_nr, filename) if not tiff_name: return out_filename = opt.out_dir + "page_%04d" % page_nr if opt.config == 'hocr': out_filename += '.hocr' else: out_filename += '.txt' ocr.ocr(tiff_name, opt.out_dir + 'page_%04d' % page_nr, opt.lang, opt.config) if opt.compress: utils.compress_file(out_filename, out_filename, opt.compress) os.remove(out_filename) os.remove(tiff_name)
def getyzm(): print("getyzm") fp=io.BytesIO(a.get("https://isisn.nsfc.gov.cn/egrantindex/validatecode.jpg",result=False,o=True).content) image=Image.open(fp) yzm = ocr(image) try: return 'success' in a.post("https://isisn.nsfc.gov.cn/egrantindex/funcindex/validate-checkcode","checkCode="+yzm).text, yzm except: return False,''
def getPokemonInfo(): toolkit.takeScreenshot() img = ds.getScreenshot() pokemonName = ocr.ocr('pokemon', img).lower() x = ds.getShinyListX(pokemonName) y = ds.getShinyListY(pokemonName) shinyColor = ds.getShinyListColor(pokemonName) return x, y, shinyColor
def test_image(self): for name, expected in cases.items(): image = os.path.join("screenshots", name) out, err = ocr(image) if "err" in expected: assert err == expected["err"] else: assert err == b"" assert expected["ocr_out"] in out # ocrout = os.path.join("screenshots", name) + ".txt" # with open(ocrout, encoding='utf-8') as fh: # expectedoutput = fh.read() # assert out == expectedoutput image = os.path.join("screenshots", "view.jpg") out, err = ocr(image) assert err == b"Warning. Invalid resolution 0 dpi. Using 70 instead.\n" assert b' \n\n' == out
def workerThreadFunc(): self.ongoing = True print('==========') print('光学字符识别') print('启动时间:' + time.asctime(time.localtime(time.time()))) print('共找到文件数:' + str(int(len(ocr.file_list)) - 1)) Counts = (int(len(ocr.file_list)) - 1) time.sleep(0.5) self.ui.progressBar.setRange(0, Counts) self.ui.progressBar.setValue(0) print('==========') ocr.textlist.append('==========') ocr.textlist.append('光学字符识别') ocr.textlist.append('启动时间:' + time.asctime(time.localtime(time.time()))) ocr.textlist.append('共找到文件数:' + str(int(len(ocr.file_list) - 1))) ocr.textlist.append('==========') count = 1 for ocr.path in ocr.file_list: print(ocr.file_list) Counts = int(Counts) if ocr.path == "C:\\MonianHello\\list.txt": continue try: # ocr.transimg(ocr.path) time.sleep(0.5) self.ui.progressBar.setValue(count) print('{0} / {1}'.format(str(count), str(Counts))) ocr.ocr(ocr.path) count += 1 except: print('出现内部错误') for i in ocr.textlist: fl.write(i) fl.write('\n') fl.write('结束时间:' + time.asctime(time.localtime(time.time()))) fl.close() print('写入成功,已将文件写入' + str(ocr.outputtext)) print('结束时间:' + time.asctime(time.localtime(time.time()))) print('识别完成,现在将结果写入文件...') time.sleep(5) self.ongoing = False
def upload_file(): if request.method == 'POST': f = request.files['file'] file_name = f.filename f.save(file_name) text = str(ocr(str(file_name))) text = re.findall(r'\w+', text) text = ' '.join(text) df = cosine_similarity_T(10, text).head(6) return render_template('index.html', frame=df)
def __init__(self, image, caption=None): self.image = image self.caption = caption # Photo OCR if we don't have an input caption if not self.caption: self.caption = ocr.ocr(self.image) # Make image dimensions uniform if len(self.image) != dimX or len(self.image[0]) != dimY: self.image = alignDimensions.alignDimensions( self.image, dimX, dimY)
def __init__(self): """ 初始化 :return: null """ # 超类初始化 super().__init__() # UI初始化 self.ui = Ui_mainWidget() self.ui.setupUi(self) self.grabKeyboard() self.setMouseTracking(True) self.setWindowFlags(Qt.FramelessWindowHint) self.setWindowIcon(QIcon('OCR.ico')) # 初始化相机 self.camera = QCamera() self.imageCapture = QCameraImageCapture(self.camera) self.viewsetting = QCameraViewfinderSettings() self.initimplement() # 初始化标题栏 self.initTitleBar() # 初始化系统托盘 self.tray = QSystemTrayIcon() self.tray.setIcon(QIcon('OCR.ico')) self.initTray() # OCR识别部分 self.OCR = ocr() self.OCR.setappid('1257206643') self.OCR.setsecretid('AKIDFTddWEg9Ncsz0sE7oOpBNOExdDdeCUJ3') self.OCR.setsecretkey('FQitsgUND8yfrZK0RrBMOJB5tWhCm5Ol') # 初始化登录部分 self.logWidget = QWidget() self.logui = Ui_Form() self.logui.setupUi(self.logWidget) self.logWidget.setWindowFlags(Qt.FramelessWindowHint) self.logWidget.setWindowModality(Qt.ApplicationModal) self.logui.close_btn.clicked.connect(self.logWidget.close) # 初始化变量 self.mousePressd = False self.mousePoint = None self.result = {} self.isFirst = False self.ocrType = ocrType.ocr_general # 默认为印刷体识别 # 初始化字定义信号连接 self.processFinished.connect(self.updateOCRInfo) self.ui.btn_login.clicked.connect(self.logWidget.show) self.ui.comboBox_choose.currentIndexChanged.connect(self.changeocrType)
def getCups(self): cups_img = self.grabScreen(self.game_params.game_area_left + 230, self.game_params.game_area_top + 55, self.game_params.game_area_left + 230 + 50, self.game_params.game_area_top + 57 + 16) # cups_img.show() cups_str = ocr(cups_img, type='digits') # print(cups_str) if cups_str == '': return 0 return cups_str
def capture(self): # 右下から左上に選択したときの処理 if self.startPos.x() > self.endPos.x(): self.startPos, self.endPos = self.endPos, self.startPos # QPixmap で選択座標をコピー pmap = self.snipScreen.copy(QRect(self.startPos, self.endPos)) # QPixmap -> Qimage -> binary qimg = pmap.toImage() image_format = 'PNG' bits = QByteArray() buffer = QBuffer(bits) qimg.save(buffer, image_format) # OCR ocr(buffer.data(), self.args.debugmode, self.gcp_eng, self.is_cut_newline) self.close()
def ocr_endpoint(): if request.method == 'POST': file = request.files['file'] if file.filename == '': return 'no file selected' filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) text = ocr(file_path) os.remove(file_path) return text else: return "Welcome to ocr"
def make_predict(): #return jsonify(request.get_json()) #data = request.get_json(force=True) #return '123' #predict_request = [str(x) for x in request.form.values()] #predict_request = np.array(predict_request) #delay_predicted = model.predict(predict_request) #output = predict(data['string']) #predict_request = [str(x) for x in request.form.values()] #predict_request = predict_request[0] #output = predict(translate(predict_request[0],'en')) output = predict(translate(str(request.form.get('headline')), 'en')) #return '123' #return str(output) #return jsonify(str(output)) #url = predict_request[1] url = str(request.form.get('url')) if (request.form.get('headline') != ''): if (output): result = "decrease" else: result = "increase" ## return render_template('index.html', prediction_text='Stock Price will {}'.format(result)) elif (request.form.get("url") != ''): toi_article = Article(url, language="en") # en for English toi_article.download() #toi_article.parse() output_2 = predict(toi_article.title) if (output_2): result = "decrease" else: result = "increase" else: file = request.files['file'] #filename = secure_filename(file.filename) text = ocr(file.filename) output_3 = predict(text) if (output_3): result = "decrease" else: result = "increase" return render_template('index.html', prediction_text=translate( 'Stock Price will {}'.format(result), lang), Prediction=translate('Stock Market Prediction', lang), Predict=translate('Predict', lang), Plot=translate('plot', lang))
def display(): global UPLOAD_FOLDER, FILE_NAME, ocrtext ocrtext = ocr(FILE_NAME) f = open('test.txt', 'w+') f.write(ocrtext) f.close() info = label(ocrtext) with open('test.json', 'w+') as g: json.dump(info, g) return render_template('display.html', ocrtext=info)
def run(self): if self.device.isOpened(): try: while True: ret, frame = self.device.read() height, width, bytesPerComponent = frame.shape bytesPerLine = bytesPerComponent * width # 變換彩色空間順序 cv2.cvtColor(frame, cv2.COLOR_BGR2RGB, frame) # 轉為QImage物件 image = QImage(frame.data, width, height, bytesPerLine, QImage.Format_RGB888) if self.paizhao == 1: image.save('./origin/takephoto.png') self.paizhao = 0 pixmap = QPixmap.fromImage(image) pixmap = pixmap.scaled(400, 300, QtCore.Qt.KeepAspectRatio) self.imgLab.setPixmap(pixmap) ocr() outcome('./result/audio/outcome.txt') finally: self.device.release()
def test(): r = request print(r.files, flush=True) file = r.files['image'] path = os.getcwd() + '/last.jpg' file.save(path) result = ocr.ocr(path) response = {'ocr_result': result} # encode response using jsonpickle response_pickled = jsonpickle.encode(response) return Response(response=response_pickled, status=200, mimetype="application/json")
def do_ocr(): id = uuid.uuid4() path = "/tmp/"+str(id)+".png" nparr = np.fromstring(request.data, np.uint8) #frombuffer? img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) cv2.imwrite(path,img) data = {} text, coordinates = ocr.ocr(path) data["text"] = text data["coordinates"]=coordinates response = json.dumps(data) return Response(response=response, status=200, mimetype="application/json")
def login(username: str, password: str): session = msession.session res = session.get(msession.urls.cas, verify=False) lt = re.findall(r'name="lt" value="(.*)"', res.text) captcha_url = msession.urls.captcha captcha_path = 'captcha.jpg' with session.get(captcha_url) as captcha: with open(captcha_path, mode='wb') as captcha_jpg: captcha_jpg.write(captcha.content) #captcha = input('验证码已生成于该目录下,请查看\n并输入验证码') captcha = ocr(captcha_path) login_form = { 'username': username, 'password': password, 'captcha': captcha, 'warn': 'true', 'lt': lt[0], 'execution': 'e1s1', '_eventId': 'submit', 'submit': '登录' } post_res = session.post(msession.urls.cas, data=login_form) if '账号或密码错误' in post_res.text: print('账号或密码错误') return if '验证码不正确' in post_res.text: print('验证码不正确') return os.remove('captcha.jpg') session.get(msession.urls.sso, verify=False) cookies = session.cookies if not os.path.exists('cookies'): os.mkdir('cookies') if not cookies: print('No cookies!') else: file_name = 'cookies' + os.sep + username with open(file_name, mode='wb') as cookies_file: pickle.dump(session.cookies, cookies_file)
def handler(event, context): """ download tar.gz, do ocr and upload it to configured cloud service (currently AWS S3 or Google Drive) """ upload_type = os.environ.get('UPLOAD_TYPE', None) if upload_type == 'gdrive': for k in [ 'GDRIVE_CLIENT_ID', 'GDRIVE_CLIENT_SECRET', 'GDRIVE_REFRESH_TOKEN' ]: assert k in os.environ, "missing {} in environment vars".format(k) elif upload_type == 's3': assert 'S3_BUCKET' in os.environ elif upload_type == 'discard': pass else: raise Exception('unknown upload type {}'.format( os.environ['UPLOAD_TYPE'])) empty_page_threshold = int(os.environ.get('EMPTY_PAGE_THRESHOLD', 200)) language = os.environ.get('TESSERACT_LANG', 'eng') import boto3, ocr s3 = boto3.client('s3') for record in event['Records']: src_bucket = record['s3']['bucket']['name'] src_file = record['s3']['object']['key'] s3.download_file(src_bucket, src_file, "/tmp/scan.tar.gz") pdf_file = ocr.ocr("/tmp/scan.tar.gz", empty_page_threshold, language) dest_filename = src_file.split('.')[0] + '.pdf' if upload_type == 's3': bucket = os.environ['S3_BUCKET'] s3.upload_file(pdf_file, bucket, dest_filename) elif upload_type == 'gdrive': folder = os.environ.get('GDRIVE_FOLDER', None) client_id = os.environ['GDRIVE_CLIENT_ID'] client_secret = os.environ['GDRIVE_CLIENT_SECRET'] refresh_token = os.environ['GDRIVE_REFRESH_TOKEN'] upload_gdrive(pdf_file, dest_filename, client_id, client_secret, refresh_token, folder) elif upload_type == 'discard': print('all fine, discarding file, but not deleting source file') return s3.delete_object(Bucket=src_bucket, Key=src_file) os.remove(pdf_file)
def main(): program_options = processArguments() if program_options['help']: printUsage() exit() if len(program_options['image_paths']) == 0: program_options['image_paths'].append('car.JPG') print('Warning: Missing image path. Will use default image (' + program_options['image_paths'][0] + ')') printUsage() json_output = [] for i, image_path in enumerate(program_options['image_paths']): img = cv2.imread(image_path) plate = detectplate(img) if plate.size == 0: if program_options['output'] == 'json': output = { 'image_path': image_path, 'plates': [], 'error': 'Error: cannot detect plate in ' + image_path } json_output.append(output) else: print('Error: cannot detect plate in ' + image_path) continue chars = cropChars(plate) if not program_options['silent']: cv2.imshow('plate' + str(i), plate) cv2.imshow('chars' + str(i), chars) chars, x_cords = segmentCharacters(chars) dic = sortingCharacters(chars, x_cords) chars_text = [] for index, x in dic: char_img = chars[index] char_img = cv2.bitwise_not(char_img) chars_text.append(ocr(char_img)) # cv2.imshow('char',char_img) # cv2.waitKey(0) # cv2.imwrite('./output/'+str(x)+'.png', char_img) if program_options['output'] == 'json': output = {'image_path': image_path, 'plates': [chars_text]} json_output.append(output) else: print(' '.join(chars_text)) if program_options['output'] == 'json': print(json.dumps(json_output, indent=4)) if not program_options['silent']: cv2.waitKey(0) cv2.destroyAllWindows()
def main(): #screenshot.snapscreen() #截图,保存到Steam目录 #print_time('Screenshot') raw_location = screenshot.get_raw_location() #原图片的位置 #print(raw_location) region.region(raw_location) #处理截图,保存到cache文件夹 #print_time('Region') img_location = region.get_img_location() #bar图的位置 #print(img_location) items = ocr(img_location) #物品名称列表 #print_time('OCR') info = output.get_info(items) #获得价格列表 #print_time('Get_info') out_GUI.popwindow(info) #输出到窗口中
def main(): for i in range(50): image_number: str = str(i + 150).zfill(2) target_image_path = f"movie_frames/0{image_number}.jpg" trimed_image_path = trim_image(target_image_path) image_texts: List[str] = ocr(trimed_image_path) music_name = search_music_name(image_texts) clear_rate = search_clear_rate(image_texts) fullcombo_rate = search_fullcombo_rate(image_texts) # print(image_texts) print(image_number) print(f"music_name:{music_name}") print(f"clear_rate:{clear_rate}") print(f"full_combo_rate:{fullcombo_rate}") print()
#small test function; the inner loop # tests each combination of # hidden nodes # a 100 times, and the outerloop takes the # average of those 100 times, and uses that # as the general accuracy for that many hidden nodes in the ANN def test(dm, dl, indices, nn): avg = 0 for i in xrange(100): rightguesses = 0 for j in indices: test = dm[j] prediction = nn.predict(test) if dl[j] == prediction: rightguesses += 1 avg += (rightguesses / float(len(indices))) return avg / 100 #opens the data sets dm = np.loadtxt(open('data.csv', 'rb'), delimiter = ',').tolist() dl = np.loadtxt(open('dataLabels.csv', 'rb')).tolist() #goes through every combination of # of hidden nodes and checks # the accuracy of the ANN (prints it out) for i in xrange(5, 100, 10): nn = ocr(i, indices, dl, dm, False) performance = test(dm, dl, indices, nn) print "" + str(i) + " Hidden Nodes --> " + str(performance)
data = urllib2.urlopen(url).read() soup = BeautifulSoup(data) images = soup.findAll('img') want = [i for i in images if i.attrs['height']=='960'][0].attrs['src'] if 'http' not in want: want = 'http://www.klobysbbq.com/' + want imageData = urllib2.urlopen(want).read() imgFile = tempfile.mktemp(os.path.basename(want)) file(imgFile,'wb+').write(imageData) ocrData = ocr.ocr(imgFile) beerNames = [] for line in ocrData.split('\n'): if len(line.strip()) == 0: continue if 'Today' in line: continue line = re.sub(percent, "", line) line = re.sub(quoteToEnd, "", line) line = re.sub(alphaNumSpace, "", line) line = re.sub(lowerUpperLower, "\\1 \\2 \\3", line) beerNames.append(line) results = SearchForBeers(beerNames)