def get_cell_digits(x, y, table_struct, img): cell = cell_table_image(x, y, table_struct, img) # cv2_imshow(cell) # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='digits1') text = ocr(cell, config='--oem 1 --psm 7', lang='digits') # text = text2num(text) return text
def _get_captcha(session): url = 'http://eoz.one/user/captcha?refresh=1' r = utils.get(session, url) url = r.json()['url'] url = 'http://eoz.one' + url r = utils.get(session, url) return utils.ocr(r.content)
def add_reminders(img_path): img = open_image(img_path) ocr_text = ocr(img) menu_items = text_to_menu_items(ocr_text)[:4] for date, food in menu_items: add_reminder(date, food) return menu_items
def get_cell_text(x, y, table_struct, img): cell = cell_table_image(x, y, table_struct, img) # cv2_imshow(cell) # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='rus2+digits1') # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='rus+digits1') text = ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='rus') text = error_correct(text) return text
def test_baidu_index(self): baidu_index_page = page.BaiduIndexPage(self.driver, self.action) baidu_index_page.openPage("https://index.baidu.com/") baidu_index_page.input_search_key("start") baidu_index_page.click_submit_button() baidu_index_page.maxWindows() ''' 理想化的函数: @param1: 地区:全国, 北京,香港 @param2: 查询日期:2018-09 @param3: 查询关键字: 赵丽颖+zhaoliyig @output: void 处理过程中将结果转换成csv ''' df = pd.DataFrame( columns=['key_words', 'date_range', 'index_type', 'baidu_index']) i = 0 for keywords in utils.setAllKeywords(): for date in utils.setAllDate(): # try: baidu_index_page.input_new_search_key(keywords) baidu_index_page.click_new_submit_button() time.sleep(1) # 开始自定义时间 self.inputSelfDefineDate(baidu_index_page, Constant.YEAR_2011, Constant.OCT, Constant.YEAR_2012, Constant.OCT) baidu_index_page.click_index_average() for indexType in ["Total", "PC", "Mobile"]: filename = keywords + '.' + indexType + '.' + date if "PC" == indexType: baidu_index_page.click_pc_index_button() if "Mobile" == indexType: baidu_index_page.click_mobile_index_button() baidu_index_page.hoverOnAvgIndex(3) baidu_index_page.hoverOnAvgIndex(4) baidu_index_page.saveThePicture(filename) time.sleep(1) avg_index = utils.ocr(Constant.IDENTIIFIED_PICTURE_FOLDER + filename + ".png").replace(",", "") df.loc[i] = [keywords, date, indexType, avg_index] i += 1 # except Exception as e: # print(keywords+date+"get failed") # print(e) # time.sleep(60*60) df.to_csv(Constant.FINAL_RESULT_DIR + Constant.FINAL_RESULT_FILE_NAME, index=False, sep=',')
def classify_image(img_path): img = open_image(img_path) ocr_text = ocr(img) if 'mosaicscience' in ocr_text: return 'mosaic' days_mentioned = [ token for line in ocr_text.split('\n') for token in line.split(' ') if token in DAYS ] if len(days_mentioned) > 5: return 'food' return 'trustnet'
def on_status(self, status, firstcall=True): # Save ID of first tweet with @mention to reply to if firstcall: self.id = status.id # One or multiple captions? opts = ['tell me more', 'what else', 'is that all', 'can you do better'] if any(substr in status.text.lower() for substr in opts): self.more = True else: self.more = False # If tweet has an image, tag this one if hasattr(status, 'extended_entities'): print(status.text) # Setup DenseCap headers = { 'api-key': self.key, } # Iterate through each image in tweet caption = [] for each in status.extended_entities['media']: # Get image URL img = each['media_url_https'] files = { 'image': img, } # Call DenseCap API response = requests.post('https://api.deepai.org/api/densecap', headers=headers, files=files).json() # Call tesseract for OCR text = ocr(img) # Construct caption if 'output' in response: if not self.more: # Take first caption provided cap = response['output']['captions'][0]['caption'] else: # Take next 3 captions cap = '; '.join([response['output']['captions'][i]['caption'] for i in range(1, 4)]) # Add OCR text if detected if text is not None: cap = 'The image shows ' + cap + ' and it says ' + text caption.append(cap) if caption: # Build tweet if len(caption) == 1: tweet = caption[0] else: tweet = '' for i, c in enumerate(caption): tweet += f'[{i+1}] {c}\n' print(tweet) # Tweet (reply) the response if len(tweet) <= 280: self.api.update_status(tweet, in_reply_to_status_id=self.id, auto_populate_reply_metadata=True) else: tweets = tweetsplitter(tweet) prev = self.id for t in tweets: latest = self.api.update_status(t, in_reply_to_status_id=prev, auto_populate_reply_metadata=True) prev = latest.id # If tweet has no image, try and find parent tweet with image else: if status.in_reply_to_status_id is not None: parent = self.api.get_status(status.in_reply_to_status_id) # Recursive call self.on_status(parent, False)
def read_title(img): ocr_text = ocr(img) ocr_title = ocr_text.split('\n\n')[0] return ocr_title
score, pos = _match(img, tp_startup) if score > 0.9: utils.tap((pos[0] + pos[1]) / 2, (pos[2] + pos[3]) / 2) state.chg_state('run') time.sleep(1) else: time.sleep(0.3) elif state.state == 'run': score, pos = _match(img, tp_main) if score > 0.20: simg = img[content_offset:, :, :] print('Call ocr...', end=' ') st = time.time() ocr_rst = utils.ocr(simg, regions) et = time.time() print('Time: %.2f' % (et - st)) print('Question:', ocr_rst) if '' not in ocr_rst: question, opt1, opt2, opt3, opt4 = ocr_rst with env.begin() as txn: ans = txn.get(question.encode()) if ans is None: select = random.randrange(1, 5) else: ans = ans.decode() print('Memory hit!', ans)