def segment_one_page(page_id, image_name, text): if text == text1: print 'equal----' try: image = io.imread(image_name, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save() #Character.objects.bulk_create(character_lst) except: print 'missing image file'
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % ( vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % ( vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % (vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % (vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def segment_one_page(page_id, image_name, text): image_path = u'/home/share/dzj_characters/page_images/%s' % image_name image = io.imread(image_path, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: path = u'/home/share/dzj_characters/character_images/%s.jpg' % ch.char_id.strip( ) ch.cut_char_from_page(image, path) character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=0) character_lst.append(character) Character.objects.filter(page_id=page_id).delete() Character.objects.bulk_create(character_lst)
def segment_one_page(page_id, image_name, text): image_path = u"/home/share/dzj_characters/page_images/%s" % image_name image = io.imread(image_path, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: path = u"/home/share/dzj_characters/character_images/%s.jpg" % ch.char_id.strip() ch.cut_char_from_page(image, path) character = Character( id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u".jpg", left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=0, ) character_lst.append(character) Character.objects.filter(page_id=page_id).delete() Character.objects.bulk_create(character_lst)