Exemplo n.º 1
0
def run_seg(request, pk):
    page = get_object_or_404(Page, pk=pk)
    img_path = page.get_image_path()
    raw_data = serializers.serialize("python",
                                     Region.objects.filter(page_id=pk))
    region_lst = [d['fields'] for d in raw_data]
    char_lst = charseg(img_path, region_lst, pk)
    for ch in char_lst:
        char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk, ch['line_no'],
                                                    ch['region_no'],
                                                    ch['char_no'])
        character = Character(
            id=char_id,
            page_id=pk,
            char=ch['char'],
            left=ch['left'],
            right=ch['right'],
            top=ch['top'],
            bottom=ch['bottom'],
            line_no=ch['line_no'],
            region_no=ch['region_no'],
            char_no=ch['char_no'],
            is_correct=-9  # has not chut char image
        )
        character.save()
    cut_char.delay(pk)  #offline task  cut char image
    return JsonResponse(char_lst, safe=False)
def segment_one_page(page_id, image_name, text):
    if text == text1:
        print 'equal----'
    try:
        image = io.imread(image_name, 0)
        total_char_lst = process_page(image, text, page_id)
        character_lst = []
        for ch in total_char_lst:
            character = Character(id=ch.char_id.strip(),
                                  page_id=page_id,
                                  char=ch.char,
                                  image=ch.char_id.strip() + u'.jpg',
                                  left=ch.left,
                                  right=ch.right,
                                  top=ch.top,
                                  bottom=ch.bottom,
                                  line_no=ch.line_no,
                                  char_no=ch.char_no,
                                  is_correct=False)
            #character_lst.append(character)
            print character
            character.save()
            #Character.objects.bulk_create(character_lst)
    except:
        print 'missing image file'
def test_file():
    vol_no = u'01'
    page_no = u'001'
    output = subprocess.check_output(
        'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null'
        % (vol_no, page_no),
        shell=True)
    pos = output.find('.txt')
    if pos == -1:
        sys.exit(-1)
    page_id_prefix = output[:pos - 1]
    page_id = page_id_prefix + 'a'
    image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % (
        vol_no, page_no, page_id)
    text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % (
        vol_no, page_no, page_id)
    image = io.imread(image_name, 0)
    # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0)
    text = u''
    with open(text_name, 'r') as f:
        text = f.read().decode('utf-8')
        text1 = text
    process_page(image, text, page_id)
    total_char_lst = process_page(image, text, page_id)

    import django
    django.setup()
    from segmentation.models import Character

    character_lst = []
    for ch in total_char_lst:
        character = Character(id=ch.char_id.strip(),
                              page_id=page_id,
                              char=ch.char,
                              image=ch.char_id.strip() + u'.jpg',
                              left=ch.left,
                              right=ch.right,
                              top=ch.top,
                              bottom=ch.bottom,
                              line_no=ch.line_no,
                              char_no=ch.char_no,
                              is_correct=False)
        #character_lst.append(character)
        print character
        character.save()
def segment_one_page(page_id, image_name, text):
    if text == text1:
        print 'equal----'
    try:
        image = io.imread(image_name, 0)
        total_char_lst = process_page(image, text, page_id)
        character_lst = []
        for ch in total_char_lst:
            character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char,
                                  image=ch.char_id.strip() + u'.jpg',
                                  left=ch.left, right=ch.right,
                                  top=ch.top, bottom=ch.bottom,
                                  line_no=ch.line_no, char_no=ch.char_no,
                                  is_correct=False)
            #character_lst.append(character)
            print character
            character.save()
            #Character.objects.bulk_create(character_lst)
    except:
        print 'missing image file'
def test_file():
    vol_no = u'01'
    page_no = u'001'
    output = subprocess.check_output(
        'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no),
        shell=True)
    pos = output.find('.txt')
    if pos == -1:
        sys.exit(-1)
    page_id_prefix = output[:pos - 1]
    page_id = page_id_prefix + 'a'
    image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % (vol_no, page_no, page_id)
    text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % (vol_no, page_no, page_id)
    image = io.imread(image_name, 0)
    # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0)
    text = u''
    with open(text_name, 'r') as f:
        text = f.read().decode('utf-8')
        text1 = text
    process_page(image, text, page_id)
    total_char_lst = process_page(image, text, page_id)

    import django
    django.setup()
    from segmentation.models import Character

    character_lst = []
    for ch in total_char_lst:
        character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char,
                              image=ch.char_id.strip() + u'.jpg',
                              left=ch.left, right=ch.right,
                              top=ch.top, bottom=ch.bottom,
                              line_no=ch.line_no, char_no=ch.char_no,
                              is_correct=False)
        #character_lst.append(character)
        print character
        character.save()
Exemplo n.º 6
0
def run_seg(request,pk):
    page = get_object_or_404(Page, pk=pk)
    img_path = page.get_image_path()
    raw_data = serializers.serialize("python",Region.objects.filter(page_id=pk))
    region_lst = [d['fields'] for d in raw_data]
    char_lst = charseg(img_path,region_lst, pk)
    for ch in char_lst:
        char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk,ch['line_no'],ch['region_no'],ch['char_no'])
        character = Character(
                id=char_id,
                page_id= pk,
                char=ch['char'],
                left=ch['left'],
                right=ch['right'],
                top=ch['top'],
                bottom=ch['bottom'],
                line_no=ch['line_no'],
                region_no = ch['region_no'],
                char_no=ch['char_no'],
                is_correct=-9 # has not chut char image
                )
        character.save()
    cut_char.delay(pk)#offline task  cut char image
    return JsonResponse(char_lst, safe=False)