-
Notifications
You must be signed in to change notification settings - Fork 0
/
Hybrid.py
232 lines (182 loc) · 9.17 KB
/
Hybrid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import cv2
import numpy as np
from PIL import Image
from skimage import filters
from tkFileDialog import askopenfilename
from scipy import ndimage as ndi
import math
from tesserocr import PyTessBaseAPI, iterate_level, RIL
from skimage import feature
import matplotlib.image as mpimg
filename = askopenfilename()
print filename # for debugging purposes
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
contour_area_min = 350
contour_area_max = int(math.floor(img.shape[0]*img.shape[1]*0.08))
strel_size = 9 # may have some correlation with the min contour area threshold.
img_red = img[:, :, 0]
img_green = img[:, :, 1]
img_blue = img[:, :, 2]
edge_red = feature.canny(img_red, sigma=2) # alternatively, add lower and upper thresholds of 50/100 TBC
edge_green = feature.canny(img_green, sigma=3) # changed from 3
edge_blue = feature.canny(img_blue, sigma=3) #may consider using default sigma value instead
#edge_red = cv2.Canny(img_red, 50, 100)
#edge_green = cv2.Canny(img_green, 50, 100)
# edge_blue = cv2.Canny(img_blue, 50, 100)
edge_assimilated = np.logical_or(edge_red, np.logical_or(edge_green, edge_blue)) # boolean array
strel1 = np.zeros((strel_size, strel_size))
for i in range(0, strel_size):
strel1[int(math.floor(strel_size/2.0))][i] = 1
# horizontal dilation
edge_dim_1 = ndi.binary_dilation(edge_assimilated, structure=strel1)
strel2 = np.zeros((strel_size, strel_size))
for j in range(0, strel_size):
strel2[j][int(math.floor(strel_size/2.0))] = 1
# vertical dilation
edge_dim_2 = ndi.binary_dilation(edge_assimilated, structure=strel2)
# combine the results with OR operator
edges_dil_comb = np.logical_or(edge_dim_1, edge_dim_2)
# all_labels = measure.label(edges_dil_comb, neighbors=8, connectivity=2).astype('uint8') # same dimensions as image
all_labels = edges_dil_comb.astype('uint8')
# the above step may not be necessary? If skip, convert edges_dil_comb to 'uint8'
im2, abac, hierarchy = cv2.findContours(all_labels.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
large_contours = []
for contour in abac:
if cv2.contourArea(contour) > contour_area_min and (cv2.contourArea(contour) < contour_area_max): # Green's theorem for contour area approximation
large_contours.append(contour)
# at this point have, collected large contours only.
# instead of drawing contours, try to select the interior pts of each contour, in grayscale
collect_gs = 255 + np.zeros((img.shape[0], img.shape[1]), dtype='uint8') # want grayscale
black_shell = np.zeros((img.shape[0], img.shape[1]), dtype='uint8')
for i in xrange(0, len(large_contours)):
black_bg = np.zeros((img.shape[0], img.shape[1]), dtype='uint8')
cv2.drawContours(black_bg, large_contours, i, color=255, thickness=-1)
interior_pts = np.where(black_bg == 255) # collection of sets of coordinates corresponding to the interior points of contours
for x in xrange(0, len(interior_pts[0])):
collect_gs[interior_pts[0][x], interior_pts[1][x]] = img_gs[interior_pts[0][x], interior_pts[1][x]]
Image.fromarray(collect_gs).show()
# alternative branch: check the intensities of pixels that were REMOVED. corresponds to white pixels in collect_gs
mask = (collect_gs > 254).astype('uint8')
fig = plt.figure()
a = fig.add_subplot(1, 2, 1)
hist = cv2.calcHist([img_gs], [0], mask, [256], [0, 256])
plt.plot(hist)
print hist
print hist.shape
hist_2 = cv2.calcHist([img_gs], [0], None, [256], [0, 256])
plt.plot(hist_2)
# the histogram of the data
# n, bins, patches = plt.hist(hist, 100, normed=1, facecolor='cyan', alpha=0.82)
# n, bins, patches = plt.hist(x, 50, normed=1, facecolor='blue', alpha=0.75) # alpha: intensity (related to transparency)
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram of Image:')
# plt.axis([0, 256, 0, 300000]) optional, will get best fit if don't use
plt.grid(True) # whether to show gridlines or not. Default: False
# plt.show()
# then perform global threshold from here
img_for_binarization = collect_gs
# obtain the cuts in horizontal dim through histogram analysis:
foreground = collect_gs < 255
# Image.fromarray(foreground.astype('uint8')*255).show()
Image.fromarray(foreground.astype('uint8')*255).save('binaryimg.png')
# compute the histogram of foreground vs background pixels
# use mpimg
binary = mpimg.imread('binaryimg.png') # collection of lists
fig = plt.figure()
a = fig.add_subplot(1, 2, 2)
print binary
print binary.shape # y, x (in dims)
# want to compute the sum along the y-dim
sum1 = np.sum(binary, axis=1)
print "Sum1: "
print sum1
# find the cutting points based on sum1
t_histo = 130 # number of nonzero pixels in -> direction (threshold value)
potential_cuts = []
for x in xrange(1, len(sum1)):
if (sum1[x] < 200) and (sum1[x-1] >=200): # cross downward from the left
potential_cuts.append(x) # then is a potential cutting point
print potential_cuts # collection of y-coordinates corresponding to cuts
print len(potential_cuts)
# get adjustments for the potential cuts: use the next local minimum
# description of local min: f(x) such that f(x-1)>=f(x)<=f(x+1)
adjusted_cuts = []
for value in potential_cuts:
for x in xrange(0, 100): # search for the first local min, with x>value. limit search to 300 vals may have indexerror with this?
try:
if (sum1[value+x] <= sum1[value+x+1]) and (sum1[value+x] <= sum1[value+x-1]): # i.e. if is local min
adjusted_cuts.append(value+x)
break
elif x==99:
adjusted_cuts.append(value)
except IndexError:
adjusted_cuts.append(value)
break
# next, extract the image regions of the cuts obtained above
# img is in rgb
segments = []
coords = []
if len(adjusted_cuts):
for x in xrange(0, len(adjusted_cuts)): # appears to be t
if x > 0:
y_low = adjusted_cuts[x-1]
else:
y_low = 0
if x == len(adjusted_cuts)-1: # to obtain the last (lowest) segment
y_high = img.shape[0]
else:
y_high = adjusted_cuts[x]
segments.append(collect_gs[y_low:y_high, :]) # contains white background, and have grayscale
coords.append([y_low, y_high])
# need to handle the end, i.e. after the last cut
print adjusted_cuts
"""for seg in segments: # want to collect the histograms of each segment
# Image.fromarray(seg).show()
hist_seg = cv2.calcHist([seg], [0], None, [256], [0, 256]) # segment is gs on white background
plt.plot(hist_seg)"""
# use some existing global thresholding method for each of the segments (different T for each segment)
# inputs should be grayscale (single channel)
black_bg = np.zeros((img.shape[0], img.shape[1]), dtype='uint8')
y_cur_min = 0
for seg in zip(segments, coords): # is in order of increasing y-value
threshglobal = filters.threshold_li(seg[0]) # Should be an integer? seg[0] refers to the first element, i.e. the segment and not the adjusted cut
# compute adjustment to the threshold found above based on the ratio of foreground pixels to total pixels in segment
# background_pixels
background_pixels = np.where(seg[0] == 255)
a = len(background_pixels[0])
seg_img = seg[0]
b = (seg_img.shape[0])*(seg_img.shape[1])
adjustment_ratio = 1.0*a/b
adjustment_value = 35*pow(adjustment_ratio, (1.0/2.0)) # gives a^b
print "Adjustment: " + str(adjustment_value)
threshglobal = threshglobal - adjustment_value
# the larger the ratio, the larger the adjustment effect
# threshglobal = int(0.9*threshglobal)
thresholded = seg[0] > threshglobal # need to stack the images on top of each other after thresholding
black_bg[seg[1][0]:seg[1][1], :] = thresholded.astype('uint8')*255 # need to have the starting y-coordinate, which should be equal to ending of previous segment
# may want to make adjustments to the threshold value based on the number of background pixels included in the image
# alternatively, if possible, compute the threshold based on the foreground values only. .. but input should be an NxM array. so have to live with the background pixels
Image.fromarray(black_bg).show()
binary = black_bg
with PyTessBaseAPI(psm=6) as api:
api.SetVariable("tessedit_char_whitelist",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0987654321-.:/()")
api.SetImage(Image.fromarray(binary.astype('uint8')*255).convert('RGB'))
print api.GetUTF8Text()
print filename
# next: display the cutting lines for illustrative purposes
"""black = np.zeros((img.shape[0], img.shape[1], 3), dtype='uint8')
black[0:img.shape[0], 0:img.shape[1], 0] = binary.astype('uint8')*255
Image.fromarray(black).show()"""
# may want to remove the small contours again, after this. But may cause problems for the digits that start shattering / disintegrating
# do thresholding within each of the image segments based on difference between bg and fg (lf peak)
# intuitions: the background pixels should be brighter than the foreground pixels. Should have only 1 color of text within one segment (horizontal cut->)
plt.title("Horizontal Frequencies")
plt.plot(sum1) # this gives the histogram for horizontal direction (->), with y varying
# plt.show()