/
start.py
332 lines (295 loc) · 10.5 KB
/
start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
#coding=utf-8
import web
from web import form
#!/usr/bin/env python
import cv2
import numpy as np
INDEX_DIR = "IndexFiles.index"
import sys, os, lucene
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.util import Version
from org.apache.lucene.search import BooleanQuery
from org.apache.lucene.search import BooleanClause
from org.apache.lucene.search import highlight
from org.apache.lucene.search.highlight import SimpleHTMLFormatter
from org.apache.lucene.search.highlight import Highlighter
from org.apache.lucene.search.highlight import QueryScorer
from org.apache.lucene.search.highlight import SimpleFragmenter
from org.apache.lucene.search.highlight import Highlighter
import jieba
import re
render = web.template.render(
"template/")
urls = (
'/', 'text_index',
'/res', 'products',
'/img','img',
'/img_res','img_res',
)
login= form.Form(
form.Textbox('keywords'),
form.Button('Search'),
)
class text_index:
def GET(self):
f = login()
return render.text_index(f)
class img:
def GET(self):
web.header("Content-Type","text/html; charset=utf-8")
return render.img()
def POST(self):
return render.img()
class img_res:
def POST(self):
x = web.input(myfile={})
filedir = './tmp'
if 'myfile' in x:
filepath = x.myfile.filename.replace('\\', '/')
# filename = filepath.split('/')[-1]
filename = 'test.jpg'
fout = open(filedir + '/' + filename, 'wb')
fout.write(x.myfile.file.read())
fout.close()
f = img_run()
res=[]
for i in f:
grand = open(unicode(('brands/'+i), "utf8"), 'r')
grand_list = grand.readlines()
x=[]
for k in range(len(grand_list)-7):
x.append(grand_list[k])
res.append(x)
for i in range(len(res)):
for j in range(len(res[i])):
if j == 0 or j > 3 :
res[i][j] = res[i][j].split('\t')
os.remove('tmp/test.jpg')
return render.img_res(res)
def canny(img):
Canny_img = cv2.Canny(img, 50, 150)
Gause_img = cv2.GaussianBlur(img, (3, 3), 0)
img_sobel0 = np.float32(Gause_img)
img_sobel = img_sobel0.copy()
img_sobel1 = img_sobel0.copy()
img_sobel2 = img_sobel0.copy()
x = len(img_sobel)
y = len(img_sobel[0])
def sobel(img1, img2, img3, img4, x, y):
for i in range(1, x - 1):
for j in range(1, y - 1):
gradx = img1[i + 1][j - 1] + 2 * img1[i + 1][j] + img1[i + 1][j + 1] \
- img1[i - 1][j - 1] - 2 * img1[i - 1][j] - img1[i - 1][j + 1]
grady = img1[i - 1][j + 1] + 2 * img1[i][j + 1] + img1[i + 1][j + 1] \
- img1[i - 1][j - 1] - 2 * img1[i][j - 1] - img1[i + 1][j - 1]
img3[i][j] = gradx
img4[i][j] = grady
img2[i][j] = np.sqrt(gradx * gradx + grady * grady)
sobel(img_sobel0, img_sobel, img_sobel1, img_sobel2, x, y)
img_sobell = img_sobel.copy()
for i in range(1, x - 1):
for j in range(1, y - 1):
if img_sobel1[i][j] != 0:
tan = img_sobel2[i][j] / img_sobel1[i][j]
if tan >= 0 and tan < 1:
dtmp1 = img_sobel[i - 1][j] * (1 - tan) + img_sobel[i - 1][j - 1] * tan
dtmp2 = img_sobel[i + 1][j] * (1 - tan) + img_sobel[i + 1][j + 1] * tan
elif tan >= 1:
dtmp1 = img_sobel[i][j - 1] * (1 - 1 / tan) + img_sobel[i - 1][j - 1] / tan
dtmp2 = img_sobel[i][j + 1] * (1 - 1 / tan) + img_sobel[i + 1][j + 1] / tan
elif tan <= -1:
dtmp1 = img_sobel[i][j - 1] * (1 + 1 / tan) - img_sobel[i + 1][j - 1] / tan
dtmp2 = img_sobel[i][j + 1] * (1 + 1 / tan) - img_sobel[i - 1][j + 1] / tan
else:
dtmp1 = img_sobel[i - 1][j] * (1 + tan) - img_sobel[i - 1][j + 1] * tan
dtmp2 = img_sobel[i + 1][j] * (1 + tan) - img_sobel[i + 1][j - 1] * tan
else:
if img_sobel2[i][j] != 0:
dtmp1 = img_sobel[i][j - 1]
dtmp2 = img_sobel[i][j + 1]
else:
dtmp1 = img_sobel[i - 1][j - 1]
dtmp2 = img_sobel[i + 1][j + 1]
if img_sobel[i][j] >= dtmp1 and img_sobel[i][j] >= dtmp2:
img_sobell[i][j] = 128
else:
img_sobell[i][j] = 0
for i in range(0, x):
img_sobell[i][0] = 0
img_sobell[i][y - 1] = 0
for i in range(0, y):
img_sobell[0][i] = 0
img_sobell[x - 1][i] = 0
img_sobell1 = img_sobell.copy()
img_sobell2 = img_sobell.copy()
for i in range(1, x):
for j in range(1, y):
if img_sobel[i][j] >= 90 and img_sobell[i][j] == 128:
img_sobell1[i][j] = 255
img_sobell2[i][j] = 255
elif img_sobel[i][j] >= 30 and img_sobell[i][j] == 128:
img_sobell1[i][j] = 0
img_sobell2[i][j] = 255
elif img_sobell[i][j] == 128:
img_sobell1[i][j] = 0
img_sobell2[i][j] = 0
img_sobell3 = img_sobell1.copy()
def draw(img1, img2, x0, y0):
for i in range(-1, 2):
for j in range(-1, 2):
if img1[x0 + i][y0 + j] == 0 and img2[x0 + i][y0 + j] == 255:
if x0 + i != 0 and x0 + i != x - 1 and y0 + j != 0 and y0 + j != y - 1:
img1[x0 + i][y0 + j] = 255
draw(img1, img2, x0 + i, y0 + j)
for i in range(1, x - 1):
for j in range(1, y - 1):
if img_sobell1[i][j] == 255:
draw(img_sobell3, img_sobell2, i, j)
return img_sobell3
def hu(img):
x = len(img)
y = len(img[0])
m10 = 0
m01 = 0
m00 = 0
for i in range(0, x):
for j in range(0, y):
if img[i][j] != 0:
m00 += img[i][j]
m10 += img[i][j] * (i+1)
m01 += img[i][j] * (j+1)
xbar = m10 / m00
ybar = m01 / m00
u02 = 0
u03 = 0
u11 = 0
u12 = 0
u20 = 0
u21 = 0
u30 = 0
for i in range(0, x):
for j in range(0, y):
if img[i][j] != 0:
p = i + 1 - xbar
q = j + 1 - ybar
value = img[i][j]
u02 += q * q * value
u03 += q * q * q * value
u11 += p * q * value
u12 += p * q * q * value
u20 += p * p * value
u21 += p * p * q * value
u30 += p * p * p * value
u02 /= np.power(m00, 2)
u03 /= np.power(m00, 2.5)
u11 /= np.power(m00, 2)
u12 /= np.power(m00, 2.5)
u20 /= np.power(m00, 2)
u21 /= np.power(m00, 2.5)
u30 /= np.power(m00, 2.5)
tmp1 = u20 - u02
tmp2 = u30 - 3*u12
tmp3 = u03 - 3*u21
tmp4 = u30 + u12
tmp5 = u03 + u21
v1 = u20 + u02
v2 = tmp1*tmp1 + 4*u11*u11
v3 = tmp2*tmp2 + tmp3*tmp3
v4 = tmp4*tmp4 + tmp5*tmp5
v5 = tmp2*tmp4*(tmp4*tmp4-3*tmp5*tmp5) - tmp3*tmp5*(3*tmp4*tmp4- tmp5*tmp5)
v6 = tmp1*(tmp4*tmp4-tmp5*tmp5) + 4*u11*tmp4*tmp5
v7 = - tmp3*tmp1*(tmp4*tmp4-3*tmp5*tmp5) - tmp2*tmp5*(3*tmp4*tmp4-tmp5*tmp5)
vec = [v1, v2, v3, v4, v5, v6, v7]
lenvec = 0
for i in range(0, 7):
lenvec += vec[i] * vec[i]
lenvec = np.sqrt(lenvec)
for i in range(0, 7):
vec[i] /= lenvec
return vec
def match(bian1, bian2):
sim = 0
for i in range(0, 7):
a = np.log(np.absolute(bian1[i])) * np.sign(bian1[i])
b = np.log(np.absolute(bian2[i])) * np.sign(bian2[i])
sim += np.absolute((a-b)/a)
return sim
def img_run():
imgtar = cv2.imread('tmp/test.jpg', cv2.IMREAD_GRAYSCALE)
imgtarca = canny(imgtar)
imgtarhu = hu(imgtarca)
pa = os.getcwd()
directory = os.walk(os.path.join(pa, 'brands'))
listhu = []
for root, dirnames, filenames in directory:
for filename in filenames:
try:
path = os.path.join(root, filename)
file = open(path)
contents =file.read()
contents = contents.split('\n')
huju = []
for i in range(0, 7):
huju.append(float(contents[i - 7]))
file.close()
sim = match(imgtarhu, huju)
pairhu = []
pairhu.append(sim)
pairhu.append(path)
listhu.append(pairhu)
except Exception, e:
print "Failed:", e
listhu.sort()
res=[]
for i in range(0, 10):
res.append(listhu[i][1].split('/')[-1])
file.close()
return res
class products:
def GET(self):
user_data = web.input()
if user_data.keywords=="":
return render.text_index("")
key,grand_list=run(user_data.keywords)
res=[]
for i in range(len(grand_list)):
for j in range(len(grand_list[i])):
if j>3:
grand_list[i][j]=grand_list[i][j].split('\t')
return render.products(grand_list,key)
def run(command):
global vm_env
STORE_DIR = "index"
vm_env = lucene.getVMEnv()
vm_env.attachCurrentThread()
directory = SimpleFSDirectory(File(STORE_DIR))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
query = QueryParser(Version.LUCENE_CURRENT, "contents",
analyzer).parse(command)
scoreDocs = searcher.search(query, 10).scoreDocs
#print "%s total matching documents." % len(scoreDocs)
res = []
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
tmp = []
tmp.append([doc.get('name1'),doc.get('name2')])
tmp.append(doc.get("homepage"))
tmp.append(doc.get("intro"))
tmp.append(doc.get('logo'))
a=doc.get('goods')
a=a.split('\n')
for i in a:
tmp.append(i)
res.append(tmp)
return command, res
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
app = web.application(urls, globals())
app.run()
del searcher