/
img_tool.py
190 lines (145 loc) · 5.35 KB
/
img_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from collections import defaultdict
from scipy.stats import itemfreq
from skimage import feature
from PIL import Image as IMG
import numpy as np
import pandas as pd
import operator
import cv2
import os
from keras.preprocessing import image
from tqdm import tqdm
from glob import glob
def perform_color_analysis(img, flag):
def color_analysis(img):
# obtain the color palatte of the image
palatte = defaultdict(int)
for pixel in img.getdata():
palatte[pixel] += 1
# sort the colors present in the image
sorted_x = sorted(palatte.items(), key=operator.itemgetter(1), reverse=True)
light_shade, dark_shade, shade_count, pixel_limit = 0, 0, 0, 25
for i, x in enumerate(sorted_x[:pixel_limit]):
if all(xx <= 20 for xx in x[0][:3]): ## dull : too much darkness
dark_shade += x[1]
if all(xx >= 240 for xx in x[0][:3]): ## bright : too much whiteness
light_shade += x[1]
shade_count += x[1]
light_percent = round((float(light_shade) / shade_count) * 100, 2)
dark_percent = round((float(dark_shade) / shade_count) * 100, 2)
return light_percent, dark_percent
im = IMG.open(img) # .convert("RGB")
# cut the images into two halves as complete average may give bias results
size = im.size
halves = (size[0] / 2, size[1] / 2)
im1 = im.crop((0, 0, size[0], halves[1]))
im2 = im.crop((0, halves[1], size[0], size[1]))
try:
light_percent1, dark_percent1 = color_analysis(im1)
light_percent2, dark_percent2 = color_analysis(im2)
except Exception as e:
return None
light_percent = (light_percent1 + light_percent2) / 2
dark_percent = (dark_percent1 + dark_percent2) / 2
if flag == 'black':
return dark_percent
elif flag == 'white':
return light_percent
elif flag == 'all':
return light_percent,dark_percent
else:
return None
def average_pixel_width(img):
im = IMG.open(img)
im_array = np.asarray(im.convert(mode='L'))
edges_sigma1 = feature.canny(im_array, sigma=3)
apw = (float(np.sum(edges_sigma1)) / (im.size[0]*im.size[1]))
return apw*100
def get_dominant_color(img):
arr = np.float32(img)
pixels = arr.reshape((-1, 3))
n_colors = 5
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
flags = cv2.KMEANS_RANDOM_CENTERS
_, labels, centroids = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)
palette = np.uint8(centroids)
# quantized = palette[labels.flatten()]
# quantized = quantized.reshape(img.shape)
dominant_color = palette[np.argmax(itemfreq(labels)[:, -1])]
return dominant_color
def get_average_color(img):
average_color = [img[:, :, i].mean() for i in range(img.shape[-1])]
return average_color
def getSize(filename):
st = os.stat(filename)
return st.st_size
def getDimensions(filename):
img_size = IMG.open(filename).size
return img_size
def get_blurrness_score(img):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
fm = cv2.Laplacian(img, cv2.CV_64F).var()
return fm
def image_classify(model, pak, img_f):
img = IMG.open(img_f)
if img.size != (224, 224):
img = img.resize((224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = pak.preprocess_input(x)
preds = model.predict(x)
return pak.decode_predictions(preds, top=3)[0]
def img_resize(multi_process):
from config import img_path,img_size
files = glob('./dataset/tr_img/*.jpg') + glob('./dataset/te_img/*.jpg')
print(len(files))
if multi_process:
from process_worker import img_resize_worker
import multiprocessing as mlp
num_cpu = mlp.cpu_count()//2
pool = mlp.Pool(num_cpu)
num_task = 1 + len(files) // num_cpu
results = []
for i in range(num_cpu):
result = pool.apply_async(img_resize_worker,
args=(files[i * num_task:(i + 1) * num_task],))
results.append(result)
pool.close()
pool.join()
for i in results:
i.get()
else:
for f in tqdm(files):
img = cv2.imread(f)
cv2.imwrite(img_path + f[17:], cv2.resize(img, (img_size,) * 2))
def pack_imgs():
from config import img_path
train = pd.read_csv("./dataset/train.csv", usecols=['image'])
test = pd.read_csv("./dataset/test.csv", usecols = ['image'])
all_samples = train.append(test).reset_index(drop=True)
all_samples['image'].fillna('unk', inplace=True)
all_samples['image'] = img_path + all_samples['image'] + '.jpg'
import multiprocessing as mlp
from process_worker import read_img
# imgs =read_img(all_samples['image'].values)
num_cpu = mlp.cpu_count()
pool = mlp.Pool(num_cpu)
num_task = 1 + len(all_samples) // num_cpu
results = []
for i in range(num_cpu):
result = pool.apply_async(read_img,
args=(all_samples['image'].values[i * num_task:(i + 1) * num_task],))
results.append(result)
pool.close()
pool.join()
imgs = []
for i in results:
imgs+=i.get()
print('save')
imgs = np.array(imgs)
print(imgs.shape)
print(imgs[0].shape)
np.save('./dataset/packimg.npy',imgs)
if __name__ == '__main__':
# img_resize(True)
pack_imgs()