def create_candidate_data(candidate: str) -> dict: data = { } for fontfamily in fontfamilies: for fontsize in fontsizes: path = os.path.join(fontfamily, str(fontsize)) img = image.create_image(candidate, fontsize, fontfamily) blurred_img = image.blur(img, kernel[fontsize], sigma[fontsize]) sift = cv2.xfeatures2d.SIFT_create() kp, des = sift.detectAndCompute(img,None) if(des is None): des = np.array([]) data['name'] = candidate data[os.path.join(path, 'blurred_img')] = blurred_img data[os.path.join(path, 'descriptor')] = des data[os.path.join(path, 'keypoints')] = serial.serialize_keypoints(kp) data[os.path.join(path, 'length')] = image.get_text_dim(candidate, fontsize, fontfamily)[0] return data
def create_preprocessing_data(urls: list, name: str = 'alexa') -> None: start_time = time.time() for url in urls: for fontfamily in fontfamilies: for fontsize in fontsizes: directory = os.path.join(data_path, name, url, fontfamily, str(fontsize)) if not os.path.exists(directory): os.makedirs(directory) length = image.get_text_dim(url, fontsize, fontfamily)[0] img = image.create_image(url, fontsize, fontfamily) blurred_img = image.blur(img, kernel[fontsize], sigma[fontsize]) sift = cv2.xfeatures2d.SIFT_create() kp, des = sift.detectAndCompute(img,None) if(des is None): des = np.array([]) np.save(os.path.join(data_path, name, url, fontfamily, str(fontsize), 'length'), np.array(length)) np.save(os.path.join(data_path, name, url, fontfamily, str(fontsize), 'blurred_img'), blurred_img) np.save(os.path.join(data_path, name, url, fontfamily, str(fontsize), 'descriptor'), des) np.save(os.path.join(data_path, name, url, fontfamily, str(fontsize), 'keypoints'), np.array(serial.serialize_keypoints(kp))) print("--- %s seconds ---" % (time.time() - start_time))