def get_image_size(): for file in get_all_files(dictionary): image = Image.open(file).convert('L') changed = resizeimage.resize_cover(image, [300, 300]) # plt.imshow(changed) # print(np.array(image).shape) changed.save(file)
def get_text_value_from_generated_images(generated_image_dir, dictionary_dir=None): dictionary_dir = dictionary_dir or os.path.join( root, 'tokenization', 'gerber_tokenization', 'data', 'dictionary') file_counterpart_value = {} for file in tqdm(get_all_files(generated_image_dir), total=len(os.listdir(generated_image_dir))): try: # result = find_similarity_from_hash_dir(file, dictionary_dir) result = get_predicate_by_image(file) if result: left, upper, width, height = get_coordination_from_filename( file) char = result if char == 'dot': char = '.' file_counterpart_value[(left, upper, width, height)] = char except OSError: pass return file_counterpart_value
def find_similarity_from_hash_dir(image, dir_name): all_files = ''.join(get_all_files(dir_name)).encode() hex_dig = hashlib.sha384(all_files) hash_fname = '{}.pickle'.format(hex_dig.hexdigest()) if not os.path.exists(os.path.join('data', hash_fname)): hash_dict = build_dict_hash(dir_name) with open(hash_fname, 'wb') as f: pickle.dump(hash_dict, f) else: with open(hash_fname, 'rb') as f: hash_dict = pickle.load(f) return find_image_label(image, hash_dict)
def build_dict_hash(dict_path): char_dict_hash = {} for f in get_all_files(dict_path): f_path = f if f_path[-3:] == 'png': chr_image = Image.open(f_path) chr_image = chr_image.convert('L') represent_hash = dhash.dhash_int(chr_image) if not represent_hash: continue chr_name = f_path.split('/')[-1][:-4] if represent_hash not in char_dict_hash.keys(): char_dict_hash[represent_hash] = chr_name return char_dict_hash
def find_similarity_from_tree_dir(image, dir_name, diff_threshold=5): all_files = ''.join(get_all_files(dir_name)).encode() hex_dig = hashlib.sha384(all_files) tree_fname = '{}.pickle'.format(hex_dig.hexdigest()) if not os.path.exists(os.path.join('data', tree_fname)): _chr_n, _hash_l, _bk_t = build_dict_tree(dir_name) with open(tree_fname, 'wb') as f: pickle.dump((_chr_n, _hash_l, _bk_t), f) else: with open(tree_fname, 'rb') as f: _chr_n, _hash_l, _bk_t = pickle.load(f) return judge_image_similarity(image, _chr_n, _hash_l, _bk_t, diff_threshold)
def build_dict_tree(dict_path): hash_list = [] chr_name = [] bk_tree = None for f in get_all_files(dict_path): f_path = f if f_path[-3:] == 'png': chr_image = Image.open(f_path) chr_image = chr_image.convert('L') represent_hash = dhash.dhash_int(chr_image) if not represent_hash: continue hash_list.append(represent_hash) chr_name.append(f_path.split('/')[-1][:-4]) bk_tree = pybktree.BKTree(pybktree.hamming_distance, hash_list) return chr_name, hash_list, bk_tree
if __name__ == '__main__': # order_path = '/Users/weilei/project/fastprint/08_data/2019新单/7月/(0.02平米) 07-29 2 4S8RY04IA0.zip' # unpack_zip_file_path = unzip_order_list(order_path) cust_code = '12 S7CT/' month = '' order_dir = '/Users/weilei/project/fastprint/08_data/2019新单/' + cust_code + month unzip_dir = '/Users/weilei/project/fastprint/08_data/2019新单解压/' dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/' dest_path_file_type_order_name = '/Users/weilei/project/fastprint/08_data/2019新单按文件类型_客户分类/' unzip_zip_file_path = '/Users/weilei/project/fastprint/08_data/2019新单无法解压的订单/' + cust_code + month order_zip_list = file_utilities.get_all_files(order_dir) for order_zip_path in order_zip_list: if order_zip_path.endswith('.DS_Store'): continue if file_utilities.is_zip_file(order_zip_path) is False: print('error: ' + order_zip_path + ' is not a zip file. ') continue unzip_foler_path = unzip_order_list(order_zip_path, unzip_dir) if unzip_foler_path == '': print( 'error: Can not unzip current zip file, folder is emppty. order_zip_path is '
def remove_invisible_in_html(html_content): return ''.join(x for x in html_content if x.isprintable()) def get_tokenization_from_html(filename): encoding = get_file_encoding(filename) soup = BeautifulSoup(open(filename, encoding=encoding), 'html.parser') lines = [] content = soup.text content = remove_comment_in_html(content) for i, line in enumerate(content.split('\n')): if line.strip(): line = remove_invisible_in_html(line) lines.append((0, i, line)) return lines if __name__ == '__main__': from utilities.file_utilities import get_all_files with open('extract-htlm-content-result.txt', 'w', encoding='utf-8') as f: for file in get_all_files('tests'): for line in get_tokenization_from_html(file): f.write(str(line) + '\n')
order_dir = '/Users/weilei/project/fastprint/08_data/2019新单/' unzip_dir = '/Users/weilei/project/fastprint/08_data/2019新单解压/' dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/' dest_path_file_type_order_name = '/Users/weilei/project/fastprint/08_data/2019新单按文件类型_客户分类/' unzip_zip_file_path = '/Users/weilei/project/fastprint/08_data/2019新单无法解压的订单/' cust_code_list = os.listdir(order_dir) for cust_code in cust_code_list: if cust_code.endswith('.DS_Store'): continue cust_order_dir = order_dir + cust_code number = 0 all_order_list = file_utilities.get_all_files(cust_order_dir) for order_name in all_order_list: if order_name.lower().endswith('.zip'): number = number + 1 print(cust_code + ' order num is ' + str(number)) dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/' order_folder_list = os.listdir(dest_path) for order_folder in order_folder_list: if cust_code.endswith('.DS_Store'): continue order_file_list = file_utilities.get_all_files(dest_path +
except OSError: pass return file_counterpart_value if __name__ == '__main__': import os import gerber from constants.path_manager import root from gerber.render import GerberCairoContext for i, file in enumerate( # 452488-1.0DrillDrawing.gdo get_all_files( os.path.join( root, 'tokenization/gerber_tokenization/data/452599.gdo'))): another_gerber = file # another_gerber file path print('jhhhhh', another_gerber) suffix = another_gerber.split('/')[-1].split('.')[ 0] # suffix=drill format(suffix)=drill generate_all_group_primitives(another_gerber, './group-connected-{}'.format(suffix)) ''' gm2_dir = os.path.join(root, 'tokenization/gerber_tokenization/data/TZ7.820.1780A.GM2') gerber_obj = gerber.read(gm2_dir) ctx = GerberCairoContext() gerber_obj.render(ctx) ctx.dump('test-gerber-{}.png'.format(1)) print('generate end!')
import os class GImage: def __init__(self, filename): self.filename = filename self.image = Image.open(filename) def __hash__(self): return dhash.dhash_int(self.image) def __eq__(self, other): return dhash.dhash_int(self.image) - dhash.dhash_int(other.image) < 30 all_files = list( get_all_files( '/Users/mqgao/PycharmProjects/auto-pcb-ii/tokenization/gerber_tokenization/group-connected-old' )) ic(len(all_files)) all_unique_files = set(GImage(f) for f in all_files if f.endswith('.png')) ic(len(all_unique_files)) for file in all_unique_files: name = file.filename dst = os.path.join(root, 'tokenization', 'gerber_tokenization', 'data', 'unlabelled', name.split('/')[-1]) shutil.copyfile(file.filename, dst)
assert len(get_numerics_str('tests-left_upper-N#N1dot0N#N-N#N0dot11N#N-width-N#N1dot013N#N-height-N#Nminus2dot0N#N.png' ) ) == 4 assert get_coordination_from_filename('N#Nminus0dot123N#N') == [-0.123] assert get_coordination_from_filename('tests-left_upper-N#N1dot0N#N-N#N0dot11N#N-width-N#N1dot013N#N-height-N#Nminus2dot' '0N#N.png') == [1.0, 0.11, 1.013, -2.0] """ if __name__ == '__main__': from constants.path_manager import root from gerber.render import GerberCairoContext from gerber_handlers import gerber_path_constants r_path = gerber_path_constants.CLIENT_6V29U2G7A0 for i, file in enumerate( get_all_files(os.path.join(r_path, 'cs'))): another_gerber = file # another_gerber file path suffix = another_gerber.split('/')[-1].split('.')[0] # suffix=drill format(suffix)=drill # './group-connected-{}'.format(suffix)=./group-connected-dirll # try: generate_all_group_primitives(another_gerber, './group-connected-{}'.format(suffix), 1) # generate_all_group_primitives(another_gerber, format(suffix)) # another_gerber = '/Users/mqgao/PycharmProjects/auto-pcb-ii/tokenization/gerber_tokenization/data/dirll.art' # except Exception as e: # pass ''' root_dir = os.path.join(root, 'tokenization/gerber_tokenization/data/art/') dir_list = os.listdir(root_dir) for i in range(0, len(dir_list)): img_path = os.path.join(root_dir, dir_list[i])