def test_check_dir(): if os.path.exists('temp'): shutil.rmtree('temp') wl_checking_misc.check_dir('temp') assert os.path.exists('temp') os.rmdir('temp')
def wl_msg_box_path_not_exist_confirm(main, path): reply = QMessageBox.question( main, main.tr('Path Not Exist'), main.tr(f''' {main.settings_global['styles']['style_dialog']} <body> <div>The specified path "{path}" does not exist.</div> <div>Do you want to create the directory?</div> </body> '''), QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: wl_checking_misc.check_dir(path) return reply
def exp_list(self): default_dir = self.main.settings_custom['exp'][ self.settings]['default_path'] file_path = QFileDialog.getSaveFileName( self.main, _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Export to File'), os.path.join(wl_checking_misc.check_dir(default_dir), self.exp_file_name), _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Text File (*.txt)'))[0] if file_path: encoding = self.main.settings_custom['exp'][ self.settings]['default_encoding'] with open(file_path, 'w', encoding=encoding) as f: for item in self.model().stringList(): f.write(item + '\n') wl_msg_boxes.Wl_Msg_Box_Info(self.main, title=_tr( 'Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Export Completed'), text=_tr( 'Wl_List_Add_Ins_Del_Clr_Imp_Exp', ''' <div>The list has been successfully exported to "{}".</div> ''').format(file_path)).open() # Modify default path self.main.settings_custom['exp'][ self.settings]['default_path'] = os.path.normpath( os.path.dirname(file_path))
def confirm_path(self, line_edit): path = line_edit.text() if not os.path.exists(path): reply = QMessageBox.question( self.main, self.tr('Path Not Exist'), self.tr(''' {} <body> <div>The specified path "{}" does not exist.</div> <div>Do you want to create the directory?</div> </body> ''').format(self.main.settings_global['styles']['style_dialog'], path), QMessageBox.Yes | QMessageBox.No, QMessageBox.No ) if reply == QMessageBox.Yes: wl_checking_misc.check_dir(path) return True else: line_edit.setFocus() line_edit.selectAll() return False elif not os.path.isdir(path): self.wl_msg_box_path_not_dir(path) line_edit.setFocus() line_edit.selectAll() return False else: return True
def open_files(self): if os.path.exists( self.main.settings_custom['import']['files']['default_path']): default_dir = self.main.settings_custom['import']['files'][ 'default_path'] else: default_dir = self.main.settings_default['import']['files'][ 'default_path'] file_paths = QFileDialog.getOpenFileNames( self.main, self.tr('Open File(s)'), wl_checking_misc.check_dir(default_dir), ';;'.join(self.main.settings_global['file_types']['files']), self.main.settings_global['file_types']['files'][-1])[0] if file_paths: self.main.wl_files.open_files(file_paths)
def export_list(self, settings): default_dir = self.main.settings_custom['export'][settings][ 'default_path'] file_path = QFileDialog.getSaveFileName( self.main, self.tr('Export to File'), wl_checking_misc.check_dir(default_dir), self.tr('Text File (*.txt)'))[0] if file_path: encoding = self.main.settings_custom['export'][settings][ 'default_encoding'] with open(file_path, 'w', encoding=encoding) as f: for item in self.get_items(): f.write(item + '\n') wl_msg_box.wl_msg_box_export_list(self.main, file_path) self.main.settings_custom['export'][settings][ 'default_path'] = os.path.normpath(os.path.dirname(file_path))
def run(self): new_files = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Opening files ... ({i + 1}/{len_file_paths})')) default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wl_misc.get_normalized_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text files if file_ext == '.txt': new_files.append(self.main.wl_files._new_file(file_path)) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wl_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'auto_detection']['default_settings'][ 'default_encoding'] # CSV files if file_ext == '.csv': new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # XML files elif file_ext == '.xml': with open(file_path, 'r', encoding=encoding_code) as f: xml_text = f.read() new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.xml')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(xml_text) new_paths = [new_path] # Translation memory files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] for new_path in new_paths: new_files.append( self.main.wl_files._new_file(new_path, txt=False)) self.main.settings_custom['import']['files'][ 'default_path'] = wl_misc.get_normalized_dir( self.file_paths[0]) self.progress_updated.emit(self.tr('Updating table ...')) time.sleep(0.1) self.worker_done.emit(new_files)
def _new_file(self, file_path, txt=True): new_file = {} detect_pass_encoding = True detect_pass_lang = True new_file['selected'] = True new_file['path'] = file_path if new_file['path'].endswith('.txt'): new_file['tokenized'] = 'No' new_file['tagged'] = 'No' elif new_file['path'].endswith('.xml'): new_file['tokenized'] = 'Yes' new_file['tagged'] = 'Yes' new_file['name'], _ = os.path.splitext( os.path.basename(new_file['path'])) new_file['name_old'] = new_file['name'] # Detect encodings if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: new_file['encoding'] = wl_detection.detect_encoding( self.main, new_file['path']) else: new_file['encoding'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] # Detect languages if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_langs']: new_file['lang'] = wl_detection.detect_lang(self.main, new_file) else: new_file['lang'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_lang'] if txt: default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) new_file['path'] = os.path.join(default_dir, re.split(r'[/\\]', file_path)[-1]) new_file['path'] = wl_checking_misc.check_new_path( new_file['path']) # Remove header tags tags_header = [] for _, _, tag_opening, _ in self.main.settings_custom['tags'][ 'tags_header']: tags_header.append(tag_opening[1:-1]) text = '' with open(file_path, 'r', encoding=new_file['encoding']) as f: for line in f: text += line # The "lxml" parser will add <html><body> to the text, which is undesirable with open(new_file['path'], 'w', encoding='utf_8') as f: soup = bs4.BeautifulSoup(text, features='html.parser') for tag_header in tags_header: for header_element in soup.select(tag_header): header_element.decompose() f.write(str(soup)) return new_file
def run(self): new_files = [] files_detection_error_encoding = [] files_detection_error_text_type = [] files_detection_error_lang = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Opening files ... ({i + 1}/{len_file_paths})')) default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wl_misc.get_normalized_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text files if file_ext == '.txt': (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(file_path) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append(new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') elif file_ext == '.xls': with open(new_path, 'w', encoding=default_encoding) as f: workbook = xlrd.open_workbook(file_path) for i_sheet in range(workbook.nsheets): worksheet = workbook.sheet_by_index( i_sheet) for row in range(worksheet.nrows): f.write('\t'.join([ worksheet.cell_value(row, col) for col in range(worksheet.ncols) ]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wl_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'auto_detection']['default_settings'][ 'default_encoding'] # CSV files if file_ext == '.csv': new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # XML files elif file_ext == '.xml': with open(file_path, 'r', encoding=encoding_code) as f: xml_text = f.read() new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(xml_text) new_paths = [new_path] # Translation memory files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] # Lyrics files elif file_ext == '.lrc': lyrics = {} with open(file_path, 'r', encoding=encoding_code) as f: for line in f: time_tags = [] line = line.strip() # Strip time tags while re.search(r'^\[[^\]]+?\]', line): time_tags.append( re.search(r'^\[[^\]]+?\]', line).group()) line = line[len(time_tags[-1]):].strip( ) # Strip word time tags line = re.sub(r'<[^>]+?>', r'', line) line = re.sub(r'\s{2,}', r' ', line).strip() for time_tag in time_tags: if re.search( r'^\[[0-9]{2}:[0-5][0-9]\.[0-9]{2}\]$', time_tag): lyrics[time_tag] = line new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: for _, lyrics in sorted(lyrics.items()): f.write(f'{lyrics}\n') new_paths = [new_path] for new_path in new_paths: (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(new_path) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append( new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) self.main.settings_custom['import']['files'][ 'default_path'] = wl_misc.get_normalized_dir( self.file_paths[0]) self.progress_updated.emit(self.tr('Updating table ...')) time.sleep(0.1) self.worker_done.emit(new_files, files_detection_error_encoding, files_detection_error_text_type, files_detection_error_lang)
def _new_file(self, file_path, txt=True): new_file = {} detection_success_encoding = True detection_success_text_type = True detection_success_lang = True new_file['selected'] = True new_file['path'] = file_path new_file['name'], _ = os.path.splitext( os.path.basename(new_file['path'])) new_file['name_old'] = new_file['name'] # Detect encodings if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: (new_file['encoding'], detection_success_encoding) = wl_detection.detect_encoding( self.main, new_file['path']) else: new_file['encoding'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] # Detect text types if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_text_types']: (new_file['text_type'], detection_success_text_type) = wl_detection.detect_text_type( self.main, new_file) else: new_file['text_type'] = self.main.settings_custom[ 'auto_detection']['default_settings']['default_text_type'] # Detect languages if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_langs']: (new_file['lang'], detection_success_lang) = wl_detection.detect_lang( self.main, new_file) else: new_file['lang'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_lang'] # Remove header tags tags_header_opening = [] tags_header_closing = [] if txt: default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) new_file['path'] = re.sub(r'^.+?\\([^\\]+?$)', fr'{re.escape(default_dir)}\\\1', file_path) new_file['path'] = wl_checking_misc.check_new_path( new_file['path']) for tag_opening, tag_closing in self.main.settings_custom['tags'][ 'tags_header']: tags_header_opening.append(fr"{tag_opening}.+?") tags_header_closing.append(fr".+?{tag_closing}") tag_header_opening = '|'.join(tags_header_opening) tag_header_closing = '|'.join(tags_header_closing) with open(file_path, 'r', encoding=new_file['encoding']) as f, open( new_file['path'], 'w', encoding='utf_8') as f_temp: tags_header = False for line in f: if tags_header: if re.search(tag_header_closing, line): f_temp.write(re.sub(tag_header_closing, '', line)) tags_header = False elif re.search(tag_header_opening, line): f_temp.write(re.sub(tag_header_opening, '', line)) tags_header = True else: f_temp.write(line) return (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang)