Exemplo n.º 1
0
def test_check_dir():
    if os.path.exists('temp'):
        shutil.rmtree('temp')

    wl_checking_misc.check_dir('temp')

    assert os.path.exists('temp')

    os.rmdir('temp')
Exemplo n.º 2
0
def wl_msg_box_path_not_exist_confirm(main, path):
    reply = QMessageBox.question(
        main, main.tr('Path Not Exist'),
        main.tr(f'''
            {main.settings_global['styles']['style_dialog']}
            <body>
                <div>The specified path "{path}" does not exist.</div>
                <div>Do you want to create the directory?</div>
            </body>
        '''), QMessageBox.Yes | QMessageBox.No, QMessageBox.No)

    if reply == QMessageBox.Yes:
        wl_checking_misc.check_dir(path)

    return reply
Exemplo n.º 3
0
    def exp_list(self):
        default_dir = self.main.settings_custom['exp'][
            self.settings]['default_path']

        file_path = QFileDialog.getSaveFileName(
            self.main, _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp',
                           'Export to File'),
            os.path.join(wl_checking_misc.check_dir(default_dir),
                         self.exp_file_name),
            _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Text File (*.txt)'))[0]

        if file_path:
            encoding = self.main.settings_custom['exp'][
                self.settings]['default_encoding']

            with open(file_path, 'w', encoding=encoding) as f:
                for item in self.model().stringList():
                    f.write(item + '\n')

            wl_msg_boxes.Wl_Msg_Box_Info(self.main,
                                         title=_tr(
                                             'Wl_List_Add_Ins_Del_Clr_Imp_Exp',
                                             'Export Completed'),
                                         text=_tr(
                                             'Wl_List_Add_Ins_Del_Clr_Imp_Exp',
                                             '''
                    <div>The list has been successfully exported to "{}".</div>
                ''').format(file_path)).open()

            # Modify default path
            self.main.settings_custom['exp'][
                self.settings]['default_path'] = os.path.normpath(
                    os.path.dirname(file_path))
Exemplo n.º 4
0
    def confirm_path(self, line_edit):
        path = line_edit.text()

        if not os.path.exists(path):
            reply = QMessageBox.question(
                self.main,
                self.tr('Path Not Exist'),
                self.tr('''
                    {}
                    <body>
                        <div>The specified path "{}" does not exist.</div>
                        <div>Do you want to create the directory?</div>
                    </body>
                ''').format(self.main.settings_global['styles']['style_dialog'], path),
                QMessageBox.Yes | QMessageBox.No,
                QMessageBox.No
            )

            if reply == QMessageBox.Yes:
                wl_checking_misc.check_dir(path)

                return True
            else:
                line_edit.setFocus()
                line_edit.selectAll()

                return False
        elif not os.path.isdir(path):
            self.wl_msg_box_path_not_dir(path)

            line_edit.setFocus()
            line_edit.selectAll()

            return False
        else:
            return True
Exemplo n.º 5
0
    def open_files(self):
        if os.path.exists(
                self.main.settings_custom['import']['files']['default_path']):
            default_dir = self.main.settings_custom['import']['files'][
                'default_path']
        else:
            default_dir = self.main.settings_default['import']['files'][
                'default_path']

        file_paths = QFileDialog.getOpenFileNames(
            self.main, self.tr('Open File(s)'),
            wl_checking_misc.check_dir(default_dir),
            ';;'.join(self.main.settings_global['file_types']['files']),
            self.main.settings_global['file_types']['files'][-1])[0]

        if file_paths:
            self.main.wl_files.open_files(file_paths)
Exemplo n.º 6
0
    def export_list(self, settings):
        default_dir = self.main.settings_custom['export'][settings][
            'default_path']

        file_path = QFileDialog.getSaveFileName(
            self.main, self.tr('Export to File'),
            wl_checking_misc.check_dir(default_dir),
            self.tr('Text File (*.txt)'))[0]

        if file_path:
            encoding = self.main.settings_custom['export'][settings][
                'default_encoding']

            with open(file_path, 'w', encoding=encoding) as f:
                for item in self.get_items():
                    f.write(item + '\n')

            wl_msg_box.wl_msg_box_export_list(self.main, file_path)

            self.main.settings_custom['export'][settings][
                'default_path'] = os.path.normpath(os.path.dirname(file_path))
Exemplo n.º 7
0
    def run(self):
        new_files = []

        if self.file_paths:
            len_file_paths = len(self.file_paths)

            for i, file_path in enumerate(self.file_paths):
                self.progress_updated.emit(
                    self.tr(f'Opening files ... ({i + 1}/{len_file_paths})'))

                default_dir = wl_checking_misc.check_dir(
                    self.main.settings_custom['import']['temp_files']
                    ['default_path'])
                default_encoding = self.main.settings_custom['import'][
                    'temp_files']['default_encoding']

                file_path = wl_misc.get_normalized_path(file_path)
                file_name, file_ext = os.path.splitext(
                    os.path.basename(file_path))
                file_ext = file_ext.lower()

                # Text files
                if file_ext == '.txt':
                    new_files.append(self.main.wl_files._new_file(file_path))
                else:
                    if file_ext in ['.docx', '.xlsx', '.xls']:
                        new_path = wl_checking_misc.check_new_path(
                            os.path.join(default_dir, f'{file_name}.txt'))

                        # Word documents
                        if file_ext == '.docx':
                            lines = []

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                doc = docx.Document(file_path)

                                for block in self.iter_block_items(doc):
                                    if type(block
                                            ) == docx.text.paragraph.Paragraph:
                                        f.write(f'{block.text}\n')
                                    elif type(block) == docx.table.Table:
                                        for row in self.iter_visual_cells(
                                                block):
                                            cells = []

                                            for cell in row:
                                                cells.append(' '.join([
                                                    item.text for item in
                                                    self.iter_cell_items(cell)
                                                ]))

                                            f.write('\t'.join(cells) + '\n')

                        # Excel workbooks
                        elif file_ext == '.xlsx':
                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                workbook = openpyxl.load_workbook(
                                    file_path, data_only=True)

                                for worksheet_name in workbook.sheetnames:
                                    worksheet = workbook[worksheet_name]

                                    for row in worksheet.rows:
                                        f.write('\t'.join([(
                                            cell.value if cell.value != None
                                            else '') for cell in row]) + '\n')

                        new_paths = [new_path]
                    else:
                        # Detect encoding
                        if self.main.settings_custom['files'][
                                'auto_detection_settings']['detect_encodings']:
                            encoding_code, _ = wl_detection.detect_encoding(
                                self.main, file_path)
                        else:
                            encoding_code = self.main.settings_custom[
                                'auto_detection']['default_settings'][
                                    'default_encoding']

                        # CSV files
                        if file_ext == '.csv':
                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                with open(file_path,
                                          'r',
                                          newline='',
                                          encoding=encoding_code) as f_csv:
                                    csv_reader = csv.reader(f_csv)

                                    for row in csv_reader:
                                        f.write('\t'.join(row) + '\n')

                            new_paths = [new_path]

                        # HTML files
                        elif file_ext in ['.htm', '.html']:
                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                soup = bs4.BeautifulSoup(f.read(), 'lxml')

                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                f.write(soup.get_text())

                            new_paths = [new_path]

                        # XML files
                        elif file_ext == '.xml':
                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                xml_text = f.read()

                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.xml'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                f.write(xml_text)

                            new_paths = [new_path]

                        # Translation memory files
                        elif file_ext == '.tmx':
                            lines_src = []
                            lines_target = []

                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                soup = bs4.BeautifulSoup(f.read(), 'lxml-xml')

                                for tu in soup.find_all('tu'):
                                    seg_src, seg_target = tu.find_all('seg')

                                    lines_src.append(seg_src.get_text())
                                    lines_target.append(seg_target.get_text())

                            path_src = wl_checking_misc.check_new_path(
                                os.path.join(default_dir,
                                             f'{file_name}_source.txt'))
                            path_target = wl_checking_misc.check_new_path(
                                os.path.join(default_dir,
                                             f'{file_name}_target.txt'))

                            with open(path_src, 'w',
                                      encoding=default_encoding) as f:
                                f.write('\n'.join(lines_src))
                                f.write('\n')

                            with open(path_target,
                                      'w',
                                      encoding=default_encoding) as f:
                                f.write('\n'.join(lines_target))
                                f.write('\n')

                            new_paths = [path_src, path_target]

                    for new_path in new_paths:
                        new_files.append(
                            self.main.wl_files._new_file(new_path, txt=False))

            self.main.settings_custom['import']['files'][
                'default_path'] = wl_misc.get_normalized_dir(
                    self.file_paths[0])

        self.progress_updated.emit(self.tr('Updating table ...'))

        time.sleep(0.1)

        self.worker_done.emit(new_files)
Exemplo n.º 8
0
    def _new_file(self, file_path, txt=True):
        new_file = {}

        detect_pass_encoding = True
        detect_pass_lang = True

        new_file['selected'] = True

        new_file['path'] = file_path

        if new_file['path'].endswith('.txt'):
            new_file['tokenized'] = 'No'
            new_file['tagged'] = 'No'
        elif new_file['path'].endswith('.xml'):
            new_file['tokenized'] = 'Yes'
            new_file['tagged'] = 'Yes'

        new_file['name'], _ = os.path.splitext(
            os.path.basename(new_file['path']))
        new_file['name_old'] = new_file['name']

        # Detect encodings
        if self.main.settings_custom['files']['auto_detection_settings'][
                'detect_encodings']:
            new_file['encoding'] = wl_detection.detect_encoding(
                self.main, new_file['path'])
        else:
            new_file['encoding'] = self.main.settings_custom['auto_detection'][
                'default_settings']['default_encoding']

        # Detect languages
        if self.main.settings_custom['files']['auto_detection_settings'][
                'detect_langs']:
            new_file['lang'] = wl_detection.detect_lang(self.main, new_file)
        else:
            new_file['lang'] = self.main.settings_custom['auto_detection'][
                'default_settings']['default_lang']

        if txt:
            default_dir = wl_checking_misc.check_dir(
                self.main.settings_custom['import']['temp_files']
                ['default_path'])

            new_file['path'] = os.path.join(default_dir,
                                            re.split(r'[/\\]', file_path)[-1])
            new_file['path'] = wl_checking_misc.check_new_path(
                new_file['path'])

        # Remove header tags
        tags_header = []

        for _, _, tag_opening, _ in self.main.settings_custom['tags'][
                'tags_header']:
            tags_header.append(tag_opening[1:-1])

        text = ''

        with open(file_path, 'r', encoding=new_file['encoding']) as f:
            for line in f:
                text += line

        # The "lxml" parser will add <html><body> to the text, which is undesirable
        with open(new_file['path'], 'w', encoding='utf_8') as f:
            soup = bs4.BeautifulSoup(text, features='html.parser')

            for tag_header in tags_header:
                for header_element in soup.select(tag_header):
                    header_element.decompose()

            f.write(str(soup))

        return new_file
Exemplo n.º 9
0
    def run(self):
        new_files = []

        files_detection_error_encoding = []
        files_detection_error_text_type = []
        files_detection_error_lang = []

        if self.file_paths:
            len_file_paths = len(self.file_paths)

            for i, file_path in enumerate(self.file_paths):
                self.progress_updated.emit(
                    self.tr(f'Opening files ... ({i + 1}/{len_file_paths})'))

                default_dir = wl_checking_misc.check_dir(
                    self.main.settings_custom['import']['temp_files']
                    ['default_path'])
                default_encoding = self.main.settings_custom['import'][
                    'temp_files']['default_encoding']

                file_path = wl_misc.get_normalized_path(file_path)
                file_name, file_ext = os.path.splitext(
                    os.path.basename(file_path))
                file_ext = file_ext.lower()

                # Text files
                if file_ext == '.txt':
                    (new_file, detection_success_encoding,
                     detection_success_text_type, detection_success_lang
                     ) = self.main.wl_files._new_file(file_path)

                    new_files.append(new_file)

                    if not detection_success_encoding:
                        files_detection_error_encoding.append(new_file['path'])

                    if not detection_success_text_type:
                        files_detection_error_text_type.append(
                            new_file['path'])

                    if not detection_success_lang:
                        files_detection_error_lang.append(new_file['path'])
                else:
                    if file_ext in ['.docx', '.xlsx', '.xls']:
                        new_path = wl_checking_misc.check_new_path(
                            os.path.join(default_dir, f'{file_name}.txt'))

                        # Word documents
                        if file_ext == '.docx':
                            lines = []

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                doc = docx.Document(file_path)

                                for block in self.iter_block_items(doc):
                                    if type(block
                                            ) == docx.text.paragraph.Paragraph:
                                        f.write(f'{block.text}\n')
                                    elif type(block) == docx.table.Table:
                                        for row in self.iter_visual_cells(
                                                block):
                                            cells = []

                                            for cell in row:
                                                cells.append(' '.join([
                                                    item.text for item in
                                                    self.iter_cell_items(cell)
                                                ]))

                                            f.write('\t'.join(cells) + '\n')

                        # Excel workbooks
                        elif file_ext == '.xlsx':
                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                workbook = openpyxl.load_workbook(
                                    file_path, data_only=True)

                                for worksheet_name in workbook.sheetnames:
                                    worksheet = workbook[worksheet_name]

                                    for row in worksheet.rows:
                                        f.write('\t'.join([(
                                            cell.value if cell.value != None
                                            else '') for cell in row]) + '\n')
                        elif file_ext == '.xls':
                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                workbook = xlrd.open_workbook(file_path)

                                for i_sheet in range(workbook.nsheets):
                                    worksheet = workbook.sheet_by_index(
                                        i_sheet)

                                    for row in range(worksheet.nrows):
                                        f.write('\t'.join([
                                            worksheet.cell_value(row, col)
                                            for col in range(worksheet.ncols)
                                        ]) + '\n')

                        new_paths = [new_path]
                    else:
                        # Detect encoding
                        if self.main.settings_custom['files'][
                                'auto_detection_settings']['detect_encodings']:
                            encoding_code, _ = wl_detection.detect_encoding(
                                self.main, file_path)
                        else:
                            encoding_code = self.main.settings_custom[
                                'auto_detection']['default_settings'][
                                    'default_encoding']

                        # CSV files
                        if file_ext == '.csv':
                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                with open(file_path,
                                          'r',
                                          newline='',
                                          encoding=encoding_code) as f_csv:
                                    csv_reader = csv.reader(f_csv)

                                    for row in csv_reader:
                                        f.write('\t'.join(row) + '\n')

                            new_paths = [new_path]

                        # HTML files
                        elif file_ext in ['.htm', '.html']:
                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                soup = bs4.BeautifulSoup(f.read(), 'lxml')

                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                f.write(soup.get_text())

                            new_paths = [new_path]

                        # XML files
                        elif file_ext == '.xml':
                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                xml_text = f.read()

                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                f.write(xml_text)

                            new_paths = [new_path]

                        # Translation memory files
                        elif file_ext == '.tmx':
                            lines_src = []
                            lines_target = []

                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                soup = bs4.BeautifulSoup(f.read(), 'lxml-xml')

                                for tu in soup.find_all('tu'):
                                    seg_src, seg_target = tu.find_all('seg')

                                    lines_src.append(seg_src.get_text())
                                    lines_target.append(seg_target.get_text())

                            path_src = wl_checking_misc.check_new_path(
                                os.path.join(default_dir,
                                             f'{file_name}_source.txt'))
                            path_target = wl_checking_misc.check_new_path(
                                os.path.join(default_dir,
                                             f'{file_name}_target.txt'))

                            with open(path_src, 'w',
                                      encoding=default_encoding) as f:
                                f.write('\n'.join(lines_src))
                                f.write('\n')

                            with open(path_target,
                                      'w',
                                      encoding=default_encoding) as f:
                                f.write('\n'.join(lines_target))
                                f.write('\n')

                            new_paths = [path_src, path_target]

                        # Lyrics files
                        elif file_ext == '.lrc':
                            lyrics = {}

                            with open(file_path, 'r',
                                      encoding=encoding_code) as f:
                                for line in f:
                                    time_tags = []

                                    line = line.strip()

                                    # Strip time tags
                                    while re.search(r'^\[[^\]]+?\]', line):
                                        time_tags.append(
                                            re.search(r'^\[[^\]]+?\]',
                                                      line).group())

                                        line = line[len(time_tags[-1]):].strip(
                                        )

                                    # Strip word time tags
                                    line = re.sub(r'<[^>]+?>', r'', line)
                                    line = re.sub(r'\s{2,}', r' ',
                                                  line).strip()

                                    for time_tag in time_tags:
                                        if re.search(
                                                r'^\[[0-9]{2}:[0-5][0-9]\.[0-9]{2}\]$',
                                                time_tag):
                                            lyrics[time_tag] = line

                            new_path = wl_checking_misc.check_new_path(
                                os.path.join(default_dir, f'{file_name}.txt'))

                            with open(new_path, 'w',
                                      encoding=default_encoding) as f:
                                for _, lyrics in sorted(lyrics.items()):
                                    f.write(f'{lyrics}\n')

                            new_paths = [new_path]

                    for new_path in new_paths:
                        (new_file, detection_success_encoding,
                         detection_success_text_type, detection_success_lang
                         ) = self.main.wl_files._new_file(new_path)

                        new_files.append(new_file)

                        if not detection_success_encoding:
                            files_detection_error_encoding.append(
                                new_file['path'])

                        if not detection_success_text_type:
                            files_detection_error_text_type.append(
                                new_file['path'])

                        if not detection_success_lang:
                            files_detection_error_lang.append(new_file['path'])

            self.main.settings_custom['import']['files'][
                'default_path'] = wl_misc.get_normalized_dir(
                    self.file_paths[0])

        self.progress_updated.emit(self.tr('Updating table ...'))

        time.sleep(0.1)

        self.worker_done.emit(new_files, files_detection_error_encoding,
                              files_detection_error_text_type,
                              files_detection_error_lang)
Exemplo n.º 10
0
    def _new_file(self, file_path, txt=True):
        new_file = {}

        detection_success_encoding = True
        detection_success_text_type = True
        detection_success_lang = True

        new_file['selected'] = True
        new_file['path'] = file_path
        new_file['name'], _ = os.path.splitext(
            os.path.basename(new_file['path']))
        new_file['name_old'] = new_file['name']

        # Detect encodings
        if self.main.settings_custom['files']['auto_detection_settings'][
                'detect_encodings']:
            (new_file['encoding'],
             detection_success_encoding) = wl_detection.detect_encoding(
                 self.main, new_file['path'])
        else:
            new_file['encoding'] = self.main.settings_custom['auto_detection'][
                'default_settings']['default_encoding']

        # Detect text types
        if self.main.settings_custom['files']['auto_detection_settings'][
                'detect_text_types']:
            (new_file['text_type'],
             detection_success_text_type) = wl_detection.detect_text_type(
                 self.main, new_file)
        else:
            new_file['text_type'] = self.main.settings_custom[
                'auto_detection']['default_settings']['default_text_type']

        # Detect languages
        if self.main.settings_custom['files']['auto_detection_settings'][
                'detect_langs']:
            (new_file['lang'],
             detection_success_lang) = wl_detection.detect_lang(
                 self.main, new_file)
        else:
            new_file['lang'] = self.main.settings_custom['auto_detection'][
                'default_settings']['default_lang']

        # Remove header tags
        tags_header_opening = []
        tags_header_closing = []

        if txt:
            default_dir = wl_checking_misc.check_dir(
                self.main.settings_custom['import']['temp_files']
                ['default_path'])
            new_file['path'] = re.sub(r'^.+?\\([^\\]+?$)',
                                      fr'{re.escape(default_dir)}\\\1',
                                      file_path)
            new_file['path'] = wl_checking_misc.check_new_path(
                new_file['path'])

        for tag_opening, tag_closing in self.main.settings_custom['tags'][
                'tags_header']:
            tags_header_opening.append(fr"{tag_opening}.+?")
            tags_header_closing.append(fr".+?{tag_closing}")

        tag_header_opening = '|'.join(tags_header_opening)
        tag_header_closing = '|'.join(tags_header_closing)

        with open(file_path, 'r', encoding=new_file['encoding']) as f, open(
                new_file['path'], 'w', encoding='utf_8') as f_temp:
            tags_header = False

            for line in f:
                if tags_header:
                    if re.search(tag_header_closing, line):
                        f_temp.write(re.sub(tag_header_closing, '', line))

                        tags_header = False
                elif re.search(tag_header_opening, line):
                    f_temp.write(re.sub(tag_header_opening, '', line))

                    tags_header = True
                else:
                    f_temp.write(line)

        return (new_file, detection_success_encoding,
                detection_success_text_type, detection_success_lang)