def main(args): print_log = False print args if args[1] == '-t': text = ' '.join(args[2:]) show_json = True else: text = ' '.join(args[1:]) show_json = False try: new_txt = Processor.process_text(text, [u'@', u'{', u'}'], [u'', u'{', u'}'], 1, print_log) except: try: text = text.decode('utf-8') new_txt = Processor.process_text(text, [u'@', u'{', u'}'], [u'', u'{', u'}'], 1, print_log) except: return 0 if show_json: print new_txt[-1] else: print new_txt[0] return 0
def iterate_root_web(cls, temp_filename): # try: tree = etree.parse(temp_filename) # except: # return 'error', 'error' encoding = tree.docinfo.encoding root = tree.getroot() doctype = tree.docinfo.doctype standalone = tree.docinfo.standalone log_data = [] markers = u'іѣъiѢЪIѣъѣіі' for child in root.iter(): try: if u'i' in child.text or u'I' in child.text or u'і' in child.text or u'ѣ' in child.text or u'Ѣ' in child.text or u'ъ' in child.text or u'Ъ' in child.text or u'ѣ' in child.text or u'і' in child.text: # old = child.text new_text, changes, wrong_changes, _ = Processor.process_text( child.text, 1, META['old_new_delimiters'][ META['current_delimiters_xml']], 0) child.text = new_text if changes: log_data.append(changes) except: pass try: #for marker in markers: if u'i' in child.tail or u'I' in child.tail or u'і' in child.tail or u'ѣ' in child.tail or u'Ѣ' in child.tail or u'ъ' in child.tail or u'Ъ' in child.tail or u'ѣ' in child.tail or u'і' in child.tail: # old = child.tail new_text, changes, wrong_changes, _ = Processor.process_text( child.tail, 1, META['old_new_delimiters'][ META['current_delimiters_xml']], 0) child.tail = new_text if changes: log_data.append(changes) except: pass new_text = etree.tostring(root, xml_declaration=True, encoding=encoding, standalone=standalone, doctype=doctype) new_text = new_text.replace('<choice>', '<choice>') new_text = new_text.replace('<reg>', '<reg>') new_text = new_text.replace('</choice>', '</choice>') new_text = new_text.replace('</reg>', '</reg>') new_text = new_text.replace('<orig>', '<orig>') new_text = new_text.replace('</orig>', '</orig>') return new_text, u'\n'.join(log_data)
def log(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) s = Show(self.top, changes)
def ok(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) self.result.config(state='normal') self.result.delete("1.0", "end") self.result.insert("end", new_text) self.result.config(state='disabled')
def load(cls, text, root, delimiters): """ Loading file """ text.config(state='normal') text.insert("end", u"В обработке...\n") text.config(state='disabled') # name = os.path.basename(meta.filename) name = os.path.basename(META['filename']) try: # with codecs.open(filename, 'r', 'utf-8-sig') as f_dict: with codecs.open(META['filename'], 'r', 'utf-8') as f: data = f.read() #считали файл # s_dict = r_dict.split() #поделили по пробелам text.config(state='normal') text.delete("1.0", "end") text.insert("end", u"Файл загружен\n") text.config(state='disabled') Dialog.dialog(root, text) check_brackets = 1 # учитывать скобки if META['flag'] == 1: #??????? new_text, changes, wrong_changes, _ = Processor.process_text( data, 1, delimiters, check_brackets) #транслитерировали SaveText.save_translit_text(text, new_text, changes) else: text.config(state='normal') text.delete("1.0", "end") text.insert("end", u"Вы не ввели имена выходных файлов\n") text.config(state='disabled') META['flag'] = 0 except IOError as e: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": I/O error({0}): {1}".format(e.errno, e.strerror) +\ u"\nВыберите другой файл." Error.dialogError(err, root) except ValueError: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": Неверный формат данных." +\ u"\nВыберите другой файл." Error.dialogError(err, root) except: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": Неизвестная ошибка: " + str(sys.exc_info()[0]) +\ u"\nВыберите другой файл." Error.dialogError(err, root) raise
def res(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) res = self.out.get() if res != '': res_name = META['default_directory'] + res + '.txt' else: res_name = META['default_directory'] + 'result.txt' with codecs.open(res_name, 'w', 'utf-8') as ouf: ouf.write(new_text)
def iterate_root(cls, temp_filename, int_root, one_iteration): if one_iteration: try: tree = etree.parse(temp_filename) except: # tree = etree.parse(temp_filename) # print tree Error.dialogError(u'Ошибка в структуре xml/html', int_root) return -1, -1 # tree = etree.parse(temp_filename) else: tree = etree.parse(temp_filename) # print 1 encoding = tree.docinfo.encoding root = tree.getroot() doctype = tree.docinfo.doctype standalone = tree.docinfo.standalone log_data = [] wrong_log = [] # markers = u'іѣъiѢЪIѣъѣіі' check_brackets = 1 new_markers = [u']'] markers = [u'i', u'I', u'і', u'ѣ', u'Ѣ', u'ъ', u'Ъ', u'ѣ', u'і'] markers += new_markers for child in root.iter(): # print 'GO CHILD', child try: # print 'TRY CHILD TEXT', child.text # if u'i' in child.text or u'I' in child.text or u'і' in child.text or u'ѣ' in child.text or u'Ѣ' in child.text or u'ъ' in child.text or u'Ъ' in child.text or u'ѣ' in child.text or u'і' in child.text: if u'Евстратъ-то' in child.text: print 1 for marker in markers: # print 'MARKER', marker if child.text is not None and marker in child.text: # print 'IN' # old = child.text new_text, changes, wrong_changes, _ = Processor.process_text( child.text, 1, META['old_new_delimiters'] [META['current_delimiters_xml']], check_brackets) child.text = new_text if changes: log_data.append(changes) if wrong_changes: wrong_log.append(wrong_changes) break except: pass try: #for marker in markers: # if u'i' in child.tail or u'I' in child.tail or u'і' in child.tail or u'ѣ' in child.tail or u'Ѣ' in child.tail or u'ъ' in child.tail or u'Ъ' in child.tail or u'ѣ' in child.tail or u'і' in child.tail: # old = child.tail # print 'TRY CHILD TAIL', child.tail if u'Евстратъ-то' in child.tail: print 1 for marker in markers: # print 'MARKER', marker if child.tail is not None and marker in child.tail: # print 'IN' new_text, changes, wrong_changes, _ = Processor.process_text( child.tail, 1, META['old_new_delimiters'] [META['current_delimiters_xml']], check_brackets) child.tail = new_text if changes: log_data.append(changes) if wrong_changes: wrong_log.append(wrong_changes) break except: pass print 'CHECKED' print 'FINISHED' new_text = etree.tostring(root, xml_declaration=True, encoding=encoding, standalone=standalone, doctype=doctype) new_text = new_text.replace('<choice>', '<choice>') new_text = new_text.replace('<reg>', '<reg>') new_text = new_text.replace('</choice>', '</choice>') new_text = new_text.replace('</reg>', '</reg>') new_text = new_text.replace('<orig>', '<orig>') new_text = new_text.replace('</orig>', '</orig>') new_text = new_text.replace('<sic>', '<sic>') new_text = new_text.replace('</sic>', '</sic>') new_text = new_text.replace('<corr>', '<corr>') new_text = new_text.replace('</corr>', '</corr>') new_text = new_text.replace('<choice original_editorial_correction', '<choice original_editorial_correction') new_text = new_text.replace("'><sic>", "'><sic>") # print 'CHANGE ORDER' # print new_text # new_text = re.sub(ur"<(choice original_editorial_correction=\'[^\']+\')>", "<\1>", new_text) # print 'CHANGE RE' return new_text, log_data, wrong_log
def web_converter(): input_text = '' output_text = '' new_text = '' if request.method == 'POST': if META['tmp_folder'] != '': shutil.rmtree(META['tmp_folder']) META['tmp_folder'] = '' input_text = request.values.get('inp_text') #app.logger.info(input_text) both = request.form.getlist('both') if 'go' in request.values: if 'spell' in META['current_delimiters_text']: check_brackets = 1 else: check_brackets = 0 if 'show' in both: output_text, changes, wrong_changes, _ = Processor.process_text( input_text, 1, META['old_new_delimiters'][ META['current_delimiters_text']], check_brackets) else: output_text, changes, wrong_changes, _ = Processor.process_text( input_text, 0, META['old_new_delimiters'][ META['current_delimiters_text']], check_brackets) if 'clean' in request.values: input_text = '' output_text = '' if 'download_txt' in request.values: ftxt = request.files.getlist("f_txt") errors = [] curr_time = time.time() clean_store(curr_time) tmp_folder = str(curr_time) + '_ptc' META['tmp_folder'] = META['tmp_store'] + tmp_folder os.mkdir(tmp_folder) for el in ftxt: try: META['filename'] = secure_filename(el.filename) input_text = el.read().decode('utf-8') if 'spell' in META['current_delimiters_text']: check_brackets = 1 else: check_brackets = 0 if 'show' in both: new_text, changes, wrong_changes, _ = Processor.process_text( input_text, 1, META['old_new_delimiters'] [META['current_delimiters_text']], check_brackets) else: new_text, changes, wrong_changes, _ = Processor.process_text( input_text, 0, META['old_new_delimiters'] [META['current_delimiters_text']], check_brackets) with codecs.open('log', 'w', 'utf-8') as ou: ou.write(changes) print changes, 'THIS' name = os.path.splitext(META['filename'])[0] suffix = os.path.splitext(META['filename'])[1] print 'GET NAME' if suffix == '': suffix = '.txt' new_filename = name + "_transliterated" + suffix log_filename = name + '_log.txt' fnpath = tmp_folder + '/' + new_filename lpath = tmp_folder + '/' + log_filename print 'GET PATH' with codecs.open(fnpath, 'w', 'utf-8') as ou1: ou1.write(new_text) print 'WRITE DATA' with codecs.open(lpath, 'w', 'utf-8') as ou2: ou2.write(changes) print changes, 'WRITE LOG' except: m = 'Error: file ' + secure_filename(el.filename) errors.append(m) if errors: errors = u'\n'.join(errors) fer = tmp_folder + '/' + 'errors.txt' with codecs.open(fer, 'w', 'utf-8') as ou3: ou3.write(errors) response = Response(generator(tmp_folder), mimetype='application/zip') response.headers[ 'Content-Disposition'] = 'attachment; filename={}'.format( 'files.zip') return response if 'download_xml' in request.values: fxml = request.files.getlist("f_xml") errors = [] tmp_folder = str(time.time()) + '_ptc' META['tmp_folder'] = tmp_folder os.mkdir(tmp_folder) for el in fxml: try: META['filename'] = secure_filename(el.filename) temp_filename = LoadData.get_temp_web(tmp_folder, el) new_text, changes = LoadData.iterate_root_web( temp_filename) os.remove(temp_filename) name = os.path.splitext(META['filename'])[0] suffix = os.path.splitext(META['filename'])[1] if suffix == '': suffix = '.txt' new_filename = name + "_transliterated" + suffix log_filename = name + '_log.txt' fnpath = tmp_folder + '/' + new_filename lpath = tmp_folder + '/' + log_filename with codecs.open(fnpath, 'w') as ou1: ou1.write(new_text) if changes != u'': with codecs.open(lpath, 'w', 'utf-8') as ou2: ou2.write(changes) except: m = 'Error: file ' + secure_filename(el.filename) errors.append(m) if errors: errors = u'\n'.join(errors) fer = tmp_folder + '/' + 'errors.txt' with codecs.open(fer, 'w', 'utf-8') as ou3: ou3.write(errors) response = Response(generator(tmp_folder), mimetype='application/zip') response.headers[ 'Content-Disposition'] = 'attachment; filename={}'.format( 'files.zip') return response return render_template("prereform_to_contemporary.html", output_text=output_text, input_text=input_text) return render_template("prereform_to_contemporary.html", output_text=output_text, input_text=input_text)