# -*- coding: utf-8 -*- from hip2unicode.tools import corpus_converter from hip2unicode.functions import all_hip_conversions from hip2unicode.functions import compile_conversion from hip2unicode.conversions import antconc_ucs8 # def corpus_converter(path=None, corpus_folder='corpus', converted_corpus_folder='converted_corpus', conversions=None): compiled_conversion = compile_conversion(antconc_ucs8.conversion) kwargs = { 'converted_corpus_folder': 'corpus-ucs8', 'conversions': all_hip_conversions(slav=compiled_conversion) } corpus_converter.corpus_converter(**kwargs)
# -*- coding: utf-8 -*- import sys from hip2unicode.functions import all_hip_conversions, compile_conversion, hip2unicode from hip2unicode.conversions import antconc_ucs8 compiled_conversion = compile_conversion(antconc_ucs8.conversion) conversions = all_hip_conversions(slav=compiled_conversion) if len(sys.argv) < 2: print 'Необходимо передать текст для конвертации в виде аргументов.' sys.exit(1) text = ' '.join(sys.argv[1:]).decode('utf-8') print hip2unicode(text, conversions)
# -*- coding: utf-8 -*- from __future__ import absolute_import import sys import corpus_converter from hip2unicode.conversions import hip_civilrus_accented from hip2unicode.functions import all_hip_conversions from hip2unicode.functions import compile_conversion conversions = { 'slav': compile_conversion(hip_civilrus_accented.conversion), 'rus': 'delete', 'lat': 'delete', 'grec': 'delete', } args = { 'converted_corpus_folder': 'corpus-civilrus', 'conversions': all_hip_conversions(**conversions), } corpus_folder = None converted_corpus_folder = None if len(sys.argv) > 1: corpus_folder = sys.argv[1] if corpus_folder: args['corpus_folder'] = corpus_folder if len(sys.argv) == 3: converted_corpus_folder = sys.argv[2] if converted_corpus_folder: args['converted_corpus_folder'] = converted_corpus_folder
import re from hip2unicode.functions import convert from hip2unicode.functions import compile_conversion from hip2unicode.conversions import antconc_ucs8 from hip2unicode.conversions import antconc_ucs8_corrupted_antconc from hip2unicode.conversions import antconc_ucs8_without_aspiration from hip2unicode.conversions import antconc_civilrus from hip2unicode.conversions import antconc_antconc_wo_titles compiled_conversion_antconc_anticorrupt = compile_conversion( antconc_ucs8_corrupted_antconc.conversion) compiled_conversion_civil = compile_conversion(antconc_civilrus.conversion) compiled_conversion_with_aspiration = compile_conversion( antconc_ucs8.conversion) compiled_conversion_without_aspiration = compile_conversion( antconc_ucs8_without_aspiration.conversion) compiled_conversion_wo_titles = compile_conversion( antconc_antconc_wo_titles.conversion) def html_escape(text): text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') text = text.replace('"', '"') return text.replace("'", ''') def html_unescape(text): text = text.replace(''', "'") text = text.replace('"', '"') text = text.replace('>', '>')
# -*- coding: UTF-8 -*- import re from hip2unicode.functions import convert from hip2unicode.functions import compile_conversion from hip2unicode.conversions import antconc_ucs8 from hip2unicode.conversions import antconc_ucs8_without_aspiration from hip2unicode.conversions import antconc_civilrus from hip2unicode.conversions import antconc_antconc_wo_titles compiled_conversion_wo_titles = compile_conversion( antconc_antconc_wo_titles.conversion) compiled_conversion_with_aspiration = compile_conversion( antconc_ucs8.conversion) compiled_conversion_without_aspiration = compile_conversion( antconc_ucs8_without_aspiration.conversion) compiled_conversion_civil = compile_conversion(antconc_civilrus.conversion) def html_escape(text): text = text.replace(u'&', u'&') text = text.replace(u'<', u'<') text = text.replace(u'>', u'>') text = text.replace(u'"', u'"') return text.replace(u"'", u''') def html_unescape(text): text = text.replace(u''', u"'") text = text.replace(u'"', u'"') text = text.replace(u'>', u'>') text = text.replace(u'<', u'<') return text.replace(u'&', u'&')
# -*- coding: utf-8 -*- import sys from hip2unicode.conversions import hip_civilrus from hip2unicode.functions import all_hip_conversions from hip2unicode.functions import compile_conversion from hip2unicode.tools import corpus_converter conversions = { 'slav': compile_conversion(hip_civilrus.conversion), 'rus': 'delete', 'lat': 'delete', 'grec': 'delete', } args = { 'converted_corpus_folder': 'corpus-civilrus', 'conversions': all_hip_conversions(**conversions), } corpus_folder = None converted_corpus_folder = None if len(sys.argv) > 1: corpus_folder = sys.argv[1] if corpus_folder: args['corpus_folder'] = corpus_folder if len(sys.argv) == 3: converted_corpus_folder = sys.argv[2] if converted_corpus_folder: args['converted_corpus_folder'] = converted_corpus_folder
# -*- coding: utf-8 -*- import sys from hip2unicode.conversions import hip_civilrus_accented from hip2unicode.functions import all_hip_conversions from hip2unicode.functions import compile_conversion from hip2unicode.tools import corpus_converter conversions = { 'slav': compile_conversion(hip_civilrus_accented.conversion), 'rus': 'delete', 'lat': 'delete', 'grec': 'delete', } args = { 'converted_corpus_folder': 'corpus-civilrus', 'conversions': all_hip_conversions(**conversions), } corpus_folder = None converted_corpus_folder = None if len(sys.argv) > 1: corpus_folder = sys.argv[1] if corpus_folder: args['corpus_folder'] = corpus_folder if len(sys.argv) == 3: converted_corpus_folder = sys.argv[2] if converted_corpus_folder: args['converted_corpus_folder'] = converted_corpus_folder