def __init__(self, wikt, filter_langs=None): ArticleParser.__init__(self, wikt, filter_langs) self.tr_prefix_l = [i.decode('utf8') for i in self.cfg['translation_prefix'].split(',')] self.wc_field = int(self.cfg['wc_field']) self.word_field = int(self.cfg['word_field']) self.line_field = int(self.cfg['line_field']) self.rest_of_fields = int(self.cfg['rest_of_fields']) self.build_trad_re()
def __init__(self, wikt, filter_langs=None): ArticleParser.__init__(self, wikt, filter_langs) self.tr_prefix_l = [ i for i in self.cfg['translation_prefix'].split(',') ] self.wc_field = int(self.cfg['wc_field']) self.word_field = int(self.cfg['word_field']) self.line_field = int(self.cfg['line_field']) self.rest_of_fields = int(self.cfg['rest_of_fields']) self.build_trad_re()
def __init__(self, wikt, filter_langs=None): ArticleParser.__init__(self, wikt, filter_langs) self.langname_field = int(self.cfg['language_name_field']) self.translation_field = int(self.cfg['translation_field']) self.translation_line_re = re.compile(ur'' + \ self.cfg['translation_line'].decode('utf8'), re.UNICODE) self.entity_delimiter = self.cfg['translation_entity_delimiter'] if self.cfg['skip_translation']: self.skip_re_l = [i.decode('utf8') for i in self.cfg['skip_translation'].split(',')] else: self.skip_re_l = None self.read_langname_mapping(self.cfg)
def __init__(self, wikt, filter_langs=None): ArticleParser.__init__(self, wikt, filter_langs) self.langname_field = int(self.cfg['language_name_field']) self.translation_field = int(self.cfg['translation_field']) self.translation_line_re = re.compile(r'' + \ self.cfg['translation_line'].decode('utf8'), re.UNICODE) self.entity_delimiter = self.cfg['translation_entity_delimiter'] if self.cfg['skip_translation']: self.skip_re_l = [ i.decode('utf8') for i in self.cfg['skip_translation'].split(',') ] else: self.skip_re_l = None self.read_langname_mapping(self.cfg)
def __init__(self, wikt_cfg, parser_cfg, filter_langs=None): ArticleParser.__init__(self, wikt_cfg, parser_cfg, filter_langs) self.read_langname_mapping()
def __init__(self, wikt_cfg, parser_cfg, filter_langs=None): ArticleParser.__init__(self, wikt_cfg, parser_cfg, filter_langs) self.read_section_langmap()
def __init__(self, wikt, filter_langs=None): ArticleParser.__init__(self, wikt, filter_langs) self.init_section_parser(wikt) self.build_section_re() self.section_langfield = int(self.cfg['section_langfield']) self.read_section_langmap()