示例#1
0
 def __init__(self, wikt, filter_langs=None):
     ArticleParser.__init__(self, wikt, filter_langs)
     self.tr_prefix_l = [i.decode('utf8') 
                         for i in self.cfg['translation_prefix'].split(',')]
     self.wc_field = int(self.cfg['wc_field'])
     self.word_field = int(self.cfg['word_field'])
     self.line_field = int(self.cfg['line_field'])
     self.rest_of_fields = int(self.cfg['rest_of_fields'])
     self.build_trad_re()
示例#2
0
 def __init__(self, wikt, filter_langs=None):
     ArticleParser.__init__(self, wikt, filter_langs)
     self.tr_prefix_l = [
         i for i in self.cfg['translation_prefix'].split(',')
     ]
     self.wc_field = int(self.cfg['wc_field'])
     self.word_field = int(self.cfg['word_field'])
     self.line_field = int(self.cfg['line_field'])
     self.rest_of_fields = int(self.cfg['rest_of_fields'])
     self.build_trad_re()
示例#3
0
 def __init__(self, wikt, filter_langs=None):
     ArticleParser.__init__(self, wikt, filter_langs)
     self.langname_field = int(self.cfg['language_name_field'])
     self.translation_field = int(self.cfg['translation_field'])
     self.translation_line_re = re.compile(ur'' + \
                self.cfg['translation_line'].decode('utf8'), re.UNICODE)
     self.entity_delimiter = self.cfg['translation_entity_delimiter']
     if self.cfg['skip_translation']:
         self.skip_re_l = [i.decode('utf8') 
                           for i in self.cfg['skip_translation'].split(',')]
     else:
         self.skip_re_l = None
     self.read_langname_mapping(self.cfg)
示例#4
0
 def __init__(self, wikt, filter_langs=None):
     ArticleParser.__init__(self, wikt, filter_langs)
     self.langname_field = int(self.cfg['language_name_field'])
     self.translation_field = int(self.cfg['translation_field'])
     self.translation_line_re = re.compile(r'' + \
                self.cfg['translation_line'].decode('utf8'), re.UNICODE)
     self.entity_delimiter = self.cfg['translation_entity_delimiter']
     if self.cfg['skip_translation']:
         self.skip_re_l = [
             i.decode('utf8')
             for i in self.cfg['skip_translation'].split(',')
         ]
     else:
         self.skip_re_l = None
     self.read_langname_mapping(self.cfg)
示例#5
0
 def __init__(self, wikt_cfg, parser_cfg, filter_langs=None):
     ArticleParser.__init__(self, wikt_cfg, parser_cfg, filter_langs)
     self.read_langname_mapping()
示例#6
0
 def __init__(self, wikt_cfg, parser_cfg, filter_langs=None):
     ArticleParser.__init__(self, wikt_cfg, parser_cfg, filter_langs)
     self.read_section_langmap()
示例#7
0
 def __init__(self, wikt, filter_langs=None):
     ArticleParser.__init__(self, wikt, filter_langs)
     self.init_section_parser(wikt)
     self.build_section_re()
     self.section_langfield = int(self.cfg['section_langfield'])
     self.read_section_langmap()