Пример #1
0
 def recv_line(self):
     """Receives lineS from the process. The first line is always the number
     of consecutive lines."""
     ret = []
     num_lines = int(LineByLineTagger.recv_line(self))
     for i in xrange(num_lines):
         ret.append(LineByLineTagger.recv_line(self))
     return ret
Пример #2
0
 def recv_line(self):
     """Receives lineS from the process. The first line is always the number
     of consecutive lines."""
     ret = []
     num_lines = int(LineByLineTagger.recv_line(self))
     for i in xrange(num_lines):
         ret.append(LineByLineTagger.recv_line(self))
     return ret
Пример #3
0
    def __init__(self, params):
        SentenceTokenizerWrapper.__init__(self, params)
        self.patterns.add(HunknownSentenceTokenizer._datePattern)
        self.patterns.add(HunknownSentenceTokenizer._romanNumberPattern)

        basedir = params['hunknown_basedir']
        runnable = os.path.join(basedir, 'bin', 'tokenize')
        config = params.get('hunknown_conf')
        if config is None:
            config = os.path.join(basedir, 'huntools.conf')
        encoding = params.get('hunknown_encoding', 'iso-8859-2')

        LineByLineTagger.__init__(self, runnable, encoding)
        self.options = [config]
Пример #4
0
    def __init__(self, params):
        SentenceTokenizerWrapper.__init__(self, params)
        self.patterns.add(HunknownSentenceTokenizer._datePattern)
        self.patterns.add(HunknownSentenceTokenizer._romanNumberPattern)

        basedir = params['hunknown_basedir']
        runnable = os.path.join(basedir, 'bin', 'tokenize')
        config  = params.get('hunknown_conf')
        if config is None:
            config = os.path.join(basedir, 'huntools.conf')
        encoding = params.get('hunknown_encoding', 'iso-8859-2')

        LineByLineTagger.__init__(self, runnable, encoding)
        self.options = [config]