示例#1
0
 def __init__(self, corpus_file, encoding, aligned, group_by_sent,
              word_tokenizer, sent_tokenizer, alignedsent_block_reader):
     self._aligned = aligned
     self._group_by_sent = group_by_sent
     self._word_tokenizer = word_tokenizer
     self._sent_tokenizer = sent_tokenizer
     self._alignedsent_block_reader = alignedsent_block_reader
     StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
示例#2
0
文件: corpus.py 项目: ylwctyt/Python3
 def __init__(self, *args, **kwargs):
     StreamBackedCorpusView.__init__(self, *args, **kwargs)
     # open self._stream
     self._open()
     # skip the heading block
     read_blankline_block(self._stream)
     # reset the start position to the current position in the stream
     self._filepos = [self._stream.tell()]
示例#3
0
	def __init__(self, *args, **kwargs):
		StreamBackedCorpusView.__init__(self, *args, **kwargs)
		# open self._stream
		self._open()
		# skip the heading block
		read_blankline_block(self._stream)
		# reset the start position to the current position in the stream
		self._filepos = [self._stream.tell()]
示例#4
0
 def __init__(self, corpus_file, encoding, aligned, group_by_sent,
              word_tokenizer, sent_tokenizer, alignedsent_block_reader):
     self._aligned = aligned
     self._group_by_sent = group_by_sent
     self._word_tokenizer = word_tokenizer
     self._sent_tokenizer = sent_tokenizer
     self._alignedsent_block_reader = alignedsent_block_reader
     StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
示例#5
0
    def __init__(self, fileid, delete_on_gc=False):
        """
        Create a new corpus view that reads the pickle corpus
        ``fileid``.

        :param delete_on_gc: If true, then ``fileid`` will be deleted
            whenever this object gets garbage-collected.
        """
        self._delete_on_gc = delete_on_gc
        StreamBackedCorpusView.__init__(self, fileid, encoding=None)
示例#6
0
    def __init__(self, filename, startpos=0, **kwargs):
        StreamBackedCorpusView.__init__(self, filename, None, startpos, None)
        self.in_sentence = False
        self.position = 0

        self.show_tags = kwargs.pop('tags', True)
        self.disamb_only = kwargs.pop('disamb_only', True)
        self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE)
        self.simplify_tags = kwargs.pop('simplify_tags', False)
        self.one_tag = kwargs.pop('one_tag', True)
        self.append_no_space = kwargs.pop('append_no_space', False)
        self.append_space = kwargs.pop('append_space', False)
        self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
示例#7
0
    def __init__(self, filename, startpos=0, **kwargs):
        StreamBackedCorpusView.__init__(self, filename, None, startpos, None)
        self.in_sentence = False
        self.position = 0

        self.show_tags = kwargs.pop('tags', True)
        self.disamb_only = kwargs.pop('disamb_only', True)
        self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE)
        self.simplify_tags = kwargs.pop('simplify_tags', False)
        self.one_tag = kwargs.pop('one_tag', True)
        self.append_no_space = kwargs.pop('append_no_space', False)
        self.append_space = kwargs.pop('append_space', False)
        self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
示例#8
0
    def __init__(self, filename, startpos=0, **kwargs):
        StreamBackedCorpusView.__init__(self, filename, None, startpos, None)
        self.in_sentence = False
        self.position = 0

        self.show_tags = kwargs.pop("tags", True)
        self.disamb_only = kwargs.pop("disamb_only", True)
        self.mode = kwargs.pop("mode", IPIPANCorpusView.WORDS_MODE)
        self.simplify_tags = kwargs.pop("simplify_tags", False)
        self.one_tag = kwargs.pop("one_tag", True)
        self.append_no_space = kwargs.pop("append_no_space", False)
        self.append_space = kwargs.pop("append_space", False)
        self.replace_xmlentities = kwargs.pop("replace_xmlentities", True)
示例#9
0
 def __init__(self, fileid, block_reader=None, startpos=0, encoding='utf8'):
     StreamBackedCorpusView.__init__(self,
                                     fileid,
                                     block_reader=block_reader,
                                     startpos=0,
                                     encoding='utf8')
     try:
         if isinstance(self._fileid, GzipFileSystemPathPointer):
             if re.match(r'.*\.gz$', str(self._fileid)):
                 self._eofpos = self.getuncompressedsize(self._fileid)
             else:
                 self._eofpos = self._fileid.file_size()
         else:
             self._eofpos = os.stat(self._fileid).st_size
     except Exception as exc:
         raise ValueError('Unable to open or access %r -- %s' %
                          (fileid, exc))
示例#10
0
 def __init__(self, *args, **kwargs):
     StreamBackedCorpusView.__init__(self, *args, **kwargs)
	def __init__(self, *args, **kwargs):
		StreamBackedCorpusView.__init__(self, *args, **kwargs)
		# open self._stream
		self._open()
		# skip the heading block
		 self.read_block(self._stream)
示例#12
0
 def __init__(self, *args, **kwargs):
     StreamBackedCorpusView.__init__(self, *args, **kwargs)
     self._open()
     self.read_block(self._stream)
     self._filepos = [self._stream.tell()]
示例#13
0
 def __init__(self, *args, **kwargs):
     StreamBackedCorpusView.__init__(self, *args, **kwargs)
     self._open()
     self.read_block(self._stream)
     self._filepos = [self._stream.tell()]