def __init__(self, corpus_file, encoding, aligned, group_by_sent, word_tokenizer, sent_tokenizer, alignedsent_block_reader): self._aligned = aligned self._group_by_sent = group_by_sent self._word_tokenizer = word_tokenizer self._sent_tokenizer = sent_tokenizer self._alignedsent_block_reader = alignedsent_block_reader StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs) # open self._stream self._open() # skip the heading block read_blankline_block(self._stream) # reset the start position to the current position in the stream self._filepos = [self._stream.tell()]
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs) # open self._stream self._open() # skip the heading block read_blankline_block(self._stream) # reset the start position to the current position in the stream self._filepos = [self._stream.tell()]
def __init__(self, corpus_file, encoding, aligned, group_by_sent, word_tokenizer, sent_tokenizer, alignedsent_block_reader): self._aligned = aligned self._group_by_sent = group_by_sent self._word_tokenizer = word_tokenizer self._sent_tokenizer = sent_tokenizer self._alignedsent_block_reader = alignedsent_block_reader StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
def __init__(self, fileid, delete_on_gc=False): """ Create a new corpus view that reads the pickle corpus ``fileid``. :param delete_on_gc: If true, then ``fileid`` will be deleted whenever this object gets garbage-collected. """ self._delete_on_gc = delete_on_gc StreamBackedCorpusView.__init__(self, fileid, encoding=None)
def __init__(self, filename, startpos=0, **kwargs): StreamBackedCorpusView.__init__(self, filename, None, startpos, None) self.in_sentence = False self.position = 0 self.show_tags = kwargs.pop('tags', True) self.disamb_only = kwargs.pop('disamb_only', True) self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE) self.simplify_tags = kwargs.pop('simplify_tags', False) self.one_tag = kwargs.pop('one_tag', True) self.append_no_space = kwargs.pop('append_no_space', False) self.append_space = kwargs.pop('append_space', False) self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
def __init__(self, filename, startpos=0, **kwargs): StreamBackedCorpusView.__init__(self, filename, None, startpos, None) self.in_sentence = False self.position = 0 self.show_tags = kwargs.pop('tags', True) self.disamb_only = kwargs.pop('disamb_only', True) self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE) self.simplify_tags = kwargs.pop('simplify_tags', False) self.one_tag = kwargs.pop('one_tag', True) self.append_no_space = kwargs.pop('append_no_space', False) self.append_space = kwargs.pop('append_space', False) self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
def __init__(self, filename, startpos=0, **kwargs): StreamBackedCorpusView.__init__(self, filename, None, startpos, None) self.in_sentence = False self.position = 0 self.show_tags = kwargs.pop("tags", True) self.disamb_only = kwargs.pop("disamb_only", True) self.mode = kwargs.pop("mode", IPIPANCorpusView.WORDS_MODE) self.simplify_tags = kwargs.pop("simplify_tags", False) self.one_tag = kwargs.pop("one_tag", True) self.append_no_space = kwargs.pop("append_no_space", False) self.append_space = kwargs.pop("append_space", False) self.replace_xmlentities = kwargs.pop("replace_xmlentities", True)
def __init__(self, fileid, block_reader=None, startpos=0, encoding='utf8'): StreamBackedCorpusView.__init__(self, fileid, block_reader=block_reader, startpos=0, encoding='utf8') try: if isinstance(self._fileid, GzipFileSystemPathPointer): if re.match(r'.*\.gz$', str(self._fileid)): self._eofpos = self.getuncompressedsize(self._fileid) else: self._eofpos = self._fileid.file_size() else: self._eofpos = os.stat(self._fileid).st_size except Exception as exc: raise ValueError('Unable to open or access %r -- %s' % (fileid, exc))
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs)
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs) # open self._stream self._open() # skip the heading block self.read_block(self._stream)
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs) self._open() self.read_block(self._stream) self._filepos = [self._stream.tell()]
def __init__(self, *args, **kwargs): StreamBackedCorpusView.__init__(self, *args, **kwargs) self._open() self.read_block(self._stream) self._filepos = [self._stream.tell()]