def __init__(self, filename, **kwargs): self.tags = kwargs.pop('tags', None) self.tagspec = '.*/seg/fs' self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml') XMLCorpusView.__init__( self, self.xml_tool.build_preprocessed_file(), self.tagspec )
def __init__(self, fileid, sent, tag, strip_space, stem): """ :param fileid: The name of the underlying file. :param sent: If true, include sentence bracketing. :param tag: The name of the tagset to use, or None for no tags. :param strip_space: If true, strip spaces from word tokens. :param stem: If true, then substitute stems for words. """ if sent: tagspec = ".*/s" else: tagspec = ".*/s/(.*/)?(c|w)" self._sent = sent self._tag = tag self._strip_space = strip_space self._stem = stem self.title = None #: Title of the document. self.author = None #: Author of the document. self.editor = None #: Editor self.resps = None #: Statement of responsibility XMLCorpusView.__init__(self, fileid, tagspec) # Read in a tasty header. self._open() self.read_block(self._stream, ".*/teiHeader$", self.handle_header) self.close() # Reset tag context. self._tag_context = {0: ()}
def __init__(self, fileid, sent, tag, strip_space, stem): """ :param fileid: The name of the underlying file. :param sent: If true, include sentence bracketing. :param tag: The name of the tagset to use, or None for no tags. :param strip_space: If true, strip spaces from word tokens. :param stem: If true, then substitute stems for words. """ if sent: tagspec = '.*/s' else: tagspec = '.*/s/(.*/)?(c|w)' self._sent = sent self._tag = tag self._strip_space = strip_space self._stem = stem self.title = None #: Title of the document. self.author = None #: Author of the document. self.editor = None #: Editor self.resps = None #: Statement of responsibility XMLCorpusView.__init__(self, fileid, tagspec) # Read in a tasty header. self._open() self.read_block(self._stream, '.*/teiHeader$', self.handle_header) self.close() # Reset tag context. self._tag_context = {0: ()}
def __init__(self, filename, **kwargs): self.tags = kwargs.pop('tags', None) self.tagspec = '.*/seg/fs' self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml') XMLCorpusView.__init__( self, self.xml_tool.build_preprocessed_file(), self.tagspec )
def __init__(self, filename, **kwargs): """ HEADER_MODE A stream backed corpus view specialized for use with header.xml files in NKJP corpus. """ self.tagspec = ".*/sourceDesc$" XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec)
def __init__(self, filename, **kwargs): self.mode = kwargs.pop('mode', 0) self.tagspec = '.*/div/ab' self.segm_dict = dict() #xml preprocessing self.xml_tool = XML_Tool(filename, 'text.xml') #base class init XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
def __init__(self, filename, **kwargs): """ HEADER_MODE A stream backed corpus view specialized for use with header.xml files in NKJP corpus. """ self.tagspec = ".*/sourceDesc$" XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec)
def __init__(self, filename, **kwargs): self.mode = kwargs.pop('mode', 0) self.tagspec = '.*/div/ab' self.segm_dict = dict() #xml preprocessing self.xml_tool = XML_Tool(filename, 'text.xml') #base class init XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
def __init__(self, filename, **kwargs): self.tagspec = '.*p/.*s' #intersperse NKJPCorpus_Text_View self.text_view = NKJPCorpus_Text_View(filename, mode=NKJPCorpus_Text_View.SENTS_MODE) self.text_view.handle_query() #xml preprocessing self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml') #base class init XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
def __init__(self, filename, **kwargs): self.tagspec = '.*p/.*s' #intersperse NKJPCorpus_Text_View self.text_view = NKJPCorpus_Text_View(filename, mode=NKJPCorpus_Text_View.SENTS_MODE) self.text_view.handle_query() #xml preprocessing self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml') #base class init XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag): """ :param fileid: The name of the underlying file. :param unit: One of `'token'`, `'word'`, or `'chunk'`. :param bracket_sent: If true, include sentence bracketing. :param pos_tag: Whether to include part-of-speech tags. :param sem_tag: Whether to include semantic tags, namely WordNet lemma and OOV named entity status. """ if bracket_sent: tagspec = '.*/s' else: tagspec = '.*/s/(punc|wf)' self._unit = unit self._sent = bracket_sent self._pos_tag = pos_tag self._sem_tag = sem_tag XMLCorpusView.__init__(self, fileid, tagspec)
def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag, wordnet): """ :param fileid: The name of the underlying file. :param unit: One of `'token'`, `'word'`, or `'chunk'`. :param bracket_sent: If true, include sentence bracketing. :param pos_tag: Whether to include part-of-speech tags. :param sem_tag: Whether to include semantic tags, namely WordNet lemma and OOV named entity status. """ if bracket_sent: tagspec = '.*/s' else: tagspec = '.*/s/(punc|wf)' self._unit = unit self._sent = bracket_sent self._pos_tag = pos_tag self._sem_tag = sem_tag self._wordnet = wordnet XMLCorpusView.__init__(self, fileid, tagspec)
def __init__(self, fileid, tagspec, elt_handler=None): XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
def __init__(self, fileid, tagspec, elt_handler=None): XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
def __init__(self, filename, **kwargs): self.tags = kwargs.pop("tags", None) self.tagspec = ".*/seg/fs" self.xml_tool = XML_Tool(filename, "ann_morphosyntax.xml") XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
def __init__(self, filename, **kwargs): self.tagspec = '.*/seg/fs' self.xml_tool = XML_Tool(filename, 'ann_named.xml') XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)