def xml_parse(filename, path=()): h = parsers.XMLParser() p = xml.sax.make_parser() p.setContentHandler(h) p.setErrorHandler(ErrorHandler()) p.setEntityResolver(Resolver(path)) p.parse(filename) return h.parser.tree
def xml_parse(filename, path=()): source = "file://%s" % os.path.abspath(filename) h = parsers.XMLParser() p = xml.sax.make_parser() p.setContentHandler(h) p.setErrorHandler(ErrorHandler()) p.setEntityResolver(Resolver(path)) p.parse(source) return h.parser.tree
def create_parser(store): parser = make_parser() # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") parser.setFeature(handler.feature_namespaces, 1) trix = TriXHandler(store) parser.setContentHandler(trix) parser.setErrorHandler(ErrorHandler()) return parser
def create_parser(store): parser = make_parser() # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") parser.setFeature(handler.feature_namespaces, 1) rdfxml = RDFXMLHandler(store) #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) parser.setContentHandler(rdfxml) parser.setErrorHandler(ErrorHandler()) return parser
def parseSparqlResults(store, resultString): parser = make_parser() # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") parser.setFeature(handler.feature_namespaces, 1) sparqlResults = SparqlResultsHandler(store) parser.setContentHandler(sparqlResults) parser.setErrorHandler(ErrorHandler()) sparqlResults.setDocumentLocator(parser) parser.parse(resultString) return sparqlResults.results
def xml_parse(filename, path=()): if sys.version_info[0:2] == (2, 3): # XXX: this is for older versions of python source = "file://%s" % os.path.abspath(filename) else: source = filename h = parsers.XMLParser() p = xml.sax.make_parser() p.setContentHandler(h) p.setErrorHandler(ErrorHandler()) p.setEntityResolver(Resolver(path)) p.parse(source) return h.parser.tree
def create_parser(target, store): parser = make_parser() try: # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") except AttributeError: pass # Not present in Jython (at least) parser.setFeature(handler.feature_namespaces, 1) rdfxml = RDFXMLHandler(store) rdfxml.setDocumentLocator(target) #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) parser.setContentHandler(rdfxml) parser.setErrorHandler(ErrorHandler()) return parser
def changesets_from_svnlog(log, repository, chunksize=2**15): from xml.sax import make_parser from xml.sax.handler import ContentHandler, ErrorHandler from datetime import datetime from vcpx.changes import ChangesetEntry, Changeset def get_entry_from_path(path, module=repository.module): # Given the repository url of this wc, say # "http://server/plone/CMFPlone/branches/Plone-2_0-branch" # extract the "entry" portion (a relative path) from what # svn log --xml says, ie # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py" # that is to say "tests/PloneTestCase.py" if not module.endswith('/'): module = module + '/' if path.startswith(module): relative = path[len(module):] return relative # The path is outside our tracked tree... repository.log.warning('Ignoring %r since it is not under %r', path, module) return None class SvnXMLLogHandler(ContentHandler): # Map between svn action and tailor's. # NB: 'R', in svn parlance, means REPLACED, something other # system may view as a simpler ADD, taking the following as # the most common idiom:: # # # Rename the old file with a better name # $ svn mv somefile nicer-name-scheme.py # # # Be nice with lazy users # $ echo "exec nicer-name-scheme.py" > somefile # # # Add the wrapper with the old name # $ svn add somefile # # $ svn commit -m "Longer name for somefile" ACTIONSMAP = { 'R': 'R', # will be ChangesetEntry.ADDED 'M': ChangesetEntry.UPDATED, 'A': ChangesetEntry.ADDED, 'D': ChangesetEntry.DELETED } def __init__(self): self.changesets = [] self.current = None self.current_field = [] self.renamed = {} self.copies = [] def startElement(self, name, attributes): if name == 'logentry': self.current = {} self.current['revision'] = attributes['revision'] self.current['entries'] = [] self.copies = [] elif name in ['author', 'date', 'msg']: self.current_field = [] elif name == 'path': self.current_field = [] if attributes.has_key('copyfrom-path'): self.current_path_action = (attributes['action'], attributes['copyfrom-path'], attributes['copyfrom-rev']) else: self.current_path_action = attributes['action'] def endElement(self, name): if name == 'logentry': # Sort the paths to make tests easier self.current['entries'].sort(lambda a, b: cmp(a.name, b.name)) # Eliminate "useless" entries: SVN does not have atomic # renames, but rather uses a ADD+RM duo. # # So cycle over all entries of this patch, discarding # the deletion of files that were actually renamed, and # at the same time change related entry from ADDED to # RENAMED. # When copying a directory from another location in the # repository (outside the tracked tree), SVN will report files # below this dir that are not being committed as being # removed. # We thus need to change the action_kind for all entries # that are below a dir that was "copyfrom" from a path # outside of this module: # D -> Remove entry completely (it's not going to be in here) # (M,A,R) -> A mv_or_cp = {} for e in self.current['entries']: if e.action_kind == e.ADDED and e.old_name is not None: mv_or_cp[e.old_name] = e def parent_was_copied(n): for p in self.copies: if n.startswith(p + '/'): return True return False # Find renames from deleted directories: # $ svn mv dir/a.txt a.txt # $ svn del dir def check_renames_from_dir(name): for e in mv_or_cp.values(): if e.old_name.startswith(name + '/'): e.action_kind = e.RENAMED entries = [] entries2 = [] for e in self.current['entries']: if e.action_kind == e.DELETED: if mv_or_cp.has_key(e.name): mv_or_cp[e.name].action_kind = e.RENAMED else: check_renames_from_dir(e.name) entries2.append(e) elif e.action_kind == 'R': # In svn parlance, 'R' means Replaced: a typical # scenario is # $ svn mv a.txt b.txt # $ touch a.txt # $ svn add a.txt if mv_or_cp.has_key(e.name): mv_or_cp[e.name].action_kind = e.RENAMED else: check_renames_from_dir(e.name) e.action_kind = e.ADDED entries2.append(e) elif parent_was_copied(e.name): if e.action_kind != e.DELETED: e.action_kind = e.ADDED entries.append(e) else: entries.append(e) # Changes sort: first MODIFY|ADD|RENAME, than REPLACE|DELETE for e in entries2: entries.append(e) svndate = self.current['date'] # 2004-04-16T17:12:48.000000Z y, m, d = map(int, svndate[:10].split('-')) hh, mm, ss = map(int, svndate[11:19].split(':')) ms = int(svndate[20:-1]) timestamp = datetime(y, m, d, hh, mm, ss, ms, UTC) changeset = Changeset(self.current['revision'], timestamp, self.current.get('author'), self.current['msg'], entries) self.changesets.append(changeset) self.current = None elif name in ['author', 'date', 'msg']: self.current[name] = ''.join(self.current_field) elif name == 'path': path = ''.join(self.current_field) entrypath = get_entry_from_path(path) if entrypath: entry = ChangesetEntry(entrypath) if type(self.current_path_action) == type(()): self.copies.append(entry.name) old = get_entry_from_path(self.current_path_action[1]) if old: entry.action_kind = self.ACTIONSMAP[ self.current_path_action[0]] entry.old_name = old self.renamed[entry.old_name] = True else: entry.action_kind = entry.ADDED else: entry.action_kind = self.ACTIONSMAP[ self.current_path_action] self.current['entries'].append(entry) def characters(self, data): self.current_field.append(data) parser = make_parser() handler = SvnXMLLogHandler() parser.setContentHandler(handler) parser.setErrorHandler(ErrorHandler()) chunk = log.read(chunksize) while chunk: parser.feed(chunk) for cs in handler.changesets: yield cs handler.changesets = [] chunk = log.read(chunksize) parser.close() for cs in handler.changesets: yield cs
def parse(self, input, errorHandler=ErrorHandler()): xml.sax.parse(input, self, errorHandler)
def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None, chunksize=2**15, replace_badchars=None): """ Do the real work of parsing the change log, including tags. Warning: the tag information in the changsets returned by this function are only correct if each darcs tag in the repo depends on all of the patches that precede it. This is not a valid assumption in general--a tag that does not depend on patch P can be pulled in from another darcs repo after P. We collect the tag info anyway because DarcsWorkingDir._currentTags() can use it safely despite this problem. Hopefully the problem will eventually be fixed and this function can be renamed changesets_from_darcschanges. """ from xml.sax import make_parser from xml.sax.handler import ContentHandler, ErrorHandler from datetime import datetime class DarcsXMLChangesHandler(ContentHandler): def __init__(self): self.changesets = [] self.current = None self.current_field = [] if unidiff and repodir: cmd = ["darcs", "diff", "--unified", "--repodir", repodir, "--patch", "%(patchname)s"] self.darcsdiff = ExternalCommand(command=cmd) else: self.darcsdiff = None def startElement(self, name, attributes): if name == 'patch': self.current = {} self.current['author'] = attributes['author'] date = attributes['date'] from time import strptime try: # 20040619130027 timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6]) except ValueError: # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002 timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6]) timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well self.current['date'] = timestamp self.current['comment'] = '' self.current['hash'] = attributes['hash'] self.current['entries'] = [] self.inverted = (attributes['inverted'] == "True") elif name in ['name', 'comment', 'add_file', 'add_directory', 'modify_file', 'remove_file', 'remove_directory']: self.current_field = [] elif name == 'move': self.old_name = attributes['from'] self.new_name = attributes['to'] def endElement(self, name): if name == 'patch': cset = DarcsChangeset(self.current['name'], self.current['date'], self.current['author'], self.current['comment'], self.current['entries'], tags=self.current.get('tags',[]), darcs_hash=self.current['hash']) if self.darcsdiff: cset.unidiff = self.darcsdiff.execute(TZ='UTC', stdout=PIPE, patchname=cset.revision)[0].read() self.changesets.append(cset) self.current = None elif name in ['name', 'comment']: val = ''.join(self.current_field) if val[:4] == 'TAG ': self.current.setdefault('tags',[]).append(val[4:]) self.current[name] = val elif name == 'move': entry = ChangesetEntry(self.new_name) entry.action_kind = entry.RENAMED entry.old_name = self.old_name self.current['entries'].append(entry) elif name in ['add_file', 'add_directory', 'modify_file', 'remove_file', 'remove_directory']: current_field = ''.join(self.current_field).strip() if self.inverted: # the filenames in file modifications are outdated # if there are renames for i in self.current['entries']: if i.action_kind == i.RENAMED and current_field.startswith(i.old_name): current_field = current_field.replace(i.old_name, i.name) entry = ChangesetEntry(current_field) entry.action_kind = { 'add_file': entry.ADDED, 'add_directory': entry.ADDED, 'modify_file': entry.UPDATED, 'remove_file': entry.DELETED, 'remove_directory': entry.DELETED }[name] entry.is_directory = name.endswith('directory') self.current['entries'].append(entry) def characters(self, data): self.current_field.append(data) parser = make_parser() handler = DarcsXMLChangesHandler() parser.setContentHandler(handler) parser.setErrorHandler(ErrorHandler()) def fixup_badchars(s, map): if not map: return s ret = [map.get(c, c) for c in s] return "".join(ret) chunk = fixup_badchars(changes.read(chunksize), replace_badchars) while chunk: parser.feed(chunk) for cs in handler.changesets: yield cs handler.changesets = [] chunk = fixup_badchars(changes.read(chunksize), replace_badchars) parser.close() for cs in handler.changesets: yield cs
textMethod(content) else: endMethod = self.__getEndMethod() if endMethod: endMethod() self.__pop() self.__content = '' def characters(self, content: str) -> None: if content and not self.__text and not content.isspace(): raise ParseError(f'node {self.__getContext()} is not supposed to ' f'contain text but contains "{content}"') self.__content += content _errorHandler = ErrorHandler() _interningDict: Dict[str, str] = {} # XML tag class: class XMLTag(ABC): tagName: ClassVar[str] = abstract boolProperties: ClassVar[Sequence[str]] = () intProperties: ClassVar[Sequence[str]] = () enumProperties: ClassVar[Mapping[str, Type[Enum]]] = {} @classmethod def _findDeclarations(cls, name: str) -> Iterator[Any]: '''Yields declarations with the given `name` in this class