Exemplos de saxify em Python, exemplos de lxml.sax.saxify em Python

Exemplo n.º 1

0

Exibir arquivo

    def process_node(self, session, data):

        # Turn into SAX and process_eventList() for the mean time
        handler = SaxContentHandler()
        sax.saxify(data, handler)
        saxl = handler.currentText
        return self.process_eventList(session, saxl)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: transform.py Projeto: blakearchive/erdman

def parse_document(file_name):
    tree = etree.parse(file_name)
    handler = ErdmanTransformer()
    saxify(tree, handler)
    handler.save_page()  # get the last page
    titles = get_titles(tree)
    return handler, titles

Exemplo n.º 3

0

Exibir arquivo

Arquivo: transform.py Projeto: nathan-rice/erdman

def parse_document(file_name):
    tree = etree.parse(file_name)
    handler = ErdmanTransformer()
    saxify(tree, handler)
    handler.save_page()  # get the last page
    titles = get_titles(tree)
    return handler, titles

Exemplo n.º 4

0

Exibir arquivo

Arquivo: jwglutil.py Projeto: dixyes/pyjwgl

def fuck_dom(page):
    page = UnicodeDammit(page).unicode_markup
    tree = etree.fromstring(page,etree.HTMLParser())
    #tree.docinfo.encoding = "utf-8"
    handler = SAX2DOM()
    sax.saxify(tree, handler)
    return handler.document

Exemplo n.º 5

0

Exibir arquivo

Arquivo: record.py Projeto: tanmoydeb07/cheshire3

 def get_sax(self, session):
     if (not self.sax):
         handler = SaxContentHandler()
         sax.saxify(self.dom, handler)
         self.sax = handler.currentText
         self.sax.append("9 %r" % handler.elementHash)
     return self.sax

Exemplo n.º 6

0

Exibir arquivo

Arquivo: record.py Projeto: bloomonkey/cheshire3

 def get_sax(self, session):
     if (not self.sax):
         handler = SaxContentHandler()
         sax.saxify(self.dom, handler)
         self.sax = handler.currentText
         self.sax.append("9 %r" % handler.elementHash)
     return self.sax

Exemplo n.º 7

0

Exibir arquivo

Arquivo: extractor.py Projeto: Cheshire-Grampa/cheshire3

    def process_node(self, session, data):

        # Turn into SAX and process_eventList() for the mean time
        handler = SaxContentHandler()
        sax.saxify(data, handler)
        saxl= handler.currentText
        return self.process_eventList(session, saxl)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_sax.py Projeto: zfl926/lxml

    def test_sax_to_pulldom_multiple_namespaces(self):
        tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>')
        handler = pulldom.SAX2DOM()
        sax.saxify(tree, handler)
        dom = handler.document

        # With multiple prefix definitions, the node should keep the one
        # that was actually used, even if the others also are valid.
        self.assertEqual('a',
                         dom.firstChild.localName)
        self.assertEqual('blaA',
                         dom.firstChild.namespaceURI)
        self.assertEqual(None,
                         dom.firstChild.prefix)

        tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>')
        handler = pulldom.SAX2DOM()
        sax.saxify(tree, handler)
        dom = handler.document

        self.assertEqual('a',
                         dom.firstChild.localName)
        self.assertEqual('blaA',
                         dom.firstChild.namespaceURI)
        self.assertEqual('a',
                         dom.firstChild.prefix)

Exemplo n.º 9

0

Exibir arquivo

def flexify(html_root, flex_path):
    """Insert FLEx annotations after every Zapotec word in the HTML root element."""
    with open(flex_path, 'r', encoding='utf-8') as f:
        flex_dict = json.load(f)
    print('{} words in the FLEx dictionary'.format(len(flex_dict)))
    handler = FLExParser(flex_dict)
    sax.saxify(html_root, handler)
    print('Processed {0.total} word(s), missed {0.missed}'.format(handler))
    return handler.etree

Exemplo n.º 10

0

Exibir arquivo

Arquivo: extractor.py Projeto: cheshire3/cheshire3

    def process_node(self, session, data):
        """Walk a DOM structure, extract and return.

        Turn into SAX and process_eventList() for the mean time.
        """
        handler = SaxContentHandler()
        sax.saxify(data, handler)
        saxl = handler.currentText
        return self.process_eventList(session, saxl)

Exemplo n.º 11

0

Exibir arquivo

    def process_node(self, session, data):
        """Walk a DOM structure, extract and return.

        Turn into SAX and process_eventList() for the mean time.
        """
        handler = SaxContentHandler()
        sax.saxify(data, handler)
        saxl = handler.currentText
        return self.process_eventList(session, saxl)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: XMLManager.py Projeto: manohar9999/SCATE

    def write (self, result_set):
        logging.info ('writing %s' % self.type ())

        if self.__file_target__ is None:
            writer = SAXWriter (result_set.source, 2)
        else:
            writer = SAXWriter (self.__file_target__, 2)

        logging.info ('write from source: %s' % result_set.source)
        resultset_w = etree.Element ('result')
        resultset_w.set ("source", result_set.source)
        resultset_w.set ('args', result_set.args)

        from html import escape

        for weakness in result_set.iterate_Weaknesses ():
            weaknesselement = etree.SubElement (resultset_w, "weakness")
            weaknesselement.set ("id", weakness.name)

            for suite in weakness.iterate_Suites ():
                suiteelement = etree.SubElement (weaknesselement, "suite")

                suiteelement.set ("dir", suite.directory)
                suiteelement.set ("tool", suite.compiler)
                suiteelement.set ("args", suite.args)

                for file in suite.iterate_Files ():
                    for function in file.iterate_Functions ():
                        for line in function.iterate_Lines ():

                            for flaw in line.iterate_Flaws ():

                                flawelement = etree.SubElement (suiteelement, "flaw")
                                attrib = flawelement.attrib

                                attrib['file'] = file.filename
                                attrib['function'] = function.function
                                attrib['line'] = str (line.line)
                                attrib['severity'] = str (flaw.severity.name)
                                attrib['description'] = escape (flaw.description)

                            for bug in line.iterate_Bugs ():
                                bugelement = etree.SubElement (suiteelement, "bug")
                                attrib = bugelement.attrib
                                attrib['filename'] = file.filename
                                attrib['function'] = function.function
                                attrib['line'] = str (line.line)
                                attrib['type'] = bug.type
                                attrib['message'] = escape (bug.message)

        sax.saxify (resultset_w, writer)

        if self.__file_target__ is None:
            logging.info ("Write successful on file: %s" % (result_set.source))
        else:
            logging.info ("Write successful on file: %s" % (self.__file_target__))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: html.py Projeto: Atober/lamson

def strip_html(doc):
    tree = etree.fromstring(doc)
    handler = TextOnlyContentHandler()
    sax.saxify(tree, handler)
    links_list = ""
    for i, link in enumerate(handler.links):
        links_list += "\n[%d] %s" % (i + 1, link)

    text = " ".join(handler.text)
    return wrap(text, 72) + "\n\n----" + links_list

Exemplo n.º 14

0

Exibir arquivo

Arquivo: html.py Projeto: ilg/lamson-bsd

def strip_html(doc):
    tree = etree.fromstring(doc)
    handler = TextOnlyContentHandler()
    sax.saxify(tree, handler)
    links_list = ""
    for i, link in enumerate(handler.links):
        links_list += "\n[%d] %s" % (i + 1, link)

    text = " ".join(handler.text)
    return wrap(text, 72) + "\n\n----" + links_list

Exemplo n.º 15

0

Exibir arquivo

Arquivo: html2fb2.py Projeto: dmzkrsk/feed-fb2

    def __init__(self, content):
        ContentHandler.__init__(self)

        self.content = False
        self.tree = []
        self.stack = []
        self.stack_usage = []

        self.strong = False
        self.emphasis = False

        saxify(content, self)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: filters.py Projeto: tigers08/reddit

def markdown_souptest(text, nofollow=False, target=None, lang=None):
    if not text:
        return text

    smd = safemarkdown(text, nofollow, target, lang)

    s = StringIO(smd)
    tree = lxml.etree.parse(s)
    handler = SouptestSaxHandler(markdown_ok_tags)
    saxify(tree, handler)

    return smd

Exemplo n.º 17

0

Exibir arquivo

Arquivo: filters.py Projeto: VincentVazzo/reddit

def markdown_souptest(text, nofollow=False, target=None, lang=None):
    if not text:
        return text

    smd = safemarkdown(text, nofollow, target, lang)

    s = StringIO(smd)
    tree = lxml.etree.parse(s)
    handler = SouptestSaxHandler(markdown_ok_tags)
    saxify(tree, handler)

    return smd

Exemplo n.º 18

0

Exibir arquivo

def paginate(pseudo_html_root, text_name):
    """
    Paginate the output of the XSLT transformation. This entails removing all <pb/>
    elements and adding <div class="page">...</div> elements to wrap each page. The
    output of this function is valid HTML.
    """
    # TODO [2019-04-26]: Is text_name necessary for anything? It becomes a CSS class
    # that's on the page <div>'s, so we should check the Ticha website's stylesheets
    # to see if it's ever targeted.
    handler = TEIPager(text_name)
    sax.saxify(pseudo_html_root, handler)
    return handler.etree

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_sax.py Projeto: endrit-b/lxml

    def test_sax_to_pulldom(self):
        tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>')
        handler = pulldom.SAX2DOM()
        sax.saxify(tree, handler)
        dom = handler.document

        self.assertEqual("a", dom.firstChild.localName)
        self.assertEqual("blaA", dom.firstChild.namespaceURI)

        children = dom.firstChild.childNodes
        self.assertEqual("ab", children[0].nodeValue)
        self.assertEqual("blaB", children[1].namespaceURI)
        self.assertEqual("ba", children[2].nodeValue)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: md2xhtml.py Projeto: abm-pubs/beginning-our-day

def md2xhtml(infile, outfile):
    pandoc = subprocess.Popen(['pandoc', '-f', 'markdown', '-t', 'html', '-S'],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    fold = subprocess.Popen(['fold', '-s', '-w', '72'],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    pandoc.stdin.write(infile.read())
    pandoc.stdin.close()
    tree = etree.parse(pandoc.stdout, etree.HTMLParser(encoding='utf-8'))
    sax.saxify(tree, Html2Xhtml(fold.stdin))
    fold.stdin.close()
    outfile.write(xhtml_head)
    outfile.write(fold.stdout.read())
    outfile.write(xhtml_tail)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: DjangoHelpers.py Projeto: openjamoses/CodeSmell

 def render(self, context):
     tree = self.nameOfTreeLoc.resolve(context)
     language = self.language.resolve(context)
     sourceCodeOrXML = self.isSourceOrXml.resolve(context)
     grammarFileURL = ""
     if isinstance(self.grammarFile, str):
         grammarFileURL = self.grammarFile
     else:
         grammarFileURL = self.grammarFile.resolve(context)
     contentHandler = SyntaxHighlighter(sourceCodeOrXML == "srcML",
                                        language, grammarFileURL)
     lxmlSAX.saxify(ET.fromstring(tree), contentHandler)
     return contentHandler.content

Exemplo n.º 22

0

Exibir arquivo

    def test_sax_to_pulldom(self):
        tree = self.parse(
            '<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>')
        handler = pulldom.SAX2DOM()
        sax.saxify(tree, handler)
        dom = handler.document

        self.assertEqual('a', dom.firstChild.localName)
        self.assertEqual('blaA', dom.firstChild.namespaceURI)

        children = dom.firstChild.childNodes
        self.assertEqual('ab', children[0].nodeValue)
        self.assertEqual('blaB', children[1].namespaceURI)
        self.assertEqual('ba', children[2].nodeValue)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: testrunner.py Projeto: okravetz/opensiddur

 def run_suite(self, suite_api):
     api_code, api_result = self.rest_api_get(suite_api, prefix="")
     if api_result.xpath("count(//TestSet)=0"):
         sax_handler = SaxBase(api_result)
         print(
             escaped(
                 "ERROR",
                 "[ERROR] Legacy suite '{}' failed to execute".format(
                     suite_api)))
         sax_handler.tests = 1
         sax_handler.errors = 1
     else:
         sax_handler = LegacyApiSax(api_result)
         sax.saxify(api_result, sax_handler)
     return sax_handler

Exemplo n.º 24

0

Exibir arquivo

Arquivo: filters.py Projeto: astraelly/reddit

def markdown_souptest(text, nofollow=False, target=None, lang=None):
    if not text:
        return text

    smd = safemarkdown(text, nofollow, target, lang)

    # Prepend a DTD reference so we can load up definitions of all the standard
    # XHTML entities (&nbsp;, etc.).
    smd_with_dtd = markdown_dtd + smd

    s = StringIO(smd_with_dtd)
    parser = lxml.etree.XMLParser(load_dtd=True)
    tree = lxml.etree.parse(s, parser)
    handler = SouptestSaxHandler(markdown_ok_tags)
    saxify(tree, handler)

    return smd

Exemplo n.º 25

0

Exibir arquivo

Arquivo: create_archives.py Projeto: jchazalon/smartdoc15-ch1-dataset

    def loadFromFile(cls, filename):
        path_file = os.path.abspath(filename)
        if not os.path.isfile(path_file):
            err = "Error: '%s' does not exist or is not a file." % filename
            print(err)
            raise Exception(err)

        # Note: parsing a file directly with dexml/minidom is supposedly slower, si I used lxml one, 
        #       but I did not benchmark it.
        tree = etree.parse(path_file)
        handler = SAX2DOM()
        sax.saxify(tree, handler)
        dom = handler.document

        # In case, you can pass the filename to parse() here to skip lxml
        mdl = cls.parse(dom)
        return mdl

Exemplo n.º 26

0

Exibir arquivo

Arquivo: filters.py Projeto: barneyfoxuk/reddit

def markdown_souptest(text, nofollow=False, target=None):
    if not text:
        return text

    smd = safemarkdown(text, nofollow=nofollow, target=target)

    # Prepend a DTD reference so we can load up definitions of all the standard
    # XHTML entities (&nbsp;, etc.).
    smd_with_dtd = markdown_dtd + smd

    s = StringIO(smd_with_dtd)
    parser = lxml.etree.XMLParser(load_dtd=True)
    tree = lxml.etree.parse(s, parser)
    handler = SouptestSaxHandler(markdown_ok_tags)
    saxify(tree, handler)

    return smd

Exemplo n.º 27

0

Exibir arquivo

Arquivo: extractors.py Projeto: michelav/psycho-bases

 def query(self, qstr, interval):
     self._descriptor = qstr
     query_opts = {"output":self._output, "from":'0', "qstr":qstr}
     if interval:
         year_cluster = '"{0}"'.format('" OR "'.join(interval))
     else: # Last 3 years
         cur_year = date.today().year
         year_cluster = '"{0}"'.format('" OR "'.join([str(x) for x in range(cur_year -3, cur_year)]))
     query_opts['interval'] = year_cluster
     query_opts.update(self._options)
     template = Template(self._url)
     query = template.substitute(query_opts)
     logger.debug('Querying base: %s\n', self._options['db'])
     response = requests.get(query)
     root = etree.XML(str.encode(response.text, 'UTF-8'))
     handler = DefaultContentHandler()
     saxify(root, handler)
     return handler.articles

Exemplo n.º 28

0

Exibir arquivo

Arquivo: url2article.py Projeto: yashugupta786/boilerpipepy

def main():
    ugly = False
    if os.sys.platform[0:3] == 'win':
        ugly = True

    response = urllib2.urlopen(sys.argv[1])
    encoding = response.headers.getparam('charset')
    html = response.read().decode(encoding)

    f = StringIO(html)
    parser = etree.HTMLParser()

    #create SAX tree
    tree = etree.parse(f, parser)

    handler = BoilerpipeHTMLContentHandler()
    sax.saxify(tree, handler)

    a = ArticleExtractor()

    #parses our data and creates TextDocument with TextBlocks
    doc = handler.toTextDocument()

    tw = TextWrapper()
    tw.width = 80
    tw.initial_indent = os.linesep + os.linesep
    parsed_url = urllib2.urlparse.urlparse(sys.argv[1])
    filename = parsed_url.netloc + "-" + "".join([
        c for c in parsed_url.path if c.isalpha() or c.isdigit() or c == ' '
    ]).rstrip() + '.txt'
    output = []
    for line in a.getText(doc).splitlines():
        output.append(tw.fill(line))
    i = 0
    with codecs.open(filename, 'w', encoding='utf8') as f:
        for line in output:
            if ugly:
                line.replace('\n', os.linesep)
            f.write(line)
    print "Article saved. Lines: %s. Filename: %s" % (len(output), filename)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: ooxml_to_latex.py Projeto: code4101/ooxml_to_latex_parser

    def parse(cls, xml_string, **parser_kwargs):
        """
        Instantiates an object OOXMLtoLatexParser
         and parse the string given by xml_string

        :param xml_string: An string containing the xml to be
            parsed
        :param parser_kwargs:
            OOXMLtoLatexParser kwargs:
             - math_symbols: list of math symbols
               default to latex_constants.SYMBOLS
        """

        xml_string = OOXMLtoLatexParser.change_xml_double_open_tag_to_left_arrow(xml_string)
        xml_string = OOXMLtoLatexParser._remove_self_closing_tags(xml_string)
        xml_to_latex_parser = cls(**parser_kwargs)

        if isinstance(xml_string, str):
            element = etree.fromstring(xml_string)
            sax.saxify(element, xml_to_latex_parser)
            return xml_to_latex_parser
        else:
            raise TypeError("xml string parameter must be str or unicode")

Exemplo n.º 30

0

Exibir arquivo

Arquivo: ooxml_to_latex.py Projeto: qmagico/ooxml_to_latex_parser

    def parse(cls, xml_string, **parser_kwargs):
        """
        Instantiates an object OOXMLtoLatexParser
         and parse the string given by xml_string

        :param xml_string: An string containing the xml to be
            parsed
        :param parser_kwargs:
            OOXMLtoLatexParser kwargs:
             - math_symbols: list of math symbols
               default to latex_constants.SYMBOLS
        """

        xml_string = OOXMLtoLatexParser.change_xml_double_open_tag_to_left_arrow(xml_string)
        xml_string = OOXMLtoLatexParser._remove_self_closing_tags(xml_string)
        xml_to_latex_parser = cls(**parser_kwargs)

        if isinstance(xml_string, basestring):
            element = etree.fromstring(xml_string)
            sax.saxify(element, xml_to_latex_parser)
            return xml_to_latex_parser
        else:
            raise TypeError("xml string parameter must be str or unicode")

Exemplo n.º 31

0

Exibir arquivo

Arquivo: XMLManager.py Projeto: manohar9999/SCATE

    def write_datapointset (self, datapointset):
        logging.info ('writing %s' % self.type ())
        writer = SAXWriter (self.__file_target__, 2)

        datapointset_x = etree.Element ('datapointset')

        for (source, args) in datapointset.imports.items ():
            xml = etree.SubElement (datapointset_x, 'import')
            xml.set ('source', source)
            xml.set ('args', args)

        for (source, args) in datapointset.builds.items ():
            xml = etree.SubElement (datapointset_x, 'build')
            xml.set ('source', source)
            xml.set ('args', args)

        for criteria in datapointset.iterate_Criterias ():
            criteria_x = etree.SubElement (datapointset_x, 'criteria')

            criteria_x.set ('granularity', criteria.granularity.name)
            criteria_x.set ('wrong_checker_is_fp', str (criteria.wrong_checker_is_fp))
            criteria_x.set ('minimum', str (criteria.minimum))

            for datapoint in criteria.iterate_DataPoints ():
                xml = etree.SubElement (criteria_x, 'datapoint')
                xml.set ('tp', str (datapoint.tp))
                xml.set ('fp', str (datapoint.fp))
                xml.set ('fn', str (datapoint.fn))
                xml.set ('weakness', datapoint.weakness)
                xml.set ('directory', datapoint.directory)
                xml.set ('filename', datapoint.filename)
                xml.set ('function', datapoint.function)
                xml.set ('line', str (datapoint.line))
                xml.set ('permutation', datapoint.permutation)

        sax.saxify (datapointset_x, writer)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: testrunner.py Projeto: okravetz/opensiddur

 def run_suite(self, suite_api):
     api_code, api_result = self.rest_api_get(suite_api, prefix="")
     sax_handler = XQSuiteApiSax(api_result)
     sax.saxify(api_result, sax_handler)
     return sax_handler

Exemplo n.º 33

0

Exibir arquivo

Arquivo: md2latex.py Projeto: abm-pubs/beginning-our-day

        if qname == 'p':
            self.outfile.write('\n\n')
        elif qname in ['em', 'strong']:
            self.outfile.write('}')
        elif qname == 'blockquote':
            self.outfile.write('\\end{quote}\n\n')
        elif qname == 'br':
            self.outfile.write('\\\\\n')
        elif qname == 'h2':
            self.outfile.write('\\mychapter{%s}{%s}{%s}\n\n'
                % (self.curtitle, self.curauthor, self.curdate))

infile = sys.stdin
outfile = sys.stdout


pandoc = subprocess.Popen(['pandoc', '-f', 'markdown', '-t', 'html'],
    stdin=infile, stdout=subprocess.PIPE)
sed = subprocess.Popen(['sed', 's/^[[:space:]]*//'],
    stdin=subprocess.PIPE, stdout=subprocess.PIPE)
coder = utf_8.StreamWriter(sed.stdin)
fold = subprocess.Popen(['fold', '-s', '-w', '72'],
    stdin=sed.stdout, stdout=outfile)
#pandoc.stdin.write(infile.read())
#pandoc.stdin.close()
tree = etree.parse(pandoc.stdout, etree.HTMLParser(encoding='utf-8'))
sax.saxify(tree, Html2Latex(coder))
sed.stdin.close()

#outfile.write(fold.stdout.read())

Exemplo n.º 34

0

Exibir arquivo

 def parse(cls, dom):
     """Converts DOM into paragraphs."""
     handler = cls()
     saxify(dom, handler)
     return handler.content

Exemplo n.º 35

0

Exibir arquivo

Arquivo: xslt_util.py Projeto: chubbymaggie/stereocode

 def run(self, startingElement):
     lsax.saxify(startingElement, self)
     result = " ".join(self.text)
     self.text = []
     self.continueReading = True
     return result

Exemplo n.º 36

0

Exibir arquivo

Arquivo: htmlperf.py Projeto: JordanMilne/very_scientific_tests

 def parse_file(self, filename):
     parser = lxml.etree.XMLParser(load_dtd=False)
     f = open(filename, 'rb')
     tree = lxml.etree.parse(f, parser)
     handler = SouptestSaxHandler()
     saxify(tree, handler)

Exemplo n.º 37

0

Exibir arquivo

    def getChannels(self):
        return self.channels

    def getPrograms(self):
        return self.programs


# This code takes extremely long time to execute
# Needs to be revised anyway
# One idea is to mark with a special bit channels and programs,
# as an indication that they will be deleted in the future.
f = urllib2.urlopen("https://tvcom.uz/files/xmltv.xml")
tree = etree.parse(f)
f.close()
h = MyContentHandler()
sax.saxify(tree, h)
cursor = connection.cursor(MySQLdb.cursors.DictCursor)
# Lock the tables so that no inconsistencies will occur
# When querying for channels and programs one should perform the following query:
# SELECT * FROM channels WHERE scheduled_for_insertion = FALSE;
# SELECT * FROM program WHERE scheduled_for_insertion = FALSE;
#cursor.execute("LOCK tables channels WRITE, programs WRITE;");
cursor.execute("LOCK tables programs WRITE;")
# Delete tombstones if any...
#cursor.execute("DELETE FROM channels WHERE scheduled_for_insertion = TRUE");
cursor.execute("DELETE FROM programs WHERE scheduled_for_insertion = TRUE")
# Move old ones...
#cursor.execute("UPDATE channels SET scheduled_for_deletion = TRUE");
cursor.execute("UPDATE programs SET scheduled_for_deletion = TRUE")
cursor.execute("UNLOCK tables;")
connection.commit()

Exemplo n.º 38

0

Exibir arquivo

Arquivo: annotated_text.py Projeto: appscluster/breadability

 def parse(cls, dom):
     """Converts DOM into paragraphs."""
     handler = cls()
     saxify(dom, handler)
     return handler.content

Exemplo n.º 39

0

Exibir arquivo

Arquivo: tools.py Projeto: spr3nk3ls/koert

def lxmlparse(f,handler):
	from lxml.etree import parse as lxmlparse
	from lxml.sax import saxify
	etree = lxmlparse(f)
	saxify(etree,handler)

Exemplo n.º 40

0

Exibir arquivo

Arquivo: util.py Projeto: l1ph0x/schooltool-2

def normalize_xml(xml, recursively_sort=(), compact=False):
    """Normalizes an XML document.

    The idea is that two semantically equivalent XML documents should be
    normalized into the same canonical representation.  Therefore if two
    documents compare equal after normalization, they are semantically
    equivalent.

    The canonical representation used here has nothing to do with W3C Canonical
    XML.

    This function normalizes indentation, whitespace and newlines (except
    inside text nodes), element attribute order, expands character references,
    expands shorthand notation of empty XML elements ("<br/>" becomes
    "<br></br>").

    If recursively_sort is given, it is a sequence of tags that will have
    test:sort="recursively" automatically appended to their attribute lists in
    the text.  Use it when you cannot or do not want to modify the XML document
    itself.

    If compact is True, nodes that only have text (without newlines) will be
    presented more compactly ("<tag>text</tag>").

    Caveats:
     - normalize_xml does not deal well with text nodes
     - normalize_xml does not help when different prefixes are used for the
       same namespace
     - normalize_xml does not handle all XML features (CDATA sections, inline
       DTDs, processing instructions, comments)
    """

    class Document:

        def __init__(self):
            self.children = []
            self.sort_recursively = False

        def render(self, level=0):
            result = []
            for child in self.children:
                result.append(child.render(level))
            return ''.join(result)

    class Element:

        def __init__(self, parent, tag, attrlist, sort=False,
                     sort_recursively=False):
            self.parent = parent
            self.tag = tag
            self.attrlist = attrlist
            self.children = []
            self.sort = sort
            self.sort_recursively = sort_recursively

        def render(self, level):
            result = []
            indent = '  ' * level
            line = '%s<%s' % (indent, self.tag[1])
            for attr in self.attrlist:
                if len(line + attr) < 78:
                    line += attr
                else:
                    result.append(line)
                    result.append('\n')
                    line = '%s %s%s' % (indent, ' ' * len(self.tag[1]), attr)
            if self.children:
                s = ''.join([child.render(level+1) for child in self.children])
            else:
                s = ''
            if not s:
                result.append('%s/>\n' % line)
            elif (compact and len(self.children) == 1 and '<' not in s
                  and s.count('\n') == 1):
                result.append('%s>%s</%s>\n' % (line, s.strip(), self.tag[1]))
            else:
                result.append('%s>\n' % line)
                result.append(s)
                result.append('%s</%s>\n' % (indent, self.tag[1]))
            return ''.join(result)

        def finalize(self):
            if self.sort:
                self.children.sort(lambda x, y: cmp(x.key, y.key))
            self.key = self.render(0)

    class Text:

        def __init__(self, data):
            self.data = data
            self.key = None

        def render(self, level):
            data = cgi.escape(self.data.strip())
            if data:
                indent = '  ' * level
                return ''.join(['%s%s\n' % (indent, line.strip())
                                for line in data.splitlines()])
            else:
                return ''

    class Handler(ContentHandler):

        def __init__(self):
            self.level = 0
            self.result = []
            self.root = self.cur = Document()
            self.last_text = None
            self._locator = None

        def startElementNS(self, tag, qname, attrs):
            self.startElement(tag, attrs)

        def endElementNS(self, tag, qname):
            self.endElement(tag)

        def startElement(self, tag, attrs):
            sort = sort_recursively = self.cur.sort_recursively
            if attrs:
                if tag in recursively_sort:
                    sort = sort_recursively = True
                attrlist = attrs.items()
                attrlist.sort()
                attrlist = [' %s="%s"' % (k[1], cgi.escape(v, True))
                            for k, v in attrlist]
            else:
                attrlist = []
            child = Element(self.cur, tag, attrlist, sort=sort,
                            sort_recursively=sort_recursively)
            self.cur.children.append(child)
            self.cur = child
            self.last_text = None

        def endElement(self, tag):
            self.cur.finalize()
            self.cur = self.cur.parent
            self.last_text = None

        def characters(self, data):
            if self.last_text is not None:
                self.last_text.data += data
            else:
                self.last_text = Text(data)
                self.cur.children.append(self.last_text)

        def render(self):
            return self.root.render()

    for tag in recursively_sort:
        xml = xml.replace('<%s' % tag,
                          '<%s test:sort="recursively"' % tag)
    handler = Handler()
    tree = etree.XML(xml)
    sax.saxify(tree, handler)
    return ''.join(handler.render())

Exemplo n.º 41

0

Exibir arquivo

 def process_lxml_tree(self, tree):
     handler = MyContentHandler()
     sax.saxify(tree, handler)
     if handler.max_br > 0:
         print(self.inpname, handler.max_br, "строф")
     return handler.etree