Пример #1
0
Файл: xml.py Проект: plq/spyne
def get_object_as_xml_cloth(inst, cls=None, no_namespace=False, encoding='utf8'):
    """Returns an ElementTree representation of a
    :class:`spyne.model.complex.ComplexModel` subclass.

    :param inst: The instance of the class to be serialized.
    :param cls: The class to be serialized. Optional.
    :param root_tag_name: The root tag string to use. Defaults to the output of
        ``value.__class__.get_type_name_ns()``.
    :param no_namespace: When true, namespace information is discarded.
    """

    if cls is None:
        cls = inst.__class__

    if cls.get_namespace() is None and no_namespace is None:
        no_namespace = True

    if no_namespace is None:
        no_namespace = False

    ostr = BytesIO()
    xml_cloth = XmlCloth(use_ns=(not no_namespace))
    ctx = FakeContext()
    with etree.xmlfile(ostr, encoding=encoding) as xf:
        ctx.outprot_ctx.doctype_written = False
        ctx.protocol.prot_stack = tlist([], ProtocolMixin)
        tn = cls.get_type_name()
        ret = xml_cloth.subserialize(ctx, cls, inst, xf, tn)

        assert not isgenerator(ret)

    return ostr.getvalue()
    def _write_header(self):
        """
        Generator that creates the XML file and the sheet header
        """

        NSMAP = {None : SHEET_MAIN_NS}

        with xmlfile(self.filename) as xf:
            with xf.element("worksheet", nsmap=NSMAP):
                pr = Element('sheetPr')
                SubElement(pr, 'outlinePr',
                           {'summaryBelow':
                            '%d' %  (self.show_summary_below),
                            'summaryRight': '%d' % (self.show_summary_right)})
                if self.page_setup.fitToPage:
                    SubElement(pr, 'pageSetUpPr', {'fitToPage': '1'})
                xf.write(pr)
                xf.write(write_sheetviews(self))
                xf.write(write_format(self))

                cols = write_cols(self)
                if cols is not None:
                    xf.write(cols)

                with xf.element("sheetData"):
                    try:
                        while True:
                            r = (yield)
                            xf.write(r)
                    except GeneratorExit:
                        pass
Пример #3
0
    def write(self, file_or_path, *, encoding='utf-8', pretty_print=True):
        """
        Write the XML tree into a file.

        This method writes each layer successively and discards it afterwards.
        This is more memory efficient than building the whole tree at once.

        :param file_or_path: The target to which to write the XML tree.
        :type file_or_path: A file object or a file path.

        """
        with etree.xmlfile(file_or_path, encoding=encoding) as xf:
            xf.write_declaration()
            with xf.element(P_DATA + 'D-Spin', nsmap={None: NS_DATA}):
                xf.write('\n')
                # TODO: Write MetaData.
                with xf.element(P_TEXT + 'TextCorpus', lang=self.lang,
                                nsmap={None: NS_TEXT}):
                    xf.write('\n')
                    corpus_elem = self._tree.xpath('/data:D-Spin/text:TextCorpus',
                                                  namespaces=NS)[0]
                    # Write layers from the input tree.
                    for layer_elem in corpus_elem:
                        xf.write(layer_elem, pretty_print=pretty_print)
                        layer_elem = None
                    # Write newly added layers.
                    for layer in self.new_layers:
                        layer_elem = getattr(self, layer).tcf
                        xf.write(layer_elem, pretty_print=pretty_print)
                        layer_elem = None
                xf.write('\n')
Пример #4
0
    def __init__(self, path, graph=None, encoding='utf-8', prettyprint=True,
                 infer_numeric_types=False):
        self.myElement = lxmletree.Element

        self._encoding = encoding
        self._prettyprint = prettyprint
        self.infer_numeric_types = infer_numeric_types

        self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
        self._xml = self._xml_base.__enter__()
        self._xml.write_declaration()

        # We need to have a xml variable that support insertion. This call is
        # used for adding the keys to the document.
        # We will store those keys in a plain list, and then after the graph
        # element is closed we will add them to the main graphml element.
        self.xml = []
        self._keys = self.xml
        self._graphml = self._xml.element(
            'graphml',
            {
                'xmlns': self.NS_GRAPHML,
                'xmlns:xsi': self.NS_XSI,
                'xsi:schemaLocation': self.SCHEMALOCATION
            })
        self._graphml.__enter__()
        self.keys = {}
        self.attribute_types = defaultdict(set)

        if graph is not None:
            self.add_graph_element(graph)
Пример #5
0
 def export(self):
     if self.export_avatars:
         self.avatar_dir = os.path.splitext(self.outfile)[0]
         if not os.path.exists(self.avatar_dir):
             os.makedirs(self.avatar_dir)
     with open(self.outfile, 'w') as fp:
         with etree.xmlfile(fp) as xf:
             sessions = models.Session.objects \
                 .select_related('kind', 'audience_level', 'track',
                                 'speaker__user__profile') \
                 .prefetch_related('additional_speakers__user__profile',
                                   'location') \
                 .filter(released=True, start__isnull=False, end__isnull=False) \
                 .order_by('start') \
                 .only('end', 'start', 'title', 'abstract', 'is_global',
                       'kind__name',
                       'audience_level__name',
                       'track__name',
                       'speaker__user__username',
                       'speaker__user__profile__avatar',
                       'speaker__user__profile__full_name',
                       'speaker__user__profile__display_name',
                       'speaker__user__profile__short_info',
                       'speaker__user__profile__user')\
                 .all()
             side_events = models.SideEvent.objects \
                 .prefetch_related('location') \
                 .filter(start__isnull=False, end__isnull=False) \
                 .order_by('start') \
                 .all()
             all_events = sorted(chain(sessions, side_events), key=self.event_sorter)
             all_events = groupby(all_events, self.day_grouper)
             with xf.element('schedule', created=now().isoformat()):
                 for day, events in all_events:
                     self._export_day(fp, xf, day, events)
Пример #6
0
def test_real(jmdict_path, examples_path):
    i = 0
    errs = 0

    ef = open('errors.txt', 'wb')
    out = open('jmdict-importable.xml', 'wb')

    jmdict_total_size = os.path.getsize(jmdict_path)
    examples_total_size = os.path.getsize(examples_path)
    widgets = ['Converting: ', pb.Percentage(), ' ', pb.Bar(),
               ' ', pb.Timer(), ' ']
    pbar = pb.ProgressBar(widgets=widgets, maxval=jmdict_total_size).start()

    example_dict = load_examples(examples_path)

    with open(jmdict_path, 'rb') as f:
        with etree.xmlfile(out, encoding='utf-8') as xf:
            xf.write_declaration()
            context = etree.iterparse(f, tag=('entry'), resolve_entities=False)

            with xf.element(NAMESPACE_PREFIX+'dictionary', nsmap=NSMAP,
                            attrib={XSI_PREFIX+'schemaLocation': SCHEMA_LOCATION,
                                    'schema_version': __schema_version__}): 
                xf.write("\n")
                xml_meta = create_meta(jmdict_path)
                xf.write(xml_meta, pretty_print=True)

                for action, elem in context:
                    xml_entry = convert_entry(elem, example_dict)
                    xf.write(xml_entry, pretty_print=True)
                    pbar.update(f.tell())
                    elem.clear()

    pbar.finish()
Пример #7
0
    def write(self, path, encoding='utf-8'):

        with etree.xmlfile(path,
                           encoding=encoding,
                           compression=None,
                           close=True,
                           buffered=True) as xf:
            xf.write_declaration() # standalone=True
            attributes = {
                'version': '1.1',
                'creator': 'BleauDataBase',
                'xmlns': 'http://www.topografix.com/GPX/1/1',
                'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
                'xsi:schemaLocation': 'http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd',
            }
            with xf.element('gpx', **attributes):
                for waypoint in self._waypoints:
                    d = waypoint.to_json(only_defined=True)
                    attributes = {field:str(d[field]) for field in ('lon', 'lat')}
                    del d['lon']
                    del d['lat']
                    with xf.element('wpt', **attributes):
                        # Fixme: iter ?
                        # for field in waypoint.__field_names__:
                        #     value = getattr(waypoint, field)
                        #     if value is not None:
                        for field, value in d.items():
                            with xf.element(field):
                                xf.write(str(value))
            xf.flush()
Пример #8
0
 def __OuterXMLSerialiserCoRoutine(self):
   """Coroutine which performs the actual XML serialisation"""
   with etree.xmlfile(self.Bridge, encoding='utf-8') as Writer:
     if not 'flush' in dir(Writer):
       raise Exception('The installed version of lxml is too old. Please install version >= 3.4.')
     Writer.write_declaration()
     with Writer.element('events'):
       Writer.flush()
       try:
         while True:
           Element = (yield)
           if Element is None:
             # Sending None signals the end of the generation of
             # the definitions element, and the beginning of the
             # eventgroups element.
             with Writer.element('eventgroups'):
               Writer.flush()
               while True:
                 Element = (yield)
                 Writer.write(Element, pretty_print=True)
                 Writer.flush()
           Writer.write(Element, pretty_print=True)
           Writer.flush()
       except GeneratorExit:
         pass
Пример #9
0
 def to_file(file_name, test_suites, encoding=None, doHierarchical=False):
     """Writes the JUnit XML document to file"""
     with etree.xmlfile(file_name, encoding=encoding) as xf:
         xf.write_declaration(standalone=True)
         with xf.element("testsuites"):
             for ts in test_suites:
                 ts.writeOut(xf, doHierarchical)
def lxml_writer(ws=None):
    from openpyxl.writer.lxml_worksheet import write_rows
    if ws is None:
        ws = make_worksheet()

    out = BytesIO()
    with xmlfile(out) as xf:
        write_rows(xf, ws)
Пример #11
0
    def serialize(self, ctx, message):
        """Uses ``ctx.out_object``, ``ctx.out_header`` or ``ctx.out_error`` to
        set ``ctx.out_body_doc``, ``ctx.out_header_doc`` and
        ``ctx.out_document`` as an ``lxml.etree._Element instance``.

        Not meant to be overridden.
        """

        assert message in (self.REQUEST, self.RESPONSE)

        self.event_manager.fire_event('before_serialize', ctx)

        if ctx.out_stream is None:
            ctx.out_stream = StringIO()
            print(ctx.out_stream, id(ctx.out_stream))

        if ctx.out_error is not None:
            # All errors at this point must be Fault subclasses.
            inst = ctx.out_error
            cls = inst.__class__
            name = cls.get_type_name()

            ctx.out_document = E.div()
            with etree.xmlfile(ctx.out_stream) as xf:
                # as XmlDocument is not push-ready yet, this is what we do.
                # this is a huge hack, bear with me.
                retval = XmlCloth.HtmlMicroFormat() \
                                            .to_parent(ctx, cls, inst, xf, name)

        else:
            assert message is self.RESPONSE
            result_message_class = ctx.descriptor.out_message

            name = result_message_class.get_type_name()
            if ctx.descriptor.body_style == BODY_STYLE_WRAPPED:
                if self.ignore_wrappers:
                    result_message = ctx.out_object[0]
                    while result_message_class.Attributes._wrapper and \
                                      len(result_message_class._type_info) == 1:
                        result_message_class, = \
                                        result_message_class._type_info.values()

                else:
                    result_message = result_message_class()

                    for i, attr_name in enumerate(
                                        result_message_class._type_info.keys()):
                        setattr(result_message, attr_name, ctx.out_object[i])

            else:
                result_message, = ctx.out_object

            retval = self.incgen(ctx, result_message_class, result_message, name)

        self.event_manager.fire_event('after_serialize', ctx)

        return retval
Пример #12
0
 def _writer(doc):
     with xmlfile(doc) as xf:
         with xf.element('sheetData'):
             try:
                 while True:
                     body = (yield)
                     xf.write(body)
             except GeneratorExit:
                 pass
Пример #13
0
def test_write_cell_string(worksheet):
    from .. lxml_worksheet import write_cell

    ws = worksheet
    ws['A1'] = "Hello"

    out = BytesIO()
    with xmlfile(out) as xf:
        write_cell(xf, ws, ws['A1'])
    assert ws.parent.shared_strings == ["Hello"]
Пример #14
0
def write_element(fds):
    with etree.xmlfile(fds, buffered=False, encoding="UTF-8") as xf:
        xf.write_declaration()
        with xf.element('testResults', version="1.2"):
            try:
                while True:
                    el = (yield)
                    xf.write(el)
                    xf.flush()
            except GeneratorExit:
                pass
Пример #15
0
def test_write_sheetdata(worksheet, write_rows):
    ws = worksheet
    ws['A1'] = 10

    out = BytesIO()
    with xmlfile(out) as xf:
        write_rows(xf, ws)
    xml = out.getvalue()
    expected = """<sheetData><row r="1" spans="1:1"><c t="n" r="A1"><v>10</v></c></row></sheetData>"""
    diff = compare_xml(xml, expected)
    assert diff is None, diff
Пример #16
0
def test_write_formula(worksheet, write_rows):
    ws = worksheet

    ws.cell('F1').value = 10
    ws.cell('F2').value = 32
    ws.cell('F3').value = '=F1+F2'
    ws.cell('A4').value = '=A1+A2+A3'
    ws.formula_attributes['A4'] = {'t': 'shared', 'ref': 'A4:C4', 'si': '0'}
    ws.cell('B4').value = '='
    ws.formula_attributes['B4'] = {'t': 'shared', 'si': '0'}
    ws.cell('C4').value = '='
    ws.formula_attributes['C4'] = {'t': 'shared', 'si': '0'}

    out = BytesIO()
    with xmlfile(out) as xf:
        write_rows(xf, ws)

    xml = out.getvalue()
    expected = """
    <sheetData>
      <row r="1" spans="1:6">
        <c r="F1" t="n">
          <v>10</v>
        </c>
      </row>
      <row r="2" spans="1:6">
        <c r="F2" t="n">
          <v>32</v>
        </c>
      </row>
      <row r="3" spans="1:6">
        <c r="F3">
          <f>F1+F2</f>
          <v></v>
        </c>
      </row>
      <row r="4" spans="1:6">
        <c r="A4">
          <f ref="A4:C4" si="0" t="shared">A1+A2+A3</f>
          <v></v>
        </c>
        <c r="B4">
          <f si="0" t="shared"></f>
          <v></v>
        </c>
        <c r="C4">
          <f si="0" t="shared"></f>
          <v></v>
        </c>
      </row>
    </sheetData>
    """
    diff = compare_xml(xml, expected)
    assert diff is None, diff
    def _write_xml(self, eaxs_file, tagged_eaxs_file, tagged_messages, global_id):
        """ Writes @tagged_eaxs_file as an XML file.

        Args:
            - eaxs_file (str): The filepath for the EAXS file.
            - tagged_eaxs_file (str): The filepath to which the tagged EAXS document will be
            written.
            - tagged_messages (generator): The tagged message tuple as returned by 
            self._get_tagged_messages().
            - global_id (str): The value of self._get_global_id(@eaxs_file).

        Returns:
            list: The return value.
            The message indexes for messages that failed to finish the tagging process.

        Raises:
            - FileExistsError: If @tagged_eaxs_file already exists.
        """

        # raise error if @tagged_eaxs_file already exists.
        if os.path.isfile(tagged_eaxs_file):
            err = "Destination file '{}' already exists.".format(tagged_eaxs_file)
            self.logger.error(err)
            raise FileExistsError(err)

        # create placeholder for untagged messages.
        untagged_messages = []
        
        # open new @tagged_eaxs_file.
        with etree.xmlfile(tagged_eaxs_file, encoding=self.charset, close=True, 
                buffered=self.buffered) as xfile:

            # write XML header to @xfile; register namespace information.
            xfile.write_declaration()
            etree.register_namespace(self.ncdcr_prefix, self.ncdcr_uri)

            # write root <Account> element; append tagged <Message> elements.
            account_tag = "{ns}:Account".format(ns=self.ncdcr_prefix)
            with xfile.element(account_tag, GlobalId=global_id, SourceEAXS=eaxs_file, 
            nsmap=self.ns_map):
                
                # write tagged message to file.
                for message_index, message_id, tagged_message in tagged_messages:
                    
                    # if message wasn't tagged, append index to @untagged_messages.
                    if tagged_message is None:
                        untagged_messages.append(message_index)
                    
                    # otherwise, write message.
                    else:
                        xfile.write(tagged_message)
                        tagged_message.clear()
            
            return untagged_messages
Пример #18
0
 def __InnerXMLSerialiserCoRoutine(self, EventTypeName, EventSourceId):
   """Coroutine which performs the actual XML serialisation of eventgroups"""
   with etree.xmlfile(self.Bridge, encoding='utf-8') as Writer:
     with Writer.element('eventgroup', **{'event-type': EventTypeName, 'source-id': EventSourceId}):
       Writer.flush()
       try:
         while True:
           Element = (yield)
           Writer.write(Element, pretty_print=True)
           Writer.flush()
       except GeneratorExit:
         pass
Пример #19
0
    def _run(self, inst, cls=None):
        if cls is None:
            cls = inst.__class__

        with etree.xmlfile(self.stream) as parent:
            XmlCloth().subserialize(self.ctx, cls, inst, parent,
                                                              name=cls.__name__)

        elt = etree.fromstring(self.stream.getvalue())

        print(etree.tostring(elt, pretty_print=True))
        return elt
Пример #20
0
def test_write_cell(worksheet, value, expected):
    from .. lxml_worksheet import write_cell

    ws = worksheet
    ws['A1'] = value

    out = BytesIO()
    with xmlfile(out) as xf:
        write_cell(xf, ws, ws['A1'])
    xml = out.getvalue()
    diff = compare_xml(xml, expected)
    assert diff is None, diff
Пример #21
0
    def _run(self, inst, spid=None, cloth=None):
        cls = inst.__class__
        if cloth is None:
            assert spid is not None
            cloth = etree.fromstring("""<a><b spyne_id="%s"></b></a>""" % spid)
        else:
            assert spid is None

        with etree.xmlfile(self.stream) as parent:
            XmlCloth(cloth=cloth).subserialize(self.ctx, cls, inst, parent)
        elt = etree.fromstring(self.stream.getvalue())
        print etree.tostring(elt, pretty_print=True)
        return elt
Пример #22
0
def incremental_xml_dump(xml_filename, object_dict):
    """
    Incrementally dumps objects from object dict to XML

    Params:
        xml_filename : output XML filename
        object_dict : dictionary of parent tag and list of child objects
    """

    with etree.xmlfile(xml_filename, encoding="utf-8") as xml_fp:
        for key, values in object_dict.iteritems():
            with xml_fp.element(key):
                for value in values:
                    xml_fp.write(value.to_xml())
Пример #23
0
    def write(self, financial_period, path, encoding='iso-8859-1'):

        with etree.xmlfile(path,
                           encoding=encoding,
                           compression=None,
                           close=True,
                           buffered=True) as xf:
            xf.write_declaration() # standalone=True
            with xf.element('comptabilite'):
                with xf.element('exercice'):
                    with xf.element('DateCloture'):
                        xf.write(financial_period.start_date.isoformat())
                    for journal in financial_period.journals:
                        self._write_journal(xf, journal)
Пример #24
0
def test_write_formula(worksheet, write_rows):
    ws = worksheet

    ws['F1'] = 10
    ws['F2'] = 32
    ws['F3'] = '=F1+F2'
    ws['A4'] = '=A1+A2+A3'
    ws['B4'] = "=SUM(A10:A14*B10:B14)"
    ws.formula_attributes['B4'] = {'t': 'array', 'ref': 'B4:B8'}

    out = BytesIO()
    with xmlfile(out) as xf:
        write_rows(xf, ws)

    xml = out.getvalue()
    expected = """
    <sheetData>
      <row r="1" spans="1:6">
        <c r="F1" t="n">
          <v>10</v>
        </c>
      </row>
      <row r="2" spans="1:6">
        <c r="F2" t="n">
          <v>32</v>
        </c>
      </row>
      <row r="3" spans="1:6">
        <c r="F3">
          <f>F1+F2</f>
          <v></v>
        </c>
      </row>
      <row r="4" spans="1:6">
        <c r="A4">
          <f>A1+A2+A3</f>
          <v></v>
        </c>
        <c r="B4">
          <f ref="B4:B8" t="array">SUM(A10:A14*B10:B14)</f>
          <v></v>
        </c>
      </row>
    </sheetData>
    """
    diff = compare_xml(xml, expected)
    assert diff is None, diff
Пример #25
0
 def export(self):
     output = StringIO.StringIO()
     with etree.xmlfile(output) as xf:
         sessions = models.Session.objects \
             .select_related('kind', 'audience_level', 'track',
                             'speaker__user__profile') \
             .prefetch_related('additional_speakers__user__profile',
                               'location') \
             .filter(released=True, start__isnull=False, end__isnull=False,
                     kind__slug__in=('talk', 'keynote', 'sponsored')) \
             .order_by('start') \
             .only('end', 'start', 'title', 'abstract', 'description', 'language',
                   'kind__name',
                   'audience_level__name',
                   'track__name',
                   'speaker__user__username',
                   'speaker__user__profile__avatar',
                   'speaker__user__profile__full_name',
                   'speaker__user__profile__display_name',
                   'speaker__user__profile__short_info',
                   'speaker__user__profile__user') \
             .all()
         side_events = models.SideEvent.objects \
             .select_related() \
             .prefetch_related('location') \
             .filter(start__isnull=False, end__isnull=False, is_recordable=True) \
             .order_by('start') \
             .only('end', 'start', 'name') \
             .all()
         self.conference = force_text(conference_models.current_conference())
         self._duration_base = datetime.datetime.combine(datetime.date.today(), datetime.time(0, 0, 0))
         with xf.element('iCalendar'):
             with xf.element('vcalendar'):
                 with xf.element('version'):
                     xf.write('2.0')
                 with xf.element('prodid'):
                     xf.write('-//Pentabarf//Schedule %s//EN' % self.conference)
                 with xf.element('x-wr-caldesc'):
                     xf.write(self.conference)
                 with xf.element('x-wr-calname'):
                     xf.write(self.conference)
                 for session in sessions:
                     self._export_session(xf, session)
                 for session in side_events:
                     self._export_side_event(xf, session)
     return output
Пример #26
0
def person_feed(out):
    """Generate XML feed of people.

    This is a streaming XML generator for people. Output will be
    written to the provided output destination which can be a file
    or file-like object. The context manager returns a function
    which can be called repeatedly to add a person to the feed::

        with person_feed(sys.stdout) as f:
            f({"MIT_ID": "1234", ...})
            f({"MIT_ID": "5678", ...})

    """
    with ET.xmlfile(out, encoding='UTF-8') as xf:
        xf.write_declaration()
        with xf.element(ns('records'), nsmap=NSMAP):
            yield partial(_add_person, xf)
Пример #27
0
    def incgen(self, ctx, cls, inst, name):
        if name is None:
            name = cls.get_type_name()

        from lxml import etree
        # FIXME: html.htmlfile olmali
        with etree.xmlfile(ctx.out_stream) as xf:
            ret = self.subserialize(ctx, cls, inst, xf, None, name)

            if isgenerator(ret):
                try:
                    while True:
                        sv2 = (yield)
                        ret.send(sv2)
                except Break as b:
                    try:
                        ret.throw(b)
                    except StopIteration:
                        pass
Пример #28
0
    def incgen(self, ctx, cls, inst, ns, name=None):
        if name is None:
            name = cls.get_type_name()
        with etree.xmlfile(ctx.out_stream) as xf:
            ret = self.to_parent(ctx, cls, inst, xf, ns, name)
            if isgenerator(ret):
                try:
                    while True:
                        y = (yield) # may throw Break
                        ret.send(y)

                except Break:
                    try:
                        ret.throw(Break())
                    except StopIteration:
                        pass

        if hasattr(ctx.out_stream, 'finish'):
            ctx.out_stream.finish()
Пример #29
0
def load_data(start_url, output_file, url_limit):
    visited_urls = set()
    urls = {start_url}
    with etree.xmlfile(output_file, encoding='utf-8') as xf:
        xf.write_declaration()

        with xf.element("data"):
            i = 0
            while i < url_limit:
                if not urls:
                    print('no urls left after', i, 'urls been processed')
                    break
                url = urls.pop()
                print('queued:', url, ' ... ', end='')
                try:
                    urls |= process_page(url, xml_file=xf)
                    i += 1
                    print('processed')
                except FetchError:
                    print('failed')
                visited_urls.add(url)
                urls -= visited_urls
Пример #30
0
    def convert_generator(self):
        errs = 0
        ef = open(self.error_path, 'wb')
        out = open(self.out_path, 'wb')

        with codecs.open(self.dump_path, 'r', 'utf-8') as f:
            with etree.xmlfile(out, encoding='utf-8') as xf:
                xf.write_declaration()
                with xf.element(NAMESPACE_PREFIX+'dictionary', nsmap=NSMAP,
                                attrib={XSI_PREFIX+'schemaLocation': SCHEMA_LOCATION,
                                        'schema_version': __schema_version__}): 
                    xf.write("\n")
                    entry_buffer = list()
                    in_metadata = True
                    for line in f:

                        is_entry_header = g.ENTRY_HEADER_MATCHER.match(line)
                        if is_entry_header:
                            if in_metadata:
                                in_metadata = False
                                xml_meta = self.meta_to_xml(entry_buffer[:-1])
                                xf.write(xml_meta, pretty_print=True)
                            else:
                                entry_lines = ''.join(entry_buffer)
                                
                                try:
                                    res = (g.ENTRY_BLOCK + stringEnd).parseString(entry_lines)
                                    xml_entry = self.entry_to_xml(res)
                                    xf.write(xml_entry, pretty_print=True)
                                except ParseException as e:
                                    errs += 1
                                    print "errs = {}".format(errs)
                                    ef.write((u"{}\n".format(e)).encode('utf-8', errors='ignore'))
                                    ef.write((entry_lines + u"\n").encode('utf-8', errors='ignore'))

                            entry_buffer = list()
                            yield f.tell()
                        entry_buffer.append(line)
Пример #31
0
    def loadModelToFile(self, articles, filepath):

        xml = self._toXml(articles)
        with etree.xmlfile(filepath, encoding="utf-8") as xf:
            xf.write(xml)
Пример #32
0
def serialize_xml(f, root_tag, obj, pretty=False, **kwargs):
    with et.xmlfile(f, **kwargs) as xf:
        obj._dump_xml(xf, root_tag, 0, pretty)
Пример #33
0
        xf.write(el)
        with xf.element('text'):
            xf.write(text)
        for entity_id in entity['entityId']:
            with xf.element('infon', key=entity_id['source']):
                xf.write(entity_id['idString'])

        for attr in entity['attribute']:
            with xf.element('infon', key=attr['key']):
                xf.write(attr['value'])


json_file_path, xml_file_path = sys.argv[1], sys.argv[2]

with codecs.open(json_file_path, 'r', 'utf8') as jf, \
        etree.xmlfile(xml_file_path, encoding='utf8') as xf:
    with xf.element('collection'):
        el = etree.Element('source')
        xf.write(el)
        el = etree.Element('date')
        xf.write(el)
        el = etree.Element('key')
        xf.write(el)
        for line in jf:
            doc = json.loads(line.strip())
            # if doc['docId'] != '25789565':
            #     continue

            sentences = [
                e for e in doc['entity'] if e['entityType'] == 'SENTENCE'
            ]
Пример #34
0
    # extract to_unzip to temp/
    to_unzip.extractall(tmp_path)

wanted_ids = filtering.get_id_list(tmp_path + unzipped_ids_to_extract)
print('id_list was created')

os.system('rm -rf ' + tmp_path + '*')


with ZipFile(data_path + zipped_infile, 'r') as to_unzip:
    # extract to_unzip to temp/
    to_unzip.extractall(tmp_path)

counter = 0
# default way to open a xml-file when using lxml
with etree.xmlfile(data_path + outputfile, encoding='utf-8') as out:
    out.write_declaration()
    # enclose records to be written out in collection-tag
    with out.element('collection'):
        # streaming the records
        for event, record in etree.iterparse(tmp_path + unzipped_infile, tag="{http://www.loc.gov/MARC21/slim}record"):
            
            dnb_id = re.sub('^0', '', ''.join(decoding.get_id(record)))
            
            # MATCHING
            if dnb_id in wanted_ids:
                out.write(record)
                counter += 1
                print('successfull match:', dnb_id)
            else:
                print('not a match:', dnb_id)
Пример #35
0
def getIdOrName(elem):
    if elem.tag == "{%s}Object" % odfVersion:
        return elem.findtext("./{%s}id" % odfVersion)
    elif elem.tag == "{%s}InfoItem" % odfVersion:
        return elem.get("name")
    elif elem.tag == "{%s}Objects" % odfVersion:
        return "Objects"
    else:
        eprint("ERROR: invalid element:", elem.tag)
        sys.exit(4)
            
#Create request session with the certificates and headers set
if args.single_file:
    with Session() as s:
        with etree.xmlfile(fileName, encoding='utf-8', compression=args.compression) as xf: #encoding?
            s.cert = cert
            s.headers.update(headers)
            
            r = s.get(url)
            
            #hierarchy tree
            if url != r.url:
                if redirect:
                    url = r.url
                else:
                    eprint("Error: Request redirected to {}. Use -L parameter to follow redirections.".format(r.url))
                    sys.exit(6)

            
            #hierarchy tree
Пример #36
0
 def __init__(self, fields, attrs=[], fp=sys.stdout):
     self.slots = {s.name: s for s in [Field(n) for n in fields] + attrs}
     self._xf_cm = etree.xmlfile(fp, encoding='utf-8')
Пример #37
0
def write_worksheet(worksheet, shared_strings):
    """Write a worksheet to an xml file."""

    out = BytesIO()
    NSMAP = {None: SHEET_MAIN_NS}

    with xmlfile(out) as xf:
        with xf.element('worksheet', nsmap=NSMAP):

            props = write_properties(worksheet, worksheet.vba_code)
            xf.write(props)

            dim = Element('dimension',
                          {'ref': '%s' % worksheet.calculate_dimension()})
            xf.write(dim)

            xf.write(write_sheetviews(worksheet))
            xf.write(write_format(worksheet))
            cols = write_cols(worksheet)
            if cols is not None:
                xf.write(cols)
            write_rows(xf, worksheet)

            if worksheet.protection.sheet:
                prot = Element('sheetProtection', dict(worksheet.protection))
                xf.write(prot)

            af = write_autofilter(worksheet)
            if af is not None:
                xf.write(af)

            merge = write_mergecells(worksheet)
            if merge is not None:
                xf.write(merge)

            cfs = write_conditional_formatting(worksheet)
            for cf in cfs:
                xf.write(cf)

            dv = write_datavalidation(worksheet)
            if dv is not None:
                xf.write(dv)

            hyper = write_hyperlinks(worksheet)
            if hyper is not None:
                xf.write(hyper)

            options = worksheet.page_setup.options
            if options:
                print_options = Element('printOptions', options)
                xf.write(print_options)
                del print_options

            margins = Element('pageMargins', dict(worksheet.page_margins))
            xf.write(margins)
            del margins

            setup = worksheet.page_setup.setup
            if setup:
                page_setup = Element('pageSetup', setup)
                xf.write(page_setup)
                del page_setup

            hf = write_header_footer(worksheet)
            if hf is not None:
                xf.write(hf)

            if worksheet._charts or worksheet._images:
                drawing = Element('drawing', {'{%s}id' % REL_NS: 'rId1'})
                xf.write(drawing)
                del drawing

            # If vba is being preserved then add a legacyDrawing element so
            # that any controls can be drawn.
            if worksheet.vba_controls is not None:
                xml = Element("{%s}legacyDrawing" % SHEET_MAIN_NS,
                              {"{%s}id" % REL_NS: worksheet.vba_controls})
                xf.write(xml)

            pb = write_pagebreaks(worksheet)
            if pb is not None:
                xf.write(pb)

            # add a legacyDrawing so that excel can draw comments
            if worksheet._comment_count > 0:
                comments = Element('legacyDrawing',
                                   {'{%s}id' % REL_NS: 'commentsvml'})
                xf.write(comments)

    xml = out.getvalue()
    out.close()
    return xml
Пример #38
0
    def invoke(self, packet):
        input_gml = packet.data
        if input_gml is None:
            return packet

        log.info('In SubFeatureHandler.invoke')

        if not os.path.exists(input_gml):
            msg = "The given XML file doesn't exist"
            log.error(msg)
            raise ValueError(msg)

        if not self.checkGmlFile(input_gml):
            return packet

        nsmap = {None: "http://www.opengis.net/citygml/2.0"}

        try:
            #with etree.xmlfile(self.temp_file, encoding='utf-8') as xf:
            with etree.xmlfile(self.temp_file) as xf:
                with xf.element(
                        '{http://www.opengis.net/citygml/2.0}CityModel',
                        nsmap=nsmap):
                    with open(input_gml, mode='rb') as f:
                        context = etree.iterparse(f)
                        for action, elem in context:
                            if action == 'end' and elem.tag == '{http://www.opengis.net/citygml/2.0}cityObjectMember':
                                # Duplicate feature and subfeatures
                                self.duplicateFeature(xf, elem)

                                # Clean up the original element and the node of its previous sibling
                                # (https://www.ibm.com/developerworks/xml/library/x-hiperfparse/)
                                elem.clear()
                                while elem.getprevious() is not None:
                                    del elem.getparent()[0]

                        del context

                xf.flush()

        except etree.SerialisationError:
            # When writing large (> 2.1 GB) XML files, the return code, which is the number of bytes written,
            # is being cast to a signed 32 bit integer. Libxml, the underlying library used by lxml, interprets
            # this as an error. However, the XML file is written correctly.
            # This error occurs not only in xmlfile.flush(), but also in xmlfile.__exit__
            pass

        # Check if the resulting file can be opened successfully
        with open(self.temp_file, mode='rb') as f:
            context = etree.iterparse(
                f, tag='{http://www.opengis.net/citygml/2.0}cityObjectMember')
            for _, elem in context:
                #elem.clear(keep_tail=True)
                elem.clear()
            del context

        log.info('Temporary XML file was written successfully')

        # Delete the old file and rename the new file
        os.remove(packet.data)
        os.rename(self.temp_file, packet.data)

        # Return the original packet, since this contains the name of the GML file which is being loaded
        return packet
Пример #39
0
def write_matsim_schedule(output_dir, schedule, epsg=''):
    fname = os.path.join(output_dir, "schedule.xml")
    if not epsg:
        epsg = schedule.epsg
    transformer = Transformer.from_proj(Proj('epsg:4326'),
                                        Proj(epsg),
                                        always_xy=True)
    logging.info('Writing {}'.format(fname))

    with open(fname, "wb") as f, etree.xmlfile(f, encoding='utf-8') as xf:
        xf.write_declaration(
            doctype='<!DOCTYPE transitSchedule '
            'SYSTEM "http://www.matsim.org/files/dtd/transitSchedule_v2.dtd">')
        with xf.element("transitSchedule"):
            # transitStops first
            with xf.element("transitStops"):
                for stop_facility in schedule.stops():
                    transit_stop_attrib = {'id': str(stop_facility.id)}
                    if stop_facility.epsg == epsg:
                        x = stop_facility.x
                        y = stop_facility.y
                    else:
                        x, y = change_proj(x=stop_facility.lon,
                                           y=stop_facility.lat,
                                           crs_transformer=transformer)
                    transit_stop_attrib['x'], transit_stop_attrib['y'] = str(
                        x), str(y)
                    for k in ADDITIONAL_STOP_FACILITY_ATTRIBUTES:
                        if stop_facility.has_attrib(k):
                            transit_stop_attrib[k] = str(
                                stop_facility.additional_attribute(k))
                    xf.write(etree.Element("stopFacility",
                                           transit_stop_attrib))

            # minimalTransferTimes, if present
            if schedule.minimal_transfer_times:
                with xf.element("minimalTransferTimes"):
                    for stop_1_id, val in schedule.minimal_transfer_times.items(
                    ):
                        minimal_transfer_times_attribs = {
                            'fromStop': str(stop_1_id),
                            'toStop': str(val['stop']),
                            'transferTime': str(val['transferTime'])
                        }
                        xf.write(
                            etree.Element("relation",
                                          minimal_transfer_times_attribs))

                        minimal_transfer_times_attribs['fromStop'] = str(
                            val['stop'])
                        minimal_transfer_times_attribs['toStop'] = str(
                            stop_1_id)
                        xf.write(
                            etree.Element("relation",
                                          minimal_transfer_times_attribs))

            # transitLine
            for service in schedule.services():
                transit_line_attribs = {
                    'id': service.id,
                    'name': str(service.name)
                }

                with xf.element("transitLine", transit_line_attribs):
                    for route in service.routes():
                        transit_route_attribs = {'id': route.id}

                        with xf.element("transitRoute", transit_route_attribs):
                            rec = etree.Element("transportMode")
                            rec.text = route.mode
                            xf.write(rec)

                            with xf.element("routeProfile"):
                                for j in range(len(route.ordered_stops)):
                                    stop_attribs = {
                                        'refId': str(route.ordered_stops[j])
                                    }

                                    if not (route.departure_offsets
                                            and route.arrival_offsets):
                                        logging.warning(
                                            'The stop(s) along your route don\'t have arrival and departure offsets. '
                                            'This is likely a route with one stop - consider validating your schedule.'
                                        )
                                    else:
                                        if j == 0:
                                            stop_attribs[
                                                'departureOffset'] = route.departure_offsets[
                                                    j]
                                        elif j == len(route.ordered_stops) - 1:
                                            stop_attribs[
                                                'arrivalOffset'] = route.arrival_offsets[
                                                    j]
                                        else:
                                            stop_attribs[
                                                'departureOffset'] = route.departure_offsets[
                                                    j]
                                            stop_attribs[
                                                'arrivalOffset'] = route.arrival_offsets[
                                                    j]

                                        if route.await_departure:
                                            stop_attribs[
                                                'awaitDeparture'] = str(
                                                    route.await_departure[j]
                                                ).lower()
                                    xf.write(
                                        etree.Element("stop", stop_attribs))

                            with xf.element("route"):
                                if not route.route:
                                    logging.warning(
                                        "Route needs to have a network route composed of a list of network links that "
                                        "the vehicle on this route traverses. If read the Schedule from GTFS, the "
                                        "resulting Route objects will not have reference to the network route taken."
                                    )
                                for link_id in route.route:
                                    route_attribs = {'refId': str(link_id)}
                                    xf.write(
                                        etree.Element("link", route_attribs))

                            with xf.element("departures"):
                                for trip_id, trip_dep_time, veh_id in zip(
                                        route.trips['trip_id'],
                                        route.trips['trip_departure_time'],
                                        route.trips['vehicle_id']):
                                    trip_attribs = {
                                        'id': trip_id,
                                        'departureTime': trip_dep_time,
                                        'vehicleRefId': veh_id
                                    }
                                    xf.write(
                                        etree.Element("departure",
                                                      trip_attribs))
Пример #40
0
def main():
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    logger.addHandler(logging.StreamHandler())

    store = PickleStore(path='.')
    api = SDGrabber(store=store)
    api.login()

    with open('xmltv.xml', 'wb') as f, etree.xmlfile(f) as x:
        attrs = {
            'source-info-url': 'https://www.schedulesdirect.org/',
            'source-info-name': 'Schedules Direct',
            'generator-info-name': 'sdgrabber',
            'generator-info-url': 'https://github.com/btimby/sdgrabber/',
        }
        with x.element('tv', attrs):

            LOGGER.info('Fetching lineups...')
            # Get lineups as a list so we can traverse it and also pass it to
            # api._get_programs(). This saves it making a duplicate call.
            i, lineups = 0, list(api.get_lineups())
            for lineup in lineups:
                i += 1
                for station in lineup.stations:
                    attrs = {
                        'id': station.id,
                    }
                    with x.element('channel', attrs):
                        with x.element('display-name'):
                            x.write(station.name)
                        if station.logo:
                            x.element('icon', {'src': station.logo})

            LOGGER.info('Got %i lineup(s), fetching programs...', i)

            i = 0
            for program in api.get_programs(lineups=lineups):
                for schedule in program.schedules:
                    i += 1
                    attrs = {
                        'start': schedule.airdatetime.strftime('%Y%m%d%H%M%S'),
                        'stop': schedule.enddatetime.strftime('%Y%m%d%H%M%S'),
                        'duration': str(schedule.duration),
                        'channel': schedule.station.id,
                        'program_id': schedule.program.id,
                    }
                    with x.element('programme', attrs):
                        with x.element('title'):
                            x.write(program.title)

                        if program.subtitle:
                            with x.element('sub-title', {'lang': 'en'}):
                                x.write(program.subtitle)

                        if program.description:
                            with x.element('desc', {'lang': 'en'}):
                                x.write(program.description)

                        if program.actors:
                            with x.element('credits'):
                                for actor in program.actors:
                                    with x.element('actor'):
                                        x.write(actor.name)

                        for genre in program.genres:
                            with x.element('category', {'lang': 'en'}):
                                x.write(genre)

                        if program.orig_airdate:
                            with x.element('date'):
                                x.write(
                                    program.orig_airdate.strftime(
                                        '%Y%m%d%H%M%S'))

                LOGGER.info('Got %i programs.', i)
Пример #41
0
 def docfile(self, *args, **kwargs):
     return etree.xmlfile(*args, **kwargs)
Пример #42
0
def writeMzml(specfile,
              msrunContainer,
              outputdir,
              spectrumIds=None,
              chromatogramIds=None,
              writeIndex=True):
    """ #TODO: docstring

    :param specfile: #TODO docstring
    :param msrunContainer: #TODO docstring
    :param outputdir: #TODO docstring
    :param spectrumIds: #TODO docstring
    :param chromatogramIds: #TODO docstring
    """
    #TODO: maybe change to use aux.openSafeReplace
    outputFile = io.BytesIO()

    #TODO: perform check that specfile is present in msrunContainer and at least
    #   the metadatanode.
    metadataTree = msrunContainer.rmc[specfile]
    #Generate a list of spectrum ids that should be written to mzML
    if spectrumIds is None and specfile in msrunContainer.smic:
        keyTuple = [(int(key), key)
                    for key in viewkeys(msrunContainer.smic[specfile])]
        spectrumIds = [key for _, key in sorted(keyTuple)]
    spectrumCounts = len(spectrumIds)
    #Generate a list of chromatogram ids that should be written to mzML
    if chromatogramIds is None and specfile in msrunContainer.cic:
        chromatogramIds = [
            cId for cId in viewkeys(msrunContainer.cic[specfile])
        ]
    chromatogramCounts = len(chromatogramIds)

    spectrumIndexList = list()
    chromatogramIndexList = list()

    xmlFile = ETREE.xmlfile(outputFile, encoding='ISO-8859-1', buffered=False)
    xmlWriter = xmlFile.__enter__()
    xmlWriter.write_declaration()

    nsmap = {
        None: 'http://psi.hupo.org/ms/mzml',
        'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
    }
    mzmlAttrib = {'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': \
                    'http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd',
                  'version': '1.1.0', 'id': metadataTree.attrib['id']
                  }

    if writeIndex:
        xmlIndexedMzml = xmlWriter.element('indexedmzML', nsmap=nsmap)
        xmlIndexedMzml.__enter__()
        xmlWriter.write('\n')
    xmlMzml = xmlWriter.element('mzML', mzmlAttrib, nsmap=nsmap)
    xmlMzml.__enter__()
    xmlWriter.write('\n')

    for metadataNode in metadataTree.getchildren():
        if metadataNode.tag != 'run':
            xmlWriter.write(maspy.xml.recCopyElement(metadataNode),
                            pretty_print=True)
        else:
            xmlRun = xmlWriter.element(metadataNode.tag, metadataNode.attrib)
            xmlRun.__enter__()
            xmlWriter.write('\n')
            for runChild in metadataNode.getchildren():
                if runChild.tag == 'spectrumList':
                    specDefaultProcRef = runChild.attrib[
                        'defaultDataProcessingRef']
                elif runChild.tag == 'chromatogramList':
                    chromDefaultProcRef = runChild.attrib[
                        'defaultDataProcessingRef']
                else:
                    #TODO: maybe recCopy?
                    xmlRun.append(runChild)

            #If any spectra should be written, generate the spectrumList Node.
            if spectrumCounts > 0:
                specListAttribs = {
                    'count': str(spectrumCounts),
                    'defaultDataProcessingRef': specDefaultProcRef
                }
                xmlSpectrumList = xmlWriter.element('spectrumList',
                                                    specListAttribs)
                xmlSpectrumList.__enter__()
                xmlWriter.write('\n')

                for index, key in enumerate(spectrumIds):
                    smi = msrunContainer.smic[specfile][key]
                    sai = msrunContainer.saic[specfile][key]
                    #Store the spectrum element offset here
                    spectrumIndexList.append(
                        (outputFile.tell(), smi.attributes['id']))

                    xmlSpectrum = xmlSpectrumFromSmi(index, smi, sai)
                    xmlWriter.write(xmlSpectrum, pretty_print=True)

                xmlSpectrumList.__exit__(None, None, None)
                xmlWriter.write('\n')

            #If any chromatograms should be written, generate the
            #chromatogramList Node.
            if chromatogramCounts > 0:
                chromListAttribs = {
                    'count': str(chromatogramCounts),
                    'defaultDataProcessingRef': chromDefaultProcRef
                }
                xmlChromatogramList = xmlWriter.element(
                    'chromatogramList', chromListAttribs)
                xmlChromatogramList.__enter__()
                xmlWriter.write('\n')
                for index, key in enumerate(chromatogramIds):
                    ci = msrunContainer.cic[specfile][key]
                    #Store the chromatogram element offset here
                    chromatogramIndexList.append((outputFile.tell(), ci.id))

                    xmlChromatogram = xmlChromatogramFromCi(index, ci)
                    xmlWriter.write(xmlChromatogram, pretty_print=True)
                xmlChromatogramList.__exit__(None, None, None)
                xmlWriter.write('\n')

            xmlRun.__exit__(None, None, None)
            xmlWriter.write('\n')

    #Close the mzml node
    xmlMzml.__exit__(None, None, None)
    #Optional: write the indexedMzml nodes and close the indexedMzml node
    if writeIndex:
        xmlWriter.write('\n')
        indexListOffset = outputFile.tell()
        _writeMzmlIndexList(xmlWriter, spectrumIndexList,
                            chromatogramIndexList)
        _writeIndexListOffset(xmlWriter, indexListOffset)
        _writeMzmlChecksum(xmlWriter, outputFile)
        xmlIndexedMzml.__exit__(None, None, None)
    #Close the xml file
    xmlFile.__exit__(None, None, None)
    #Write the output mzML file
    filepath = aux.joinpath(outputdir, specfile + '.mzML')
    with open(filepath, 'wb') as openfile:
        openfile.write(outputFile.getvalue())
Пример #43
0
 def __enter__(self):
     self.__context = xmlfile(self.__ostream, encoding='ascii')
     self._xf = self.__context.__enter__()
     self._depth = 0
     return self
Пример #44
0
def root_xml(out):
    """Root element for use when checking that nothing is written"""
    with xmlfile(out) as xf:
        xf.write(Element("test"))
        return xf
Пример #45
0
 def docfile(self, *args, **kwargs):
     logger.debug("Starting file with %r %r", args, kwargs)
     return etree.xmlfile(*args, **kwargs)