def test_parse_any_element(self): comment = Comment("foo") self.assertIsNone(XmlParser.parse_any_element(comment)) element = Element("foo") element.set("a", "1") element.set("b", "2") element.set( QName(Namespace.XSI.uri, "type").text, QName(Namespace.XS.uri, "float").text) element.text = "yes" element.tail = "no" actual = XmlParser.parse_any_element(element) expected = AnyElement( qname=element.tag, text="yes", tail="no", attributes={ "a": "1", "b": "2", QName(Namespace.XSI.uri, "type"): QName(Namespace.XS.uri, "float"), }, ns_map=element.nsmap, ) self.assertEqual(expected, actual) actual = XmlParser.parse_any_element(element, False) self.assertIsNone(actual.qname)
def move_after(self, a, b, comments=None): if comments is None: comments = ["Moved <%s/> to a new position"] if not isinstance(comments, list): comments = [comments] pa = a.getparent() pb = b.getparent() sa = a.getprevious() if a.tail: if sa != None: if sa.tail: sa.tail += ' ' + a.tail else: sa.tail = a.tail else: if pa.text and pa.text.strip(): pa.text += ' ' + a.tail else: if a.tail and a.tail.strip(): pa.text = a.tail a.tail = None i = pb.index(b) + 1 pb.insert(i, a) if self.options.verbose: for comment in comments: c = Comment(" v2v3: %s " % comment.strip()) pb.insert(i, c)
def element_vspace(self, e, p): t = p if t.tag != 't': #bare text inside other element -- wrap in t first t = self.wrap_content(t) l = t.getparent() if l.tag in [ 'dd', 'li', ]: i = l.index(t) + 1 t2 = self.element('t', line=e.sourceline) if e.tail and e.tail.strip(): t2.text = e.tail for s in e.itersiblings(): t2.append(s) t.remove(e) l.insert(i, t2) if self.options.verbose: c = Comment( " v2v3: <vspace/> inside list converted to sequence of <t/> " ) t.insert(i, c) if isempty(t): l.remove(t) else: self.replace(e, None, "<vspace/> deprecated and removed") self.warn( e, "Deprecated <vspace/> element removed, but no good conversion found The output will most likely need fixup." )
def encode(self, data, **options) -> FileStream: """ Encodes the data into a XML file-like stream. Args: data: The data to encode **options: The encoding options Returns: A XML file-like stream Raises: geodatabr.encoders.EncodeError: If data fails to encode """ try: database = Element('database', name=_('dataset_name')) for table_name, rows in iter(data.items()): database.append(Comment(' Table {} '.format(table_name))) table = SubElement(database, 'table', name=table_name) for row_data in iter(rows.values()): row = SubElement(table, 'row') for column_name, column_value in iter(row_data.items()): SubElement(row, 'field', name=column_name).text =\ column_value xml_data = xml_str(database, **dict(self.options, **options)) return FileStream(xml_data.decode()) except Exception: raise EncodeError
def __gen_struct_anno_files(self, top_level_layer): """ A struct annotation file contains node (struct) attributes (of non-token nodes). It is e.g. used to annotate the type of a syntactic category (NP, VP etc.). See also: __gen_hierarchy_file() """ paula_id = '{0}.{1}.{2}_{3}_struct'.format(top_level_layer, self.corpus_name, self.name, top_level_layer) E, tree = gen_paula_etree(paula_id) base_paula_id = self.paulamap['hierarchy'][top_level_layer] mflist = E('multiFeatList', {XMLBASE: base_paula_id+'.xml'}) for node_id in select_nodes_by_layer(self.dg, top_level_layer): if not istoken(self.dg, node_id): mfeat = E('multiFeat', {XLINKHREF: '#{0}'.format(node_id)}) node_dict = self.dg.node[node_id] for attr in node_dict: if attr not in IGNORED_NODE_ATTRIBS: mfeat.append( E('feat', {'name': attr, 'value': node_dict[attr]})) if self.human_readable: # adds node label as a <!--comment--> mfeat.append(Comment(node_dict.get('label'))) mflist.append(mfeat) tree.append(mflist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.multifeat return paula_id
def build_xml(podcasts): _created_date = datetime.now(timezone.utc).astimezone() _created_date.isoformat() _opml = Element("opml") _opml.set("version", "2.0") _comment = Comment("Podcasts exported from Clementine.") _opml.append(_comment) _head = SubElement(_opml, "head") _title = SubElement(_head, "title") _date_created = SubElement(_head, "dateCreated") _title.text = "Podcasts Exported from Clementine." _date_created.text = str(_created_date) _body = SubElement(_opml, "body") for _podcast in podcasts: _xmlUrl = _podcast[0] _title = _podcast[1] _text = _podcast[2] _htmlUrl = _podcast[3] _text = _text or "" _htmlUrl = _htmlUrl or "" _outline = SubElement(_body, "outline") _outline.set("xmlUrl", _xmlUrl) _outline.set("title", _title) _outline.set("text", _text) _outline.set("htmlUrl", _htmlUrl) _outline.set("type", "rss") return _opml
def replace(self, a, b, comments=None): if isinstance(b, type('')): b = self.element(b) if comments is None: if b is None: comments = ['Removed deprecated tag <%s/>' % (a.tag, )] else: comments = ['Replaced <%s/> with <%s/>' % (a.tag, b.tag)] if not isinstance(comments, list): comments = [comments] p = a.getparent() if p != None: i = p.index(a) c = None if self.options.verbose: for comment in comments: c = Comment(" v2v3: %s " % comment.strip()) c.tail = '' p.insert(i, c) i += 1 if not b is None: if a.text and a.text.strip(): b.text = a.text if a.tail != None: b.tail = a.tail if a.sourceline: b.sourceline = a.sourceline copyattr(a, b) for child in a.iterchildren(): b.append(child) # moves child from a to b p.replace(a, b) else: if iscomment(a): a.text = '' for text in [a.text, a.tail]: if text: if c is None: p.text = p.text + text if p.text else text else: c.tail += text p.remove(a) if b != None and a.sourceline: b.sourceline = a.sourceline return b
def __gen_span_markables_file(self, layer, saltnpepper_compatible=True): """ <mark> elements are used to group tokens (continuous or discontinuos spans) for further annotation. A span markable file (*_seg.xml) contains a list of spans and the type of annotation that is applied to them (stored in <markList type="annotation type..."). As a consequence, each span markable file contains only spans of a single type (in discoursegraph: spans from a single namespace, e.g. syntax categories or entity mentions). Note: The annotations themselves are stored in other files, using <feat> or <multiFeat> elements. """ paula_id = '{0}.{1}.{2}_{3}_seg'.format(layer, self.corpus_name, self.name, layer) E, tree = gen_paula_etree(paula_id) base_paula_id = '{0}.{1}.tok'.format(self.corpus_name, self.name) mlist = E('markList', {'type': layer, XMLBASE: base_paula_id+'.xml'}) span_dict = defaultdict(lambda: defaultdict(str)) edges = select_edges_by(self.dg, layer=layer, edge_type=EdgeTypes.spanning_relation, data=True) for source_id, target_id, edge_attrs in edges: span_dict[source_id][target_id] = edge_attrs target_dict = defaultdict(list) for source_id in span_dict: targets = sorted(span_dict[source_id], key=natural_sort_key) if saltnpepper_compatible: # SNP doesn't like xpointer ranges xp = ' '.join('#{0}'.format(target_id) for target_id in targets) else: # PAULA XML 1.1 specification xp = '#xpointer(id({0})/range-to(id({1})))'.format(targets[0], targets[-1]) mark = E('mark', {XLINKHREF: xp}) if self.human_readable: # add <!-- comments --> containing the token strings mark.append(Comment(tokens2text(self.dg, targets))) target_dict[targets[0]].append(mark) else: mlist.append(mark) if self.human_readable: # order <mark> elements by token ordering for target in sorted(target_dict, key=natural_sort_key): for mark in target_dict[target]: mlist.append(mark) tree.append(mlist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.mark return paula_id
def attribute_title(self, e, p): n = self.element('name', line=e.sourceline) n.text = e.get('title').strip() if n.text: e.insert(0, n) if self.options.verbose: c = Comment(" v2v3: Moved attribute title to <name/> ") e.insert(0, c) stripattr(e, [ 'title', ])
def install(project_path, server_xml_location, server_xml, server_name, lib_path, lib_name, app_name, mount=None): from lxml.etree import fromstring, ElementTree, parse from lxml.etree import XML, Comment changes = 0 with fsopendir(project_path) as project_fs: with project_fs.opendir(server_xml_location) as server_fs: with server_fs.open(server_xml, 'rb') as server_xml_file: root = parse(server_xml_file) import_tag = XML('<import location="{lib_path}"/>'.format(lib_path=lib_path)) import_tag.tail = "\n" if app_name is None: install_tag = XML('<install lib="{lib_name}" />'.format(lib_name=lib_name)) else: install_tag = XML('<install name="{app_name}" lib="{lib_name}" />'.format(app_name=app_name, lib_name=lib_name)) install_tag.tail = "\n" def has_child(node, tag, **attribs): for el in node.findall(tag): if all(el.get(k, None) == v for k, v in attribs.items()): return True return False server_el = "{{http://moyaproject.com}}server[@docname='{}']".format(server_name) for server in root.findall(server_el): def get_comment(): comment = Comment('added by moya-pm') return comment if not has_child(server, "{http://moyaproject.com}import", location=lib_path): server.insert(0, import_tag) server.insert(0, get_comment()) changes += 1 if not has_child(server, "{http://moyaproject.com}install", lib=lib_name): server.append(Comment('added by moya-pm')) server.append(install_tag) changes += 1 if mount is not None and app_name is not None: if not has_child(server, "{http://moyaproject.com}mount", app_name=app_name): mount_tag = XML('<mount app="{app_name}" url="{mount}" />'.format(app_name=app_name, mount=mount)) mount_tag.tail = '\n' server.append(get_comment()) server.append(mount_tag) changes += 1 with open_atomic_write(server_fs, server_xml, 'wb') as server_xml_file: root.write(server_xml_file) return bool(changes)
def __gen_pointing_anno_file(self, top_level_layer): """ A pointing relation annotation file contains edge (rel) attributes. It is e.g. used to annotate the type of a pointing relation. TODO: merge code with __gen_rel_anno_file() if possible! """ paula_id = '{0}.{1}.{2}_{3}_pointing_multiFeat'.format(top_level_layer, self.corpus_name, self.name, top_level_layer) E, tree = gen_paula_etree(paula_id) pointing_edges = select_edges_by(self.dg, layer=top_level_layer, edge_type=EdgeTypes.pointing_relation, data=True) pointing_dict = defaultdict(lambda: defaultdict(str)) for source_id, target_id, edge_attrs in pointing_edges: pointing_dict[source_id][target_id] = edge_attrs base_paula_id = self.paulamap['pointing'][top_level_layer] mflist = E('multiFeatList', {XMLBASE: base_paula_id+'.xml'}) for source_id in pointing_dict: for target_id in pointing_dict[source_id]: rel_href = '#rel_{0}_{1}'.format(source_id, target_id) mfeat = E('multiFeat', {XLINKHREF: rel_href}) edge_attrs = pointing_dict[source_id][target_id] for edge_attr in edge_attrs: if edge_attr not in IGNORED_EDGE_ATTRIBS: mfeat.append(E('feat', {'name': edge_attr, 'value': edge_attrs[edge_attr]})) if self.human_readable: # adds edge label as a <!--comment--> source_label = self.dg.node[source_id].get('label') target_label = self.dg.node[target_id].get('label') mfeat.append(Comment(u'{0} - {1}'.format(source_label, target_label))) mflist.append(mfeat) tree.append(mflist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.multifeat return paula_id
def __gen_rel_anno_file(self, top_level_layer): """ A rel annotation file contains edge (rel) attributes. It is e.g. used to annotate the type of a dependency relation (subj, obj etc.). See also: __gen_hierarchy_file() """ paula_id = '{0}.{1}.{2}_{3}_rel'.format(top_level_layer, self.corpus_name, self.name, top_level_layer) E, tree = gen_paula_etree(paula_id) dominance_edges = select_edges_by( self.dg, layer=top_level_layer, edge_type=EdgeTypes.dominance_relation, data=True) dominance_dict = defaultdict(lambda: defaultdict(str)) for source_id, target_id, edge_attrs in dominance_edges: if source_id != top_level_layer+':root_node': dominance_dict[source_id][target_id] = edge_attrs base_paula_id = self.paulamap['hierarchy'][top_level_layer] mflist = E('multiFeatList', {XMLBASE: base_paula_id+'.xml'}) for source_id in dominance_dict: for target_id in dominance_dict[source_id]: rel_href = '#rel_{0}_{1}'.format(source_id, target_id) mfeat = E('multiFeat', {XLINKHREF: rel_href}) edge_attrs = dominance_dict[source_id][target_id] for edge_attr in edge_attrs: if edge_attr not in IGNORED_EDGE_ATTRIBS: mfeat.append(E('feat', {'name': edge_attr, 'value': edge_attrs[edge_attr]})) if self.human_readable: # adds edge label as a <!--comment--> source_label = self.dg.node[source_id].get('label') target_label = self.dg.node[target_id].get('label') mfeat.append(Comment(u'{0} - {1}'.format(source_label, target_label))) mflist.append(mfeat) tree.append(mflist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.multifeat return paula_id
def __parse__(self): """ Yielding and internally handling (element, attribute, link, pos), where attribute may be None (indicating the link is in the text). ``pos`` is the position where the link occurs; often 0, but sometimes something else in the case of links in stylesheets or style tags. Note: multiple links inside of a single text string or attribute value are returned in reversed order. This makes it possible to replace or delete them from the text string value based on their reported text positions. Otherwise, a modification at one text position can change the positions of links reported later on. """ assert self.utx is not None, "UrlTranformer not Implemented." # internal error assert self.utx.base_path is not None, "Base Path is not set!" assert self.utx.base_url is not None, "Base url is not Set!" source = self.get_source() assert source is not None, "Source is not Set!" assert hasattr(source, 'read'), "File like object is required!" parser = HTMLParser(encoding=self.encoding, collect_ids=False, huge_tree=True, recover=False) context_tree = etree_parse(source, parser=parser) del source del parser # The tree generated by the parse is stored in the self.root # variable and can be utilised further for any number of use cases self.root = context_tree.getroot() # WaterMarking :) self.root.insert( 0, Comment(MARK.format('', VERSION, self.utx.url, utcnow(), ''))) # Modify the tree elements for el in context_tree.iter(): self.__handle(el)
def __gen_pointing_file(self, top_level_layer): """ Creates etree representations of PAULA XML files modeling pointing relations. Pointing relations are ahierarchical edges between any two nodes (``tok``, ``mark`` or ``struct``). They are used to signal pointing relations between tokens (e.g. in a dependency parse tree) or the coreference link between anaphora and antecedent. """ paula_id = '{0}.{1}.{2}_{3}_pointing'.format(top_level_layer, self.corpus_name, self.name, top_level_layer) self.paulamap['pointing'][top_level_layer] = paula_id E, tree = gen_paula_etree(paula_id) pointing_edges = select_edges_by(self.dg, layer=top_level_layer, edge_type=EdgeTypes.pointing_relation, data=True) pointing_dict = defaultdict(lambda: defaultdict(str)) for source_id, target_id, edge_attrs in pointing_edges: pointing_dict[source_id][target_id] = edge_attrs # NOTE: we don't add a base file here, because the nodes could be # tokens or structural nodes rlist = E('relList') for source_id in pointing_dict: for target_id in pointing_dict[source_id]: source_href = self.__gen_node_href(top_level_layer, source_id) target_href = self.__gen_node_href(top_level_layer, target_id) rel = E('rel', {'id': 'rel_{0}_{1}'.format(source_id, target_id), XLINKHREF: source_href, 'target': target_href}) # adds source/target node labels as a <!-- comment --> if self.human_readable: source_label = self.dg.node[source_id].get('label') target_label = self.dg.node[target_id].get('label') rel.append(Comment(u'{0} - {1}'.format(source_label, target_label))) rlist.append(rel) tree.append(rlist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.rel return paula_id
def promote(self, e, t): assert t.tag == 't' pp = t.getparent() i = pp.index(t) + 1 t2 = self.element('t', line=e.sourceline) t2.text = e.tail e.tail = None for s in e.itersiblings(): t2.append(s) # removes s from t if not isempty(t2): pp.insert(i, t2) pp.insert(i, e) # removes e from t if self.options.verbose: pp.insert( i, Comment( " v2v3: <%s/> promoted to be child of <%s/>, and the enclosing <t/> split. " % (e.tag, pp.tag))) if isempty(t): pp.remove(t)
def to_xml(self): """ Generamos el documento XML """ # XML namespaces definidos por Mandiant NSMAP = {None:'http://schemas.mandiant.com/2010/ioc', 'xsi':'http://www.w3.org/2001/XMLSchema-instance', 'xsd':'http://www.w3.org/2001/XMLSchema'} root = Element('ioc', nsmap=NSMAP) root.append(Comment('Autogenerated by Watson-Detection-Tools by luisgf')) root.attrib['id'] = self.ioc_id root.attrib['last-modified'] = self.fecha SubElement(root, 'short_description').text = self.s_descripcion SubElement(root, 'description').text = self.descripcion SubElement(root, 'keywords').text = 'host,malware,auto' SubElement(root, 'authored_by').text = self.autor SubElement(root, 'authored_date').text = self.fecha links = SubElement(root, 'links') definitions = SubElement(links, 'definition') for indicador in self.definicion: indicator = SubElement(definitions, 'Indicator') indicator.attrib['operator'] = indicador.get_operator() indicator.attrib['id'] = indicador.ind_id for item in indicador.items: xmlitem = SubElement(indicator, 'IndicatorItem') xmlitem.attrib['id'] = item.item_id xmlitem.attrib['condition'] = item.get_condicion() xmlctx = SubElement(xmlitem, 'Context') xmlctx.attrib['document'] = item.documento xmlctx.attrib['search'] = item.busqueda xmlctx.attrib['type'] = item.get_tipo_ctx() xmlcnt = SubElement(xmlitem, 'Content') xmlcnt.attrib['type'] = item.get_tipo_cnt() xmlcnt.text = item.param return tostring(root, pretty_print=True, xml_declaration=True, encoding='utf-8')
def __gen_token_anno_file(self, top_level_layer): """ creates an etree representation of a <multiFeat> file that describes all the annotations that only span one token (e.g. POS, lemma etc.). Note: discoursegraphs will create one token annotation file for each top level layer (e.g. conano, tiger etc.). """ base_paula_id = '{0}.{1}.tok'.format(self.corpus_name, self.name) paula_id = '{0}.{1}.{2}.tok_multiFeat'.format(top_level_layer, self.corpus_name, self.name) E, tree = gen_paula_etree(paula_id) mflist = E('multiFeatList', {XMLBASE: base_paula_id+'.xml'}) for token_id in self.dg.tokens: mfeat = E('multiFeat', {XLINKHREF: '#{0}'.format(token_id)}) token_dict = self.dg.node[token_id] for feature in token_dict: # TODO: highly inefficient! refactor!1!! if feature not in IGNORED_TOKEN_ATTRIBS \ and feature.startswith(top_level_layer): mfeat.append(E('feat', {'name': feature, 'value': token_dict[feature]})) if self.human_readable: # adds token string as a <!-- comment --> mfeat.append(Comment(token_dict[self.dg.ns+':token'])) mflist.append(mfeat) tree.append(mflist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.multifeat return paula_id
TRAIT = 'trait' TREE_MODEL = 'treeModel' TRUE = 'true' TUG_OPERATOR = 'tugOperator' UNIFORM_PRIOR = 'uniformPrior' UNITS = 'units' UPPER = 'upper' VALUE = 'value' WEIGHT = 'weight' PREFIXLESS_ELEMENT_TAGS = {TAXON, OPERATORS, MCMC, POSTERIOR, PRIOR, LIKELIHOOD, LOG} TREE_PRIORS = {COALESCENT_LIKELIHOOD, SPECIATION_LIKELIHOOD} BEGIN_COPHYLOGENY_MODEL_XML = \ lambda: Comment(text=' BEGIN COPHYLOGENY MODEL XML ') END_COPHYLOGENY_MODEL_XML = lambda: Comment(text=' END COPHYLOGENY MODEL XML ') associations = {} operators = [] priors = [] file_log = [] host_tree_traits = [] symbiont_tree_traits = [] def parse_associations(): assoc_file = open(options.assoc_filename, 'r') for line in assoc_file: symbiont, host = line[:-1].split('\t') associations[symbiont] = host
def convert2to3(self): if self.root.get('version') in [ '3', ]: return self.tree log.note(' Converting v2 to v3: %s' % self.xmlrfc.source) selectors = [ # we need to process list before block elements that might get # promoted out of their surrounding <t/>, as <list/> uses one <t/> # per list item, and if we promote block elements earlier, they # will not be picked up as part of the list items './/list', # 3.4. <list> # './/artwork', # 2.5. <artwork> # 2.5.4. "height" Attribute # 2.5.8. "width" Attribute # 2.5.9. "xml:space" Attribute './/back', './/code', './/date', # We need to process preamble and postamble before figure, # because artwork or sourcecode within a figure could later be # promoted and the figure discarded. './/postamble', # 3.5. <postamble> './/preamble', # 3.6. <preamble> './/figure', # 2.25. <figure> # 2.25.1. "align" Attribute # 2.25.2. "alt" Attribute # 2.25.4. "height" Attribute # 2.25.5. "src" Attribute # 2.25.6. "suppress-title" Attribute # 2.25.8. "width" Attribute './/relref', # Deprecated after 7991 './/reference', # <reference> '.', # 2.45. <rfc> # 2.45.1. "category" Attribute # 2.45.2. "consensus" Attribute # 2.45.3. "docName" Attribute # 2.45.7. "number" Attribute # 2.45.10. "seriesNo" Attribute #Disabled #'.//seriesInfo', # 2.47. <seriesInfo> './/t', # 2.53. <t> # 2.53.2. "hangText" Attribute './/xref', # 2.66. <xref> # 2.66.1. "format" Attribute # 2.66.2. "pageno" Attribute './/facsimile', # 3.2. <facsimile> './/format', # 3.3. <format> './/spanx', # 3.7. <spanx> './/texttable', # 3.8. <texttable> './/vspace', # 3.10. <vspace> # attribute selectors './/*[@title]', # 2.25.7. "title" Attribute # 2.33.2. "title" Attribute # 2.42.2. "title" Attribute # 2.46.4. "title" Attribute './/*[@anchor]', './/xref[@target]', '//processing-instruction()', # 1.3.2 # handle mixed block/non-block content surrounding all block nodes './/*[self::artwork or self::dl or self::figure or self::ol or self::sourcecode or self::t or self::ul]', './/*[@*="yes" or @*="no"]', # convert old attribute false/true '.;pretty_print_prep()', '.;wrap_non_ascii()', ] # replace the vocabulary v2 dtd, but keep some entity definitions. tree = self.tree tree.docinfo.system_url = "rfc2629-xhtml.ent" for s in selectors: slug = slugify( s.replace('self::', '').replace(' or ', '_').replace(';', '_')) if '@' in s: func_name = 'attribute_%s' % slug elif "()" in s: func_name = slug else: if not slug: slug = 'rfc' func_name = 'element_%s' % slug # get rid of selector annotation ss = s.split(';')[0] func = getattr(self, func_name, None) if func: if self.options.debug: log.note("Calling %s()" % func_name) for e in self.root.xpath(ss): func(e, e.getparent()) else: log.warn("No handler for function %s, slug %s" % ( func_name, slug, )) self.root.set('version', '3') # Add a comment about the converter version conversion_version = Comment(' xml2rfc v2v3 conversion %s ' % xml2rfc.__version__) conversion_version.tail = '\n ' self.root.insert(0, conversion_version) # This is a workaround for not being able to do anything about # namespaces other than when creating an element. It lets us retain # a namespace declaration for xi: in the root element. # dummy = self.element('{http://www.w3.org/2001/XInclude}include', nsmap=self.xmlrfc.nsmap) # self.root.insert(0, dummy) # lxml.etree.cleanup_namespaces(self.root, top_nsmap=self.xmlrfc.nsmap, keep_ns_prefixes='xi') # self.root.remove(dummy) log.note(' Completed v2 to v3 conversion') return self.tree
def begin(self, startnode): self.header = Comment( "Generated automatically from {source} at {time:%d %b %Y %H:%M}.". format(source=startnode.sourcefile, time=datetime.datetime.now()))
def build_tree_from_csv_json(csv_input, json_input): """ Build XML config for MATSim Road Pricing from .csv and .json input :param csv_input: csv output from `extract_network_id_from_osm_csv` with additional columns: `vehicle_type`, `toll_amount`, `start_time` and `end_time` for each of the tolls required. :param json_input: json output from `extract_network_id_from_osm_csv` :return: an 'lxml.etree._Element' object """ roadpricing = Element("roadpricing", type="cordon", name="cordon-toll") description = SubElement(roadpricing, "description") description.text = "A simple cordon toll scheme" links = SubElement(roadpricing, "links") # CSV input tolled_links_df = pd.read_csv(csv_input, dtype={'osm_ids': str}) # make sure all links from same toll are grouped together: tolled_links_df = tolled_links_df.sort_values(by='osm_refs') # remove the links whose osm_id were not matched to network_ids ('network_id' column is boolean) tolled_links_df = tolled_links_df[tolled_links_df['network_id']] # Time-of-day pricing: # links with multiple tolling amounts throughout the day appear as multiple rows in the .csv config # links with uniform pricing throughout the day appear only once in .csv config try: links_repeat = pd.concat(g for _, g in tolled_links_df.groupby('osm_ids') if len(g) > 1) except ValueError: links_repeat = pd.DataFrame() links_no_repeat = tolled_links_df[~tolled_links_df.index.isin(links_repeat. index)] # JSON input with open(json_input, 'r') as f: osm_id_to_network_id_dict = json.load(f) # list to keep track of which Toll names we added as comments commented_tolls = [] # links without time-of-day pricing: for index, row in links_no_repeat.iterrows(): if str(row['osm_refs']) not in commented_tolls: links.append(Comment(' === ' + str(row['osm_refs']) + ' === ')) commented_tolls.append(str(row['osm_refs'])) # from the JSON input, obtain all network_ids that match this row's specific osm_id list_of_network_ids = osm_id_to_network_id_dict[row['osm_ids']] # network link in list_of_network_ids is matched with 1 row of links_no_repeat for net_id in list_of_network_ids: link = SubElement(links, "link", id=str(net_id)) SubElement(link, "cost", start_time=str(row['start_time']), end_time=str(row['end_time']), amount=str(row['toll_amount'])) # links with time-of-day pricing: # get unique ids of these links and iterate through them if not links_repeat.empty: unique_repeated_ids = links_repeat['osm_ids'].unique() for link_id in unique_repeated_ids: link_time_of_day_df = links_repeat[links_repeat['osm_ids'] == link_id] link_ref = link_time_of_day_df['osm_refs'].unique()[0] if link_ref not in commented_tolls: links.append(Comment(' === ' + str(link_ref) + ' === ')) commented_tolls.append(str(link_ref)) # from the JSON input, obtain all network_ids that match this row's specific osm_id list_of_network_ids = osm_id_to_network_id_dict[link_id] # each network link in list_of_network_ids is now matched with multiple rows of link_time_of_day_df for net_id in list_of_network_ids: link = SubElement(links, "link", id=str(net_id)) for index, row in link_time_of_day_df.iterrows(): SubElement(link, "cost", start_time=str(row['start_time']), end_time=str(row['end_time']), amount=str(row['toll_amount'])) return roadpricing
def __gen_hierarchy_file(self, layer): """ Hierarchical structures (<structList> elements) are used to create hierarchically nested annotation graphs (e.g. to express consists-of relationships or dominance-edges in syntax trees, RST). A <struct> element will be created for each hierarchical node (e.g. an NP) with edges (<rel> elements) to each dominated element (e.g. tokens, other <struct> elements). NOTE: The types/labels of these newly create hierarchical nodes and edges aren't stored in this file, but in feat/multiFeat files referencing this one! See: __gen_struct_anno_files() and __gen_rel_anno_file()). There will be one hierarchy file for each top level layer. TODO: check, if we can omit hierarchy files for layers that don't contain dominance edges """ paula_id = '{0}.{1}.{2}_{3}'.format(layer, self.corpus_name, self.name, layer) self.paulamap['hierarchy'][layer] = paula_id E, tree = gen_paula_etree(paula_id) dominance_edges = select_edges_by( self.dg, layer=layer, edge_type=EdgeTypes.dominance_relation, data=True) span_edges = select_edges_by( self.dg, layer=layer, edge_type=EdgeTypes.spanning_relation, data=True) dominance_dict = defaultdict(lambda: defaultdict(str)) for source_id, target_id, edge_attrs in dominance_edges: if source_id != layer+':root_node': dominance_dict[source_id][target_id] = edge_attrs # in PAULA XML, token spans are also part of the hierarchy for source_id, target_id, edge_attrs in span_edges: if istoken(self.dg, target_id): dominance_dict[source_id][target_id] = edge_attrs # NOTE: we don't add a base file here, because the nodes could be # tokens or structural nodes slist = E('structList', {'type': layer}) for source_id in dominance_dict: struct = E('struct', {'id': str(source_id)}) if self.human_readable: struct.append(Comment(self.dg.node[source_id].get('label'))) for target_id in dominance_dict[source_id]: if istoken(self.dg, target_id): href = '{0}.xml#{1}'.format(self.paulamap['tokenization'], target_id) else: href = '#{0}'.format(target_id) rel = E( 'rel', {'id': 'rel_{0}_{1}'.format(source_id, target_id), 'type': dominance_dict[source_id][target_id]['edge_type'], XLINKHREF: href}) struct.append(rel) if self.human_readable: struct.append( Comment(self.dg.node[target_id].get('label'))) slist.append(struct) tree.append(slist) self.files[paula_id] = tree self.file2dtd[paula_id] = PaulaDTDs.struct return paula_id
def get_comment(): comment = Comment('added by moya-pm') return comment
def start_library(self): console = self.console from ...tools import get_moya_dir from os.path import join, abspath project_path = None if self.args.location is not None: library_path = self.args.location else: try: project_path = get_moya_dir(self.args.project_location) except: console.error("Please run 'moya start library' inside your project directory, or specifiy the -o switch") return False library_path = abspath(join(project_path, './local/')) cfg = None if not self.args.location and project_path: from ... import build cfg = build.read_config(project_path, self.get_settings()) if not self.args.acceptdefaults: console.table([[Cell("Moya Library Wizard", bold=True, fg="green", center=True)], ["""This will ask you a few questions, then create a new library in your Moya project based on your answers. Default values are shown in grey (simply hit return to accept defaults). Some defaults may be taken from your ".bashrc" file, if it exists. """]]) author = self.get_author_details() library = {} library["title"] = LibraryTitle.ask(console, default=self.args.title) longname = self.args.longname or make_name(author["organization"], library["title"]) longname = library["longname"] = LibraryLongName.ask(console, default=longname) library["url"] = LibraryURL.ask(console, default="") library["namespace"] = LibraryNamespace.ask(console, default="") mount = None appname = None do_mount = DoMount.ask(console, default="yes") if do_mount: mount = Mount.ask(console, default=self.args.mount or "/{}/".format(make_name(library["title"]))) appname = AppName.ask(console, default=self.args.name or make_name(library["title"])) data = dict(author=author, library=library, timezone=self.get_timezone()) actions = [] from ...command.sub import library_template from fs.memoryfs import MemoryFS from fs.opener import fsopendir memfs = MemoryFS() templatebuilder.compile_fs_template(memfs, library_template.template, data=data) dest_fs = fsopendir(join(library_path, library["longname"]), create_dir=True, writeable=True) continue_overwrite = 'overwrite' if not dest_fs.isdirempty('.'): if self.args.force: continue_overwrite = 'overwrite' elif self.args.new: continue_overwrite = 'new' else: continue_overwrite = DirNotEmpty.ask(console, default="cancel") if continue_overwrite != 'cancel': if continue_overwrite == 'overwrite': from fs.utils import copydir copydir(memfs, dest_fs) actions.append("Written library files to {}".format(dest_fs.getsyspath('.'))) elif continue_overwrite == 'new': files_copied = copy_new(memfs, dest_fs) table = [[ Cell("{} new file(s) written".format(len(files_copied)), fg="green", bold=True, center=True), ]] for path in files_copied: table.append([Cell(dest_fs.desc(path), bold=True, fg="black")]) console.table(table) return 0 if cfg: project_cfg = cfg['project'] location = project_cfg['location'] server_name = "main" if location: with fsopendir(project_path) as project_fs: with project_fs.opendir(location) as server_fs: from lxml.etree import fromstring, ElementTree, parse from lxml.etree import XML, Comment server_xml_path = server_fs.getsyspath(project_cfg['startup']) root = parse(server_xml_path) import_tag = XML('<import location="./local/{longname}" />\n\n'.format(**library)) import_tag.tail = "\n" install_tag = None if mount: tag = '<install name="{appname}" lib="{longname}" mount="{mount}" />' else: tag = '<install name="{appname}" lib="{longname}" />' install_tag = XML(tag.format(appname=appname, longname=longname, mount=mount)) install_tag.tail = "\n\n" def has_child(node, tag, **attribs): for el in node.findall(tag): #items = dict(el.items()) if all(el.get(k, None) == v for k, v in attribs.items()): return True return False for server in root.findall("{{http://moyaproject.com}}server[@docname='{}']".format(server_name)): add_import_tag = not has_child(server, "{http://moyaproject.com}import", location="./local/{}".format(longname)) add_install_tag = not has_child(server, "{http://moyaproject.com}install", lib=longname) and install_tag is not None if add_import_tag or add_install_tag: comment = Comment("Added by 'moya start library'") comment.tail = "\n" server.append(comment) if add_import_tag: server.append(import_tag) actions.append("Added <import> tag") if add_install_tag: server.append(install_tag) actions.append("Added <install> tag") if mount: actions.append("Mounted application on {}".format(mount)) root.write(server_xml_path) table = [[Cell("Library files written successfully!", fg="green", bold=True, center=True)]] actions_text = "\n".join(" * " + action for action in actions) table.append([Cell(actions_text, fg="blue", bold=True)]) table.append(["""A new library has been added to the project, containing some simple example functionality.\nSee http://moyaproject.com/docs/creatinglibraries/ for more information."""]) console.table(table) return 0 console.text("No project files written.", fg="red", bold=True).nl() return -1
def parse(self, parser=None, base_url=None): """Parses the underlying html source using `lxml` library. This parsed tree is stored in :attr:`root` of this object. which could be used to perform numerous operations. Returns ------- ElementTree """ utx = self._get_utx() assert utx is not None, "UrlTransformer not Implemented." # internal error assert utx.base_path is not None, "Base Path is not set!" assert utx.base_url is not None, "Base url is not Set!" if not isinstance(parser, HTMLParser): TypeError("Expected instance of <%r>, got <%r>" % (HTMLParser, parser)) if not parser: parser = HTMLParser(encoding=self.encoding, collect_ids=False) source = self.get_source() assert source is not None, "Source is not Set!" assert hasattr(source, 'read'), "File like object is required!" # assert self._element_factory is not None # assert hasattr(self._element_factory, 'make_element') LOGGER.info( 'Parsing tree with source: <%r> encoding <%s> and parser <%r>' % (self._source, self.encoding, parser)) context_tree = lxml_parse(source, parser=parser, base_url=base_url) # The tree generated by the parse is stored in the self.root # variable and can be utilised further for any number of use cases self._tree = context_tree self.root = context_tree.getroot() if self.root is not None: # WaterMarking :) self.root.insert( 0, Comment(MARK.format('', __version__, utx.url, utc_now(), ''))) # There are internal links present on the html page which are files # that includes `#` and `javascript:` and 'data:base64;` type links # or a simple `/` url referring anchor tag # thus these links needs to be left as is. factory = getattr(self, 'make_element', None) assert callable(factory), "Element generator is not callable!" # Modify the tree elements for el in context_tree.iter(): # A element can contain multiple urls for pack in self._handle_lxml_elem(el): if pack is not None: elem, attr, url, pos = pack else: # pragma: no cover continue if elem is not None: o = factory(elem, attr, url, pos) if o is not None: self._stack.append(o) self._parseComplete = True return self.root
def generate_root_element(self, stock_picking): # xml = '<?xml version="1.0" encoding="UTF-8"?>\n' # xml = '{0}<WAB xsi:noNamespaceSchemaLocation="YellowCube_WAB_Warenausgangsbestellung.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n'.format(xml) DELIVERYINSTRUCTIONS_TAG_MAX_LENGTH = 15 xml_root = create_element('WAB') self.context['yc_customer_order_no'] = stock_picking.yellowcube_customer_order_no picking_mode = stock_picking.type sale_order = stock_picking.sale_id xml_root.append(Comment(_('Sale Order #{0}: {1}').format(sale_order.id, sale_order.name))) if not sale_order: raise Warning(_("There is no sale.order related to this stock.picking (type={0})").format(picking_mode)) if not self.context.get('yc_ignore_wab_reports', False): sale_order.generate_reports() # WAB > ControlReference now = datetime.now() xml_control_reference = create_element('ControlReference') xml_control_reference.append(create_element('Type', text='WAB')) xml_control_reference.append(create_element('Sender', text=self.get_param('sender', required=True))) xml_control_reference.append(create_element('Receiver', text=self.get_param('receiver', required=True))) xml_control_reference.append(create_element( 'Timestamp', text='{0:04d}{1:02d}{2:02d}{3:02d}{4:02d}{5:02d}'.format(now.year, now.month, now.day, now.hour, now.hour, now.minute) )) xml_control_reference.append(create_element('OperatingMode', text=self.get_param('operating_mode', required=True))) xml_control_reference.append(create_element('Version', text='1.0')) xml_root.append(xml_control_reference) # WAB -> Order xml_order = create_element('Order') xml_root.append(xml_order) # WAB -> OrderHeader xml_order_header = create_element('OrderHeader') xml_order_header.append(create_element('DepositorNo', self.get_param('depositor_no', required=True))) xml_order_header.append(create_element('CustomerOrderNo', text=stock_picking.get_customer_order_no()[stock_picking.id])) dateorder = sale_order.date_order.split(' ')[0] xml_order_header.append(create_element('CustomerOrderDate', text=dateorder.replace('-', ''))) xml_order.append(xml_order_header) # WAB -> PartnerAddress xml_partner_address = create_element('PartnerAddress') xml_partner_address.append(self._generate_partner_address_element(sale_order.partner_shipping_id, self.get_param('wab_partner_type_for_shipping_address'))) if self.get_param('wab_add_invoicing_address'): xml_partner_address.append(self._generate_partner_address_element(sale_order.partner_invoice_id, self.get_param('wab_partner_type_for_invoicing_address'))) xml_order.append(xml_partner_address) # WAB -> ValueAddedServices xml_value_added_services = create_element('ValueAddedServices') xml_additional_service = create_element('AdditionalService') if picking_mode in ['out', 'outgoing']: # <BasicShippingServices> under ValueAddedServices/AdditionalService if stock_picking.carrier_id and stock_picking.carrier_id.yc_basic_shipping: xml_additional_service.append(create_element('BasicShippingServices', text=stock_picking.carrier_id.yc_basic_shipping)) else: raise Warning(_('Missing Basic shipping in delivery method'), sale_order.name) else: xml_additional_service.append(create_element('BasicShippingServices', text="RETOURE")) # <AdditionalShippingServices> under ValueAddedServices/AdditionalService if stock_picking.carrier_id and stock_picking.carrier_id.yc_additional_shipping: xml_additional_service.append(create_element('AdditionalShippingServices', text=stock_picking.carrier_id.yc_additional_shipping)) # <DeliveryInstructions> under ValueAddedServices/AdditionalService if stock_picking.carrier_id and stock_picking.carrier_id.pc_delivery_instructions: xml_additional_service.append(create_element('DeliveryInstructions', text=stock_picking.carrier_id.pc_delivery_instructions[:DELIVERYINSTRUCTIONS_TAG_MAX_LENGTH])) # <FrightShippingFlag> under ValueAddedServices/AdditionalService xml_additional_service.append(create_element('FrightShippingFlag', text=('1' if stock_picking.carrier_id.pc_freight_shipping else '0'))) # <ShippingInterface> under ValueAddedServices/AdditionalService if stock_picking.carrier_id and stock_picking.carrier_id.pc_shipping_interface: xml_additional_service.append(create_element('ShippingInterface', text=stock_picking.carrier_id.pc_shipping_interface)) xml_value_added_services.append(xml_additional_service) xml_order.append(xml_value_added_services) # WAB -> OrderPositions xml_order_positions = create_element('OrderPositions') for position in self._generate_order_position_element(stock_picking): xml_order_positions.append(position) xml_order.append(xml_order_positions) # WAB -> OrderDocuments xml_order_documents = create_element('OrderDocuments', attrib={'OrderDocumentsFlag': '1'}) xml_order_doc_filenames = create_element('OrderDocFilenames') for filename in self.get_export_files(stock_picking): xml_order_doc_filenames.append(create_element('OrderDocFilename', text=filename)) xml_order_documents.append(xml_order_doc_filenames) xml_order.append(xml_order_documents) xsd_error = validate_xml("wab", xml_root, print_error=self.print_errors) if xsd_error: raise Warning(xsd_error) if 'yc_customer_order_no' in self.context: del self.context['yc_customer_order_no'] return xml_root
def _write_mark(self, text): """Writes a watermark comment in the parsed html.""" if self.lxml is not None: self.lxml.insert(0, Comment(text))
def build_tree(df_tolls, toll_type='link', toll_scheme_name='simple-toll', toll_description='A simple toll scheme'): """ Build XML config for MATSim Road Pricing from tolls DataFrame input :param df_tolls: pd.DataFrame( columns=[ 'toll_id', # optional, unique ID of the toll, based off OSM ref if applicable 'network_link_id', # network link ID to be charged 'vehicle_type', # optional, type of vehicle, does not persist to MATSim road pricing xml file 'toll_amount', # cost to travel on that link 'start_time', # start time for the toll 'end_time', # end time for the toll 'osm_name', # optional, if derived from OSM, human readable name of the road 'notes' # optional, user notes ] :param toll_type: default 'link', other supported MATSim toll types: 'distance', 'cordon', 'area', more info: https://www.matsim.org/apidocs/core/0.3.0/org/matsim/roadpricing/package-summary.html :param toll_scheme_name: name to pass to xml file, useful for identifying multiple toll schemes :param toll_description: additional description of the toll to pass to the xml file :return: an 'lxml.etree._Element' object """ roadpricing = Element("roadpricing", type=toll_type, name=toll_scheme_name) description = SubElement(roadpricing, "description") description.text = toll_description links = SubElement(roadpricing, "links") # make sure all links from same toll are grouped together: if 'toll_id' not in df_tolls.columns: # if not present just take it link by link df_tolls['toll_id'] = df_tolls['network_link_id'] df_tolls = df_tolls.sort_values(by='toll_id') # Time-of-day pricing: # links with multiple tolling amounts throughout the day appear as multiple rows in the .csv config # links with uniform pricing throughout the day appear only once in .csv config try: links_repeat = pd.concat( g for _, g in df_tolls.groupby('network_link_id') if len(g) > 1) except ValueError: links_repeat = pd.DataFrame() links_no_repeat = df_tolls[~df_tolls.index.isin(links_repeat.index)] # list to keep track of which Toll names we added as comments commented_tolls = [] # links without time-of-day pricing: for index, row in links_no_repeat.iterrows(): if str(row['toll_id']) not in commented_tolls: links.append(Comment(' === ' + str(row['toll_id']) + ' === ')) commented_tolls.append(str(row['toll_id'])) link = SubElement(links, "link", id=str(row['network_link_id'])) SubElement(link, "cost", start_time=str(row['start_time']), end_time=str(row['end_time']), amount=str(row['toll_amount'])) # links with time-of-day pricing: # get unique ids of these links and iterate through them if not links_repeat.empty: unique_repeated_ids = links_repeat['network_link_id'].unique() for link_id in unique_repeated_ids: link_time_of_day_df = links_repeat[links_repeat['network_link_id'] == link_id] link_ref = link_time_of_day_df['toll_id'].unique()[0] if link_ref not in commented_tolls: links.append(Comment(' === ' + str(link_ref) + ' === ')) commented_tolls.append(str(link_ref)) link = SubElement(links, "link", id=str(link_id)) for index, row in link_time_of_day_df.iterrows(): SubElement(link, "cost", start_time=str(row['start_time']), end_time=str(row['end_time']), amount=str(row['toll_amount'])) return roadpricing
def get_comment(): comment = Comment("added by moya-pm") return comment