def fill_in_expiration_months(select): """Fill in expiration date month values in the given `select` element.""" # First remove any children since they are just there to # help preview templates. for child in select.getchildren(): select.remove(child) # Create the empty value option, selected since the form # is empty. e = Element("option", value="", selected="selected") e.text = "(select)" select.append(e) # Create months. for index, month_name in enumerate( [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ] ): month_number = index + 1 e = Element("option", value="%02i" % month_number) e.text = "%02i (%s)" % (month_number, month_name) select.append(e)
def to_xml(self): q = Element('dictionary') q.attrib["value"] = basename(dirname(self.dct)) r = SubElement(q, "revision", value=str(self._svn_revision(dirname(self.dct))), timestamp=datetime.utcnow().isoformat(), checksum=self._checksum(open(self.dct, 'rb').read())) s = SubElement(r, 'corpus') s.attrib["value"] = basename(self.fn) s.attrib["checksum"] = self._checksum(open(self.fn, 'rb').read()) SubElement(r, 'percent').text = "%.2f" % self.get_coverage() SubElement(r, 'total').text = str(len(self.get_words())) SubElement(r, 'known').text = str(len(self.get_known_words())) SubElement(r, 'unknown').text = str(len(self.get_unknown_words())) wrx = re.compile(r"\^(.*)/") s = SubElement(r, 'top') for word, count in self.get_top_unknown_words(): SubElement(s, 'word', count=str(count)).text = wrx.search(word).group(1) s = SubElement(r, 'system') SubElement(s, 'time').text = "%.4f" % self.timer return ("coverage", etree.tostring(q))
def _add_request(self, doc): """Constructs add request using doc dict. """ node = dict2element(doc) root = Element("add") root.append(node) return tostring(root).encode('utf-8')
def make_tree(html): """ Returns an lxml tree for the given HTML string (either Unicode or bytestring). This is better than lxml.html.document_fromstring because this takes care of a few known issues. """ # Normalize newlines. Otherwise, "\r" gets converted to an HTML entity # by lxml. html = re.sub('\r\n', '\n', html) # Remove <?xml> declaration in Unicode objects, because it causes an error: # "ValueError: Unicode strings with encoding declaration are not supported." # Note that the error only occurs if the <?xml> tag has an "encoding" # attribute, but we remove it in all cases, as there's no downside to # removing it. if isinstance(html, unicode): html = re.sub(r'^\s*<\?xml\s+.*?\?>', '', html) else: html = UnicodeDammit(html, isHTML=True).unicode html = html.strip() if html: try: return document_fromstring(html) except: # Fall back to using the (slow) BeautifulSoup parser. return lxml.html.soupparser.fromstring(html) else: root = Element('body') root.text = u'' return ElementTree(root)
def to_xml(self): q = Element('config') q.attrib["value"] = self.f r = SubElement(q, "revision", value=str(self._svn_revision(dirname(self.f))), timestamp=datetime.utcnow().isoformat(), checksum=self._checksum(open(self.f, 'rb').read())) s = SubElement(r, 'gen') s.attrib["value"] = self.gen s.attrib["checksum"] = self._checksum(open(self.gen, 'rb').read()) s = SubElement(r, 'morph') s.attrib["value"] = self.morph s.attrib["checksum"] = self._checksum(open(self.morph, 'rb').read()) SubElement(r, 'total').text = str(self.passes + self.fails) SubElement(r, 'passes').text = str(self.passes) SubElement(r, 'fails').text = str(self.fails) s = SubElement(r, 'tests') for k, v in self.count.items(): t = SubElement(s, 'test') t.text = str(k) t.attrib['fails'] = str(v["Fail"]) t.attrib['passes'] = str(v["Pass"]) s = SubElement(r, "system") SubElement(s, "speed").text = "%.4f" % self.timer return ("morph", etree.tostring(r))
def run(): date=strftime("%Y-%m-%d",localtime()) root=Element("Dataset") tree=etree.ElementTree(element=root) root.set("Date",date) prefix = "http://search.yahoo.com/search?_adv_prop=web&x=op&ei=UTF-8&fr=fp-top&va=link:" midfix = "&va_vt=any&vp_vt=any&vo_vt=any&ve_vt=any&vd=all&vst=0&vf=all&vm=i&fl=1&vl=" suffix = "&n=10" urls=parse() for l in langs: total=0 node=etree.SubElement(root,"Trawl") node.set("Lang",l) for url in urls: try: results=urllib.urlopen(prefix+url+midfix+l+suffix).read() count=re.search('of about <.*?>(\S+?)<',results).group(1) count = string.replace(count,',','') total += int(count) except: count = '!' subnode=etree.SubElement(node,"Search") subnode.set("URI",url) subnode.text=str(count) time.sleep(1) totnode=etree.SubElement(node,"Total") totnode.text=str(total) fileopen=open('/web/teamspace/www/stats/lang.xml','w') tree.write(fileopen)
def merge_pages(self, replacements): """ Duplicate template page. Creates a copy of the template for each item in the list, does a merge, and separates the them by page breaks. """ for part in self.parts.values(): root = part.getroot() tag = root.tag if tag == '{%(w)s}ftr' % NAMESPACES or tag == '{%(w)s}hdr' % NAMESPACES: continue children = [] for child in root: root.remove(child) children.append(child) for i, repl in enumerate(replacements): # Add page break in between replacements if i > 0: pagebreak = Element('{%(w)s}br' % NAMESPACES) pagebreak.attrib['{%(w)s}type' % NAMESPACES] = 'page' root.append(pagebreak) parts = [] for child in children: child_copy = deepcopy(child) root.append(child_copy) parts.append(child_copy) self.merge(parts, **repl)
def _get_payload_c14n(self, method, c14n_algorithm=default_c14n_algorithm): self.payload = self.data self.sig_root = Element(ds_tag("Signature"), nsmap=self.namespaces) if method == methods.enveloped: if isinstance(self.data, (str, bytes)): raise InvalidInput("When using enveloped signature, **data** must be an XML element") signature_placeholders = self._findall(self.data, "Signature[@Id='placeholder']") if len(signature_placeholders) == 0: self.payload.append(self.sig_root) elif len(signature_placeholders) == 1: self.sig_root = signature_placeholders[0] del self.sig_root.attrib["Id"] else: raise InvalidInput("Enveloped signature input contains more than one placeholder") self._reference_uri = "" elif method == methods.detached: if self._reference_uri is None: self._reference_uri = "#{}".format(self.payload.get("Id", self.payload.get("ID", "object"))) else: self.payload = Element(ds_tag("Object"), nsmap=self.namespaces, Id="object") if isinstance(self.data, (str, bytes)): self.payload.text = self.data else: self.payload.append(self.data) self._reference_uri = "#object" self.payload_c14n = self._c14n(self.payload, algorithm=c14n_algorithm) if method == methods.enveloped: self.payload_c14n = _get_signature_regex(ns_prefix="ds").sub(b"", self.payload_c14n)
def to_xsd(self, wrap_into_schema=False, type_prefix="", annotate=False): xsd_uri = self.nsmap["xs"] # Create an xs:element element attrib = {"name": self.name} if not wrap_into_schema: # If not at root, assign occurence indicators attrib.update({"minOccurs": str(self.min_occurs), "maxOccurs": str(self.max_occurs)}) e = Element(QName(xsd_uri, "element"), attrib=attrib) # Append type definition e1, e1_tdefs = self._to_xsd_type(type_prefix) if isinstance(e1, basestring): e.attrib["type"] = str(e1) else: e.append(e1) # Decide what to return depending on wrap_into_schema flag if wrap_into_schema: root = self._root_xsd_element() for tdef in e1_tdefs.itervalues(): root.append(tdef) root.append(e) return root else: return (e, e1_tdefs)
def solr_update_subjects(): global subjects_to_update print subjects_to_update subject_add = Element("add") for subject_type, subject_name in subjects_to_update: key = subject_type + '/' + subject_name count = subject_count(subject_type, subject_name) if not subject_need_update(key, count): print 'no updated needed:', (subject_type, subject_name, count) continue print 'updated needed:', (subject_type, subject_name, count) doc = Element("doc") add_field(doc, 'key', key) add_field(doc, 'name', subject_name) add_field(doc, 'type', subject_type) add_field(doc, 'count', count) subject_add.append(doc) if len(subject_add): print 'updating subjects' add_xml = tostring(subject_add).encode('utf-8') solr_update([add_xml], debug=False, index='subjects') solr_update(['<commit />'], debug=True, index='subjects') subjects_to_update = set()
def _serialize_session_block(self, session_block): session = session_block.session slot_id = sorted(session_block.session.blocks, key=attrgetter('start_dt')).index(session_block) xml = Element('session', self._color_tuple_to_attributes(session.colors)) SubElement(xml, 'ID').text = str(session.friendly_id) SubElement(xml, 'new_id').text = str(session.id) SubElement(xml, 'parentProtection').text = self._format_bool(session.is_protected) SubElement(xml, 'code').text = 'sess{}-{}'.format(session.friendly_id, slot_id + 1) SubElement(xml, 'slotId').text = str(slot_id) SubElement(xml, 'sessionTimetableLink').text = self._url_for('sessions.display_session', session) title = session.title if session_block.title: title += ': ' + session_block.title SubElement(xml, 'title').text = title if session_block.can_manage(self._user): SubElement(xml, 'modifyLink').text = self._url_for('timetable.manage_session', session) SubElement(xml, 'description').text = session.description.replace('\r\n', '\n') xml.append(self._serialize_location(session_block)) SubElement(xml, 'startDate').text = self._format_date(session_block.start_dt) SubElement(xml, 'endDate').text = self._format_date(session_block.end_dt) SubElement(xml, 'duration').text = self._format_duration(session_block.duration) for entry in session_block.timetable_entry.children: xml.append(self._serialize_timetable_entry(entry)) return xml
def test_etree_to_dict2(self): root = Element('div') root.append(Element('a', {'href': 'http://aaa.bbb/'})) root.append(Element('a', {'href': 'http://ccc.ddd/'})) result = util.etree_to_dict2(root) self.assertEqual(result['a']['0']['href'], "http://aaa.bbb/") self.assertEqual(result['a']['1']['href'], "http://ccc.ddd/")
def test_etree_to_dict3(self): root = Element('div') cite1 = Element('cite') cite1.text = "123" root.append(cite1) result = util.etree_to_dict2(root) self.assertEqual(result['cite'], "123")
def __merge_field(self, part, field, text): for mf in part.findall('.//MergeField[@name="%s"]' % field): children = list(mf) mf.clear() # clear away the attributes mf.tag = '{%(w)s}r' % NAMESPACES mf.extend(children) nodes = [] # preserve new lines in replacement text text = text or '' # text might be None text_parts = text.replace('\r', '').split('\n') for i, text_part in enumerate(text_parts): text_node = Element('{%(w)s}t' % NAMESPACES) text_node.text = text_part nodes.append(text_node) # if not last node add new line node if i < (len(text_parts) - 1): nodes.append(Element('{%(w)s}br' % NAMESPACES)) ph = mf.find('MergeText') if ph is not None: # add text nodes at the exact position where # MergeText was found index = mf.index(ph) for node in reversed(nodes): mf.insert(index, node) mf.remove(ph) else: mf.extend(nodes)
def envelope(**kwargs): """Create OAI-PMH envelope for response.""" e_oaipmh = Element(etree.QName(NS_OAIPMH, 'OAI-PMH'), nsmap=NSMAP) e_oaipmh.set(etree.QName(NS_XSI, 'schemaLocation'), '{0} {1}'.format(NS_OAIPMH, NS_OAIPMH_XSD)) e_tree = ElementTree(element=e_oaipmh) e_oaipmh.addprevious(etree.ProcessingInstruction( 'xml-stylesheet', 'type="text/xsl" href="{0}"'.format(url_for( 'invenio_oaiserver.static', filename='xsl/oai2.v1.0.xsl')))) e_responseDate = SubElement( e_oaipmh, etree.QName( NS_OAIPMH, 'responseDate')) # date should be first possible moment e_responseDate.text = datetime_to_datestamp(datetime.utcnow()) e_request = SubElement(e_oaipmh, etree.QName(NS_OAIPMH, 'request')) for key, value in kwargs.items(): if key == 'from_' or key == 'until': value = datetime_to_datestamp(value) elif key == 'resumptionToken': value = value['token'] e_request.set(key, value) e_request.text = url_for('invenio_oaiserver.response', _external=True) return e_tree, e_oaipmh
def run(): date=strftime("%Y-%m-%d",localtime()) root=Element("Dataset") tree=etree.ElementTree(element=root) root.set("Date",date) prefix = "http://search.yahoo.com/search?p=link:" midfix = "&vs=." urls=parse() for l in langs: total=0 node=etree.SubElement(root,"Trawl") node.set("Domain",l) for url in urls: try: results = urllib.urlopen(prefix+url+midfix+l).read() count = re.search('of about <.*?>(\S+?)<',results).group(1) count = string.replace(count,',','') total += int(count) except: count = '!' subnode=etree.SubElement(node,"Search") subnode.set("URI",url) subnode.text=str(count) time.sleep(1) totnode=etree.SubElement(node,"Total") totnode.text=str(total) fileopen=open('/web/teamspace/www/stats/domains.xml','w') tree.write(fileopen)
def addSyncCommand(self, sync_command, gid, data, media_type, more_data): """ Generate the sync command XXX media type must be managed by conduit, no this class """ self._initSyncTag() data_node = E.Data() # XXX to be remove later to use only CDATA if media_type == 'text/xml': if isinstance(data, basestring): data_node.append(etree.XML(data, parser=parser)) elif isinstance(data, etree.CDATA): # data could be Data element if partial XML data_node.text = data else: # XXX Is it suppose to happen ? data_node.append(data) else: if isinstance(data, etree.CDATA): data_node.text = data else: cdata = etree.CDATA(data.decode('utf-8')) data_node.text = cdata main_tag = Element('{%s}%s' % (SYNCML_NAMESPACE, sync_command)) main_tag.extend((E.CmdID(self._getNextCommandId()), E.Meta(E.Type(media_type)), E.Item(E.Source(E.LocURI(gid)), data_node))) if more_data: item_node = main_tag.find('{%s}Item' % SYNCML_NAMESPACE) item_node.append(E.MoreData()) self.sync_append(main_tag)
def getSearch(node): search = Element( 'Search', { RESERVED_XML_TYPE : TYPE_TAB }, f='nodata noscroll' ) cols = SubElement(search, 'Colgroup_0', { RESERVED_XML_TYPE : TYPE_COLS }, t=UI_TYPE_GROUP, s='orientation') lCol = SubElement(cols, 'Col_0', { RESERVED_XML_TYPE : TYPE_COL }, t=UI_TYPE_GROUP, s='even') rCol = SubElement(cols, 'Col_1', { RESERVED_XML_TYPE : TYPE_COL }, t=UI_TYPE_GROUP, s='large') term = SubElement(lCol, 'Search_Term', t=UI_TYPE_INPUT) btn = SubElement(rCol, 'Search_Button', t=UI_TYPE_BUTTON) # Add 'Entity Types' dropdown if there's more than one entity to choose from isGuiAndData = lambda e: util.gui. isGuiNode (e) and \ util.data.isDataElement(e) nodes = getTabGroups(node, isGuiAndData, descendantOrSelf=False) if len(nodes) > 1: SubElement(search, 'Entity_Types', t=UI_TYPE_DROPDOWN) SubElement(search, 'Entity_List', t=UI_TYPE_LIST) for n in search: annotateWithXmlTypes(n) search.attrib[RESERVED_XML_TYPE] = TYPE_SEARCH btn.text = 'Search' return search,
def merge_pages(self, replacements): for part in self.parts.values(): root = part.getroot() tag = root.tag if tag == '{%(w)s}ftr' % NAMESPACES or tag == '{%(w)s}hdr' % NAMESPACES: continue children = [] for child in root: root.remove(child) children.append(child) for i, repl in enumerate(replacements): # Add page break in between replacements if i > 0: pagebreak = Element('{%(w)s}br' % NAMESPACES) pagebreak.attrib['{%(w)s}type' % NAMESPACES] = 'page' root.append(pagebreak) parts = [] for child in children: child_copy = deepcopy(child) root.append(child_copy) parts.append(child_copy) self.merge(parts, **repl)
def update_work(w, obj_cache=None, debug=False, resolve_redirects=False): if obj_cache is None: obj_cache = {} wkey = w['key'] #assert wkey.startswith('/works') #assert '/' not in wkey[7:] deletes = [] requests = [] q = {'type': '/type/redirect', 'location': wkey} redirect_keys = [r['key'][7:] for r in query_iter(q)] deletes += redirect_keys deletes += [wkey[7:]] # strip /works/ from /works/OL1234W # handle edition records as well # When an edition is not belonged to a work, create a fake work and index it. if w['type']['key'] == '/type/edition' and w.get('title'): edition = w w = { # Use key as /works/OL1M. # In case of single-core-solr, we are using full path as key. So it is required # to be unique across all types of documents. # The website takes care of redirecting /works/OL1M to /books/OL1M. 'key': edition['key'].replace("/books/", "/works/"), 'type': {'key': '/type/work'}, 'title': edition['title'], 'editions': [edition] } # Hack to add subjects when indexing /books/ia:xxx if edition.get("subjects"): w['subjects'] = edition['subjects'] if w['type']['key'] == '/type/work' and w.get('title'): try: d = build_data(w, obj_cache=obj_cache, resolve_redirects=resolve_redirects) doc = dict2element(d) except: logger.error("failed to update work %s", w['key'], exc_info=True) else: if d is not None: # Delete all ia:foobar keys # XXX-Anand: The works in in_library subject were getting wiped off for unknown reasons. # I suspect that this might be a cause. Disabling temporarily. #if d.get('ia'): # deletes += ["ia:" + iaid for iaid in d['ia']] # In single core solr, we use full path as key, not just the last part if is_single_core(): deletes = ["/works/" + k for k in deletes] requests.append(make_delete_query(deletes)) add = Element("add") add.append(doc) add_xml = tostring(add).encode('utf-8') requests.append(add_xml) return requests
def getAutonum(node): # Get the nodes flagged with f="notnull" isAutonumbered = lambda e: isFlagged( e, FLAG_AUTONUM, checkAncestors=False ) autonumbered = xml.getAll( node, keep=isAutonumbered, descendantOrSelf=False ) # Generate some 'Next_XYZ' fields to replace the <autonum/> tag with autonum = [] for n in autonumbered: e = Element( 'Next_' + n.tag, { RESERVED_XML_TYPE : TYPE_GUI_DATA, ORIGINAL_TAG : node.tag, AUTONUM_DEST : getPathString(n) }, b=BIND_DECIMAL, f=FLAG_NOTNULL, c=CSS_REQUIRED, t=UI_TYPE_INPUT, ) e.text = n.text autonum.append(e) return tuple(autonum)
def process_content(jsonBody,item_dict): entry = json.loads(jsonBody) content=Xhtml.fromstring(entry['body']) # get author # print item_dict['json_url'] try: author=content.xpath('//span[@class="author"]/text()')[0].strip() except IndexError: author = '' try: bio=content.xpath('//span[@class="bio"]/text()')[0].strip() except IndexError: bio='' item_dict['author'] = author + bio coverelement = Element('img') coverelement.set('src', item_dict['cover']) content.insert(0, coverelement) item_dict['content'] = Xhtml.tostring(content, encoding='unicode') # print "++++\tGet zhihu items\t++++" print item_dict['cover'] print item_dict['created'] print item_dict['title'] print item_dict['author'] print item_dict['link'] return item_dict
def _create_resource_doc(self, record): root = Element('div') for link in record.get('links', '').split(','): root.append(Element('a', href = link)) return root
def addTag(self, t): assert type(t) == unicode if self.isUnique("tag", t): n = Element("tag") n.text = t self.__node.append(n) return TagNode(n)
def _updateInfo(self): ln = self.__node.xpath("c") assert len(ln) in [0, 1] if ln: nodeComment = ln[0] else: nodeComment = None comment = None file = os.path.join(self.file, FolderNode.commentFile) if os.path.isfile(file): fid = open(file, "r") if fid: comment = fid.read().decode("utf_8") fid.close() if comment: if nodeComment is None: nodeComment = Element("c") nodeComment.text = comment self.__node.append(nodeComment) else: nodeComment.text = comment else: if nodeComment is not None: self.__node.remove(nodeComment)
def mk_elem(parent, tag=None, text=None, empty=False, **kwargs): """Add element as a child of parent.""" # special-case the top-level element if tag is None: tag = parent parent = None empty = True # don't create empty elements if not empty and text is None and \ all(x is None for x in kwargs.values()): return # replace namespace identifier with URL if ':' in tag: ns, name = tag.split(':', 1) tag = '{%s}%s' % (namespaces[ns], name) if parent is None: element = Element(tag, OrderedDict()) else: element = SubElement(parent, tag, OrderedDict()) # set text of element if text is not None: element.text = _format(text) # set kwargs as attributes for k, v in kwargs.items(): if v is not None: element.set(k, _format(v)) return element
def change_service(self, doc): """Method handles change_service request Args: doc (xml): change_service request, choice customer|payer|subscriber Returns: xml: change_service_response with result true, SOAP fault when service not changed Example: .. code-block:: xml <change_service> <service>615</service> <customer>1</customer> <payer>1</payer> <subscriber>1</subscriber> <status>active</active> <params> <entry> <key>121</key> <value>12345</value> </entry> </params> </change_service> <change_service_response> <result>true</result> </change_service_response> """ self._mh.demsg('htk_on_debug_info', self._mh._trn.msg('te_soap_request', 'change_service', tostring(doc)), self._mh.fromhere()) service = doc.find('service').text if doc.find('service') else None customer = doc.find('customer').text if doc.find('customer') else None payer = doc.find('payer').text if doc.find('payer') else None subscriber = doc.find('subscriber').text if doc.find( 'subscriber') else None status = doc.find('status').text if doc.find('status') else None params = {} if (doc.find('params') != None): for param in doc.findall('params/entry'): params[param.find('key').text] = param.find('value').text db = self._get_db() res = db.change_service( service, customer, payer, subscriber, status, params) db.disconnect() if (id != None): elem = Element('result') elem.text = 'true' return self._response('change_service', elem) else: return self._fault('Service not changed')
def get_line_pattern_rules(declarations, dir=None, move_local_files=False): """ Given a Map element, a Layer element, and a list of declarations, create a new Style element with a LinePatternSymbolizer, add it to Map and refer to it in Layer. Optionally provide an output directory for local copies of image files. """ property_map = {'line-pattern-file': 'file', 'line-pattern-width': 'width', 'line-pattern-height': 'height', 'line-pattern-type': 'type'} # a place to put rule elements rule_els = [] for (filter, parameter_values) in filtered_property_declarations(declarations, property_map): symbolizer_el = Element('LinePatternSymbolizer') # collect all the applicable declarations into a symbolizer element for (parameter, value) in parameter_values.items(): symbolizer_el.set(parameter, str(value)) if symbolizer_el.get('file', False): postprocess_symbolizer_image_file(symbolizer_el, dir, 'line-pattern', move_local_files) rule_el = make_rule_element(filter, symbolizer_el) rule_els.append(rule_el) return rule_els
def __call__(self): self.request.environ[DISABLE_TRANSFORM_REQUEST_KEY] = True cat = getToolByName(self.context, 'portal_catalog') root = Element("casestudies") for brain in cat.searchResults( portal_type='eea.climateadapt.casestudy', review_state='published'): cs = brain.getObject() cs = cs._repr_for_arcgis() e_cs = SubElement(root, 'casestudy') e_attrs = SubElement(e_cs, 'attributes') for k, v in cs['attributes'].items(): el = Element(k) if isinstance(v, str): el.text = v.decode('utf-8').strip() else: el.text = unicode(v).strip() e_attrs.append(el) e_geo = SubElement(e_cs, 'geometry') for k, v in cs['geometry'].items(): el = Element(k) el.text = unicode(v) e_geo.append(el) res = tostring(root, pretty_print=True) return res
def _getcontent(toUserName,fromUserName,iterItem): articleElement = Element('Articles') item_count = 0 for item in iterItem: item_count += 1 if item_count >= 10: break ori_dict = { "Title":item.title, "Description":item.description, "PicUrl":item.picurl, "Url":item.url, } element = dict_to_xml('item',ori_dict=ori_dict) articleElement.append(element) ori_dict = { "ToUserName":toUserName, "FromUserName":fromUserName, "CreateTime":int(time.time()), "MsgType":"news", "ArticleCount":item_count, } element = dict_to_xml( 'xml',ori_dict,["CreateTime","ArticleCount"] ) element.append(articleElement) return tostring( element, encoding = "unicode" )
def __init__(self): self.seg = Element(xmlns + "trkseg") self.mintime = "9999-99-99T00:00:00Z" self.maxtime = "0000-00-00T00:00:00Z"
def _clear_element(e: etree.Element): """Clear XML Element and delete references to its parents to free up memory.""" e.clear() while e.getprevious() is not None: del e.getparent()[0]
def to_etree(self, data, options=None, name=None, depth=0): """ Given some data, converts that data to an ``etree.Element`` suitable for use in the XML output. """ if isinstance(data, (list, tuple)): element = Element(name or 'objects') if name: element = Element(name) element.set('type', 'list') else: element = Element('objects') for item in data: element.append(self.to_etree(item, options, depth=depth + 1)) elif isinstance(data, dict): if depth == 0: element = Element(name or 'response') else: element = Element(name or 'object') element.set('type', 'hash') for (key, value) in data.iteritems(): element.append( self.to_etree(value, options, name=key, depth=depth + 1)) elif isinstance(data, Bundle): element = Element(name or 'object') for field_name, field_object in data.data.items(): element.append( self.to_etree(field_object, options, name=field_name, depth=depth + 1)) elif hasattr(data, 'dehydrated_type'): if getattr(data, 'dehydrated_type', None) == 'related' and data.is_m2m == False: if data.full: return self.to_etree(data.fk_resource, options, name, depth + 1) else: return self.to_etree(data.value, options, name, depth + 1) elif getattr(data, 'dehydrated_type', None) == 'related' and data.is_m2m == True: if data.full: element = Element(name or 'objects') for bundle in data.m2m_bundles: element.append( self.to_etree(bundle, options, bundle.resource_name, depth + 1)) else: element = Element(name or 'objects') for value in data.value: element.append( self.to_etree(value, options, name, depth=depth + 1)) else: return self.to_etree(data.value, options, name) else: element = Element(name or 'value') simple_data = self.to_simple(data, options) data_type = get_type_string(simple_data) if data_type != 'string': element.set('type', get_type_string(simple_data)) if data_type != 'null': element.text = force_unicode(simple_data) return element
def placemark(row): placemark = Element('Placemark', targetId="ID") name = Element('name') extended_data = Element('ExtendedData') streamtitle = Element('Data', name="Rivername") streamflow = Element('Data', name="Stream Flow") streamtitle.text = "{}".format(row.name) streamflow.text = "{}".format(str(row.data[0])) extended_data.append(streamtitle) extended_data.append(streamflow) point = Element('Point') # coordinates = SubElement(point, "coordinates") coordinates = Element('coordinates') coordinates.text = '{},{}'.format(row.long, row.lat) point.append(coordinates) # placemark.append(name) # placemark.append(description) placemark.append(extended_data) placemark.append(point) ##Styling information # style = Element("Style", id="ID") # iconstyle = Element('IconStyle', id="ID") # scale = Element('scale') # iconstyle.append(scale) # style.append(iconstyle) # placemark.append(style) # icon = Element("Icon") # iconstyle.append(icon) # href = Element("href") # href.text = "http://maps.google.com/mapfiles/kml/paddle/purple-circle.png" # icon.append(href) # color = Element('color') # iconstyle.append(color) # color.text = '50DC783C' return placemark
def fire(request): kml = Element('kml', xmlns="http://www.opengis.net/kml/2.2") document = Element("Document") kml.append(document) camera = Element('LookAt') longitude = Element('longitude') latitude = Element('latitude') altitude = Element('altitude') longitude.text = "82.9001" latitude.text = "32.1656" altitude.text = "2000" camera.append(longitude) camera.append(latitude) camera.append(altitude) document.append(camera) points = makelists() name = Element("name") name.text = "Guaging Stations" document.append(name) # folder = Element("Folder") for i in range(0, len(points), 2): document.append(placemark(points[i])) print tostring(kml, pretty_print=True) return HttpResponse(tostring(kml))
def Base_asXML(object, root=None): """ Generate an xml text corresponding to the content of this object """ self = object return_as_object = True if root is None: return_as_object = False root = Element('erp5') #LOG('asXML',0,'Working on: %s' % str(self.getPhysicalPath())) object = SubElement(root, 'object', attrib=dict(id=self.getId(), portal_type=self.getPortalType())) # We have to find every property for prop_id in set(self.propertyIds()): # In most case, we should not synchronize acquired properties if prop_id not in ('uid', 'workflow_history', 'id', 'portal_type',): value = self.getProperty(prop_id) if value is None: prop_type = 'None' else: prop_type = self.getPropertyType(prop_id) sub_object = SubElement(object, prop_id, attrib=dict(type=prop_type)) if prop_type in ('object',): # We may have very long lines, so we should split value = aq_base(value) value = dumps(value) sub_object.text = standard_b64encode(value) elif prop_type in ('data',): # Create blocks to represent data # <data><block>ZERD</block><block>OEJJM</block></data> size_block = 60 if isinstance(value, str): for index in xrange(0, len(value), size_block): content = value[index:index + size_block] data_encoded = standard_b64encode(content) block = SubElement(sub_object, 'block_data') block.text = data_encoded else: raise ValueError("XMLExportImport failed, the data is undefined") elif prop_type in ('lines', 'tokens',): value = [word.decode('utf-8').encode('ascii','xmlcharrefreplace')\ for word in value] sub_object.append(marshaller(value)) elif prop_type in ('text', 'string',): sub_object.text = unicode(escape(value), 'utf-8') elif prop_type != 'None': sub_object.text = str(value) # We have to describe the workflow history if getattr(self, 'workflow_history', None) is not None: workflow_list = self.workflow_history workflow_list_keys = workflow_list.keys() workflow_list_keys.sort() # Make sure it is sorted for workflow_id in workflow_list_keys: for workflow_action in workflow_list[workflow_id]: workflow_node = SubElement(object, 'workflow_action', attrib=dict(workflow_id=workflow_id)) workflow_variable_list = workflow_action.keys() workflow_variable_list.sort() for workflow_variable in workflow_variable_list: variable_type = "string" # Somewhat bad, should find a better way if workflow_variable.find('time') >= 0: variable_type = "date" if workflow_variable.find('language_revs') >= 0: # XXX specific to cps variable_type = "dict" if workflow_action[workflow_variable] is None: variable_type = 'None' variable_node = SubElement(workflow_node, workflow_variable, attrib=dict(type=variable_type)) if variable_type != 'None': variable_node_text = str(workflow_action[workflow_variable]) variable_node.text = unicode(variable_node_text, 'utf-8') if workflow_variable == 'time': time = variable_node.text elif workflow_variable == 'actor': actor = variable_node.text workflow_node.attrib['id'] = sha1(workflow_id + time + str(actor.encode('utf-8'))).hexdigest() # We should now describe security settings for user_role in self.get_local_roles(): local_role_node = SubElement(object, 'local_role', attrib=dict(id=user_role[0], type='tokens')) #convert local_roles in string because marshaller can't do it role_list = [] for role in user_role[1]: if isinstance(role, unicode): role = role.encode('utf-8') role_list.append(role) local_role_node.append(marshaller(tuple(role_list))) if getattr(self, 'get_local_permissions', None) is not None: for user_permission in self.get_local_permissions(): local_permission_node = SubElement(object, 'local_permission', attrib=dict(id=user_permission[0], type='tokens')) local_permission_node.append(marshaller(user_permission[1])) # Sometimes theres is roles specified for groups, like with CPS if getattr(self, 'get_local_group_roles', None) is not None: for group_role in self.get_local_group_roles(): local_group_node = SubElement(object, 'local_group', attrib=dict(id=group_role[0], type='tokens')) local_group_node.append(marshaller(group_role[1])) if return_as_object: return root return etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=True)
def wpt_from_trkt(self, point, name): wpt = Element(xmlns + "wpt") wpt.set('lat', point.get('lat')) wpt.set('lon', point.get('lon')) name = Element(xmlns + "name") name.text = name wpt.append(name) prev_ele = point.find(xmlns + 'ele') if prev_ele is not None: ele = Element(xmlns + 'ele') ele.text = prev_ele.text wpt.append(ele) return wpt
def add_annotation(self, span, xml, i): annotation = Element('annotation') # mention id mention = Element('mention', id=f'EHOST_Instance_{i}') annotation.append(mention) # annotator id annotator = Element('annotator', id='medSpaCy') annotator.text = 'medSpaCy' annotation.append(annotator) # span start = span.start_char end = start + len(span.text) span = Element('span', start=str(start), end=str(end)) annotation.append(span) # spannedText spanned_text = Element('spannedText') spanned_text.text = span.text annotation.append(spanned_text) # creationDate # TODO xml.append(annotation)
def add_public_attributes(self, xml): # Now add the attributes defined in span_attributes attribute_defs = xml.find('attributeDefs') for attr in self.span_attributes: attribute_def = Element("attributeDef") name = Element("Name") name.text = attr attribute_def.append(name) default_value = Element("defaultValue") default_value.text = "false" attribute_def.append(default_value) for value in ["false", "true"]: option_def = Element("attributeDefOptionDef") option_def.text = value attribute_def.append(option_def) for name in [ "is_Linked_to_UMLS_CUICode_and_CUILabel", "is_Linked_to_UMLS_CUICode", "is_Linked_to_UMLS_CUILabel" ]: sub_elem = Element(name) sub_elem.text = "false" attribute_def.append(sub_elem) attribute_defs.append(attribute_def)
class Aggregator: def __init__(self): self.seg = Element(xmlns + "trkseg") self.mintime = "9999-99-99T00:00:00Z" self.maxtime = "0000-00-00T00:00:00Z" def append_tracks(self, tracks): for track in tracks: for segment in track.findall(xmlns + 'trkseg'): self.append_seg(segment) def append_seg(self, segment): first = segment.xpath('*[1]') firsttime = first[0].find(xmlns + 'time').text last = segment.xpath('*[last()]') lasttime = last[0].find(xmlns + 'time').text if firsttime > self.maxtime: # whole segment is later than our max time self.seg.extend(segment) self.maxtime = lasttime self.mintime = min(firsttime, self.mintime) return if lasttime < self.mintime: # whole segment is earlier than our min time self.seg, segment = segment, self.seg self.seg.extend(segment) self.maxtime = max(lasttime, self.maxtime) self.mintime = firsttime return # TODO: segments intersection sys.stderr.write("time overlaps!\n") def add_file(self, filename): print('Adding ' + filename) data = etree.parse(filename) tracks = data.findall(xmlns + "trk") self.append_tracks(tracks) def separate_days(self, tgt_dir): points = list(self.seg) prevtime_str = points[0].find(xmlns + 'time').text prevtime = datetime.strptime(prevtime_str, TIME_FORMAT) stops_count = 0 day_begin_idx = 0 for idx, point in enumerate(points): curtime_str = point.find(xmlns + 'time').text curtime = datetime.strptime(curtime_str, TIME_FORMAT) if curtime - prevtime > STOP_TRESHOLD: #stops_count += 1 #self.wpt_from_trkt(prevp, 'Ночёвка ' + str(stops_count)) tgt_file = tgt_dir + '/' + prevtime_str + '.gpx' self.save(points[day_begin_idx:idx], tgt_file) day_begin_idx = idx prevtime_str = curtime_str prevtime = curtime tgt_file = tgt_dir + '/' + prevtime_str + '.gpx' self.save(points[day_begin_idx:], tgt_file) def wpt_from_trkt(self, point, name): wpt = Element(xmlns + "wpt") wpt.set('lat', point.get('lat')) wpt.set('lon', point.get('lon')) name = Element(xmlns + "name") name.text = name wpt.append(name) prev_ele = point.find(xmlns + 'ele') if prev_ele is not None: ele = Element(xmlns + 'ele') ele.text = prev_ele.text wpt.append(ele) return wpt def save(self, points, filename): print('Saving ', filename, ' (exists!)' if os.path.isfile(filename) else '') result = etree.parse( os.path.dirname(os.path.realpath(__file__)) + '/template.gpx') gpx = result.getroot() track = Element(xmlns + "trk") seg = Element(xmlns + "trkseg") gpx.append(track) track.append(seg) seg.extend(points) result.write(filename, xml_declaration=True, encoding='utf-8')
def add_attribute(element: etree.Element, name: str, value: Any) -> None: if value is not None: element.set(name, str(value))
def add_rgb_tags(self, class_def, rgb): # Add RGB values for (value, name) in zip(rgb, ('RGB_R', 'RGB_G', 'RGB_B')): sub = Element(name) sub.text = str(value) class_def.append(sub)
def inasafe_metadata_fix(layer_id): """Attempt to fix problem of InaSAFE metadata. This fix is needed to make sure InaSAFE metadata is persisted in GeoNode and is used correctly by GeoSAFE. This bug happens because InaSAFE metadata implement wrong schema type in supplementalInformation. :param layer_id: layer ID :type layer_id: int :return: """ # Take InaSAFE keywords from xml metadata *file* try: instance = Layer.objects.get(id=layer_id) xml_file = instance.upload_session.layerfile_set.get(name='xml') # if xml file exists, check supplementalInformation field namespaces = { 'gmd': 'http://www.isotc211.org/2005/gmd', 'gco': 'http://www.isotc211.org/2005/gco' } content = xml_file.file.read() root = XML(content) supplemental_info = root.xpath( '//gmd:supplementalInformation', namespaces=namespaces)[0] # Check that it contains InaSAFE metadata inasafe_el = supplemental_info.find('inasafe') inasafe_provenance_el = supplemental_info.find('inasafe_provenance') # Take InaSAFE metadata if inasafe_el is None: # Do nothing if InaSAFE tag didn't exists return # Take root xml from layer metadata_xml field layer_root_xml = XML(instance.metadata_xml) layer_sup_info = layer_root_xml.xpath( '//gmd:supplementalInformation', namespaces=namespaces)[0] char_string_tagname = '{%s}CharacterString' % namespaces['gco'] layer_sup_info_content = layer_sup_info.find(char_string_tagname) if layer_sup_info_content is None: # Insert gco:CharacterString value el = Element(char_string_tagname) layer_sup_info.insert(0, el) # put InaSAFE keywords after CharacterString layer_inasafe_meta_content = layer_sup_info.find('inasafe') if layer_inasafe_meta_content is not None: # Clear existing InaSAFE keywords, replace with new one layer_sup_info.remove(layer_inasafe_meta_content) layer_sup_info.insert(1, inasafe_el) # provenance only shows up on impact layers layer_inasafe_meta_provenance = layer_sup_info.find( 'inasafe_provenance') if inasafe_provenance_el is not None: if layer_inasafe_meta_provenance is not None: # Clear existing InaSAFE keywords, replace with new one layer_sup_info.remove(layer_inasafe_meta_provenance) layer_sup_info.insert(1, inasafe_provenance_el) # write back to resource base so the same thing returned by csw resources = ResourceBase.objects.filter( id=instance.resourcebase_ptr.id) resources.update( metadata_xml=etree.tostring(layer_root_xml, pretty_print=True)) # update qgis server xml file with open(xml_file.file.path, mode='w') as f: f.write(etree.tostring(layer_root_xml, pretty_print=True)) qgis_layer = instance.qgis_layer qgis_xml_file = '{prefix}.xml'.format( prefix=qgis_layer.qgis_layer_path_prefix) with open(qgis_xml_file, mode='w') as f: f.write(etree.tostring(layer_root_xml, pretty_print=True)) # update InaSAFE keywords cache metadata, created = Metadata.objects.get_or_create(layer=instance) inasafe_metadata_xml = etree.tostring(inasafe_el, pretty_print=True) if inasafe_provenance_el: inasafe_metadata_xml += '\n' inasafe_metadata_xml += etree.tostring( inasafe_provenance_el, pretty_print=True) metadata.keywords_xml = inasafe_metadata_xml metadata.save() except Exception as e: LOGGER.debug(e) pass
def add_classdef(self, class_defs, label, color, inherit=True): class_def = Element('classDef') name = Element('Name', ) name.text = label class_def.append(name) self.add_rgb_tags(class_def, color) # Add other tags sub = Element('InHerit_Public_Attributes') sub.text = str(inherit) class_def.append(sub) sub = Element('Source') sub.text = 'eHOST' class_def.append(sub) class_defs.append(class_def)
def __init__(self, name): super().__init__(Element(name))
def get_type(element: etree.Element, name: str, _type: Generic[T]) -> Optional[T]: value = element.get(name) if value is not None: value = _type(value) return value
def inventory_to_kml_string( inventory, icon_url="https://maps.google.com/mapfiles/kml/shapes/triangle.png", icon_size=1.5, label_size=1.0, cmap="Paired", encoding="UTF-8", timespans=True, strip_far_future_end_times=True): """ Convert an :class:`~obspy.core.inventory.inventory.Inventory` to a KML string representation. :type inventory: :class:`~obspy.core.inventory.inventory.Inventory` :param inventory: Input station metadata. :type icon_url: str :param icon_url: Internet URL of icon to use for station (e.g. PNG image). :type icon_size: float :param icon_size: Icon size. :type label_size: float :param label_size: Label size. :type encoding: str :param encoding: Encoding used for XML string. :type timespans: bool :param timespans: Whether to add timespan information to the single station elements in the KML or not. If timespans are used, the displayed information in e.g. Google Earth will represent a snapshot in time, such that using the time slider different states of the inventory in time can be visualized. If timespans are not used, any station active at any point in time is always shown. :type strip_far_future_end_times: bool :param strip_far_future_end_times: Leave out likely fictitious end times of stations (more than twenty years after current time). Far future end times may produce time sliders with bad overall time span in third party applications viewing the KML file. :rtype: byte string :return: Encoded byte string containing KML information of the station metadata. """ twenty_years_from_now = UTCDateTime() + 3600 * 24 * 365 * 20 # construct the KML file kml = Element("kml") kml.set("xmlns", "http://www.opengis.net/kml/2.2") document = SubElement(kml, "Document") SubElement(document, "name").text = "Inventory" # style definition cmap = get_cmap(name=cmap, lut=len(inventory.networks)) for i in range(len(inventory.networks)): color = _rgba_tuple_to_kml_color_code(cmap(i)) style = SubElement(document, "Style") style.set("id", "station_%i" % i) iconstyle = SubElement(style, "IconStyle") SubElement(iconstyle, "color").text = color SubElement(iconstyle, "scale").text = str(icon_size) icon = SubElement(iconstyle, "Icon") SubElement(icon, "href").text = icon_url hotspot = SubElement(iconstyle, "hotSpot") hotspot.set("x", "0.5") hotspot.set("y", "0.5") hotspot.set("xunits", "fraction") hotspot.set("yunits", "fraction") labelstyle = SubElement(style, "LabelStyle") SubElement(labelstyle, "color").text = color SubElement(labelstyle, "scale").text = str(label_size) for i, net in enumerate(inventory): folder = SubElement(document, "Folder") SubElement(folder, "name").text = str(net.code) SubElement(folder, "open").text = "1" SubElement(folder, "description").text = str(net) style = SubElement(folder, "Style") liststyle = SubElement(style, "ListStyle") SubElement(liststyle, "listItemType").text = "check" SubElement(liststyle, "bgColor").text = "00ffff" SubElement(liststyle, "maxSnippetLines").text = "5" # add one marker per station code for sta in net: placemark = SubElement(folder, "Placemark") SubElement(placemark, "name").text = ".".join((net.code, sta.code)) SubElement(placemark, "styleUrl").text = "#station_%i" % i SubElement(placemark, "color").text = color if sta.longitude is not None and sta.latitude is not None: point = SubElement(placemark, "Point") SubElement(point, "coordinates").text = "%.6f,%.6f,0" % \ (sta.longitude, sta.latitude) SubElement(placemark, "description").text = str(sta) if timespans: start = sta.start_date end = sta.end_date if start is not None or end is not None: timespan = SubElement(placemark, "TimeSpan") if start is not None: SubElement(timespan, "begin").text = str(start) if end is not None: if not strip_far_future_end_times or \ end < twenty_years_from_now: SubElement(timespan, "end").text = str(end) if timespans: start = net.start_date end = net.end_date if start is not None or end is not None: timespan = SubElement(folder, "TimeSpan") if start is not None: SubElement(timespan, "begin").text = str(start) if end is not None: if not strip_far_future_end_times or \ end < twenty_years_from_now: SubElement(timespan, "end").text = str(end) # generate and return KML string return tostring(kml, pretty_print=True, xml_declaration=True, encoding=encoding)
userName = tree.find('div/div[1]/div[1]/div[1]/div[1]/a') userName.text = dynamic_data['user_name'] userName.set('href', dynamic_data['user_url']) publishDate = tree.find('div/div[1]/div[1]/div[1]/span') publishDate.text = dynamic_data['publish_date'] content = tree.find('div/div[2]') content.text = dynamic_data['content_text'] for picture in dynamic_data['pictures']: resource, mime, hash_hex = insert_img(picture['img_src']) note.resources.append(resource) contentDivImg = Element('div', attrib_map_div_img) contentImg = getattr(E, 'en-media')('', { 'style': attrib_map_img['style'], 'type': mime, 'hash': hash_hex }) contentDivImg.append(contentImg) content.append(contentDivImg) dynamicLink = tree.find('div/div[3]/a') dynamicLink.set('href', dynamic_data['dynamic_url']) if dynamic_data['origin']: origin = dynamic_data['origin'] repostUserAvatar = content.find('div/div/p/span[1]/a') repostUserAvatar.set('href', origin['user_url'])
def xml_transform(root, classes): annopath = join(root, 'classes', '%s.txt') _imgpath = join(root, 'images', '%s.jpg') new_annopath = join(root, 'classes', '%s.xml') mypath = join(root, 'classes') ids = list() l = os.listdir(mypath) ids = [x.split('.')[0] for x in l] for i in range(len(ids)): img_id = ids[i] img = cv2.imread(_imgpath % img_id) height, width, channels = img.shape node_root = Element('annotation') node_folder = SubElement(node_root, 'folder') node_folder.text = 'VOC2007' img_name = img_id + '.jpg' node_filename = SubElement(node_root, 'filename') node_filename.text = img_name node_source = SubElement(node_root, 'source') node_database = SubElement(node_source, 'database') node_database.text = 'Coco database' node_size = SubElement(node_root, 'size') node_width = SubElement(node_size, 'width') node_width.text = str(width) node_height = SubElement(node_size, 'height') node_height.text = str(height) node_depth = SubElement(node_size, 'depth') node_depth.text = str(channels) node_segmented = SubElement(node_root, 'segmented') node_segmented.text = '0' target = (annopath % img_id) if os.path.exists(target): label_norm = np.loadtxt(target).reshape(-1, 5) for i in range(len(label_norm)): labels_conv = label_norm[i] new_label = unconvert(labels_conv[0], width, height, labels_conv[1], labels_conv[2], labels_conv[3], labels_conv[4]) node_object = SubElement(node_root, 'object') node_name = SubElement(node_object, 'name') node_name.text = CLASSES[new_label[0]] node_pose = SubElement(node_object, 'pose') node_pose.text = 'Unspecified' node_truncated = SubElement(node_object, 'truncated') node_truncated.text = '0' node_difficult = SubElement(node_object, 'difficult') node_difficult.text = '0' node_bndbox = SubElement(node_object, 'bndbox') node_xmin = SubElement(node_bndbox, 'xmin') node_xmin.text = str(new_label[1]) node_ymin = SubElement(node_bndbox, 'ymin') node_ymin.text = str(new_label[3]) node_xmax = SubElement(node_bndbox, 'xmax') node_xmax.text = str(new_label[2]) node_ymax = SubElement(node_bndbox, 'ymax') node_ymax.text = str(new_label[4]) xml = tostring(node_root, pretty_print=True) f = open(new_annopath % img_id, "wb") os.remove(target) f.write(xml) f.close()
def run(self): world = self.tree.getroot().getchildren()[0] # starts, goals = self.scenarios.random_scene() # starts, goals = self.scenarios.circle_scene_uniform() for item in range(self.num_agents): actor = Element("instanced_actor", name="actor" + str(item)) skin = Element("skin") skin_fn = Element("filename") skin_fn.text = "/home/tingxfan/lib/gazebo/media/models/Gamechar-male.mesh" skin_scale = Element("scale") skin_scale.text = "0.15" skin.append(skin_fn) skin.append(skin_scale) actor.append(skin) pose = Element("pose") # x = str(starts[item][0]) # y = str(starts[item][1]) # pose.text = x+" "+y+" "+"1.02 0 0 0" pose.text = "0 0 0 0 0 0" actor.append(pose) animation = Element("animation", name="Walk") animate_fn = Element("filename") animate_fn.text = "/home/tingxfan/lib/gazebo/media/models/Gamechar-male.skeleton" interpolate_x = Element("interpolate_x") interpolate_x.text = "true" animate_scale = Element("scale") animate_scale.text = "1" animation.append(animate_fn) animation.append(animate_scale) animation.append(interpolate_x) actor.append(animation) # static = Element("static") # static.text = "0" # actor.append(static) # link = Element("link", name="link"+str(item)) # visual = Element("visual", name="visual"+str(item)) # geometry = Element("geometry") # box = Element("box") # box_size = Element("size") # box_size.text = "0.2 0.2 0.2" # box.append(box_size) # geometry.append(box) # visual.append(geometry) # link.append(visual) # actor.append(link) plugin = Element("plugin", name="actor_plugin", filename="libInstancedActorPlugin.so") target = Element("target") # x = str(goals[item][0]) # y = str(goals[item][1]) # target.text = x+" "+y+" "+"1.02" target.text = "1 1 0" target_weight = Element("target_weight") target_weight.text = "1.15" obstacle_weight = Element("obstacle_weight") obstacle_weight.text = "1.8" animation_factor = Element("animation_factor") animation_factor.text = "5.1" # speed = Element("speed") # speed.text = "0.6" ignore_obstacle = Element("ignore_obstacles") model_ground_plane = Element("model") model_ground_plane.text = "ground_plane" ignore_obstacle.append(model_ground_plane) plugin.append(target) plugin.append(target_weight) plugin.append(obstacle_weight) plugin.append(animation_factor) # plugin.append(speed) plugin.append(ignore_obstacle) actor.append(plugin) world.append(actor) self.tree.write(self.actor_pkg_path + '/worlds/ped_square1.world', pretty_print=True, xml_declaration=True, encoding="utf-8")
def rpc(self, rpc): return exec_rpc(self.module, tostring(Element(rpc)))
def catalog_to_kml_string( catalog, icon_url="https://maps.google.com/mapfiles/kml/shapes/earthquake.png", label_func=None, icon_size_func=None, encoding="UTF-8", timestamps=True): """ Convert a :class:`~obspy.core.event.Catalog` to a KML string representation. :type catalog: :class:`~obspy.core.event.Catalog` :param catalog: Input catalog data. :type icon_url: str :param icon_url: Internet URL of icon to use for events (e.g. PNG image). :type label_func: func :type label_func: Custom function to use for determining each event's label. User provided function is supposed to take an :class:`~obspy.core.event.Event` object as single argument, e.g. for empty labels use `label_func=lambda x: ""`. :type icon_size_func: func :type icon_size_func: Custom function to use for determining each event's icon size. User provided function should take an :class:`~obspy.core.event.Event` object as single argument and return a float. :type encoding: str :param encoding: Encoding used for XML string. :type timestamps: bool :param timestamps: Whether to add timestamp information to the event elements in the KML or not. If timestamps are used, the displayed information in e.g. Google Earth will represent a snapshot in time, such that using the time slider different states of the catalog in time can be visualized. If timespans are not used, any event happening at any point in time is always shown. :rtype: byte string :return: Encoded byte string containing KML information of the event metadata. """ # default label and size functions if not label_func: def label_func(event): origin = (event.preferred_origin() or event.origins and event.origins[0] or None) mag = (event.preferred_magnitude() or event.magnitudes and event.magnitudes[0] or None) label = origin.time and str(origin.time.date) or "" if mag: label += " %.1f" % mag.mag return label if not icon_size_func: def icon_size_func(event): mag = (event.preferred_magnitude() or event.magnitudes and event.magnitudes[0] or None) if mag: try: icon_size = 1.2 * log(1.5 + mag.mag) except ValueError: icon_size = 0.1 else: icon_size = 0.5 return icon_size # construct the KML file kml = Element("kml") kml.set("xmlns", "http://www.opengis.net/kml/2.2") document = SubElement(kml, "Document") SubElement(document, "name").text = "Catalog" # style definitions for earthquakes style = SubElement(document, "Style") style.set("id", "earthquake") iconstyle = SubElement(style, "IconStyle") SubElement(iconstyle, "scale").text = "0.5" icon = SubElement(iconstyle, "Icon") SubElement(icon, "href").text = icon_url hotspot = SubElement(iconstyle, "hotSpot") hotspot.set("x", "0.5") hotspot.set("y", "0.5") hotspot.set("xunits", "fraction") hotspot.set("yunits", "fraction") labelstyle = SubElement(style, "LabelStyle") SubElement(labelstyle, "color").text = "ff0000ff" SubElement(labelstyle, "scale").text = "0.8" folder = SubElement(document, "Folder") SubElement(folder, "name").text = "Catalog" SubElement(folder, "open").text = "1" SubElement(folder, "description").text = str(catalog) style = SubElement(folder, "Style") liststyle = SubElement(style, "ListStyle") SubElement(liststyle, "listItemType").text = "check" SubElement(liststyle, "bgColor").text = "00ffffff" SubElement(liststyle, "maxSnippetLines").text = "5" # add one marker per event for event in catalog: origin = (event.preferred_origin() or event.origins and event.origins[0] or None) placemark = SubElement(folder, "Placemark") SubElement(placemark, "name").text = label_func(event) SubElement(placemark, "styleUrl").text = "#earthquake" style = SubElement(placemark, "Style") icon_style = SubElement(style, "IconStyle") liststyle = SubElement(style, "ListStyle") SubElement(liststyle, "maxSnippetLines").text = "5" SubElement(icon_style, "scale").text = "%.5f" % icon_size_func(event) if origin: if origin.longitude is not None and origin.latitude is not None: point = SubElement(placemark, "Point") SubElement(point, "coordinates").text = "%.6f,%.6f,0" % \ (origin.longitude, origin.latitude) SubElement(placemark, "description").text = str(event) if timestamps: time = _get_event_timestamp(event) if time is not None: SubElement(placemark, "TimeStamp").text = str(time) # generate and return KML string return tostring(kml, pretty_print=True, xml_declaration=True, encoding=encoding)
def _dictToEtree(data, name=None, depth=0): element = None if depth == 0: element = Element('cml') element.append(_dictToEtree(data, name, depth + 1)) elif depth == 1: element = Element('MDocument') element.append(_dictToEtree(data, name, depth + 1)) elif depth == 2: element = Element('MChemicalStruct') for mol in data: element.append(_dictToEtree(mol, name, depth + 1)) elif depth == 3: molID = data.keys()[0] val = data.values()[0] element = Element('molecule', molID=molID) element.append(_dictToEtree(val['atoms'], 'atoms', depth + 1)) element.append(_dictToEtree(val['bonds'], 'bonds', depth + 1)) elif depth == 4: if name == 'atoms': element = Element('atomArray') for atom in data: element.append(_dictToEtree(atom, 'atom', depth + 1)) elif name == 'bonds': element = Element('bondArray') for bond in data: element.append(_dictToEtree(bond, 'bond', depth + 1)) elif depth == 5: if name == 'bond': kwargs = {} kwargs['atomRefs2'] = ' '.join(data['atomRefs']) kwargs['order'] = str(data['order']) element = Element('bond', **kwargs) stereo = data.get('stereo') if stereo: element.append(_dictToEtree(stereo, 'bondStereo', depth + 1)) elif name == 'atom': element = Element('atom', **data) elif depth == 6: element = Element('bondStereo') if data == rdkit.Chem.rdchem.BondDir.BEGINWEDGE: element.text = 'W' elif data == rdkit.Chem.rdchem.BondDir.BEGINDASH: element.text = 'H' elif data == rdkit.Chem.rdchem.BondDir.UNKNOWN: element.attrib["convention"] = "MDL" element.attrib["conventionValue"] = "4" return element
def main(): """ main entry point for module execution """ neighbors_spec = dict(host=dict(), port=dict()) element_spec = dict(name=dict(), description=dict(), enabled=dict(default=True, type='bool'), speed=dict(), mtu=dict(type='int'), duplex=dict(choices=['full', 'half', 'auto']), tx_rate=dict(), rx_rate=dict(), neighbors=dict(type='list', elements='dict', options=neighbors_spec), delay=dict(default=10, type='int'), state=dict(default='present', choices=['present', 'absent', 'up', 'down']), active=dict(default=True, type='bool')) aggregate_spec = deepcopy(element_spec) aggregate_spec['name'] = dict(required=True) # remove default in aggregate spec, to handle common arguments remove_default_spec(aggregate_spec) argument_spec = dict(aggregate=dict(type='list', elements='dict', options=aggregate_spec), ) argument_spec.update(element_spec) argument_spec.update(junos_argument_spec) required_one_of = [['name', 'aggregate']] mutually_exclusive = [['name', 'aggregate']] module = AnsibleModule(argument_spec=argument_spec, required_one_of=required_one_of, mutually_exclusive=mutually_exclusive, supports_check_mode=True) warnings = list() result = {'changed': False} if warnings: result['warnings'] = warnings top = 'interfaces/interface' param_to_xpath_map = collections.OrderedDict() param_to_xpath_map.update([('name', { 'xpath': 'name', 'is_key': True }), ('description', 'description'), ('speed', 'speed'), ('mtu', 'mtu'), ('duplex', 'link-mode'), ('disable', { 'xpath': 'disable', 'tag_only': True })]) choice_to_value_map = { 'link-mode': { 'full': 'full-duplex', 'half': 'half-duplex', 'auto': 'automatic' } } params = to_param_list(module) requests = list() for param in params: # if key doesn't exist in the item, get it from module.params for key in param: if param.get(key) is None: param[key] = module.params[key] item = param.copy() state = item.get('state') item['disable'] = True if not item.get('enabled') else False if state in ('present', 'up', 'down'): item['state'] = 'present' validate_param_values(module, param_to_xpath_map, param=item) want = map_params_to_obj(module, param_to_xpath_map, param=item) requests.append( map_obj_to_ele(module, want, top, value_map=choice_to_value_map, param=item)) diff = None with locked_config(module): for req in requests: diff = load_config(module, tostring(req), warnings, action='merge') # issue commit after last configuration change is done commit = not module.check_mode if diff: if commit: commit_configuration(module) else: discard_changes(module) result['changed'] = True if module._diff: result['diff'] = {'prepared': diff} failed_conditions = [] neighbors = None for item in params: state = item.get('state') tx_rate = item.get('tx_rate') rx_rate = item.get('rx_rate') want_neighbors = item.get('neighbors') if state not in ( 'up', 'down' ) and tx_rate is None and rx_rate is None and want_neighbors is None: continue element = Element('get-interface-information') intf_name = SubElement(element, 'interface-name') intf_name.text = item.get('name') if result['changed']: sleep(item.get('delay')) reply = exec_rpc(module, tostring(element), ignore_warning=False) if state in ('up', 'down'): admin_status = reply.xpath( 'interface-information/physical-interface/admin-status') if not admin_status or not conditional( state, admin_status[0].text.strip()): failed_conditions.append('state ' + 'eq(%s)' % state) if tx_rate: output_bps = reply.xpath( 'interface-information/physical-interface/traffic-statistics/output-bps' ) if not output_bps or not conditional( tx_rate, output_bps[0].text.strip(), cast=int): failed_conditions.append('tx_rate ' + tx_rate) if rx_rate: input_bps = reply.xpath( 'interface-information/physical-interface/traffic-statistics/input-bps' ) if not input_bps or not conditional( rx_rate, input_bps[0].text.strip(), cast=int): failed_conditions.append('rx_rate ' + rx_rate) if want_neighbors: if neighbors is None: element = Element('get-lldp-interface-neighbors') intf_name = SubElement(element, 'interface-device') intf_name.text = item.get('name') reply = exec_rpc(module, tostring(element), ignore_warning=False) have_host = [ item.text for item in reply.xpath( 'lldp-neighbors-information/lldp-neighbor-information/lldp-remote-system-name' ) ] have_port = [ item.text for item in reply.xpath( 'lldp-neighbors-information/lldp-neighbor-information/lldp-remote-port-id' ) ] for neighbor in want_neighbors: host = neighbor.get('host') port = neighbor.get('port') if host and host not in have_host: failed_conditions.append('host ' + host) if port and port not in have_port: failed_conditions.append('port ' + port) if failed_conditions: msg = 'One or more conditional statements have not been satisfied' module.fail_json(msg=msg, failed_conditions=failed_conditions) module.exit_json(**result)
def update_author(akey, a=None, handle_redirects=True): # http://ia331507.us.archive.org:8984/solr/works/select?indent=on&q=author_key:OL22098A&facet=true&rows=1&sort=edition_count%20desc&fl=title&facet.field=subject_facet&facet.mincount=1 m = re_author_key.match(akey) if not m: print 'bad key:', akey return author_id = m.group(1) if not a: a = withKey(akey) if a['type']['key'] in ('/type/redirect', '/type/delete') or not a.get('name', None): return ['<delete><query>key:%s</query></delete>' % author_id] try: assert a['type']['key'] == '/type/author' except AssertionError: print a['type']['key'] raise facet_fields = ['subject', 'time', 'person', 'place'] url = 'http://' + get_solr( 'works' ) + '/solr/works/select?wt=json&json.nl=arrarr&q=author_key:%s&sort=edition_count+desc&rows=1&fl=title,subtitle&facet=true&facet.mincount=1' % author_id url += ''.join('&facet.field=%s_facet' % f for f in facet_fields) reply = json.load(urlopen(url)) work_count = reply['response']['numFound'] docs = reply['response'].get('docs', []) top_work = None if docs: top_work = docs[0]['title'] if docs[0].get('subtitle', None): top_work += ': ' + docs[0]['subtitle'] all_subjects = [] for f in facet_fields: for s, num in reply['facet_counts']['facet_fields'][f + '_facet']: all_subjects.append((num, s)) all_subjects.sort(reverse=True) top_subjects = [s for num, s in all_subjects[:10]] add = Element("add") doc = SubElement(add, "doc") add_field(doc, 'key', author_id) if a.get('name', None): add_field(doc, 'name', a['name']) for f in 'birth_date', 'death_date', 'date': if a.get(f, None): add_field(doc, f, a[f]) if top_work: add_field(doc, 'top_work', top_work) add_field(doc, 'work_count', work_count) add_field_list(doc, 'top_subjects', top_subjects) requests = [] if handle_redirects: q = {'type': '/type/redirect', 'location': akey} redirects = ''.join('<id>%s</id>' % re_author_key.match(r['key']).group(1) for r in query_iter(q)) if redirects: requests.append('<delete>' + redirects + '</delete>') requests.append(tostring(add).encode('utf-8')) return requests
def gen_xml_from_predict(root_path): for (root_path, dirnames, _) in walk(root_path): for dirname in dirnames: sub_path = os.path.join(root_path, dirname) for (_, _, filenames) in walk(sub_path): for filename in filenames: if filename[-4:] == '.png': pic_path = os.path.join(sub_path, filename) print(pic_path) mask = cv2.imdecode( np.fromfile(pic_path, dtype=np.uint8), -1) h = mask.shape[0] w = mask.shape[1] _, mask_Bin = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours( mask_Bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) node_root = Element('annotation') node_folder = SubElement(node_root, 'folder') node_folder.text = dirname node_filename = SubElement(node_root, 'filename') node_filename.text = filename[:-4] + '.jpg' node_path = SubElement(node_root, 'path') node_path.text = pic_path node_source = SubElement(node_root, 'source') node_database = SubElement(node_source, 'database') node_database.text = 'tdf' node_size = SubElement(node_root, 'size') node_width = SubElement(node_size, 'width') node_width.text = str(w) node_height = SubElement(node_size, 'height') node_height.text = str(h) node_depth = SubElement(node_size, 'depth') node_depth.text = '3' node_segmented = SubElement(node_root, 'segmented') node_segmented.text = '0' if len(contours) > 0: # max_cont_index = 0 # max_cont = 0 contours_OK = [] max_thresh = 0 #800 for j in range(len(contours)): arclen = cv2.arcLength(contours[j], True) if arclen > max_thresh: contours_OK.append(contours[j]) # max_cont = arclen # max_cont_index = j # contours = contours.tolist() # 得到最大轮廓 # max_length_cont = contours[max_cont_index] contours = contours_OK for h in range(len(contours)): # 多边形近似 approx = cv2.approxPolyDP( contours[h], 0.6, True) pt_x = "" pt_y = "" for k in range(len(approx)): # if k%8==0: #或者平均间隔 pt = approx[k] pt_x += str(pt[0][0]) pt_y += str(pt[0][1]) pt_x += "," pt_y += "," node_object = SubElement(node_root, 'object') node_name = SubElement(node_object, 'name') # node_name.text = dirname.split('.', 1)[0] # node_name.text = str(cls) node_name.text = '1' node_pose = SubElement(node_object, 'pose') node_pose.text = 'Unspecified' node_truncated = SubElement( node_object, 'truncated') node_truncated.text = '0' node_polygen = SubElement( node_object, 'polygen') node_points_x = SubElement( node_polygen, 'points_x') node_points_x.text = str(pt_x) node_points_y = SubElement( node_polygen, 'points_y') node_points_y.text = str(pt_y) xml = tostring(node_root, pretty_print=True) dom = parseString(xml) xml_path = os.path.join(pic_path[:-4] + '.xml') with open(xml_path, 'wb') as f: f.write( dom.toprettyxml(indent='\t', encoding='utf-8'))
def eprints_description(metadataPolicy, dataPolicy, submissionPolicy=None, content=None): """Generate the eprints element for the identify response. The eprints container is used by the e-print community to describe the content and policies of repositories. For the full specification and schema definition visit: http://www.openarchives.org/OAI/2.0/guidelines-eprints.htm """ eprints = Element(etree.QName(NS_EPRINTS[None], 'eprints'), nsmap=NS_EPRINTS) eprints.set( etree.QName(ns['xsi'], 'schemaLocation'), '{0} {1}'.format(EPRINTS_SCHEMA_LOCATION, EPRINTS_SCHEMA_LOCATION_XSD)) if content: contentElement = etree.Element('content') for key, value in content.items(): contentElement.append(E(key, value)) eprints.append(contentElement) metadataPolicyElement = etree.Element('metadataPolicy') for key, value in metadataPolicy.items(): metadataPolicyElement.append(E(key, value)) eprints.append(metadataPolicyElement) dataPolicyElement = etree.Element('dataPolicy') for key, value in dataPolicy.items(): dataPolicyElement.append(E(key, value)) eprints.append(dataPolicyElement) if submissionPolicy: submissionPolicyElement = etree.Element('submissionPolicy') for key, value in submissionPolicy.items(): submissionPolicyElement.append(E(key, value)) eprints.append(submissionPolicyElement) return etree.tostring(eprints, pretty_print=True)
def make_xml(path, offset=100, default_bounds=(50, 50), default_radius=3.0, convert_mm=False, make=True, use_label=True, rotate=0): """ convert a csv into an xml use blank line as a group marker circle labels are offset by ``offset*group_id`` ie. group 0. 1,2,3 group 1. 101,102,103 """ out = '{}_from_csv.xml'.format(os.path.splitext(path)[0]) if not make: return out root = Element('root') ul = Element('use_label') ul.text = 'True' if use_label else 'False' root.append(ul) outline = Element('outline') bb = Element('bounding_box') width, height = Element('width'), Element('height') width.text, height.text = map(str, default_bounds) bb.append(width) bb.append(height) outline.append(bb) root.append(outline) circles = Element('circles') radius = Element('radius') radius.text = str(default_radius) circles.append(radius) face_color = Element('face_color') face_color.text = 'white' circles.append(face_color) root.append(circles) i = 0 off = 0 reader = csv.reader(open(path, 'r'), delimiter=',') # writer = open(path + 'angles.txt', 'w') nwriter = None if rotate: nwriter = csv.writer(open(path + 'rotated_{}.txt'.format(rotate), 'w')) header = reader.next() if nwriter: nwriter.writerow(header) theta = math.radians(rotate) for k, row in enumerate(reader): # print k, row row = map(str.strip, row) if row: e = Element('point') x, y, l = Element('x'), Element('y'), Element('label') xx, yy = float(row[0]), float(row[1]) try: r = float(row[2]) rr = Element('radius') if convert_mm: r *= 2.54 rr.text = str(r) e.append(rr) except IndexError: r = None px = math.cos(theta) * xx - math.sin(theta) * yy py = math.sin(theta) * xx + math.cos(theta) * yy xx, yy = px, py if nwriter: data = ['{:0.4f}'.format(xx), '{:0.4f}'.format(yy)] if r is not None: data.append('{:0.4f}'.format(r)) nwriter.writerow(data) if convert_mm: xx = xx * 2.54 yy = yy * 2.54 x.text = str(xx) y.text = str(yy) # a = math.degrees(math.atan2(yy, xx)) # writer.write('{} {}\n'.format(k + 1, a)) l.text = str(i + 1 + off) e.append(l) e.append(x) e.append(y) circles.append(e) i += 1 else: # use blank rows as group markers off += offset i = 0 tree = ElementTree(root) tree.write(out, xml_declaration=True, method='xml', pretty_print=True) return out
def oai_identifier_description(scheme, repositoryIdentifier, delimiter, sampleIdentifier): """Generate the oai-identifier element for the identify response. The OAI identifier format is intended to provide persistent resource identifiers for items in repositories that implement OAI-PMH. For the full specification and schema definition visit: http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm """ oai_identifier = Element(etree.QName(NS_OAI_IDENTIFIER[None], 'oai_identifier'), nsmap=NS_OAI_IDENTIFIER) oai_identifier.set( etree.QName(ns['xsi'], 'schemaLocation'), '{0} {1}'.format(OAI_IDENTIFIER_SCHEMA_LOCATION, OAI_IDENTIFIER_SCHEMA_LOCATION_XSD)) oai_identifier.append(E('scheme', scheme)) oai_identifier.append(E('repositoryIdentifier', repositoryIdentifier)) oai_identifier.append(E('delimiter', delimiter)) oai_identifier.append(E('sampleIdentifier', sampleIdentifier)) return etree.tostring(oai_identifier, pretty_print=True)
def build_doc(w, obj_cache={}, resolve_redirects=False): wkey = w['key'] assert w['type']['key'] == '/type/work' title = w.get('title', None) if not title: return def get_pub_year(e): pub_date = e.get('publish_date', None) if pub_date: m = re_iso_date.match(pub_date) if m: return m.group(1) m = re_year.search(pub_date) if m: return m.group(1) if 'editions' not in w: q = {'type': '/type/edition', 'works': wkey, '*': None} w['editions'] = list(query_iter(q)) print 'editions:', [e['key'] for e in w['editions']] editions = [] for e in w['editions']: pub_year = get_pub_year(e) if pub_year: e['pub_year'] = pub_year if 'ocaid' in e: collection = get_ia_collection(e['ocaid']) #print 'collection:', collection e['ia_collection'] = collection e['public_scan'] = ('lendinglibrary' not in collection) and ('printdisabled' not in collection) overdrive_id = e.get('identifiers', {}).get('overdrive', None) if overdrive_id: #print 'overdrive:', overdrive_id e['overdrive'] = overdrive_id editions.append(e) editions.sort(key=lambda e: e.get('pub_year', None)) #print len(w['editions']), 'editions found' #print w['key'] work_authors = [] authors = [] author_keys = [] for a in w.get('authors', []): if 'author' not in a: # OL Web UI bug continue # http://openlibrary.org/works/OL15365167W.yml?m=edit&v=1 akey = a['author']['key'] m = re_author_key.match(akey) if not m: print 'invalid author key:', akey continue work_authors.append(akey) author_keys.append(m.group(1)) if akey in obj_cache and obj_cache[akey]['type'][ 'key'] != '/type/redirect': authors.append(obj_cache[akey]) else: authors.append(withKey(akey)) if any(a['type']['key'] == '/type/redirect' for a in authors): if resolve_redirects: def resolve(a): if a['type']['key'] == '/type/redirect': a = withKey(a['location']) return a authors = [resolve(a) for a in authors] else: print for a in authors: print 'author:', a print w['key'] print raise AuthorRedirect for a in authors: print 'author:', a assert all(a['type']['key'] == '/type/author' for a in authors) try: subjects = four_types(get_work_subjects(w)) except: print 'bad work: ', w['key'] raise field_map = { 'subjects': 'subject', 'subject_places': 'place', 'subject_times': 'time', 'subject_people': 'person', } has_fulltext = any( e.get('ocaid', None) or e.get('overdrive', None) for e in editions) #print 'has_fulltext:', has_fulltext for db_field, solr_field in field_map.iteritems(): if not w.get(db_field, None): continue cur = subjects.setdefault(solr_field, {}) for v in w[db_field]: try: if isinstance(v, dict): if 'value' not in v: continue v = v['value'] cur[v] = cur.get(v, 0) + 1 except: print 'v:', v raise if any(e.get('ocaid', None) for e in editions): subjects.setdefault('subject', {}) subjects['subject']['Accessible book'] = subjects['subject'].get( 'Accessible book', 0) + 1 if not has_fulltext: subjects['subject']['Protected DAISY'] = subjects['subject'].get( 'Protected DAISY', 0) + 1 #print w['key'], subjects['subject'] doc = Element("doc") add_field(doc, 'key', w['key'][7:]) title = w.get('title', None) if title: add_field(doc, 'title', title) # add_field(doc, 'title_suggest', title) add_field(doc, 'has_fulltext', has_fulltext) if w.get('subtitle', None): add_field(doc, 'subtitle', w['subtitle']) alt_titles = set() for e in editions: if 'title' in e and e['title'] != title: alt_titles.add(e['title']) for f in 'work_titles', 'other_titles': for t in e.get(f, []): if t != title: alt_titles.add(t) add_field_list(doc, 'alternative_title', alt_titles) alt_subtitles = set(e['subtitle'] for e in editions if e.get('subtitle', None) and e['subtitle'] != w.get('subtitle', None)) add_field_list(doc, 'alternative_subtitle', alt_subtitles) add_field(doc, 'edition_count', len(editions)) for e in editions: add_field(doc, 'edition_key', re_edition_key.match(e['key']).group(1)) cover_edition = pick_cover(w, editions) if cover_edition: add_field(doc, 'cover_edition_key', re_edition_key.match(cover_edition).group(1)) k = 'by_statement' add_field_list(doc, k, set(e[k] for e in editions if e.get(k, None))) k = 'publish_date' pub_dates = set(e[k] for e in editions if e.get(k, None)) add_field_list(doc, k, pub_dates) pub_years = set( m.group(1) for m in (re_year.search(i) for i in pub_dates) if m) if pub_years: add_field_list(doc, 'publish_year', pub_years) add_field(doc, 'first_publish_year', min(int(i) for i in pub_years)) k = 'first_sentence' fs = set(e[k]['value'] if isinstance(e[k], dict) else e[k] for e in editions if e.get(k, None)) add_field_list(doc, k, fs) publishers = set() for e in editions: publishers.update('Sine nomine' if is_sine_nomine(i) else i for i in e.get('publishers', [])) add_field_list(doc, 'publisher', publishers) # add_field_list(doc, 'publisher_facet', publishers) field_map = [ ('lccn', 'lccn'), ('publish_places', 'publish_place'), ('oclc_numbers', 'oclc'), ('contributions', 'contributor'), ] for db_key, search_key in field_map: v = set() for e in editions: if db_key not in e: continue v.update(e[db_key]) add_field_list(doc, search_key, v) isbn = set() for e in editions: for f in 'isbn_10', 'isbn_13': for v in e.get(f, []): isbn.add(v.replace('-', '')) add_field_list(doc, 'isbn', isbn) lang = set() for e in editions: for l in e.get('languages', []): m = re_lang_key.match(l['key'] if isinstance(l, dict) else l) lang.add(m.group(1)) if lang: add_field_list(doc, 'language', lang) pub_goog = set() # google pub_nongoog = set() nonpub_goog = set() nonpub_nongoog = set() public_scan = False all_collection = set() all_overdrive = set() lending_edition = None in_library_edition = None printdisabled = set() for e in editions: if 'overdrive' in e: all_overdrive.update(e['overdrive']) if 'ocaid' not in e: continue if not lending_edition and 'lendinglibrary' in e['ia_collection']: lending_edition = re_edition_key.match(e['key']).group(1) if not in_library_edition and 'inlibrary' in e['ia_collection']: in_library_edition = re_edition_key.match(e['key']).group(1) if 'printdisabled' in e['ia_collection']: printdisabled.add(re_edition_key.match(e['key']).group(1)) all_collection.update(e['ia_collection']) assert isinstance(e['ocaid'], basestring) i = e['ocaid'].strip() if e['public_scan']: public_scan = True if i.endswith('goog'): pub_goog.add(i) else: pub_nongoog.add(i) else: if i.endswith('goog'): nonpub_goog.add(i) else: nonpub_nongoog.add(i) #print 'lending_edition:', lending_edition ia_list = list(pub_nongoog) + list(pub_goog) + list(nonpub_nongoog) + list( nonpub_goog) add_field_list(doc, 'ia', ia_list) if has_fulltext: add_field(doc, 'public_scan_b', public_scan) if all_collection: add_field(doc, 'ia_collection_s', ';'.join(all_collection)) if all_overdrive: add_field(doc, 'overdrive_s', ';'.join(all_overdrive)) if lending_edition: add_field(doc, 'lending_edition_s', lending_edition) elif in_library_edition: add_field(doc, 'lending_edition_s', in_library_edition) if printdisabled: add_field(doc, 'printdisabled_s', ';'.join(list(printdisabled))) author_keys = [re_author_key.match(a['key']).group(1) for a in authors] author_names = [a.get('name', '') for a in authors] add_field_list(doc, 'author_key', author_keys) add_field_list(doc, 'author_name', author_names) alt_names = set() for a in authors: if 'alternate_names' in a: alt_names.update(a['alternate_names']) add_field_list(doc, 'author_alternative_name', alt_names) add_field_list(doc, 'author_facet', (' '.join(v) for v in zip(author_keys, author_names))) #if subjects: # add_field(doc, 'fiction', subjects['fiction']) for k in 'person', 'place', 'subject', 'time': if k not in subjects: continue add_field_list(doc, k, subjects[k].keys()) add_field_list(doc, k + '_facet', subjects[k].keys()) subject_keys = [str_to_key(s) for s in subjects[k].keys()] add_field_list(doc, k + '_key', subject_keys) return doc