def _writeXml(f, id, metadata): f.write("<record identifier=\"%s\">" % util.xmlEscape(id.identifier)) for k, v in metadata.items(): if k in ["datacite", "crossref"]: v = util.removeXmlDeclaration(v) else: v = util.xmlEscape(v) f.write(("<element name=\"%s\">%s</element>" %\ (util.xmlEscape(k), v)).encode("UTF-8")) f.write("</record>")
def mark_paras(txt): """ Given plain text where each line is a separate paragraph, returns a safe XML (= escaped content) with <p>-marked paras. The safeness is implicit; for use in a jinja2 template, use |safe. """ if not txt: return '' return '\n'.join('<p>%s</p>' % util.xmlEscape(line.strip()) for line in txt.splitlines() if line.strip())
def xml_str(val, key=None): "Unicode, xml-safe variant of `val`. `key` is for debug only." if val is None: return '' elif type(val) in (str,unicode): val = util.xmlEscape(val) if type(val) == str: val = val.decode('utf8','replace') elif type(val) in (list,tuple,set): val = type(val)(xml_str(x,key) for x in val) elif type(val) in (long, int, float): val = str(val) else: raise ValueError, "Can't handle type %r for key %r" % (type(val), key,) return val
def meta(self, refName): """Generate an openmensa XML meta feed from the static json file using an XML template""" with open(metaTemplateFile) as f: template = f.read() for reference, mensa in self.canteens.items(): if refName != reference: continue data = { "name": mensa["name"], "address": mensa["address"], "city": mensa["city"], "phone": mensa['phone'], "latitude": mensa["latitude"], "longitude": mensa["longitude"], "feed": self.urlTemplate.format(metaOrFeed='feed', mensaReference=urllib.parse.quote(reference)), "source": mensa["source"], } openingTimes = {} pattern = re.compile( r"([A-Z][a-z])(\s*-\s*([A-Z][a-z]))?\s*(\d{1,2}):(\d{2})\s*-\s*(\d{1,2}):(\d{2}) Uhr") m = re.findall(pattern, mensa["times"]) for result in m: fromDay, _, toDay, fromTimeH, fromTimeM, toTimeH, toTimeM = result openingTimes[fromDay] = "%02d:%02d-%02d:%02d" % ( int(fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM)) if toDay: select = False for short, long in weekdays_map: if short == fromDay: select = True elif select: openingTimes[short] = "%02d:%02d-%02d:%02d" % ( int(fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM)) if short == toDay: select = False for short, long in weekdays_map: if short in openingTimes: data[long] = 'open="%s"' % openingTimes[short] else: data[long] = 'closed="true"' for key in data: data[key] = xmlEscape(data[key]) xml = template.format(**data) return xml return '<openmensa xmlns="http://openmensa.org/open-mensa-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="2.1" xsi:schemaLocation="http://openmensa.org/open-mensa-v2 http://openmensa.org/open-mensa-v2.xsd"/>'
""" debug('processing %s ...' % articleId) conn = cur.connection try: cleartext = dechrome.parseCleartext(html) title = cleartext.split('\n')[0] except Exception, e: raise ProcessingError(e) # Debug: print output to a file for later inspection if 0: try: cur.execute("SELECT url FROM feed_article WHERE id=%s", (articleId,)); url=cur.fetchone()['url'] debug('Article %s has url %s', articleId, url) if cleartext.startswith('\n'): cleartext = '???'+cleartext html_out = ('%s: <a href="%s">%s</a>' % (articleId, url, url)) + ('<pre><span style="font-weight:bold; font-size:20px">'+util.xmlEscape(cleartext)+'</pre>').replace('\n','</span>\n',1) with open('clean_output.html','a') as f: f.write(html_out.encode('utf8')); f.flush() except: exception("Error while producing debug output") if not cleartext: cur.execute("DELETE FROM processed_article WHERE feed_articleid=%s", (articleId,)) cur.execute("UPDATE feed_article_meta SET is_cleartext='e' WHERE id=%s", (articleId,)) raise NoTextFoundError else: try: cur.execute("UPDATE processed_article SET content=%s WHERE feed_articleid=%s AND mode='cleartext'", (cleartext, articleId,)) if cur.rowcount == 0: cur.execute("INSERT INTO processed_article (content, feed_articleid, mode) VALUES (%s, %s, 'cleartext')", (cleartext, articleId,)) cur.execute("UPDATE feed_article_meta SET is_cleartext='1', title=COALESCE(title,%s) WHERE id=%s", (title,articleId,)) except psycopg2.DataError, e:
def meta(self, refName): """Generate an openmensa XML meta feed from the static json file using an XML template""" with open(metaTemplateFile) as f: template = f.read() for ref, mensa in self.canteens.items(): if refName != ref: continue data = { "name": mensa["name"], "address": mensa["address"], "city": mensa["city"], "latitude": mensa["latitude"], "longitude": mensa["longitude"], "feed": xmlEscape( self.urlTemplate.format( metaOrFeed='feed', mensaReference=urllib.parse.quote(ref))), "source": xmlEscape(self.build_url(refName, baseUrlMeta)), } if "phone" in mensa: data["phone"] = f"<phone>{mensa['phone']}</phone>" else: data["phone"] = "" if "times" in mensa: openingTimes = {} pattern = re.compile( r"([A-Z][a-z])(\s*-\s*([A-Z][a-z]))?\s*(\d{1,2}):(\d{2})\s*-\s*(\d{1,2}):(\d{2}) Uhr" ) m = re.findall(pattern, mensa["times"]) for result in m: fromDay, _, toDay, fromTimeH, fromTimeM, toTimeH, toTimeM = result openingTimes[fromDay] = "%02d:%02d-%02d:%02d" % (int( fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM)) if toDay: select = False for short, long in weekdays_map: if short == fromDay: select = True elif select: openingTimes[short] = "%02d:%02d-%02d:%02d" % ( int(fromTimeH), int(fromTimeM), int(toTimeH), int(toTimeM)) if short == toDay: select = False for short, long in weekdays_map: if short in openingTimes: data[long] = 'open="%s"' % openingTimes[short] else: data[long] = 'closed="true"' data['times'] = f""" <times type="opening"> <monday {data['monday']} /> <tuesday {data['tuesday']} /> <wednesday {data['wednesday']} /> <thursday {data['thursday']} /> <friday {data['friday']} /> <saturday {data['saturday']} /> <sunday {data['sunday']} /> </times>""" else: data['times'] = '' for key in data: data[key] = data[key] xml = template.format(**data) return xml return '<openmensa xmlns="http://openmensa.org/open-mensa-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="2.1" xsi:schemaLocation="http://openmensa.org/open-mensa-v2 http://openmensa.org/open-mensa-v2.xsd"/>'
def _interpolate(template, *args): return template % tuple(util.xmlEscape(a) for a in args)
bottle.request.environ.get('HTTP_AUTHORIZATION'), username, password, acl_tag) traceback.print_exc() return bottle.HTTPResponse("You don't have the permission to access this stream", status=401) else: print 'GRANTED: user %r, acl_tag %r' % (username, acl_tag) after = bottle.request.GET.get('after','0000-00-00T00-00-00Z') fn_template = FILENAME_TEMPLATE.replace('%(acl_tag)s',acl_tag) # hack: partial string interpolation if not fn_template: return bottle.HTTPResponse( "<h1>404</h1>Unknown stream: %s. Check http://newsfeed.ijs.si/ for possible URLs.", status=404) path = get_cached_file(fn_template=fn_template+'.gz', after=after, reverse_order=(after==None)) if path is None: return bottle.HTTPResponse("<h1>404</h1>No gzips created after %s on stream %r yet." % (util.xmlEscape(after), acl_tag), status=404) else: dir, fn = os.path.split(path) return bottle.static_file(fn, root=dir, download=acl_tag+'-'+fn, mimetype='application/x-gzip') if __name__=='__main__': zmqctx = zmq.Context() # (the socket is created in its own thread; context should be created in the main thread) # Debug only: uncomment either of the two below for a single-threaded run #zmq_to_files(); 1/0 #bottle.debug(True); bottle.run(host='0.0.0.0', port=13380); 1/0 # zmq subscriber threading.Thread(target=zmq_to_files).start()
cleartext = dechrome.parseCleartext(html) title = cleartext.split('\n')[0] except Exception, e: raise ProcessingError(e) # Debug: print output to a file for later inspection if 0: try: cur.execute("SELECT url FROM feed_article WHERE id=%s", (articleId, )) url = cur.fetchone()['url'] debug('Article %s has url %s', articleId, url) if cleartext.startswith('\n'): cleartext = '???' + cleartext html_out = ('%s: <a href="%s">%s</a>' % (articleId, url, url)) + ( '<pre><span style="font-weight:bold; font-size:20px">' + util.xmlEscape(cleartext) + '</pre>').replace( '\n', '</span>\n', 1) with open('clean_output.html', 'a') as f: f.write(html_out.encode('utf8')) f.flush() except: exception("Error while producing debug output") if not cleartext: cur.execute("DELETE FROM processed_article WHERE feed_articleid=%s", (articleId, )) cur.execute( "UPDATE feed_article_meta SET is_cleartext='e' WHERE id=%s", (articleId, )) raise NoTextFoundError else: