def parse_doc(path, icao, country, title, category): print "Parsing AIP doc" icao = icao.upper() assert len(icao) == 4 url = fetchdata.getrawurl(path, country=country) ret = dict() ret['icao'] = icao ret['url'] = url ret['title'] = title ret['name'] = icao + " - " + title ret['category'] = category #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200) blobname = icao + "_" + category tmppath = os.path.join(os.getenv("SWFP_DATADIR"), "aiptext", icao) if not os.path.exists(tmppath): os.makedirs(tmppath) if path.lower().endswith("pdf"): outpath_inter = os.path.join(tmppath, blobname + ".tmp.html") def render(inputfile, outputfile): r = "pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s" % ( inputfile, outputfile ) #-s is not supported on older pdftohtml, and doesn't appear necessary either. print "running", r assert 0 == os.system(r) fetchdata.getcreate_derived_data_raw(path, outpath_inter, render, "html", country=country) whole = open(outpath_inter).read() fixed = (whole.replace("<BODY bgcolor=\"#A0A0A0\"", "<BODY bgcolor=\"#FFFFFF\"").replace( "<TITLE>Microsoft Word - ", "<TITLE>")) else: assert path.endswith("html") fixed, date = fetchdata.getdata(path, country=country) cksum = md5.md5(fixed).hexdigest() outpath = os.path.join(tmppath, blobname + "." + cksum + ".html") f = open(outpath, "w") f.write(fixed) f.close() #print "Wrote raw:",out,outpath ret['checksum'] = cksum ret['date'] = fetchdata.get_filedate(outpath) ret['blobname'] = blobname return ret
def getsvg(path,pagenr,usecache=True): assert type(pagenr)==int inputfile=fetchdata.getdatafilename(path,country="se",maxcacheage=7200) svged=fetchdata.getcachename(path,'svg') if os.path.exists(svged) and usecache: cacheddate=fetchdata.get_filedate(svged) print "Cached svg version exists, date:",svged,cacheddate if fetchdata.is_devcomp() or datetime.now()-cacheddate<timedelta(0,86400/2): print "Using svg cache" try: return open(svged).read() except Exception,cause: print "Couldn't read cached svg version",cause
def getsvg(path, pagenr, usecache=True): assert type(pagenr) == int inputfile = fetchdata.getdatafilename(path, country="se", maxcacheage=7200) svged = fetchdata.getcachename(path, 'svg') if os.path.exists(svged) and usecache: cacheddate = fetchdata.get_filedate(svged) print "Cached svg version exists, date:", svged, cacheddate if fetchdata.is_devcomp() or datetime.now() - cacheddate < timedelta( 0, 86400 / 2): print "Using svg cache" try: return open(svged).read() except Exception, cause: print "Couldn't read cached svg version", cause
def parse_doc(path,icao,country,title,category): print "Parsing AIP doc" icao=icao.upper() assert len(icao)==4 url=fetchdata.getrawurl(path,country=country) ret=dict() ret['icao']=icao ret['url']=url ret['title']=title ret['name']=icao+" - "+title ret['category']=category #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200) blobname=icao+"_"+category tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"aiptext",icao) if not os.path.exists(tmppath): os.makedirs(tmppath) if path.lower().endswith("pdf"): outpath_inter=os.path.join(tmppath,blobname+".tmp.html") def render(inputfile,outputfile): r="pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s"%(inputfile,outputfile) #-s is not supported on older pdftohtml, and doesn't appear necessary either. print "running",r assert 0==os.system(r) fetchdata.getcreate_derived_data_raw( path,outpath_inter,render,"html",country=country) whole=open(outpath_inter).read() fixed=(whole.replace("<BODY bgcolor=\"#A0A0A0\"","<BODY bgcolor=\"#FFFFFF\"") .replace("<TITLE>Microsoft Word - ","<TITLE>")) else: assert path.endswith("html") fixed,date=fetchdata.getdata(path,country=country) cksum=md5.md5(fixed).hexdigest() outpath=os.path.join(tmppath,blobname+"."+cksum+".html") f=open(outpath,"w") f.write(fixed) f.close() #print "Wrote raw:",out,outpath ret['checksum']=cksum ret['date']=fetchdata.get_filedate(outpath) ret['blobname']=blobname return ret