def header(self): """Make the header bar of the webpage""" return E.E.header( E.ATTR(id='header'), E.DIV( E.ATTR(id='title'), E.H1( E.A( 'GCC Python Plugin', href='http://gcc-python-plugin.readthedocs.org/', ), ), E.DIV( E.ATTR(id='info'), E.SPAN( E.CLASS('label'), 'Filename: ', ), self.data['filename'], E.SPAN( E.CLASS('label'), 'Function: ', ), self.data['function']['name'], ), E.DIV( E.ATTR(id='report-pagination'), E.SPAN( E.CLASS('label'), 'Report: ', ), *( E.A(str(i + 1), href="#state{0}".format(i + 1)) for i in range(len(self.data['reports'])) ) ), E.DIV( E.ATTR(id='prev'), E.IMG( src=data_uri('image/png', 'images/arrow-180.png'), ), ), E.DIV( E.ATTR(id='next'), E.IMG( src=data_uri('image/png', 'images/arrow.png'), ), ), ), )
def process_jsonfiles(zipdir): """Process all JSON files in the resulting directory :param zipdir: zipdir name :type zipdir: Path | str """ body = gen_html() for jfile in listjsonfiles(str(zipdir)): content = load_jsonfile(jfile) # Create title div = E.DIV(E.H1(content.get("date_journal"))) # Create date: div.append(E.H5(content.get("address"))) # Create photos: divimg = E.DIV() for image in content.get('photos'): img = E.IMG( src=image, width="600", ) divimg.append(img) div.append(divimg) # Create text: text = content["text"] = markdown.markdown(content["text"]) texthtml = fromstring(text) div.append(E.P(texthtml)) body.append(div) return body
def img(self): """Still image displayed for the video.""" if not hasattr(self, "_img"): url = self.IMAGE_URL.format(self.youtube_id) self._img = builder.IMG(src=url) return self._img
def parse_detail(self, response): item = response.meta['item'] hxs = scrapy.Selector(response) now = time.strftime('%Y-%m-%d %H:%M:%S') htmlcontent = '' imageUrl = hxs.xpath( '//article[contains(@class, "post type-post status-publish")][1]/a[1]/img[1]/@src' ) if imageUrl: imageEle = E.IMG(src=imageUrl.extract_first()) imageEle = lxml.html.tostring(imageEle, encoding=unicode) htmlcontent = imageEle root = lxml.html.fromstring(response.body) lxml.etree.strip_elements(root, lxml.etree.Comment, "script", "head") content = root.xpath( '//p[text() and not(contains(@class, "write_author")) and not(contains(@class, "copy")) and not(contains(@class, "text-right"))]' ) for c in content: htmlcontent += lxml.html.tostring(c, encoding=unicode) item['htmlcontent'] = htmlcontent yield item
def parse_detail(self, response): item = response.meta['item'] hxs = scrapy.Selector(response) now = time.strftime('%Y-%m-%d %H:%M:%S') print item['url'] imageUrl = hxs.xpath(""" //img[@itemprop="contentURL"][1]/@src """) imageEle = '' item['imageUrl'] = '' if not imageUrl: print('Phnompenhpost => [' + now + '] No imageUrl') else: imageEle = E.IMG(src=imageUrl.extract_first()) imageEle = lxml.html.tostring(imageEle, encoding=unicode) item['imageUrl'] = imageUrl.extract_first() root = lxml.html.fromstring(response.body) lxml.etree.strip_elements(root, lxml.etree.Comment, "script", "head") htmlcontent = '' for p in root.xpath('//div[@id="ArticleBody"][1]'): htmlcontent = lxml.html.tostring(p, pretty_print=True, encoding=unicode) item['htmlcontent'] = imageEle + htmlcontent yield item
def parse_detail(self, response): item = response.meta['item'] hxs = scrapy.Selector(response) now = time.strftime('%Y-%m-%d %H:%M:%S') root = lxml.html.fromstring(response.body) lxml.etree.strip_elements(root, lxml.etree.Comment, "script", "head") content = root.xpath( '//span[@class="left kh size17_kh dark lineheight26_kh"][1]')[0] imageEle = E.IMG(src=item['imageUrl']) imageEle = lxml.html.tostring(imageEle, encoding=unicode) htmlcontent = imageEle for p in content.iterchildren(): imgE = p.xpath('//img[contains(@src, "advertise")]') if imgE: for im in imgE: im.drop_tag() c = lxml.html.tostring(p, encoding=unicode) wrap_p = lxml.html.fragment_fromstring(c, create_parent='p') wrap_p_string = lxml.html.tostring(wrap_p, encoding=unicode) clean_html = html2safehtml(wrap_p_string, valid_tags=("p", "img")) minified_html = minify(clean_html) htmlcontent += minified_html.replace('\n', ' ').replace( '\r', '').replace('%0A', '').replace('%0D', '').replace('<p> </p>', '') item['htmlcontent'] = htmlcontent yield item
def header(self): """Make the header bar of the webpage""" return E.E.header( E.ATTR(id='header'), E.DIV( E.ATTR(id='title'), E.H1('GCC Python Plugin', ), E.DIV( E.ATTR(id='filename'), E.SPAN( E.CLASS('label'), 'Filename: ', ), self.data['filename'], ), ), E.E.nav( E.ATTR(id='nav'), E.DIV( E.ATTR(id='function'), E.H3('Function'), self.data['function']['name'], ), E.DIV( E.ATTR(id='report-pagination'), E.H3('Report'), *(E.A(str(i + 1), href="#state{0}".format(i + 1)) for i in range(len(self.data['reports'])))), E.DIV( E.ATTR(id='bug-toggle'), E.IMG(src='images/bug.png', ), E.H3('Bug'), ' [count]', ), E.DIV( E.ATTR(id='prev'), E.IMG(src='images/arrow-180.png', ), ), E.DIV( E.ATTR(id='next'), E.IMG(src='images/arrow.png', ), ), ), )
def make_emoji_img_elem(emoji_span_elem: lxml.html.HtmlElement) -> Dict[str, Any]: # Convert the emoji spans to img tags. classes = emoji_span_elem.get("class") match = re.search(r"emoji-(?P<emoji_code>\S+)", classes) # re.search is capable of returning None, # but since the parent function should only be called with a valid css element # we assert that it does not. assert match is not None emoji_code = match.group("emoji_code") emoji_name = emoji_span_elem.get("title") alt_code = emoji_span_elem.text image_url = base_url + f"/static/generated/emoji/images-{emojiset}-64/{emoji_code}.png" img_elem = E.IMG(alt=alt_code, src=image_url, title=emoji_name, style="height: 20px;") img_elem.tail = emoji_span_elem.tail return img_elem
def generate_csrf_img(url, html): """ Create a piece of HTML that contains an image tag with the CSRF values """ img_url = form_to_get(url, html) # Create an img tag that is 1x1 pixel HTML = E.HTML( E.HEAD(), E.BODY( E.IMG(src=img_url, height="1", width="1") ) ) # print(lxml.html.tostring(HTML).decode("utf8")) return lxml.html.tostring(HTML, pretty_print=True).decode("utf8")
def main_page_gen(default_style): html = E.HTML( E.HEAD( E.META(**{'http-equiv':"Default-Style", 'content':default_style, 'id':'stylemetatag'}), E.TITLE("U2ch - Main Page"), E.SCRIPT(type = 'text/javascript', src = '/mainscript.js'), #js *initiate.style_cache ), E.BODY( E.UL(initiate.stats_cache, style = "display: none;", id = "mblstatscache"), E.TABLE( E.CLASS("maintable"), E.THEAD(E.TR(E.TD(E.DIV(E.CLASS("mainslogandiv"), E.SPAN("U2CH"), E.SPAN("", style="display: inline-block; width: 5em;"), E.SPAN("Viewing above imageboards"), ), E.DIV(E.CLASS("mainimagediv"), E.IMG(src="u-2.jpg", style="width:496px;height:334px;"), ), )), id = 'header'), E.TBODY(E.TR(E.TD( E.HR(E.CLASS("delimeter")), E.DIV(E.CLASS("mblcontainer"), E.DIV(E.CLASS("mblcentering"), initiate.board_cache_main_page, ), ), E.HR(E.CLASS("delimeter")), )), id = 'mainpart'), E.TFOOT(E.TR(E.TD( E.DIV('powered by ', E.A('Farlight Imageboard Engine', href='https://github.com/Alpherie/farlight_board_engine', target='_blank', ), id='credentials'), )), id = 'footer'), ), onload = 'mainpagefunc()' ) ) return lxml.html.tostring(html)
def format_msg(msg, status): if msg.text is not None: if "password" in msg.text: """Strip out the test password, just in case the report gets sent around.""" return E.P("Entering password") if status == "FAIL": msg_html = html.fromstring(msg.text) if msg_html.xpath(".//a") != []: href = msg_html.xpath(".//a")[0].get("href") return E.UL( E.CLASS("thumbnails"), E.LI( E.CLASS("span4"), E.A(E.CLASS("thumbnail"), E.IMG(src=href), href=href), ), ) else: return E.P(msg.text) else: return E.P(msg.text)
def imgfr(img_name, height=None): if height: return E.IMG(src=img_name, height=height, **{"style": "float:right"}) return E.IMG(src=img_name, **{"style": "float:right"})
def img(img_name): return E.IMG(src=img_name, **{"class": "stretch"})
def imgw(img_name, width="970"): return E.IMG(src=img_name, width=width)
def imgh(img_name, height="620"): return E.IMG(src=img_name, height=height)
def mkgeolink(latitude, longitude): return E.A(E.IMG(E.CLASS('mwscrape2slob-geo-link-icon'), src='~/images/Globe.svg'), href='geo:{},{}'.format(latitude.strip(), longitude.strip()))
def img(img_name): return E.IMG(src=img_name)
"Databases": "Databases (Custom)", "Design": "Design (Custom)", "General": "General (Custom)", "Integration": "Integration (Custom)", "Misc1": "Misc 1 (Custom)", "Misc2": "Misc 2 (Custom)", "Revisions": "Revisions (Custom)", "Shapes": "Shapes (Custom)", "Structure": "Structure (Custom)" } css = """\ img { border: 1px black solid } * { font-family: Arial; } """ paths = glob.glob("d:/Inetpub/wwwroot/images/xmetal/*.jpg") content = B.CENTER(B.H1("XMetaL CDR Icons")) for path in sorted(paths): if "xmetal_cdr" not in path and "Standard" not in path: name = path.replace("\\", "/").split("/")[-1][:-4] content.append(B.H2(names.get(name, name))) content.append(B.IMG(src="/images/xmetal/%s.jpg" % name)) page = B.HTML( B.HEAD( B.TITLE("XMetaL CDR Icons"), B.STYLE(css) ), B.BODY(content) ) xml = etree.tostring(page, pretty_print=True, encoding="unicode") print(f"Content-type: text/html\n\n{xml}")