def Assess_HTML_SIM(generatedHTML, targetHTML): from html_similarity import style_similarity, structural_similarity, similarity print("---------------------------") print(style_similarity(generatedHTML, targetHTML)) print("---------------------------") print(structural_similarity(generatedHTML, targetHTML)) print("---------------------------") print(similarity(generatedHTML, targetHTML))
def compare(html1, html2): if html1 == html2: return 1, 1 # print("html1",html1) # print("html2",html2) style_rate = style_similarity(html1, html2) struc_rate = structural_similarity(html1, html2) return style_rate, struc_rate
def main(lang1, lang2, url1, url2, file1, file2, enc1, enc2): if url1 and url2: html1 = requests.get(url1).content.decode( enc1, "replace").replace("\n", " ").replace("\t", " ") html2 = requests.get(url2).content.decode( enc2, "replace").replace("\n", " ").replace("\t", " ") else: html1 = open(file1).read().replace("\n", " ").replace("\t", " ") html2 = open(file2).read().replace("\n", " ").replace("\t", " ") url1 = "http://test.com/{:s}".format(lang1) url2 = "http://test.com/{:s}".format(lang2) print("style_similarity={:f} structural_similarity={:f} similarity={:f}".format( style_similarity(html1, html2), structural_similarity(html1, html2), similarity(html1, html2)), file=sys.stderr) print("\t".join(["dummy_key", lang1, url1, html1, lang2, url2, html2]))
def test_no_styles_similarity(): assert 1 == style_similarity('<h1>No class here</h1>', '<h1>Look no css class here</h1>')
def test_style_similarity(): assert 1 == style_similarity(html1, html2)