def compare_versions():
    resource_id_1 = request.form['v_o_id']
    resource_id_2 = request.form['v_t_id']
    version_1 = request.form['v_o']
    version_2 = request.form['v_t']
    html_1 = ScriptData.get_html_for_version(resource_id_1, version_1)
    html_2 = ScriptData.get_html_for_version(resource_id_2, version_2)
    if len(html_1) < 10000 and len(html_2) < 10000:
        return diff(html_1, html_2)
    begining_string = ''
    while '</p>' in html_1:
        first_tag = html_1.index('</p>') + 4
        if html_1[:first_tag] == html_2[:first_tag]:
            begining_string += html_1[:first_tag]
            html_1 = html_1[first_tag:]
            html_2 = html_2[first_tag:]
        else:
            break

    end_string = ''
    while '</p>' in html_1:
        last_tag_start = html_1.rfind('<p CLASS=')
        length_of_tag = len(html_1) - last_tag_start
        if html_1[-length_of_tag:] == html_2[-length_of_tag:]:
            end_string = end_string + html_1[-length_of_tag:]
            html_1 = html_1[:-length_of_tag]
            html_2 = html_2[:-length_of_tag]
        else:
            break

    return begining_string  + diff(html_1, html_2) + end_string
示例#2
0
def compare_versions():
    resource_id_1 = request.form['v_o_id']
    resource_id_2 = request.form['v_t_id']
    version_1 = request.form['v_o']
    version_2 = request.form['v_t']
    html_1 = ScriptData.get_html_for_version(resource_id_1, version_1)
    html_2 = ScriptData.get_html_for_version(resource_id_2, version_2)
    if len(html_1) < 10000 and len(html_2) < 10000:
        return diff(html_1, html_2)
    begining_string = ''
    while '</p>' in html_1:
        first_tag = html_1.index('</p>') + 4
        if html_1[:first_tag] == html_2[:first_tag]:
            begining_string += html_1[:first_tag]
            html_1 = html_1[first_tag:]
            html_2 = html_2[first_tag:]
        else:
            break

    end_string = ''
    while '</p>' in html_1:
        last_tag_start = html_1.rfind('<p CLASS=')
        length_of_tag = len(html_1) - last_tag_start
        if html_1[-length_of_tag:] == html_2[-length_of_tag:]:
            end_string = end_string + html_1[-length_of_tag:]
            html_1 = html_1[:-length_of_tag]
            html_2 = html_2[:-length_of_tag]
        else:
            break

    return begining_string + diff(html_1, html_2) + end_string
示例#3
0
def main(argv=None):
    if not argv:
        argv = sys.argv  # pragma: no cover
    with codecs.open(argv[1], 'r', 'utf-8') as file_a:
        html_a = file_a.read()
    with codecs.open(argv[2], 'r', 'utf-8') as file_b:
        html_b = file_b.read()
    print diff(html_a, html_b, cutoff=0.0, pretty=False)
示例#4
0
def main(argv=None):
    if not argv:
        argv = sys.argv # pragma: no cover
    with open(argv[1]) as file_a:
        html_a = file_a.read()
    with open(argv[2]) as file_b:
        html_b = file_b.read()
    print diff(html_a, html_b, cutoff=0.0, pretty=True)
示例#5
0
def diff_html(ref_path, gen_path):
    res = True

    with open(ref_path) as ref_file:
        ref_html = ref_file.read()
    with open(gen_path) as gen_file:
        gen_html = gen_file.read()

    result = diff(ref_html, gen_html, cutoff=0.0, pretty=True)

    with open ('result.html', 'w') as f:
        f.write (result)

    parser = etree.HTMLParser()
    tree = etree.parse ('result.html', parser=parser)
    root = tree.getroot()

    inserted = root.findall(".//ins")
    deleted = root.findall(".//del")

    if inserted or deleted:
        diff_filename = '%s.diff.html' % gen_path
        sys.stderr.write ("There's a difference, you can open %s"
                " with your browser to look at it\n" % diff_filename)
        with open (diff_filename, 'w') as f:
            f.write ('<head><link rel="stylesheet" type="text/css"'
                     'href="../diff_highlight.css"></head>')
            f.write (result)
        res = False

    os.unlink ('result.html')

    return res
示例#6
0
def main(argv=None):
    if not argv:
        argv = sys.argv  # pragma: no cover
    with open(argv[1]) as file_a:
        html_a = file_a.read()
    with open(argv[2]) as file_b:
        html_b = file_b.read()
    print(diff(html_a, html_b, cutoff=0.0, pretty=True))
示例#7
0
def main(argv=None):
    if not argv:
        argv = sys.argv  # pragma: no cover
    with codecs.open(argv[1], 'r', 'utf-16') as file_a:
        html_a = file_a.read()
    with codecs.open(argv[2], 'r', 'utf-16') as file_b:
        html_b = file_b.read()
    with codecs.open('output.html', 'w', 'utf-16') as file_o:
        file_o.write(diff(html_a, html_b, cutoff=0.0, pretty=True))
示例#8
0
def main(argv=None):
    if not argv:
        argv = sys.argv  # pragma: no cover
    with open(argv[1]) as file_a:
        html_a = file_a.read()
    with open(argv[2]) as file_b:
        html_b = file_b.read()
    output_filename = f"tmp/doc_diff/output_{secrets.token_urlsafe(6)}.html"
    f = open(output_filename, "w")
    f.write(diff(html_a, html_b, cutoff=0.0, pretty=True))
    f.close()
    print(output_filename)
def html_tree_diff(a_text, b_text):
    css = """
diffins {text-decoration : none; background-color: #d4fcbc;}
diffdel {text-decoration : none; background-color: #fbb6c2;}
diffins * {text-decoration : none; background-color: #d4fcbc;}
diffdel * {text-decoration : none; background-color: #fbb6c2;}
    """
    d = htmltreediff.diff(a_text,
                          b_text,
                          ins_tag='diffins',
                          del_tag='diffdel',
                          pretty=True)
    return insert_style(d, css)
def html_tree_diff(a_text, b_text):
    color_palette = get_color_palette()
    css = f'''
diffins {{text-decoration : none; background-color:
    {color_palette['differ_insertion']};}}
diffdel {{text-decoration : none; background-color:
    {color_palette['differ_deletion']};}}
diffins * {{text-decoration : none; background-color:
    {color_palette['differ_insertion']};}}
diffdel * {{text-decoration : none; background-color:
    {color_palette['differ_deletion']};}}
    '''
    d = htmltreediff.diff(a_text, b_text,
                          ins_tag='diffins', del_tag='diffdel',
                          pretty=True)
    # TODO Count number of changes.
    return {'diff': insert_style(d, css)}
示例#11
0
def diff(a_text, b_text):
    """
    Wraps the ``htmltreediff`` package with the standard arguments and output
    format used by all diffs in ``web-monitoring-diff``.

    ``htmltreediff`` parses HTML documents into an XML DOM and attempts to diff
    the document *structures*, rather than look at streams of tags & text
    (like ``htmldiffer``) or the readable text content of the HTML (like
    ``web_monitoring_diff.html_render_diff``). Because of this, it can give
    extremely accurate and detailed information for documents that are very
    similar, but its output gets complicated or opaque as the two documents
    diverge in structure. It can also be very slow.

    In practice, we've found that many real-world web pages vary their
    structure enough (over periods as short as a few months) to reduce the
    value of this diff. It's best used for narrowly-defined scenarios like:

    - Comparing versions of a page that are very similar, often at very close
      points in time.

    - Comparing XML structures you can expect to be very similar, like XML API
      responses, RSS documents, etc.

    - Comparing two documents that were generated from the same template with
      differing underlying data. (Assuming the template is fairly rigid, and
      does not leave too much document structure up to the underlying data.)

    ``htmltreediff`` is no longer under active development; we maintain a fork
    with minimal fixes and Python 3 support. It is not available on PyPI, so
    you must install via git::

        $ pip install git+https://github.com/danielballan/htmltreediff@customize

    You can also install all experimental differs with::

        $ pip install -r requirements-experimental.txt

    Parameters
    ----------
    a_text : string
        Source HTML of one document to compare
    b_text : string
        Source HTML of the other document to compare

    Returns
    -------
    dict
    """
    color_palette = get_color_palette()
    css = f'''
diffins {{text-decoration : none; background-color:
    {color_palette['differ_insertion']};}}
diffdel {{text-decoration : none; background-color:
    {color_palette['differ_deletion']};}}
diffins * {{text-decoration : none; background-color:
    {color_palette['differ_insertion']};}}
diffdel * {{text-decoration : none; background-color:
    {color_palette['differ_deletion']};}}
    '''
    d = htmltreediff.diff(a_text,
                          b_text,
                          ins_tag='diffins',
                          del_tag='diffdel',
                          pretty=True)
    # TODO Count number of changes.
    return {'diff': insert_style(d, css)}
示例#12
0
文件: diff.py 项目: hhroc/WebWatcher
from htmltreediff import diff
from kitchen.text.converters import to_unicode


f1 = open('test1.html', 'r')
f2 = open('test2.html', 'r')


v1 = f1.read()
v2 = f2.read()

print diff(to_unicode(v1), to_unicode(v2), pretty=True)


f1.close()
f2.close()
示例#13
0
from htmltreediff import diff
from kitchen.text.converters import to_unicode

f1 = open('test1.html', 'r')
f2 = open('test2.html', 'r')

v1 = f1.read()
v2 = f2.read()

print diff(to_unicode(v1), to_unicode(v2), pretty=True)

f1.close()
f2.close()
示例#14
0
from htmltreediff import diff


print diff('<h1>...one...</h1>', '<h1...two...</h1>', pretty=True)