Пример #1
0
from utils import (
    TemplateData,
    create_result_as_html,
)


if __name__ == '__main__':
    w1 = WebPage(
        url="http://automotive.about.com/b/2012/06/14/how-to-become-a-new-car-dealer.htm"
    )
    w2 = WebPage(
        url="http://ipod.about.com/od/KidsiPhoneiPodTouch/tp/Roadtrips-With-Iphone-And-Apps.htm"
    )

    text_similarity = w1.get_text_similarity(w2)

    image_similarity = w1.get_image_similatiry(w2)

    template_data = TemplateData(
        template_path="template.html",
        first_web_page=w1,
        second_web_page=w2,
        text_similarity=text_similarity,
        image_similarity=image_similarity,
        html_path="./Output.html")

    create_result_as_html(template_data)

    current_dir = os.path.dirname(os.path.realpath(__file__))
    webbrowser.open_new("file://{}/Output.html".format(current_dir))
Пример #2
0
from utils import print_sorted_dict
from os import path


if __name__ == '__main__':
    w1 = WebPage(
        url="http://automotive.about.com/b/2012/06/14/how-to-become-a-new-car-dealer.htm"
    )
    w2 = WebPage(
        url="http://ipod.about.com/od/KidsiPhoneiPodTouch/tp/Roadtrips-With-Iphone-And-Apps.htm"
    )

    print "Text Similarity for w1 and w2"
    comparsion = w1.get_text_similarity(w2)

    print "Result: ", comparsion.similarity_result
    print "Important categories:"
    for cat in comparsion.important_categories:
        print cat

    print_sorted_dict(w1.content.categories_membership, "car")
    print_sorted_dict(w2.content.categories_membership, "ipod")

    print("\nMany images:")
    comparsion = w1.get_image_similatiry(w2)
    print "Result: ", comparsion.similarity_result
    print "Pictures with each other:"
    for pair, result in comparsion.image_pairs_results.items():
        first, sec = map(lambda x: path.split(x)[-1], pair)
        print "{} : {} is {}".format(first, sec, result)