def test_extract_weheartit(self):
        train_set_weheartit = {
            "username": "******",
            'twitter': '@orochimaru_sannin',
            "hearts": "33",
            "image": "https://data.whicdn.com/images/333255393/large.jpg"
        }
        with open("tests/samples/weheartit_1.html", 'r') as f:
            html1 = f.read()
        extractor_weheartit = Extractor()
        extractor_weheartit.add_train_page(html1, train_set_weheartit)
        res1 = {
            'username': '******',
            'twitter': '@orochimaru_sannin',
            'hearts': '33',
            'image': 'https://data.whicdn.com/images/333255393/large.jpg'
        }
        pred1 = extractor_weheartit.predict(html1)
        print(extractor_weheartit.best_wrappers["hearts"].selector)

        with open("tests/samples/weheartit_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'username': '******',
            'twitter': '@soighne',
            'hearts': '68',
            'image': 'https://data.whicdn.com/images/333254397/large.jpg'
        }
        pred2 = extractor_weheartit.predict(html2)
        print(extractor_weheartit.best_wrappers["hearts"].selector)

        assert pred1 == res1 and pred2 == res2
    def test_extract_escortsexe(self):
        train_set_escortsexe = {
            "category":
            "Escort girls",
            'location':
            'Montpellier - 34000',
            "phone":
            "0659853862",
            "images": [
                "https://www.escortsexepic.net/classifieds/232404-classifieds-20190725_005216.jpg",
                "https://www.escortsexepic.net/classifieds/232404-classifieds-20190723_150822.jpg"
            ]
        }

        with open("tests/samples/escortsexe_1.html", 'r') as f:
            html1 = f.read()
        extractor_escortsexe = Extractor()
        extractor_escortsexe.add_train_page(html1, train_set_escortsexe)
        res1 = {
            'category':
            'Escort girls',
            'location':
            'Montpellier - 34000',
            'phone':
            '0659853862',
            'images': [
                'https://www.escortsexepic.net/classifieds/232404-classifieds-20190725_005216.jpg',
                'https://www.escortsexepic.net/classifieds/232404-classifieds-20190724_075838.jpg',
                'https://www.escortsexepic.net/classifieds/232404-classifieds-20190724_080614.jpg',
                'https://www.escortsexepic.net/classifieds/232404-classifieds-20190706_154210.jpg',
                'https://www.escortsexepic.net/classifieds/232404-classifieds-20190723_150822.jpg'
            ]
        }

        with open("tests/samples/escortsexe_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'category':
            'Massages',
            'location':
            'Nice - 06000',
            'phone':
            '0646179220',
            'images': [
                'https://www.escortsexepic.net/classifieds/179451-classifieds-lisa04.JPG',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-lisa.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-a905393_photo1.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-lisa07.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-1.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-86221-classifieds-lisa.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-elise1.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-86221-classifieds-IMG_20160719_180118.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-lisa08.jpg',
                'https://www.escortsexepic.net/classifieds/179451-classifieds-lisa11.jpg'
            ]
        }

        assert extractor_escortsexe.predict(
            html1) == res1 and extractor_escortsexe.predict(html2) == res2
    def test_extract_pypi(self):
        train_set_pypi = {
            "name":
            "pip 19.2.1",
            'w3':
            'http://ogp.me/ns#',
            "maintainers": ["cjerdonek", "dstufft"],
            "maintainers_profile_pictures": [
                "https://warehouse-camo.cmh1.psfhosted.org/697af4520c5134f9d47c5647352f0a1a83bac949/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f39623531336565376363343030633962373337346634363937613165363961643f73697a653d3530",
                "https://warehouse-camo.cmh1.psfhosted.org/6d0424bff7dd2ff3855b621bf1470d578040d430/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f65626631333233363262363232343233656435626163613239383839313162383f73697a653d3530"
            ]
        }
        extractor_pypi = Extractor()
        with open("tests/samples/pypi_1.html", 'r') as f:
            html1 = f.read()
        extractor_pypi.add_train_page(html1, train_set_pypi)
        res1 = {
            'name':
            'pip 19.2.1',
            'w3':
            'http://ogp.me/ns#',
            'maintainers': ['cjerdonek', 'dstufft', 'pf_moore', 'pradyunsg'],
            'maintainers_profile_pictures': [
                'https://warehouse-camo.cmh1.psfhosted.org/697af4520c5134f9d47c5647352f0a1a83bac949/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f39623531336565376363343030633962373337346634363937613165363961643f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/6d0424bff7dd2ff3855b621bf1470d578040d430/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f65626631333233363262363232343233656435626163613239383839313162383f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/1d0deb041bb7e8edce368279a37546324366eced/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f64393935623436326139386665613431326566613739643137626133373837613f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/f15785f37e0e3fb85805ffd0760ea2f7ad35cba0/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f64333733306664646430333536643334366666636437653466376137616261643f73697a653d3530'
            ]
        }

        with open("tests/samples/pypi_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'name':
            'Django 2.2.3',
            'w3':
            'http://ogp.me/ns#',
            'maintainers': [
                'apollo13', 'carltongibson', 'felixx', 'jacobian',
                'Tim.Graham', 'ubernostrum'
            ],
            'maintainers_profile_pictures': [
                'https://warehouse-camo.cmh1.psfhosted.org/04bfcf7860c8fffd7f686950c3cdcb81d8c61e45/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f31646339636564326637323165346266636239396534356135306231383366323f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/d9cf326b5aeb544a49654e29530691c03bdef3ec/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f64646564323130636631623537326636663832303836393562326663366562333f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/183c060dbdfbe42fe63df08f08badca85dca3bb9/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f36643037376136613161663037386435346631613231353535366537643436333f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/e524204ccab58fe3ec3ca04af176f6dbeaa50c3b/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f32663534363338333263636237363863636234613163613336303763323765663f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/a9e53e05771e61c8079939fe5a6553cd2eb19679/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f66613137303737373237336361376232333562646465376438306539663861363f73697a653d3530',
                'https://warehouse-camo.cmh1.psfhosted.org/095e1b32c90de718c30ab173b05be0cec0bb6ca4/68747470733a2f2f7365637572652e67726176617461722e636f6d2f6176617461722f31303835333466363564386432643764653639393539373363316634393838393f73697a653d3530'
            ]
        }

        assert extractor_pypi.predict(
            html1) == res1 and extractor_pypi.predict(html2) == res2
    def test_extract_booking(self):
        train_set_booking = {
            "title":
            "Edgar Suites Expo Paris Porte de Versailles",
            "address":
            "1 Villa Thoreton, 15th arr., 75015 Paris, France",
            "images": [
                "https://s-ec.bstatic.com/images/hotel/max1024x768/140/140580527.jpg",
                "https://t-ec.bstatic.com/images/hotel/max1024x768/139/139935427.jpg"
            ]
        }
        with open("tests/samples/booking_1.html", 'r') as f:
            html1 = f.read()
        extractor_booking = Extractor()
        extractor_booking.add_train_page(html1, train_set_booking)
        res1 = {
            'title':
            'Edgar Suites Expo Paris Porte de Versailles',
            'address':
            '1 Villa Thoreton, 15th arr., 75015 Paris, France',
            'images': [
                'https://s-ec.bstatic.com/images/hotel/max1024x768/140/140580527.jpg',
                'https://t-ec.bstatic.com/images/hotel/max1024x768/139/139935427.jpg',
                'https://t-ec.bstatic.com/images/hotel/max1024x768/137/137731189.jpg',
                'https://s-ec.bstatic.com/images/hotel/max1024x768/170/170783702.jpg'
            ]
        }

        with open("tests/samples/booking_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'title':
            'Paseo de Gracia Bas Apartments Barcelona',
            'address':
            'Diputació, 268, Eixample, 08009 Barcelona, Spain',
            'images': [
                'https://s-ec.bstatic.com/images/hotel/max1024x768/164/164837377.jpg',
                'https://t-ec.bstatic.com/images/hotel/max1024x768/197/197698956.jpg',
                'https://t-ec.bstatic.com/images/hotel/max1024x768/197/197700755.jpg',
                'https://s-ec.bstatic.com/images/hotel/max1024x768/197/197699357.jpg'
            ]
        }

        assert extractor_booking.predict(
            html1) == res1 and extractor_booking.predict(html2) == res2
    def test_extract_tunisimmo(self):
        train_set_tunisimmo_1 = {
            "prix":
            "290DN",
            "contact":
            "moncef",
            "localisation":
            "Monastir",
            "titre":
            "Coquet studio - 290DN",
            "description":
            "Studio composé d'un salon, cuisine à l'américaine, chambre à coucher, salle d'eau avec douche wc lavabo, un petit balcon séchoir, et branchement d'une machine à laver. Gaz de ville, climatiseur, connexion ADSL, parabole collective. Au 2ème étage d'un immeuble récent, ascenseur, interphone, accès avec une clé, parking gardé la nuit. L'immeuble Violettes 3 (banefsej 3) est situé derrière l'ENIT, et proche de Skanes kobba. NON meublé, location à l'année, garanties exigées. 290 DT par mois et caution de 2 mois de loyer.",
            "images": [
                "https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_1.jpg",
                "https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_3.jpg"
            ]
        }
        train_set_tunisimmo_2 = {
            "prix":
            "900DN",
            "contact":
            "SAMI",
            "localisation":
            "Ariana",
            "titre":
            "Appartement S+3 avec place parking - 900DN",
            "description":
            "appartement s+3 avec place de parking . résidente gardé avec deux ascenseurs.salon + 3 chambre + salle d'eau + salle de bain . avec 4 climatisseurset 3 balcons.4 ieme etages",
            "images": [
                "https://www.tunisimmo.com/images/2018/11/15/5193/thumb_appartement-s3-avec-place-parking_3.jpg",
                "https://www.tunisimmo.com/images/2018/11/15/5193/appartement-s3-avec-place-parking_2.jpg"
            ]
        }
        train_set_tunisimmo_3 = {
            "prix":
            "1500DN",
            "contact":
            "Le Jasmin Immobiliere",
            "localisation":
            "Sousse",
            "titre":
            "Coquet Bungalow Vue Piscine - 1500DN",
            "description":
            "LE JASMIN immobilière met en location un joli bungalow S+1, surface de 80 m2, à Marina El Kantaoui. Se compose d’une chambre à coucher , sallon avec balcon vue magnifique sur piscine, une cuisine équipée et d'une salle de bain. Pour connaître plus d'information ou pour organiser un RDV, contacter nous par email ou téléphonez pendant les heures de bureau.",
            "images": [
                "https://www.tunisimmo.com/images/2018/11/15/5254/thumb_coquet-bungalow-vue-piscine_5.jpg",
                "https://www.tunisimmo.com/images/2018/11/15/5254/thumb_coquet-bungalow-vue-piscine_2.jpg"
            ]
        }

        with open("tests/samples/tunisimmo_1.html", 'r') as f:
            html1 = f.read()
        with open("tests/samples/tunisimmo_3.html", 'r') as f:
            html2 = f.read()
        with open("tests/samples/tunisimmo_3.html", 'r') as f:
            html3 = f.read()

        extractor_tunisimmo = Extractor()
        extractor_tunisimmo.add_train_page(html1, train_set_tunisimmo_1)
        extractor_tunisimmo.add_train_page(html2, train_set_tunisimmo_2)
        # extractor_tunisimmo.add_train_page(html3, train_set_tunisimmo_3)

        res1 = {
            'prix':
            '290DN',
            'contact':
            "moncef",
            'localisation':
            'Monastir',
            'titre':
            'Coquet studio  - 290DN',
            'description':
            "Description\nStudio composé d'un salon, cuisine à l'américaine, chambre à coucher, salle d'eau avec douche wc lavabo, un petit balcon séchoir, et branchement d'une machine à laver. Gaz de ville, climatiseur, connexion ADSL, parabole collective.Au 2ème étage d'un immeuble récent, ascenseur, interphone, accès avec une clé, parking gardé la nuit.L'immeuble Violettes 3 (banefsej 3) est situé derrière l'ENIT, et proche de Skanes kobba.NON meublé, location à l'année, garanties exigées.290 DT par mois et caution de 2 mois de loyer.",
            'images': [
                'https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_1.jpg',
                'https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_2.jpg',
                'https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_3.jpg',
                'https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_4.jpg',
                'https://www.tunisimmo.com/images/2018/11/15/5122/thumb_coquet-studio_5.jpg'
            ]
        }

        with open("tests/samples/tunisimmo_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'prix':
            '400DN',
            'contact':
            "casa rayen",
            'localisation':
            'Nabeul',
            'titre':
            'location des appartements    - 400DN',
            'description':
            'Description\nVous désirez d’avoir\nun emplacement stratégique pour logement veuillez nous visiter notre\nagence à kélibia.\n\nCASA RAYEN vous offre\nla location des appartements bien équipés de qualités hautes standing avec une\nfinition chic et moderne.\n\n·        \nPour S+2 : prix mensuelle 500 dt\n\n·        \nPour S+1 : prix \nmensuelle 400 dt \n\nDécouvrez toutes nos\noffres sur notre page Facebook: Casa Rayen \n\nPour plus\nde renseignement contacter nous sur 23 012 582 – 72 219\n729',
            'images': [
                'https://www.tunisimmo.com/images/2018/11/15/6415/thumb_location-des-appartements_1.jpg',
                'https://www.tunisimmo.com/images/2018/11/15/6415/thumb_location-des-appartements_2.jpg'
            ]
        }

        assert extractor_tunisimmo.predict(html1) == res1
    def test_extract_tumblrgallery(self):
        train_set_tumblrgallery = {
            "title":
            "The Vault Of The Atomic Space Age",
            'description':
            'Art,fashion,design,technology etc from the atomic space age',
            "images": [
                "https://78.media.tumblr.com/416ea221cb7cf2d2b98ce6ca98a3c354/tumblr_oyn5z3ioJL1qm25fso3_1280.jpg",
                "https://78.media.tumblr.com/6465f5b03b375b7c7213e4faaa9ec38a/tumblr_oyn5z3ioJL1qm25fso2_1280.jpg"
            ]
        }
        with open("tests/samples/tumblrgallery_1.html", 'r') as f:
            html1 = f.read()
        extractor_tumblrgallery = Extractor()
        extractor_tumblrgallery.add_train_page(html1, train_set_tumblrgallery)
        res1 = {
            'title':
            'The Vault Of The Atomic Space Age',
            'description':
            'Art,fashion,design,technology etc from the atomic space age',
            'images': [
                'https://78.media.tumblr.com/416ea221cb7cf2d2b98ce6ca98a3c354/tumblr_oyn5z3ioJL1qm25fso3_1280.jpg',
                'https://78.media.tumblr.com/57d6735ec253ab4f71e335e97661e638/tumblr_oyn5z3ioJL1qm25fso1_1280.jpg',
                'https://78.media.tumblr.com/6465f5b03b375b7c7213e4faaa9ec38a/tumblr_oyn5z3ioJL1qm25fso2_1280.jpg',
                'https://78.media.tumblr.com/d5d06fd0f953bd42ca41a4c73a5fde0f/tumblr_oyn5z3ioJL1qm25fso4_1280.jpg',
                'https://78.media.tumblr.com/02a5e0c6a7859c0087c6c768afa1764f/tumblr_oyn5z3ioJL1qm25fso5_1280.jpg',
                'https://78.media.tumblr.com/3df00869be8677679fd28ba12be4e02a/tumblr_oyn5z3ioJL1qm25fso6_1280.jpg',
                'https://78.media.tumblr.com/42c680d47b5e1d7b15d6279ff33d58b9/tumblr_oyn5z3ioJL1qm25fso7_1280.jpg',
                'https://78.media.tumblr.com/2db768487a39915e1517e81c42549810/tumblr_p4mg0u2HjM1qgggino1_1280.jpg',
                'https://78.media.tumblr.com/68c2bbd23f669a81c9d72034a09967cf/tumblr_p4nujnYWoO1qzpsi6o1_1280.jpg',
                'https://78.media.tumblr.com/bd44267e786927ebe539283872263c2f/tumblr_p3twj7gCeY1w3jzmno1_1280.jpg',
                'https://78.media.tumblr.com/db60329f7f821bccff5a278aaa7359c7/tumblr_ndwz1vedhb1s1447ao1_400.gif',
                'https://78.media.tumblr.com/8e503dc7cc91fc2c146e3456a2058980/tumblr_nwpb2ecbGh1skkfpco1_400.jpg',
                'https://78.media.tumblr.com/32f1b0cd4039486f23f1297138636a0c/tumblr_p4va3bfVvt1skkfpco1_1280.jpg',
                'https://78.media.tumblr.com/e954c1b9f87849fa9eea3cd2116c63c5/tumblr_p4jz15BRE71ug5c9zo1_1280.jpg',
                'https://78.media.tumblr.com/41b25029cdd6cb3d7abdea8b6ea2c9f6/tumblr_p523fcHjLM1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/878195ac5da73e70a298af403b679dff/tumblr_p523ehTxhL1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/acff14951f06927218bfd7c38e233222/tumblr_p523drE12N1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/bbe0325ffd10b1278d7a833d7f01f0e5/tumblr_p523d3YpHF1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/a30b530c64ebe1256c67099887df1ed5/tumblr_p523cdDq0A1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/cffec650bf3a5f5e14851019675357be/tumblr_p523buRkQq1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/d1c61c44c7c45cc58397ee08670f0cf9/tumblr_p523bbVkX21uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/7d8678022006af661993a4f9e06a49ba/tumblr_p523am04H21uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/dc695f2dde353ba26f4b49e5bc5a8a6b/tumblr_p523a2Fb7o1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/f57b2e52ad7fdf0ee7a3f4dae1545dc5/tumblr_p5239jDMeD1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/c0ac9666d18811d8c9fec957ebc062a8/tumblr_p52392jlWF1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/d60fe82e0514d96805837c3e9fd0ccb3/tumblr_p5238j3fbB1uu4f9zo1_1280.jpg',
                'https://78.media.tumblr.com/2c379daff931438dcb4ea64170052458/tumblr_p3as3446l91wyn6mwo1_1280.jpg',
                'https://78.media.tumblr.com/e8b9c4e61532bd3dec42f1b1e4398da1/tumblr_ov67hqsiaj1ue4wwmo1_1280.jpg',
                'https://78.media.tumblr.com/a1fab8b536809b559187c34a42bb9aec/tumblr_p3b7byQsZM1skkfpco2_r1_1280.jpg',
                'https://78.media.tumblr.com/e6c3652c9f5d6d7d7f4cf3f857ffc1a2/tumblr_p37tlhT8m51uu4f9zo1_1280.jpg'
            ]
        }

        with open("tests/samples/tumblrgallery_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'title':
            'Cozyhuarique',
            'description':
            "een knus plekje voor kunst,gedachten,foto's en leuke dingen ...",
            'images': [
                'https://78.media.tumblr.com/ca68fce8194c1d95526de3ca3bb999b5/tumblr_p55b23ZW5G1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/1e88fe7e865792cf4248938e45721cd4/tumblr_p55aiz7Hgh1sgzhgyo1_1280.png',
                'https://78.media.tumblr.com/90301cda91b07a7bfe1964e76c6e173a/tumblr_p55arfRKpo1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/3836d73e0496b39ef15ebac0772f403d/tumblr_p558qf2rnQ1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/ef989667c971e60a8cfca231e1efc170/tumblr_p556r8ZFtO1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/50d25af81261855e44ca1400a370849c/tumblr_p556uljhno1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/2ffa6bd47af1b58d158237b7206ca9c3/tumblr_p556b1cx0J1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/0d5f8ea14b9c1628258eb502d2a12597/tumblr_p551pg28xQ1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/10faccd7bf4d3fe91c5d2ef986c7d3d0/tumblr_p54qryVBst1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/59053b1bb08da87ed675ae0e5e3c581d/tumblr_ms61tzmQdb1qkc8v3o1_1280.png',
                'https://78.media.tumblr.com/1c9b376e909aa91fb11ab2ba30a98abc/tumblr_o8mw6oTW0S1sgzhgyo2_r1_1280.jpg',
                'https://78.media.tumblr.com/79eff9add2393b109e7f6a134491b90f/tumblr_p545djb0Wd1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/9c943da20ebdbde6194fa714e4cefa61/tumblr_ns9rlhaCv81sgzhgyo1_r1_1280.jpg',
                'https://78.media.tumblr.com/1c1261d30245d95c4b92bcd7012e47ed/tumblr_p54rlzDsRp1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/5d32d6bdbd7443846c1dd2ffc149dc39/tumblr_p54qvf2C011sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/c664b43ae5ea11c24462ddf1084ccb88/tumblr_p54hqbK1d11sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/ad583a3bd5b02c7929185a355b7a8ca6/tumblr_p54g6kIP9S1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/87b9275fa15fdb130845b105f6151ba2/tumblr_p54gw6k1yr1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/f7926c92085525d3e4117cb3d2fb1586/tumblr_p54fxfv1In1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/tumblr_m3wvv2f63t1qcwl12o1_1280.gif',
                'https://78.media.tumblr.com/f7bfd8d364dcf3f9df1d9bc3d0794b35/tumblr_o9lwylQzf41sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/5aba1da53cd3c58bb8b7f0bdfcf88565/tumblr_ob6ky7i3my1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/24b8aa43d7c0a2fa729f5e79c1c5605b/tumblr_ntd27k0GlA1upwifco1_1280.gif',
                'https://78.media.tumblr.com/a4a54abd248d5a6ee566fefa28c42fd2/tumblr_ntxrvvA3j61sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/4c87c07d9c7db35b876185253a8ada70/tumblr_nvigluQgOn1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/546f427d86519938851c124f42955334/tumblr_n5eu8eLcAZ1rc2ub3o1_1280.jpg',
                'https://78.media.tumblr.com/a950119fa9451e8d69f5bdc7ca34e8ca/tumblr_oynlmqUsnO1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/a5d4cdf5fa95e534c8ea55cf5b617b03/tumblr_p078lbGV7w1sgzhgyo1_1280.jpg',
                'https://78.media.tumblr.com/fdff0d5fd9d244782edb057466dda654/tumblr_nbemrjFy4e1rod4jeo1_1280.jpg',
                'https://78.media.tumblr.com/f1b20e938c31e98e04734e560125938d/tumblr_mkgeb0UbPA1qd5d9ho1_1280.jpg'
            ]
        }

        assert extractor_tumblrgallery.predict(
            html1) == res1 and extractor_tumblrgallery.predict(html2) == res2
    def test_extract_escortfish(self):
        train_set_escortfish = {
            "title":
            "Only in town for three days",
            "location":
            "Cumberland Valley, MD",
            'age':
            '25',
            "description":
            "25_38dd Italian Candy looking for someone to enjoy my short stay with. Not many open appointments left. Sexy clean discreet fun. Fetishes okay. Two girls available upon request. No law enforcement. Serious inquiries ONLY",
            "images": [
                "https://cdn.escortfish.ch/images/7b0fbd_thumb_lg.jpg",
                "https://cdn.escortfish.ch/images/z7oLbt.jpg"
            ]
        }

        with open("tests/samples/escortfish_1.html", 'r') as f:
            html1 = f.read()
        extractor_escortfish = Extractor()
        extractor_escortfish.add_train_page(html1, train_set_escortfish)
        res1 = {
            'title':
            'Only in town for three days',
            'location':
            'Cumberland Valley, MD',
            'age':
            '25',
            'description':
            '25_38dd Italian Candy looking for someone to enjoy my short stay with. Not many open appointments left. Sexy clean discreet fun. Fetishes okay. Two girls available upon request. No law enforcement. Serious inquiries ONLY',
            'images': [
                'https://cdn.escortfish.ch/images/7b0fbd_thumb_lg.jpg',
                'https://cdn.escortfish.ch/images/298fb0_thumb_lg.jpg',
                'https://cdn.escortfish.ch/images/z7oLbt_thumb_lg.jpg',
                'https://cdn.escortfish.ch/images/2bd180_thumb_lg.jpg',
                'https://cdn.escortfish.ch/images/7b0fbd.jpg',
                'https://cdn.escortfish.ch/images/298fb0.jpg',
                'https://cdn.escortfish.ch/images/z7oLbt.jpg',
                'https://cdn.escortfish.ch/images/2bd180.jpg'
            ]
        }

        with open("tests/samples/escortfish_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'title':
            '🌀🌀🌀HOT NEW GIRLS🌀🌀🌀UPSCALE MASSAGE🌀🌀🌀505-720-3334',
            'location':
            'Clovis - Portales, NM',
            'age':
            '24',
            'description':
            "NO BS HERE GUYS! WE HAVE THE BEST STAFF!!💋💘DON'T BE FOOLED BY IMITATIONS💋💘💋💘THIS IS THE BEST MASSAGE ACCUPRESSURE TREATMENT IN CLOVIS💋💘",
            'images': [
                'https://cdn.escortfish.ch/images/4vCboK_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/KfJYOE_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/ucfilh_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/034ZOq_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/VKxQ6l_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/EWeUWk_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/jqWsIp_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/RXDwJK_thumb_xl.jpg',
                'https://cdn.escortfish.ch/images/4vCboK.jpg',
                'https://cdn.escortfish.ch/images/KfJYOE.jpg',
                'https://cdn.escortfish.ch/images/ucfilh.jpg',
                'https://cdn.escortfish.ch/images/034ZOq.jpg',
                'https://cdn.escortfish.ch/images/VKxQ6l.jpg',
                'https://cdn.escortfish.ch/images/EWeUWk.jpg',
                'https://cdn.escortfish.ch/images/jqWsIp.jpg',
                'https://cdn.escortfish.ch/images/RXDwJK.jpg'
            ]
        }

        assert extractor_escortfish.predict(
            html1) == res1 and extractor_escortfish.predict(html2) == res2
    def test_extract_lodgis(self):
        train_set_lodgis = {
            "price":
            "€308,900",
            'surface':
            '34.1',
            "address":
            "Rue De Belleville, Paris 19°",
            "images": [
                "https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-kitchen-P12.jpg?v=1562856328",
                "https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-bedroom--H12.jpg?v=1562856328"
            ]
        }

        with open("tests/samples/lodgis_1.html", 'r') as f:
            html1 = f.read()
        extractor_lodgis = Extractor()
        extractor_lodgis.add_train_page(html1, train_set_lodgis)
        res1 = {
            'price':
            '€308,900',
            'surface':
            '34.1',
            'address':
            'Rue De Belleville, Paris 19°',
            'images': [
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-kitchen-P11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-kitchen-P12.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-bedroom--H11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-bedroom--H12.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-living-room-G11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-living-room-G12.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-living-room-G13.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-living-room-G14.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-living-room-G15.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-entrance-C11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-bathroom-R11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-bathroom-R12.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-toilet-S11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-building-F11.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-building-F12.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-building-F13.jpg?v=1562856328',
                'https://images.lodgis.com/photos/lpa/ap/19849/orange/carousel/g/apartment-paris-19-building-F14.jpg?v=1562856328'
            ]
        }

        with open("tests/samples/lodgis_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'price':
            '€234,700',
            'surface':
            '19.4',
            'address':
            'Avenue Mac-Mahon, Paris 17°',
            'images': [
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-living-room-G11.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-living-room-G12.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-living-room-G13.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-living-room-G14.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-living-room-G15.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-kitchen-P11.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-bedroom--H11.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-bedroom--H12.jpg?v=1562683837',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-bedroom--H13.jpg?v=1562683838',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-bathroom-R11.jpg?v=1562683838',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-bathroom-R12.jpg?v=1562683838',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-building-F11.jpg?v=1562683838',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-building-F12.jpg?v=1562683838',
                'https://images.lodgis.com/photos/lpa/ap/19848/orange/carousel/g/apartment-paris-17-building-F13.jpg?v=1562683838'
            ]
        }

        assert extractor_lodgis.predict(
            html1) == res1 and extractor_lodgis.predict(html2) == res2
    def test_extract_bookaflat(self):
        train_set_bookaflat = {
            "surface":
            "178",
            'address':
            'Rue Saint Marc, Paris 2nd',
            "price":
            "13000 €",
            "images": [
                "../photo/paris/18150/dsc05491-hdr.jpg",
                "../photo/paris/18150/salon.jpg"
            ]
        }
        with open("tests/samples/bookaflat_1.html", 'r') as f:
            html1 = f.read()
        extractor_bookaflat = Extractor()
        extractor_bookaflat.add_train_page(html1, train_set_bookaflat)
        res1 = {
            'surface':
            '178',
            'address':
            'Rue Saint Marc, Paris 2nd',
            'price':
            '13000 €',
            'images': [
                '../photo/paris/18150/dsc05491-hdr.jpg',
                '../photo/paris/18150/dsc05503-hdr.jpg',
                '../photo/paris/18150/salon.jpg',
                '../photo/paris/18150/salon-3.jpg',
                '../photo/paris/18150/salon-4.jpg',
                '../photo/paris/18150/salon-2.jpg',
                '../photo/paris/18150/sam-2.jpg',
                '../photo/paris/18150/sam-3.jpg',
                '../photo/paris/18150/sam-4.jpg',
                '../photo/paris/18150/sam.jpg',
                '../photo/paris/18150/dsc05488-hdr.jpg',
                '../photo/paris/18150/dsc05485-hdr.jpg',
                '../photo/paris/18150/cuisine.jpg',
                '../photo/paris/18150/cuisine-2.jpg',
                '../photo/paris/18150/chambre-3.jpg',
                '../photo/paris/18150/chambre-2.jpg',
                '../photo/paris/18150/sdb-2.jpg',
                '../photo/paris/18150/dsc05467-hdr.jpg',
                '../photo/paris/18150/sdb.jpg',
                '../photo/paris/18150/capture-daei-cran-2019-05-03-ai-17-09-25.jpg',
                '../photo/paris/18150/chambre-1.jpg',
                '../photo/paris/18150/chambre-bis.jpg',
                '../photo/paris/18150/bureau.jpg',
                '../photo/paris/18150/sdb-2-2.jpg',
                '../photo/paris/18150/bureau-5.jpg',
                '../photo/paris/18150/bureau-4.jpg',
                '../photo/paris/18150/bureau-3.jpg',
                '../photo/paris/18150/bureau-2.jpg',
                '../photo/paris/18150/dsc05563.jpg',
                '../photo/paris/18150/opera-comique.jpg',
                '../photo/paris/18150/quartier.jpg'
            ]
        }

        with open("tests/samples/bookaflat_2.html", 'r') as f:
            html2 = f.read()
        res2 = {
            'surface':
            'Studio - 31 M2',
            'address':
            "Rue d'Assas, Paris 6th",
            'price':
            '1690 €',
            'images': [
                '../photo/paris/18469/sal5.jpg',
                '../photo/paris/18469/sal4.jpg',
                '../photo/paris/18469/sal3.jpg',
                '../photo/paris/18469/sal2.jpg',
                '../photo/paris/18469/sal1.jpg',
                '../photo/paris/18469/cuisine1.jpg',
                '../photo/paris/18469/cuisine2.jpg',
                '../photo/paris/18469/coin-nuit1.jpg',
                '../photo/paris/18469/vue-chambre.jpg',
                '../photo/paris/18469/coin-nuit3.jpg',
                '../photo/paris/18469/coin-nuit2.jpg',
                '../photo/paris/18469/sdb1.jpg',
                '../photo/paris/18469/sdb2.jpg',
                '../photo/paris/18469/facade.jpg',
                '../photo/paris/18469/port-royal.jpg',
                '../photo/paris/18469/img-9616.jpg'
            ]
        }
        assert extractor_bookaflat.predict(
            html1) == res1 and extractor_bookaflat.predict(html2) == res2