Пример #1
0
class TestPlanetaLudicoScrap(unittest.TestCase):

    def setUp(self):
        self.mock_collection = mock()
        self.scrap = PlanetaLudicoScrap(self.mock_collection)

    def _read_entries(self, _):
        return MockKimonoPlanetaLudicoAPI.json

    def test_when_scrapping_saves_one_element_per_entry_in_db(self):
        #mock_scrap = mock(self.scrap)
        #when(mock_scrap)._read_entries(any()).thenReturn(MockKimonoPlanetaLudicoAPI.json)
        self.scrap._read_entries = self._read_entries
        result = self.scrap.scrapListOfURL([URL("http://www.kimonolabs.com/api/6j0yuuni?apikey=4bac761d58acc84da9ccadf9e1ff2d8f", "Planeta Ludico")])
        verify(self.mock_collection, 2).save(any())
        #self.assertEqual(len(result), 2)

    def test_when_read_an_entry_title_is_title_of_emtry(self):
        result = self.scrap._build_entry(MockKimonoPlanetaLudicoAPI.entry_json)
        self.assertEqual(result.title,
                         MockKimonoPlanetaLudicoAPI.entry_json['title']['text'])

    def test_when_read_an_entry_date_is_date_of_emtry(self):
        result = self.scrap._build_entry(MockKimonoPlanetaLudicoAPI.entry_json)
        self.assertEqual(result.date,
                         MockKimonoPlanetaLudicoAPI.entry_json['date'])

    def test_when_read_an_entry_link_is_link_of_emtry(self):
        result = self.scrap._build_entry(MockKimonoPlanetaLudicoAPI.entry_json)
        self.assertEqual(result.link,
                         MockKimonoPlanetaLudicoAPI.entry_json['title']['href'])

    def test_when_read_an_entry_source_is_the_blog(self):
        result = self.scrap._build_entry(MockKimonoPlanetaLudicoAPI.entry_json)
        self.assertEqual(result.json()['location'],
                         MockKimonoPlanetaLudicoAPI.entry_json['source']['text'])
Пример #2
0
 def setUp(self):
     self.mock_collection = mock()
     self.scrap = PlanetaLudicoScrap(self.mock_collection)
Пример #3
0
threads.setMsgPageLimit(250)  # Nunca bajes este valor o perderas mensajes, al menos mantenlo igual

threads.scrapListOfURL(labsk_urls)
delta = datetime.now() - starttime

print "----------------------------------------------"
print "Total time: ", delta
print "Page limit ", threads.pagelimit, " Msg page limit ", threads.msgpagelimit
print str(listener)

mr = db.merge('link')
print str(mr)


print "Scrapping planeta ludico"
blogs = PlanetaLudicoScrap(db.blogs_collection())
listener = BlogListener()
blogs.setListener(listener)
blogs.scrapListOfURL(planetaludico_urls)

print "----------------------------------------------"
print str(listener)

#------------------------------------------------
# Build reports

print "Building reports"


def write(name, html_text):
    path = '../webgui/templates/'