Exemplo n.º 1
0
    def testScraper(self):
        """
        Test the Scraper class by feeding it a sample of html
        files randomly picked from the download directory.
        """

        path_ = dirname(__file__)
        downloads = normpath(join(path_, '../downloads/m-134'))

        files = listdir(downloads)
        files = [file for file in files if 'NO_JOBS' not in file]

        min_ = 1
        max_ = len(files)
        size = 100

        samples = sample(range(min_, max_), size)
        soup_items = list()

        for i in samples:
            filepath = join(downloads, files[i])

            with open(filepath, 'r') as f:
                html = f.read()
                f.close()
                soup = BeautifulSoup(html)

                uuid_match = search(r'uuid-(\d{7})', files[i])
                uuid = uuid_match.group(1)

                date_match = match(r'(\d{4}-\d{2}-\d{2})', files[i])
                sdate = date_match.group(1)
                date_ = datetime.strptime(sdate, '%Y-%m-%d').date()

                stamp = Stamp(date_, uuid)
                soup_item = Stamped(stamp, soup)
                soup_items.append(soup_item)

        s = Scraper()
        serial_items = s.scrape(soup_items)

        self.assertIsNotNone(serial_items)