def test_segregates_links(self): template = """ <tr class="js-navigation-item"> <td class="icon"> <span class="octicon octicon-file-directory"></span> </td> <td class="content"> <span> <a href="/link/to/dir">DirLink</a> </span> </td> </tr> <tr class="js-navigation-item"> <td class="icon"> <span class="octicon octicon-file-text"></span> </td> <td class="content"> <span> <a href="/blob/link/to/file.jpg">FileLink</a> </span> </td> </tr> <tr class="js-navigation-item"> <td class="icon"> <span class="octicon octicon-file-text"></span> </td> <td class="content"> <span> <a href="/blob/link/to/file.png">FileLink</a> </span> </td> </tr> """ rows = html.fromstring(template) self.assertEqual(len(rows), 3) scarper = GitHubLinkScarper('whatever', ('jpg', )) data = scarper.segregate_links(rows) self.assertEqual(len(data), 2) follow_links = data['follow_links'] download_links = data['download_links'] self.assertEqual(len(follow_links), 1) self.assertEqual(len(download_links), 1) self.assertIn('/link/to/dir', follow_links[0]) self.assertIn('raw', download_links[0]) self.assertNotIn('blob', download_links[0]) self.assertIn('link/to/file.jpg', download_links[0])