def next(self): while True: if self.test_limit is not None: self.test_limit -= 1 if self.test_limit <= 0: raise StopIteration line = self.fh.readline().strip() if line == "": raise StopIteration if line == "page_title": continue basename = line.decode("utf-8").strip() path = os.path.join(self.dest_dir, to3dirs.to_path(basename)) disk_name = os.path.join(path, to3dirs.to_filename(basename)) if not os.path.exists(disk_name.encode('utf-8')): if not os.path.exists(path.encode('utf-8')): os.makedirs(path.encode('utf-8')) quoted_url = urllib.quote(basename.encode('utf-8')) # Skip wikipedia automatic redirect wiki = WIKI % dict(lang=self.language) url = wiki + "w/index.php?title=%s&redirect=no" % (quoted_url,) data = DataURLs(url=url, temp_dir=self.temp_dir, disk_name=disk_name, basename=basename) return data
def test_to_path(self): test_paths = ( ((u"*", NULL, NULL), u"*/"), ((u"a", u"b", u"c"), u"abcdefgh"), ((u"á", NULL, NULL), u"á"), ((u"á", u"þ", NULL), u"áþ"), ((u"$", u"9", NULL), u"$9.99"), ((u"a", u"b", u"c"), u"Anexo:abcdefgh"), ((u'a',u':',u'b'), u'Anexo:a:blanco'), ((u'N',u'o',u'e'), u'Noestoy:Anexo:a:blanco'), ) for path, orig in test_paths: self.assertEqual(os.path.join(*path), to_path(orig))
def _to_complete_path(pagina): return '/'.join((to_path(pagina), to_filename(pagina)))
def test_encoding(self): r = to3dirs.to_path("2.3") self.assertEqual(r, "2/%/2")
def test_short(self): r = to3dirs.to_path("mo") self.assertEqual(r, "m/o/_")
def test_very_short(self): r = to3dirs.to_path("m") self.assertEqual(r, "m/_/_")
def test_simple(self): r = to3dirs.to_path("moño") self.assertEqual(r, "m/o/ñ")