def download(self): for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs: nametmpl = sanitize_filename(course_name) + '/' \ + sanitize_filename(chapter_name) + '/' \ + '%02i.%02i.*' % (i,j) fn = glob.glob(DIRECTORY + nametmpl) if fn: continue par = self._br.open(SITE_URL + url) par_soup = BeautifulSoup(par.read()) contents = par_soup.findAll('div','seq_contents') k = 0 for content in contents: content_soup = BeautifulSoup(content.text) try: video_type = content_soup.h2.text.strip() video_stream = content_soup.find('div','video')['data-streams'] video_id = video_stream.split(':')[1] video_url = youtube_url + video_id k += 1 print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type) #f.writelines(video_url+'\n') outtmpl = DIRECTORY + sanitize_filename(course_name) + '\/' \ + sanitize_filename(chapter_name) + '\/' \ + '%02i.%02i.%i ' % (i,j,k) \ + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s' self._fd.params['outtmpl'] = outtmpl self._fd.download([video_url]) except: pass
def test_sanitize_ids(self): self.assertEqual( sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual( sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual( sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
def download(self): print "\n-----------------------\nStart downloading\n-----------------------\n" for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs: # nametmpl = sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.*' % (i,j) # fn = glob.glob(DIRECTORY + nametmpl) nametmpl = os.path.join( DIRECTORY, sanitize_filename(course_name, replace_space_with_underscore), sanitize_filename(chapter_name, replace_space_with_underscore), "%02i.%02i.*" % (i, j), ) fn = glob.glob(nametmpl) if fn: print "Processing of %s skipped" % nametmpl continue print "Processing %s..." % nametmpl par = self._br.open(base_url + url) par_soup = BeautifulSoup(par.read()) contents = par_soup.findAll("div", "seq_contents") k = 0 for content in contents: # print "Content: %s" % content content_soup = BeautifulSoup(content.text) try: video_type = content_soup.h2.text.strip() video_stream = content_soup.find("div", "video")["data-streams"] video_id = video_stream.split(":")[1] video_url = youtube_url + video_id k += 1 print "[%02i.%02i.%02i] %s (%s)" % (i, j, k, par_name, video_type) # f.writelines(video_url+'\n') # outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.%02i ' % (i,j,k) \ # + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s' outtmpl = os.path.join( DIRECTORY, sanitize_filename(course_name, replace_space_with_underscore), sanitize_filename(chapter_name, replace_space_with_underscore), "%02i.%02i.%02i " % (i, j, k) + sanitize_filename("%s (%s)" % (par_name, video_type), replace_space_with_underscore) + ".%(ext)s", ) self._fd.params["outtmpl"] = outtmpl self._fd.download([video_url]) except Exception as e: # print "Error: %s" % e pass
def get_youtube_url(self, youtube_url): # determine the media file name filetmpl = u'%(id)s_%(uploader_id)s_%(title)s.%(ext)s' ydl = youtube_dl.YoutubeDL({ 'outtmpl': join(self.media_folder, filetmpl), 'quiet': True, 'restrictfilenames': True, 'noplaylist': True, 'continuedl': True, 'nooverwrites': True, 'retries': 3000, 'fragment_retries': 3000, 'ignoreerrors': True }) ydl.add_default_info_extractors() try: result = ydl.extract_info(youtube_url, download=False) media_filename = sanitize_filename(filetmpl % result['entries'][0], restricted=True) except: return '' # check if a file with this name already exists if not os.path.isfile(media_filename): try: ydl.extract_info(youtube_url, download=True) except: return '' return u'%s/%s' % (self.media_url, split(media_filename)[1])
def download(self): print "\n-----------------------\nStart downloading\n-----------------------\n" for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs: #nametmpl = sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.*' % (i,j) #fn = glob.glob(DIRECTORY + nametmpl) nametmpl = os.path.join(DIRECTORY, sanitize_filename(course_name), sanitize_filename(chapter_name), '%02i.%02i.*' % (i,j)) fn = glob.glob(nametmpl) if fn: print "Processing of %s skipped" % nametmpl continue print "Processing %s..." % nametmpl par = self._br.open(base_url + url) par_soup = BeautifulSoup(par.read()) contents = par_soup.findAll('div','seq_contents') k = 0 for content in contents: #print "Content: %s" % content content_soup = BeautifulSoup(content.text) try: video_type = content_soup.h2.text.strip() video_stream = content_soup.find('div','video')['data-streams'] video_id = video_stream.split(':')[1] video_url = youtube_url + video_id k += 1 print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type) #f.writelines(video_url+'\n') #outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.%i ' % (i,j,k) \ # + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s' outtmpl = os.path.join(DIRECTORY, sanitize_filename(course_name), sanitize_filename(chapter_name), '%02i.%02i.%i ' % (i,j,k) + \ sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s') self._fd.params['outtmpl'] = outtmpl self._fd.download([video_url]) except Exception as e: #print "Error: %s" % e pass
def test_sanitize_ids(self): self.assertEqual(sanitize_filename("_n_cd26wFpw", is_id=True), "_n_cd26wFpw") self.assertEqual(sanitize_filename("_BD_eEpuzXw", is_id=True), "_BD_eEpuzXw") self.assertEqual(sanitize_filename("N0Y__7-UOdI", is_id=True), "N0Y__7-UOdI")
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename("abc", restricted=True), "abc") self.assertEqual(sanitize_filename("abc_d-e", restricted=True), "abc_d-e") self.assertEqual(sanitize_filename("123", restricted=True), "123") self.assertEqual("abc_de", sanitize_filename("abc/de", restricted=True)) self.assertFalse("/" in sanitize_filename("abc/de///", restricted=True)) self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de", restricted=True)) self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|", restricted=True)) self.assertEqual("yes_no", sanitize_filename("yes? no", restricted=True)) self.assertEqual("this_-_that", sanitize_filename("this: that", restricted=True)) tests = _compat_str("a\xe4b\u4e2d\u56fd\u7684c") self.assertEqual(sanitize_filename(tests, restricted=True), "a_b_c") self.assertTrue(sanitize_filename(_compat_str("\xf6"), restricted=True) != "") # No empty filename forbidden = "\"\0\\/&!: '\t\n()[]{}$;`^,#" for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename(_compat_str("\u5927\u58f0\u5e26 - Song"), restricted=True), "Song") self.assertEqual(sanitize_filename(_compat_str("\u603b\u7edf: Speech"), restricted=True), "Speech") # .. but make sure the file name is never empty self.assertTrue(sanitize_filename("-", restricted=True) != "") self.assertTrue(sanitize_filename(":", restricted=True) != "")
def test_sanitize_filename(self): self.assertEqual(sanitize_filename("abc"), "abc") self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e") self.assertEqual(sanitize_filename("123"), "123") self.assertEqual("abc_de", sanitize_filename("abc/de")) self.assertFalse("/" in sanitize_filename("abc/de///")) self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de")) self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|")) self.assertEqual("yes no", sanitize_filename("yes? no")) self.assertEqual("this - that", sanitize_filename("this: that")) self.assertEqual(sanitize_filename("AT&T"), "AT&T") aumlaut = _compat_str("\xe4") self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = _compat_str("\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430") self.assertEqual(sanitize_filename(tests), tests) forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename(self): self.assertEqual(sanitize_filename("abc"), "abc") self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e") self.assertEqual(sanitize_filename("123"), "123") self.assertEqual("abc_de", sanitize_filename("abc/de")) self.assertFalse("/" in sanitize_filename("abc/de///")) self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de")) self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|")) self.assertEqual("yes no", sanitize_filename("yes? no")) self.assertEqual("this - that", sanitize_filename("this: that")) self.assertEqual(sanitize_filename("AT&T"), "AT&T") aumlaut = "ä" self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = "\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430" self.assertEqual(sanitize_filename(tests), tests) self.assertEqual(sanitize_filename("New World record at 0:12:34"), "New World record at 0_12_34") self.assertEqual(sanitize_filename("--gasdgf"), "_-gasdgf") self.assertEqual(sanitize_filename("--gasdgf", is_id=True), "--gasdgf") self.assertEqual(sanitize_filename(".gasdgf"), "gasdgf") self.assertEqual(sanitize_filename(".gasdgf", is_id=True), ".gasdgf") forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc))
def download(self): print "\n-----------------------\nStart downloading\n-----------------------\n" for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs: #nametmpl = sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.*' % (i,j) #fn = glob.glob(DIRECTORY + nametmpl) nametmpl = os.path.join(DIRECTORY, sanitize_filename(course_name, replace_space_with_underscore), sanitize_filename(chapter_name, replace_space_with_underscore), '%02i.%02i.*' % (i,j)) fn = glob.glob(nametmpl) if fn: print "Processing of %s skipped" % nametmpl continue print "Processing %s..." % nametmpl par = self._br.open(base_url + url) par_soup = BeautifulSoup(par.read()) contents = par_soup.findAll('div','seq_contents') k = 0 for content in contents: #print "Content: %s" % content content_soup = BeautifulSoup(content.text) try: video_type = content_soup.h2.text.strip() video_stream = content_soup.find('div','video')['data-streams'] video_id = video_stream.split(':')[1] video_url = youtube_url + video_id k += 1 print '[%02i.%02i.%02i] %s (%s)' % (i, j, k, par_name, video_type) #f.writelines(video_url+'\n') #outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \ # + sanitize_filename(chapter_name) + '/' \ # + '%02i.%02i.%02i ' % (i,j,k) \ # + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s' outtmpl = os.path.join(DIRECTORY, sanitize_filename(course_name, replace_space_with_underscore), sanitize_filename(chapter_name, replace_space_with_underscore), '%02i.%02i.%02i ' % (i,j,k) + \ sanitize_filename('%s (%s)' % (par_name, video_type), replace_space_with_underscore) + '.%(ext)s') # #print "Debug me pause- %s" % self._config.pause_mode #print "Debug me resume- %s" % self._config.resume_mode if self._config.pause_mode: launch_download_msg = 'Download this video [%s - %s]? (y/n) ' % (chapter_name, outtmpl) launch_download = raw_input(launch_download_msg) if (launch_download.lower() == "n"): continue if self._config.resume_mode: launch_download_msg = 'Download video from this [%s - %s]? (y/n) ' % (chapter_name, outtmpl) launch_download = raw_input(launch_download_msg) if (launch_download.lower() == "n"): continue else: self._config.resume_mode = False # # self._fd.params['outtmpl'] = outtmpl self._fd.download([video_url]) except Exception as e: #print "Error: %s" % e pass
def test_sanitize_filename(self): self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') self.assertEqual(sanitize_filename('123'), '123') self.assertEqual('abc_de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) self.assertEqual('yes no', sanitize_filename('yes? no')) self.assertEqual('this - that', sanitize_filename('this: that')) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = 'ä' self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' self.assertEqual(sanitize_filename(tests), tests) self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) forbidden = u'"\0\\/&: \'\t\n' for fc in forbidden: print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
def test_sanitize_filename(self): self.assertEqual(sanitize_filename(u'abc'), u'abc') self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e') self.assertEqual(sanitize_filename(u'123'), u'123') self.assertEqual(u'abc-de', sanitize_filename(u'abc/de')) self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de')) self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'this - that', sanitize_filename(u'this: that')) self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T') self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') forbidden = u'"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc))
def main(args): # Parse arguments playlist_url, output_dirpath = args if not os.path.exists(output_dirpath): sys.exit('directory not found: %s' % output_dirpath) if not os.path.isdir(output_dirpath): sys.exit('not a directory: %s' % output_dirpath) # Default settings restrictfilenames = False # (TODO: Enable again when it plays nicely with 'extract_audio_for_itunes') writeinfojson = False extract_audio_for_itunes = True # Locate all videos already in the filesystem ordering_filepath = os.path.join(output_dirpath, '.ordering') if os.path.exists(ordering_filepath): filesystem_filenames = [] with codecs.open(ordering_filepath, 'rt', 'utf-8') as ordering_file: for line in ordering_file: filesystem_filenames.append(line.rstrip(u'\r\n')) # Ensure all referenced files actually exist real_filesystem_filenames = [] for filename in filesystem_filenames: if os.path.exists(os.path.join(output_dirpath, filename)): real_filesystem_filenames.append(filename) else: print ('WARNING: Could not locate file "%s" referenced by ' + '".ordering" file. Assuming deleted.') % filename filesystem_filenames = real_filesystem_filenames else: filesystem_filenames = [] # Prepare downloader video_filename_template = u'%(title)s.%(ext)s' downloader = youtube_dl.FileDownloader({ 'outtmpl': os.path.join( # (Be robust against output_dirpath containing %) output_dirpath.replace('%', '%%'), video_filename_template), 'restrictfilenames': restrictfilenames, 'writeinfojson': writeinfojson, }) if not extract_audio_for_itunes: final_filename_template = video_filename_template else: final_filename_template = video_filename_template.replace(u'%(ext)s', u'm4a') downloader.add_post_processor(FFmpegExtractAudioPP( preferredcodec='m4a', # iTunes compatible. preferredquality=None, # default audio quality keepvideo=False)) # Locate all videos in the playlist video_infos = extract_youtube_playlist_info(playlist_url) playlist_filenames = [] for cur_info in video_infos: cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames) playlist_filenames.append(cur_filename) # Download videos to filesystem that are missing for cur_info in video_infos: cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames) if not os.path.exists(os.path.join(output_dirpath, cur_filename)): if not cur_info.get('deleted', False): # Download (and optionally extract the audio) downloader.process_info(cur_info) # Verify downloaded if not os.path.exists(os.path.join(output_dirpath, cur_filename)): raise ValueError('Could not locate downloaded video: %s' % cur_filename) # Remove filesystem files not in playlist playlist_filename_set = set(playlist_filenames) for cur_filename in filesystem_filenames: if cur_filename not in playlist_filename_set: # Remove video (if present) video_filepath = os.path.join(output_dirpath, cur_filename) if os.path.exists(video_filepath): os.remove(video_filepath) # Remove info json (if present) # TODO: This is not the correct path for the info json file # if 'extract_audio_for_itunes' is True. # (The info json will be proceded by the *video* extension, # instead of the output audio file extension.) infojson_filepath = os.path.join(output_dirpath, cur_filename + u'.info.json') if os.path.exists(infojson_filepath): os.remove(infojson_filepath) # Rewrite the ordering file with codecs.open(ordering_filepath, 'wt', 'utf-8') as ordering_file: for cur_filename in playlist_filenames: ordering_file.write(cur_filename) ordering_file.write(u'\n')
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse( '/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = 'aäb\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue( fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual( sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') self.assertEqual( sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual( sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy' )
def test_sanitize_filename(self): self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') self.assertEqual(sanitize_filename('123'), '123') self.assertEqual('abc_de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) self.assertEqual('yes no', sanitize_filename('yes? no')) self.assertEqual('this - that', sanitize_filename('this: that')) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = _compat_str('\xe4') self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430') self.assertEqual(sanitize_filename(tests), tests) forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = 'a\xe4b\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '')
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename("abc", restricted=True), "abc") self.assertEqual(sanitize_filename("abc_d-e", restricted=True), "abc_d-e") self.assertEqual(sanitize_filename("123", restricted=True), "123") self.assertEqual("abc_de", sanitize_filename("abc/de", restricted=True)) self.assertFalse("/" in sanitize_filename("abc/de///", restricted=True)) self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de", restricted=True)) self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|", restricted=True)) self.assertEqual("yes_no", sanitize_filename("yes? no", restricted=True)) self.assertEqual("this_-_that", sanitize_filename("this: that", restricted=True)) tests = "aäb\u4e2d\u56fd\u7684c" self.assertEqual(sanitize_filename(tests, restricted=True), "aab_c") self.assertTrue(sanitize_filename("\xf6", restricted=True) != "") # No empty filename forbidden = "\"\0\\/&!: '\t\n()[]{}$;`^,#" for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename("\u5927\u58f0\u5e26 - Song", restricted=True), "Song") self.assertEqual(sanitize_filename("\u603b\u7edf: Speech", restricted=True), "Speech") # .. but make sure the file name is never empty self.assertTrue(sanitize_filename("-", restricted=True) != "") self.assertTrue(sanitize_filename(":", restricted=True) != "") self.assertEqual( sanitize_filename("ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ", restricted=True), "AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy", )
def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = 'aäb\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
def test_sanitize_filename(self): self.assertEqual(sanitize_filename(u'abc'), u'abc') self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e') self.assertEqual(sanitize_filename(u'123'), u'123') self.assertEqual(u'abc_de', sanitize_filename(u'abc/de')) self.assertTrue(u'de' in sanitize_filename(u'abc/de')) self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de')) self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de')) self.assertTrue(u'de' in sanitize_filename(u'abc\\de')) self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')