def test_info_json(self): expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text. ie = youtube_dl.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() self.assertEqual(root.tag, 'document') annotationsTag = root.find('annotations') self.assertEqual(annotationsTag.tag, 'annotations') annotations = annotationsTag.findall('annotation') #Not all the annotations have TEXT children and the annotations are returned unsorted. for a in annotations: self.assertEqual(a.tag, 'annotation') if a.get('type') == 'text': textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) #assertIn only added in python 2.7 #remove the first occurance, there could be more than one annotation with the same text expected.remove(text) #We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
def test_info_json(self): expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. ie = youtube_dl.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() self.assertEqual(root.tag, 'document') annotationsTag = root.find('annotations') self.assertEqual(annotationsTag.tag, 'annotations') annotations = annotationsTag.findall('annotation') # Not all the annotations have TEXT children and the annotations are returned unsorted. for a in annotations: self.assertEqual(a.tag, 'annotation') if a.get('type') == 'text': textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) # assertIn only added in python 2.7 # remove the first occurrence, there could be more than one annotation with the same text expected.remove(text) # We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
def test_info_json(self): ie = youtube_dl.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(INFO_JSON_FILE)) with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf: jd = json.load(jsonf) self.assertEqual(jd['upload_date'], u'20121002') self.assertEqual(jd['description'], EXPECTED_DESCRIPTION) self.assertEqual(jd['id'], TEST_ID) self.assertEqual(jd['extractor'], 'youtube') self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''') self.assertEqual(jd['uploader'], 'Philipp Hagemeister') self.assertTrue(os.path.exists(DESCRIPTION_FILE)) with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf: descr = descf.read() self.assertEqual(descr, EXPECTED_DESCRIPTION)
def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) if not ie._WORKING: print('Skipping: IE marked as not _WORKING') return if 'playlist' not in test_case and not test_case['file']: print('Skipping: No output file specified') return if 'skip' in test_case: print('Skipping: {0}'.format(test_case['skip'])) return params = self.parameters.copy() params.update(test_case.get('params', {})) ydl = YoutubeDL(params) for ie in youtube_dl.extractor.gen_extractors(): ydl.add_info_extractor(ie) finished_hook_called = set() def _hook(status): if status['status'] == 'finished': finished_hook_called.add(status['filename']) ydl.fd.add_progress_hook(_hook) test_cases = test_case.get('playlist', [test_case]) for tc in test_cases: _try_rm(tc['file']) _try_rm(tc['file'] + '.part') _try_rm(tc['file'] + '.info.json') try: for retry in range(1, RETRIES + 1): try: ydl.download([test_case['url']]) except (DownloadError, ExtractorError) as err: if retry == RETRIES: raise # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): raise print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) else: break for tc in test_cases: if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) self.assertTrue(tc['file'] in finished_hook_called) self.assertTrue(os.path.exists(tc['file'] + '.info.json')) if 'md5' in tc: md5_for_file = _file_md5(tc['file']) self.assertEqual(md5_for_file, tc['md5']) with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, expected) in tc.get('info_dict', {}).items(): if isinstance(expected, compat_str) and expected.startswith('md5:'): self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) else: got = info_dict.get(info_field) self.assertEqual( expected, got, u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # If checkable fields are missing from the test case, print the info_dict test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in info_dict.items() if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n') # Check for the presence of mandatory fields for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: for tc in test_cases: _try_rm(tc['file']) _try_rm(tc['file'] + '.part') _try_rm(tc['file'] + '.info.json')