def test_replace_multiple_lines_with_single(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 3000, "Hey 2 and 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) # for both time_change and text_changed, we calculate them as follows: # there are 7 total subs. 4 of those are matches and 2 in set_1 were # replaced with 1 in set_2. So the change amount is 3/7. self.assertAlmostEqual(result['time_changed'], 3/7.0) self.assertAlmostEqual(result['text_changed'], 3/7.0) self.assertEqual(len(result['subtitle_data']), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3) # check the line that was inserted line1 = result['subtitle_data'][1] self.assertEquals(line1['time_changed'], True) self.assertEquals(line1['text_changed'], True) self.assertEquals(line1['subtitles'][0], set_1[1]) self.assertEquals(line1['subtitles'][1], set_2[1]) line2 = result['subtitle_data'][2] self.assertEquals(line2['time_changed'], True) self.assertEquals(line2['text_changed'], True) self.assertEquals(line2['subtitles'][0], set_1[2]) self.assertEquals(line2['subtitles'][1], self.empty_line())
def test_insert(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (500, 800, "Hey 1.5"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) # for both time_change and text_changed, we calculate them as follows: # there are 9 total subs. 8 of those are matches and 1 is new in # set_2. So the change amount is 1/9 self.assertAlmostEqual(result['time_changed'], 1/9.0) self.assertAlmostEqual(result['text_changed'], 1/9.0) self.assertEqual(len(result['subtitle_data']), 5) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3, 4) # check the line that was inserted insert_sub_data = result['subtitle_data'][1] self.assertEquals(insert_sub_data['time_changed'], True) self.assertEquals(insert_sub_data['text_changed'], True) self.assertEquals(insert_sub_data['subtitles'][0], self.empty_line()) self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
def test_simple_replace(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey New 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertAlmostEqual(result['time_changed'], 0) # for text_changed, we calculate as follows: there are 8 total subs. # 6 of those are matches and 1 is different in both sets. So 2/8.0 # has been changed. self.assertAlmostEqual(result['text_changed'], 2/8.0) self.assertEqual(len(result['subtitle_data']), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3) # check the line that was inserted insert_sub_data = result['subtitle_data'][1] self.assertEquals(insert_sub_data['time_changed'], False) self.assertEquals(insert_sub_data['text_changed'], True) self.assertEquals(insert_sub_data['subtitles'][0], set_1[1]) self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
def test_calc_changes(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey New 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) text_changed, time_changed = calc_changes(set_1, set_2) self.assertAlmostEqual(time_changed, 0) self.assertAlmostEqual(text_changed, 2 / 8.0)
def test_data_ordering(self): set_1 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1")]) set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3")]) result = diff(set_1, set_2) subs_result = result["subtitle_data"][2]["subtitles"] # make sure the 0 index subs is for set_1, test # we respect the ordering of arguments passed to diff self.assertEqual(subs_result[0].text, None) self.assertEqual(subs_result[1].text, "Hey 3")
def test_unsynced_reflect_time_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (None, None, "Hey 2"), ]) result = diff(set_1, set_2) self.assertAlmostEqual(result['time_changed'], 1/3.0)
def test_text_changes(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 22"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) self.assertEqual(result["text_changed"], 1 / 4.0) self.assertEqual(result["time_changed"], 0) self.assertEqual(len(result["subtitle_data"]), 4) # only sub #2 should have text changed for i, sub_data in enumerate(result["subtitle_data"]): self.assertEqual(sub_data["text_changed"], i == 1)
def test_calc_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey New 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) text_changed, time_changed = calc_changes(set_1, set_2) self.assertAlmostEqual(time_changed, 0) self.assertAlmostEqual(text_changed, 2/8.0)
def test_data_ordering(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3"), ]) result = diff(set_1, set_2) subs_result = result['subtitle_data'][2]['subtitles'] # make sure the 0 index subs is for set_1, test # we respect the ordering of arguments passed to diff self.assertEqual(subs_result[0].text , None) self.assertEqual(subs_result[1].text , "Hey 3")
def _stack_version(sv, nsl): """Stack the given version onto the given new SL.""" from apps.subtitles import pipeline visibility = get_visibility_from_old_version(sv) subtitles = _get_subtitles(sv) try: subtitles = list(subtitles) # set subtitle set as the pipeline will pass escaping # otherwise and it will break sset = SubtitleSet.from_list(nsl.language_code, subtitles) nsv = pipeline.add_subtitles(nsl.video, nsl.language_code, sset, title=sv.title, description=sv.description, parents=[], visibility=visibility, author=sv.user, created=sv.datetime_started) except: log_subtitle_error(sv, subtitles) raise sv.new_subtitle_version = nsv sv.needs_sync = False sv.save(tern_sync=True) log('SubtitleVersion', 'stacked', sv.pk, nsv.pk)
def test_one_set_empty(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) result = diff(set_1, SubtitleSet("en")) self.assertEqual(result["changed"], True) self.assertEqual(result["text_changed"], 1.0) self.assertEqual(result["time_changed"], 1.0)
def test_one_set_empty(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, SubtitleSet('en')) self.assertEqual(result['changed'], True) self.assertEqual(result['text_changed'], 1.0) self.assertEqual(result['time_changed'], 1.0)
def test_text_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 22"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertEqual(result['text_changed'], 1/4.0) self.assertEqual(result['time_changed'], 0) self.assertEqual(len(result['subtitle_data']), 4) # only sub #2 should have text changed for i,sub_data in enumerate(result['subtitle_data']): self.assertEqual(sub_data['text_changed'], i ==1)
def test_time_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertEqual(result['time_changed'], 1/4.0) self.assertEqual(result['text_changed'], 0) self.assertEqual(len(result['subtitle_data']), 4) # only sub #2 should have text changed for i,sub_data in enumerate(result['subtitle_data']): self.assertEqual(sub_data['time_changed'], i ==1) self.assertFalse(sub_data['text_changed'])
def test_delete(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) # for both time_change and text_changed, we calculate them as follows: # there are 7 total subs. 6 of those are matches and 1 is new in # set_2. So the change amount is 1/9 self.assertAlmostEqual(result["time_changed"], 1 / 7.0) self.assertAlmostEqual(result["text_changed"], 1 / 7.0) self.assertEqual(len(result["subtitle_data"]), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3) # check the line that was deleted delete_sub_data = result["subtitle_data"][1] self.assertEquals(delete_sub_data["time_changed"], True) self.assertEquals(delete_sub_data["text_changed"], True) self.assertEquals(delete_sub_data["subtitles"][1], self.empty_line()) self.assertEquals(delete_sub_data["subtitles"][0], set_1[1])
def test_replace_single_line_with_multiple(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list( "en", [ (0, 1000, "Hey 1"), (1000, 1500, "Hey 2.1"), (1500, 2000, "Hey 2.2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ], ) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) # for both time_change and text_changed, we calculate them as follows: # there are 9 total subs. 6 of those are matches and 1 in set 1 was # changed to 2 in set 2. So the change amount is 3/9. self.assertAlmostEqual(result["time_changed"], 3 / 9.0) self.assertAlmostEqual(result["text_changed"], 3 / 9.0) self.assertEqual(len(result["subtitle_data"]), 5) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3, 4) # line 1 in set_1 was replaced my lines 2 and 3 in set_2 line1 = result["subtitle_data"][1] self.assertEquals(line1["time_changed"], True) self.assertEquals(line1["text_changed"], True) self.assertEquals(line1["subtitles"][0], set_1[1]) self.assertEquals(line1["subtitles"][1], set_2[1]) line2 = result["subtitle_data"][2] self.assertEquals(line2["time_changed"], True) self.assertEquals(line2["text_changed"], True) self.assertEquals(line2["subtitles"][0], self.empty_line()) self.assertEquals(line2["subtitles"][1], set_2[2])
def setUp(self): self.user_1 = User.objects.create(username='******') self.user_2 = User.objects.create(username='******') self.video = video = Video.get_or_create_for_url( "http://www.example.com/video.mp4")[0] video.primary_audio_language_code = 'en' video.user = self.user_1 video.save() mail.outbox = [] self.original_language = SubtitleLanguage.objects.create( video=video, language_code='en') subs = SubtitleSet.from_list('en', [ (1000, 2000, "1"), (2000, 3000, "2"), (3000, 4000, "3"), ]) self.original_language.add_version(subtitles=subs)
def setUp(self): self.user_1 = User.objects.create(username='******', notify_by_email=False) self.user_2 = User.objects.create(username='******', notify_by_email=False) def setup_video(video, video_url): video.primary_audio_language_code = 'en' self.video = video = Video.add("http://www.example.com/video.mp4", self.user_1)[0] mail.outbox = [] self.original_language = SubtitleLanguage.objects.create( video=video, language_code='en') subs = SubtitleSet.from_list('en', [ (1000, 2000, "1"), (2000, 3000, "2"), (3000, 4000, "3"), ]) self.original_language.add_version(subtitles=subs)
def test_email_diff_subtitles(self): initial_count = len(mail.outbox) # set a user who can receive notification # make sure we have a different author, else he won't get notified author = User(username='******', email='*****@*****.**', notify_by_email=True, valid_email=True) author.save(send_email_confirmation=False) # bypass logic from hell author.valid_email = True author.save() # this is needed for the non_editor template check user2 = User(username='******', email='*****@*****.**', notify_by_email=True, valid_email=True) user2.save(send_email_confirmation=False) # bypass logic from hell user2.valid_email = True user2.save() # version is indentical to previous one video, video_url = Video.add("http://wwww.example.com/video-diff.mp4", None) video.followers.add(author) video.followers.add(user2) language = SubtitleLanguage(video=video, language_code='en') language.save() subs_data = [ [0, 1000, '1'], [1000, 2000, '2'], ] subtitles_1 = SubtitleSet.from_list('en', subs_data) old_version = language.add_version(subtitles=subtitles_1, author=author) # now we change the text on the second sub subs_data[1][2] = '2 changed' # add a regular sub subs_data.append([2000, 3000, 'new sub']) # add an unsyced subs_data.append([None, None, 'no sync']) subtitles_2 = SubtitleSet.from_list('en', subs_data) new_version = language.add_version(subtitles=subtitles_2) self.assertTrue(len(video.notification_list()) > 0) res = send_new_version_notification(new_version.pk) self.assertNotEqual(res, None) # we expect two emails, one is the new-edits-non-editor, and # the other for mail_notification.html self.assertEqual(len(mail.outbox), initial_count + 2) for email_number, email_msg in enumerate(mail.outbox): # make sure this is the right message self.assertIn("New edits to ", email_msg.subject) self.assertIn("video-diff.mp4", email_msg.subject) html = BeautifulSoup(email_msg.body) html_text = "".join(html.body(text=True)).replace("\n", "") if email_number == 0: # assert text and timing changes are correct self.assertIn('67% of the text', html_text) self.assertIn('33% of the timing was changed.', html_text) # find the listed text changes to make sure they match diff_table = html.findAll('table', attrs={'class': 'diffs'})[0] old_version_changes = [] new_version_changes = [] for i, node in enumerate(diff_table.findAll('td')): if i % 2 == 0: old_version_changes.append(node.text) else: new_version_changes.append(node.text) self.assertEqual(old_version_changes, [u'2', u'', u'']) self.assertEqual(new_version_changes, [ u'2 changed', u'new sub', u'no sync', ])
def test_add_subtitles(self): def _get_tip_subs(): sl = SubtitleLanguage.objects.get(video=self.video, language_code='en') return list(sl.get_tip(full=True).get_subtitles().subtitle_items()) def _add(subs): pipeline.add_subtitles(self.video, 'en', subs) # Passing nil. _add(None) self.assertEqual(_get_tip_subs(), []) # Passing a list of tuples. _add([(100, 200, "foo", { 'new_paragraph': True }), (300, None, "bar", { 'new_paragraph': False })]) self.assertEqual(_get_tip_subs(), [ SubtitleLine(100, 200, "foo", {'new_paragraph': True}), SubtitleLine(300, None, "bar", {'new_paragraph': False}), ]) # Passing an iterable of tuples. iterable = (s for s in [(101, 200, "foo", { 'new_paragraph': True }), (300, None, "bar", { 'new_paragraph': False })]) # FIXME: this is failing because the genertator is getting exhausted along the pipeline # debug and pass the iterable directly _add(tuple(iterable)) self.assertEqual(_get_tip_subs(), [ SubtitleLine(101, 200, "foo", {'new_paragraph': True}), SubtitleLine(300, None, "bar", {'new_paragraph': False}) ]) # Passing a SubtitleSet. subs = SubtitleSet.from_list('en', [ SubtitleLine(110, 210, "foo", {'new_paragraph': True}), SubtitleLine(310, 410, "bar", {'new_paragraph': False}), SubtitleLine(None, None, '"baz"', {'new_paragraph': False}) ]) _add(subs) self.assertEqual(_get_tip_subs(), [(110, 210, "foo", { 'new_paragraph': True }), (310, 410, "bar", { 'new_paragraph': False }), (None, None, '"baz"', { 'new_paragraph': False })]) # Passing a hunk of XML. subs = SubtitleSet.from_list("en", [ SubtitleLine(10000, 22000, "boots", {}), SubtitleLine(23000, 29000, "cats", {}) ]) _add(subs.to_xml()) self.assertEqual(_get_tip_subs(), [ SubtitleLine(10000, 22000, "boots", {'new_paragraph': True}), SubtitleLine(23000, 29000, "cats", {'new_paragraph': False}) ]) # Passing nonsense should TypeError out. self.assertRaises(TypeError, lambda: _add(1)) # Make sure all the versions are there. sl = SubtitleLanguage.objects.get(video=self.video, language_code='en') self.assertEqual(sl.subtitleversion_set.full().count(), 5)
def test_diffing(self): create_langs_and_versions(self.video, ['en']) eng = self.video.newsubtitlelanguage_set.get(language_code='en') subtitles = SubtitleSet.from_list('en', [ (10000, 20000, "1 - :D"), (20000, 30000, "2 - :D"), (30000, 40000, "3 - :D"), (40000, 50000, "4 - :D"), (50000, 60000, "5 - :D"), ]) subtitles2 = SubtitleSet.from_list( 'en', [ (10000, 20000, "1 - :D"), (20000, 25000, "2 - :D"), # time change, (30000, 40000, "Three - :D"), # text change, # multiple lines replaced by a single line (40000, 60000, "45 - :D"), ]) first_version = eng.add_version(subtitles=subtitles) second_version = eng.add_version(subtitles=subtitles2) # Note on the argument order to diff: we always diff the more recent # version against the less recent diff_result = diff(subtitles2, subtitles) response = self._simple_test('videos:diffing', [first_version.id, second_version.id]) self.assertEquals(diff_result, response.context['diff_data']) diff_sub_data = diff_result['subtitle_data'] html = BeautifulSoup(response.content) diff_list = html.find('ol', {"class": 'subtitles-diff'}) diff_items = diff_list.findAll('li') # check number of lines self.assertEquals(len(diff_items), len(diff_sub_data)) def check_column_data(column, sub_data): """Check the data in the HTML for a column against the data in from diff() """ # special check for empty lines if sub_data.text is None: self.assertEquals(column.string.strip(), "") return time_span, text_span = column.findAll('span', recursive=False) self.assertEquals(text_span.string.strip(), sub_data.text) time_child_spans = time_span.findAll('span', {'class': 'stamp_text'}) self.assertEquals(time_child_spans[0].string.strip(), format_sub_time(sub_data.start_time)) self.assertEquals(time_child_spans[1].string.strip(), format_sub_time(sub_data.end_time)) for li, diff_sub_data_item in zip(diff_items, diff_sub_data): # Intuitively, left_column should be compared against # ['subtitles'][0], but we do the opposite. This is because of # the way things are ordered: # - diff() was passed (older_version, newer_version) # - The rendered HTML has the newer version on the left and the # older version on the right check_column_data(li.find('div', {'class': 'left_column'}), diff_sub_data_item['subtitles'][1]) check_column_data(li.find('div', {'class': 'right_column'}), diff_sub_data_item['subtitles'][0]) # we use the time_change class for either text or time changes. time_changes = li.findAll('span', {'class': 'time_change'}) if (diff_sub_data_item['time_changed'] or diff_sub_data_item['text_changed']): self.assertNotEqual(len(time_changes), 0) else: self.assertEquals(len(time_changes), 0)