def test_insert(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (500, 800, "Hey 1.5"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) # for both time_change and text_changed, we calculate them as follows: # there are 9 total subs. 8 of those are matches and 1 is new in # set_2. So the change amount is 1/9 self.assertAlmostEqual(result['time_changed'], 1/9.0) self.assertAlmostEqual(result['text_changed'], 1/9.0) self.assertEqual(len(result['subtitle_data']), 5) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3, 4) # check the line that was inserted insert_sub_data = result['subtitle_data'][1] self.assertEquals(insert_sub_data['time_changed'], True) self.assertEquals(insert_sub_data['text_changed'], True) self.assertEquals(insert_sub_data['subtitles'][0], self.empty_line()) self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
def test_simple_replace(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey New 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertAlmostEqual(result['time_changed'], 0) # for text_changed, we calculate as follows: there are 8 total subs. # 6 of those are matches and 1 is different in both sets. So 2/8.0 # has been changed. self.assertAlmostEqual(result['text_changed'], 2/8.0) self.assertEqual(len(result['subtitle_data']), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3) # check the line that was inserted insert_sub_data = result['subtitle_data'][1] self.assertEquals(insert_sub_data['time_changed'], False) self.assertEquals(insert_sub_data['text_changed'], True) self.assertEquals(insert_sub_data['subtitles'][0], set_1[1]) self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
def test_replace_multiple_lines_with_single(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 3000, "Hey 2 and 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) # for both time_change and text_changed, we calculate them as follows: # there are 7 total subs. 4 of those are matches and 2 in set_1 were # replaced with 1 in set_2. So the change amount is 3/7. self.assertAlmostEqual(result['time_changed'], 3/7.0) self.assertAlmostEqual(result['text_changed'], 3/7.0) self.assertEqual(len(result['subtitle_data']), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3) # check the line that was inserted line1 = result['subtitle_data'][1] self.assertEquals(line1['time_changed'], True) self.assertEquals(line1['text_changed'], True) self.assertEquals(line1['subtitles'][0], set_1[1]) self.assertEquals(line1['subtitles'][1], set_2[1]) line2 = result['subtitle_data'][2] self.assertEquals(line2['time_changed'], True) self.assertEquals(line2['text_changed'], True) self.assertEquals(line2['subtitles'][0], set_1[2]) self.assertEquals(line2['subtitles'][1], self.empty_line())
def test_one_set_empty(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) result = diff(set_1, SubtitleSet("en")) self.assertEqual(result["changed"], True) self.assertEqual(result["text_changed"], 1.0) self.assertEqual(result["time_changed"], 1.0)
def test_data_ordering(self): set_1 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1")]) set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3")]) result = diff(set_1, set_2) subs_result = result["subtitle_data"][2]["subtitles"] # make sure the 0 index subs is for set_1, test # we respect the ordering of arguments passed to diff self.assertEqual(subs_result[0].text, None) self.assertEqual(subs_result[1].text, "Hey 3")
def test_one_set_empty(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, SubtitleSet('en')) self.assertEqual(result['changed'], True) self.assertEqual(result['text_changed'], 1.0) self.assertEqual(result['time_changed'], 1.0)
def test_unsynced_reflect_time_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (None, None, "Hey 2"), ]) result = diff(set_1, set_2) self.assertAlmostEqual(result['time_changed'], 1/3.0)
def test_text_changes(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 22"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) self.assertEqual(result["text_changed"], 1 / 4.0) self.assertEqual(result["time_changed"], 0) self.assertEqual(len(result["subtitle_data"]), 4) # only sub #2 should have text changed for i, sub_data in enumerate(result["subtitle_data"]): self.assertEqual(sub_data["text_changed"], i == 1)
def test_data_ordering(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3"), ]) result = diff(set_1, set_2) subs_result = result['subtitle_data'][2]['subtitles'] # make sure the 0 index subs is for set_1, test # we respect the ordering of arguments passed to diff self.assertEqual(subs_result[0].text , None) self.assertEqual(subs_result[1].text , "Hey 3")
def test_text_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 22"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertEqual(result['text_changed'], 1/4.0) self.assertEqual(result['time_changed'], 0) self.assertEqual(len(result['subtitle_data']), 4) # only sub #2 should have text changed for i,sub_data in enumerate(result['subtitle_data']): self.assertEqual(sub_data['text_changed'], i ==1)
def test_time_changes(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) set_2 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, set_2) self.assertEqual(result['changed'], True) self.assertEqual(result['time_changed'], 1/4.0) self.assertEqual(result['text_changed'], 0) self.assertEqual(len(result['subtitle_data']), 4) # only sub #2 should have text changed for i,sub_data in enumerate(result['subtitle_data']): self.assertEqual(sub_data['time_changed'], i ==1) self.assertFalse(sub_data['text_changed'])
def test_delete(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) # for both time_change and text_changed, we calculate them as follows: # there are 7 total subs. 6 of those are matches and 1 is new in # set_2. So the change amount is 1/9 self.assertAlmostEqual(result["time_changed"], 1 / 7.0) self.assertAlmostEqual(result["text_changed"], 1 / 7.0) self.assertEqual(len(result["subtitle_data"]), 4) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3) # check the line that was deleted delete_sub_data = result["subtitle_data"][1] self.assertEquals(delete_sub_data["time_changed"], True) self.assertEquals(delete_sub_data["text_changed"], True) self.assertEquals(delete_sub_data["subtitles"][1], self.empty_line()) self.assertEquals(delete_sub_data["subtitles"][0], set_1[1])
def test_replace_single_line_with_multiple(self): set_1 = SubtitleSet.from_list( "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")] ) set_2 = SubtitleSet.from_list( "en", [ (0, 1000, "Hey 1"), (1000, 1500, "Hey 2.1"), (1500, 2000, "Hey 2.2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ], ) result = diff(set_1, set_2) self.assertEqual(result["changed"], True) # for both time_change and text_changed, we calculate them as follows: # there are 9 total subs. 6 of those are matches and 1 in set 1 was # changed to 2 in set 2. So the change amount is 3/9. self.assertAlmostEqual(result["time_changed"], 3 / 9.0) self.assertAlmostEqual(result["text_changed"], 3 / 9.0) self.assertEqual(len(result["subtitle_data"]), 5) # check the lines that haven't changed self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3, 4) # line 1 in set_1 was replaced my lines 2 and 3 in set_2 line1 = result["subtitle_data"][1] self.assertEquals(line1["time_changed"], True) self.assertEquals(line1["text_changed"], True) self.assertEquals(line1["subtitles"][0], set_1[1]) self.assertEquals(line1["subtitles"][1], set_2[1]) line2 = result["subtitle_data"][2] self.assertEquals(line2["time_changed"], True) self.assertEquals(line2["text_changed"], True) self.assertEquals(line2["subtitles"][0], self.empty_line()) self.assertEquals(line2["subtitles"][1], set_2[2])
def test_empty_subs(self): result = diff(SubtitleSet('en'), SubtitleSet('en')) self.assertEqual(result['changed'], False) self.assertEqual(result['text_changed'], 0) self.assertEqual(result['time_changed'], 0) self.assertEqual(len(result['subtitle_data']), 0)
def test_empty_subs(self): result = diff(SubtitleSet("en"), SubtitleSet("en")) self.assertEqual(result["changed"], False) self.assertEqual(result["text_changed"], 0) self.assertEqual(result["time_changed"], 0) self.assertEqual(len(result["subtitle_data"]), 0)
def _get_new_version_for_save(self, subtitles, language, session, user, new_title, new_description, new_metadata, save_for_later=None): """Return a new subtitle version for this save, or None if not needed.""" new_version = None previous_version = language.get_tip(public=False) if previous_version: title_changed = (new_title is not None and new_title != previous_version.title) desc_changed = (new_description is not None and new_description != previous_version.description) metadata_changed = ( new_metadata is not None and new_metadata != previous_version.get_metadata()) else: title_changed = new_title is not None desc_changed = new_description is not None metadata_changed = new_metadata is not None subtitle_set = None subs_length = 0 if isinstance(subtitles, basestring): subtitle_set = SubtitleSet(language.language_code, subtitles) elif isinstance(subtitles, SubtitleSet): subtitle_set = subtitles if subtitle_set: subs_length = len(subtitle_set) # subtitles have changed if only one of the version is empty # or if the versions themselves differ if not previous_version and not subtitle_set: subtitles_changed = False elif not previous_version or not subtitle_set: subtitles_changed = True else: subtitles_changed = diff(previous_version.get_subtitles(), subtitle_set)['changed'] should_create_new_version = (subtitles_changed or title_changed or desc_changed or metadata_changed) if should_create_new_version: new_version, should_create_task = self._create_version( session.language, user, new_title=new_title, new_description=new_description, new_metadata=new_metadata, subtitles=subtitles, session=session) incomplete = not new_version.is_synced() or save_for_later # Record the origin of this set of subtitles. # # We need to record it *before* creating review/approve tasks (if # any) because that means these subs were from a post-publish edit # or something similar. If we record the origin after creating the # review task it'll be marked as originating from review, which # isn't right because these subs had to come from something else. # # :( record_workflow_origin(new_version, new_version.video.get_team_video()) if (not incomplete) and should_create_task: self._create_review_or_approve_task(new_version) return new_version
def test_diffing(self): create_langs_and_versions(self.video, ['en']) eng = self.video.newsubtitlelanguage_set.get(language_code='en') subtitles = SubtitleSet.from_list('en', [ (10000, 20000, "1 - :D"), (20000, 30000, "2 - :D"), (30000, 40000, "3 - :D"), (40000, 50000, "4 - :D"), (50000, 60000, "5 - :D"), ]) subtitles2 = SubtitleSet.from_list( 'en', [ (10000, 20000, "1 - :D"), (20000, 25000, "2 - :D"), # time change, (30000, 40000, "Three - :D"), # text change, # multiple lines replaced by a single line (40000, 60000, "45 - :D"), ]) first_version = eng.add_version(subtitles=subtitles) second_version = eng.add_version(subtitles=subtitles2) # Note on the argument order to diff: we always diff the more recent # version against the less recent diff_result = diff(subtitles2, subtitles) response = self._simple_test('videos:diffing', [first_version.id, second_version.id]) self.assertEquals(diff_result, response.context['diff_data']) diff_sub_data = diff_result['subtitle_data'] html = BeautifulSoup(response.content) diff_list = html.find('ol', {"class": 'subtitles-diff'}) diff_items = diff_list.findAll('li') # check number of lines self.assertEquals(len(diff_items), len(diff_sub_data)) def check_column_data(column, sub_data): """Check the data in the HTML for a column against the data in from diff() """ # special check for empty lines if sub_data.text is None: self.assertEquals(column.string.strip(), "") return time_span, text_span = column.findAll('span', recursive=False) self.assertEquals(text_span.string.strip(), sub_data.text) time_child_spans = time_span.findAll('span', {'class': 'stamp_text'}) self.assertEquals(time_child_spans[0].string.strip(), format_sub_time(sub_data.start_time)) self.assertEquals(time_child_spans[1].string.strip(), format_sub_time(sub_data.end_time)) for li, diff_sub_data_item in zip(diff_items, diff_sub_data): # Intuitively, left_column should be compared against # ['subtitles'][0], but we do the opposite. This is because of # the way things are ordered: # - diff() was passed (older_version, newer_version) # - The rendered HTML has the newer version on the left and the # older version on the right check_column_data(li.find('div', {'class': 'left_column'}), diff_sub_data_item['subtitles'][1]) check_column_data(li.find('div', {'class': 'right_column'}), diff_sub_data_item['subtitles'][0]) # we use the time_change class for either text or time changes. time_changes = li.findAll('span', {'class': 'time_change'}) if (diff_sub_data_item['time_changed'] or diff_sub_data_item['text_changed']): self.assertNotEqual(len(time_changes), 0) else: self.assertEquals(len(time_changes), 0)