def test_read_gold_standard(self): all_files = data_processing.get_list_all_corrected_files(FIXTURES) all_files.sort() chars = {'ascii':u'ascii yo!', 'iso-8859-1':u'\xd3', 'utf-8':u'\xae', 'utf-16':u'\xae'} for e in chars: content_comments = data_processing.read_gold_standard( FIXTURES, e) actual_content = (u"Content here\nmore content\n" + chars[e] + u"\n") self.assertEqual(content_comments[0], actual_content) self.assertEqual(content_comments[1], '\nsome comments\n')
def test_utf8(self): gs = ' '.join(data_processing.read_gold_standard(FIXTURES, 'utf-8_chinese')) self.assertEqual(gs, Testread_gold_standard.actual_chinese_content)