def test_apache_parser(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.ApacheMailSyncer', 'channel.parser.common_parsers.ApacheMailParser', 'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.ApacheMailSyncer', 'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) toc_download_section(pname, cname, start=0, end=1) toc_download_entries(pname, cname, 9, 99) parse_channel(pname, cname, True) self.assertEqual(8, Message.objects.all().count()) messages = list(Message.objects.all()) for message in messages: print('{0} by {1} on {2} (wc: {3})'.format( message.title, message.author, message.msg_date, message.word_count)) print(' {0} snippets and {1} references'.format( message.code_snippets.count(), message.code_references.count())) print(' Snippets:') for code_snippet in message.code_snippets.all(): print(' {0}'.format(code_snippet.language)) for ref in message.code_references.all(): print(' {0}: {1}'.format(ref.kind_hint.kind, ref.content)) # Test Snippets third_to_last = messages[-3] self.assertEqual('l', third_to_last.code_snippets.all()[0].language) self.assertEqual('jx', third_to_last.code_snippets.all()[1].language) # Test Refs fourth_to_last = messages[-4] refs = [ref.content.strip() for ref in fourth_to_last.code_references.all()] self.assertEqual(4, len(refs)) self.assertTrue('EasySSLProtocolSocketFactory' in refs) self.assertTrue('SSL' in refs) # Test Post-Processing! channel = post_process_channel(pname, cname) self.assertEqual(4, channel.threads.count()) second_thread = channel.threads.all()[1] self.assertEqual(3, second_thread.messages.count()) indexes = [msg.index for msg in second_thread.messages.all()] self.assertEqual([0, 1, 2], indexes) self.assertFalse(second_thread.messages.all()[0].title.lower() .startswith('re')) self.assertTrue(second_thread.messages.all()[1].title.lower() .startswith('re'))
def test_phpbb_parser(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.PHPBBForumSyncer', 'channel.parser.common_parsers.PHPBBForumParser', 'https://forum.hibernate.org/viewforum.php?f=1' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.PHPBBForumSyncer', 'https://forum.hibernate.org/viewforum.php?f=1' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) toc_download_section(pname, cname, start=0, end=2) toc_download_entries(pname, cname, 1023, 1025) parse_channel(pname, cname, True) self.assertEqual(23, Message.objects.all().count()) messages = list(Message.objects.all()) for message in messages: print(message.url) print('{0} by {1} on {2} (wc: {3})'.format( message.title, message.author, message.msg_date, message.word_count)) print(' {0} snippets and {1} references'.format( message.code_snippets.count(), message.code_references.count())) print(' Snippets:') for code_snippet in message.code_snippets.all(): print(' {0}'.format(code_snippet.language)) for ref in message.code_references.all(): print(' {0}: {1}'.format(ref.kind_hint.kind, ref.content)) # Test Snippets first_message = messages[0] self.assertEqual('x', first_message.code_snippets.all()[0].language) # Test Author self.assertEqual(first_message.author.nickname, 'mhellkamp') # Test Date self.assertEqual(first_message.msg_date, datetime(2003, 8, 29, 10, 16)) # Test Refs second_message = messages[1] refs = [ref.content.strip() for ref in second_message.code_references.all()] # Because of the title 4 + 1 = 5 self.assertEqual(5, len(refs)) self.assertTrue('DBCP' in refs) self.assertTrue('C3P0' in refs)
def test_fudeclipse_parser(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.FUDEclipseForumSyncer', 'channel.parser.common_parsers.FUDEclipseForumParser', 'http://www.eclipse.org/forums/index.php/sf/thread/59/' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.FUDEclipseForumSyncer', 'http://www.eclipse.org/forums/index.php/sf/thread/59/' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) toc_download_section(pname, cname, start=0, end=2) toc_download_entries(pname, cname, 0, 6) parse_channel(pname, cname, True) self.assertEqual(18, Message.objects.all().count()) messages = list(Message.objects.all()) for message in messages: print('{0} by {1} on {2} (wc: {3})'.format( message.title, message.author, message.msg_date, message.word_count)) print(' {0} snippets and {1} references'.format( message.code_snippets.count(), message.code_references.count())) print(' Snippets:') for code_snippet in message.code_snippets.all(): print(' {0}'.format(code_snippet.language)) for ref in message.code_references.all(): print(' {0}: {1}'.format(ref.kind_hint.kind, ref.content)) first_message = messages[0] # Test Title self.assertEqual(first_message.title, 'looping back to a previous step') # Test Author self.assertEqual(first_message.author.nickname, 'No real name') # Test Date self.assertEqual(first_message.msg_date, datetime(2010, 8, 26, 7, 50)) # Test Refs refs = [ref.content.strip() for ref in first_message.code_references.all()] self.assertEqual(3, len(refs)) self.assertTrue('ActivityElements' in refs)
def handle_noargs(self, **options): pname = smart_decode(options.get('pname')) cname = smart_decode(options.get('cname')) skip = options.get('skip_refs') parse_channel(pname, cname, not skip)