def debug_channel(pname, cname, parse_refs=True, entry_url=None): model = load_model(pname, STHREAD_PATH, cname) channel = SupportChannel.objects.filter(project__dir_name=pname).\ get(dir_name=cname) pm = CLIProgressMonitor() generic_parser.debug_channel(channel, model, progress_monitor=pm, parse_refs=parse_refs, entry_url=entry_url) return channel
def sync_doc(pname, dname, release): doc_key = dname + release doc_path = get_doc_path(pname, dname, release) model = load_model(pname, DOC_PATH, doc_key) syncer = import_clazz(model.syncer)(model.input_url, doc_path) pages = syncer.sync() model.pages = pages dump_model(model, pname, DOC_PATH, doc_key)
def toc_refresh(pname, cname): model = load_model(pname, STHREAD_PATH, cname) try: syncer = import_clazz(model.syncer_clazz)() syncer.toc_refresh(model) dump_model(model, pname, STHREAD_PATH, cname) except Exception: logger.exception('Error while refreshing toc')
def test_phpbb_syncer(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.PHPBBForumSyncer', 'foo.parser', 'https://forum.hibernate.org/viewforum.php?f=1' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.PHPBBForumSyncer', 'https://forum.hibernate.org/viewforum.php?f=1' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) model = load_model(pname, STHREAD_PATH, cname) self.assertEqual( 'https://forum.hibernate.org/viewforum.php?f=1&sd=a&start=0', model.toc_sections[0].url) self.assertFalse(model.toc_sections[0].downloaded) self.assertTrue(len(model.toc_sections) > 2349) for i in xrange(0, 2349): self.assertEqual(i, model.toc_sections[i].index) toc_download_section(pname, cname, start=0, end=4) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.toc_sections[0].downloaded) self.assertTrue(model.toc_sections[1].downloaded) self.assertTrue(model.toc_sections[2].downloaded) self.assertTrue(model.toc_sections[3].downloaded) self.assertFalse(model.toc_sections[4].downloaded) self.assertEqual(100, len(model.entries)) self.assertEqual(0, model.entries[0].index) self.assertFalse(model.entries[0].downloaded) self.assertEqual(1000, model.entries[25].index) self.assertEqual(1001, model.entries[26].index) self.assertTrue(model.entries[26].url.find('t=59') > -1) toc_download_entries(pname, cname, 1024, 1025) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.entries[49].downloaded) self.assertFalse(model.entries[50].downloaded) path = os.path.join(settings.PROJECT_FS_ROOT, model.entries[49].local_paths[0]) self.assertTrue(os.path.exists(path)) path = os.path.join(settings.PROJECT_FS_ROOT, model.entries[49].local_paths[1]) self.assertTrue(os.path.exists(path))
def test_fudeclipse_syncer(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.FUDEclipseForumSyncer', 'foo.parser', 'http://www.eclipse.org/forums/index.php/sf/thread/13/' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.FUDEclipseForumSyncer', 'http://www.eclipse.org/forums/index.php/sf/thread/13/' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) model = load_model(pname, STHREAD_PATH, cname) self.assertEqual( 'http://www.eclipse.org/forums/index.php/sf/thread/13/1/0/', model.toc_sections[0].url) self.assertFalse(model.toc_sections[0].downloaded) self.assertTrue(len(model.toc_sections) >= 247) for i in xrange(0, 247): self.assertEqual(i, model.toc_sections[i].index) toc_download_section(pname, cname, start=0, end=4) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.toc_sections[0].downloaded) self.assertTrue(model.toc_sections[1].downloaded) self.assertTrue(model.toc_sections[2].downloaded) self.assertTrue(model.toc_sections[3].downloaded) self.assertFalse(model.toc_sections[4].downloaded) self.assertEqual(160, len(model.entries)) self.assertEqual(0, model.entries[0].index) self.assertFalse(model.entries[0].downloaded) self.assertEqual(1000, model.entries[40].index) self.assertEqual(1001, model.entries[41].index) #self.assertTrue(model.entries[26].url.find('t=59') > -1) toc_download_entries(pname, cname, 1039, 1040) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.entries[79].downloaded) self.assertFalse(model.entries[80].downloaded) path = os.path.join(settings.PROJECT_FS_ROOT, model.entries[79].local_paths[0]) self.assertTrue(os.path.exists(path))
def parse_channel(pname, cname, parse_refs=True): model = load_model(pname, STHREAD_PATH, cname) channel = SupportChannel.objects.filter(project__dir_name=pname).\ get(dir_name=cname) pm = CLIProgressMonitor() generic_parser.parse_channel(channel, model, progress_monitor=pm, parse_refs=parse_refs) dump_model(model, pname, STHREAD_PATH, cname) return channel
def test_apache_syncer(self): create_channel_db('project1', 'cf', 'coreforum', 'channel.syncer.common_syncers.ApacheMailSyncer', 'foo.parser', 'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/' ) create_channel_local('project1', 'coreforum', 'channel.syncer.common_syncers.ApacheMailSyncer', 'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/' ) pname = 'project1' cname = 'coreforum' toc_refresh(pname, cname) model = load_model(pname, STHREAD_PATH, cname) self.assertEqual( 'http://mail-archives.apache.org/mod_mbox/hc-httpclient-users/200410.mbox/date', model.toc_sections[0].url) self.assertFalse(model.toc_sections[0].downloaded) self.assertTrue(len(model.toc_sections) >= 79) for i in xrange(0, 79): self.assertEqual(i, model.toc_sections[i].index) toc_download_section(pname, cname, start=0, end=4) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.toc_sections[0].downloaded) self.assertTrue(model.toc_sections[1].downloaded) self.assertTrue(model.toc_sections[2].downloaded) self.assertTrue(model.toc_sections[3].downloaded) self.assertFalse(model.toc_sections[4].downloaded) self.assertEqual(316, len(model.entries)) self.assertEqual(0, model.entries[0].index) self.assertFalse(model.entries[0].downloaded) self.assertEqual(1000, model.entries[17].index) self.assertEqual(1001, model.entries[18].index) self.assertTrue(model.entries[18].url.find('xbox.localdomain') > -1) toc_download_entries(pname, cname, 0, 1) model = load_model(pname, STHREAD_PATH, cname) self.assertTrue(model.entries[0].downloaded) self.assertFalse(model.entries[1].downloaded) path = os.path.join(settings.PROJECT_FS_ROOT, model.entries[0].local_paths[0]) self.assertTrue(os.path.exists(path))
def parse_doc(pname, dname, release, parse_refs=True): prelease = ProjectRelease.objects.filter(project__dir_name=pname).\ filter(release=release)[0] document = Document.objects.filter(project_release=prelease).\ filter(title=dname)[0] doc_key = dname + release model = load_model(pname, DOC_PATH, doc_key) progress_monitor = CLIProgressMonitor() parse(document, model.pages, parse_refs, progress_monitor) return document
def test_sync_doc_remote(self): pname = 'project1' release = '3.0' dname = 'manual' create_doc_local(pname, dname, release, 'doc.syncer.generic_syncer.SingleURLSyncer', 'http://hc.apache.org/httpcomponents-client-ga/tutorial/html/index.html') sync_doc(pname, dname, release) doc_key = dname + release model = load_model(pname, DOC_PATH, doc_key) self.assertEqual(9, len(model.pages)) for page_key in model.pages: path = urlparse(page_key).path self.assertTrue(os.path.exists(path))
def clear_channel_elements(pname, cname): model = load_model(pname, STHREAD_PATH, cname) for entry in model.entries: entry.parsed = False dump_model(model, pname, STHREAD_PATH, cname) channel = SupportChannel.objects.filter(project__dir_name=pname).\ get(dir_name=cname) query = Message.objects.filter(sthread__channel=channel) print('Deleting {0} messages'.format(query.count())) for message in query.all(): message.code_references.all().delete() message.code_snippets.all().delete() message.delete() SupportThread.objects.filter(channel=channel).delete()
def toc_view_entries(pname, cname): model = load_model(pname, STHREAD_PATH, cname) size = len(model.entries) downloaded = sum((1 for entry in model.entries if entry.downloaded)) last_d = -1 for entry in model.entries: if entry.downloaded: last_d = entry.index else: break print('Table of Content Entries Status for {0}'.format(cname)) print('Number of entries: {0}'.format(size)) print('Number of downloaded entries: {0}'.format(downloaded)) print('Last downloaded entry index: {0}'.format(last_d))
def test_sync_doc_remote(self): pname = 'project1' release = '3.0' dname = 'manual' create_doc_local( pname, dname, release, 'doc.syncer.generic_syncer.SingleURLSyncer', 'http://hc.apache.org/httpcomponents-client-ga/tutorial/html/index.html' ) sync_doc(pname, dname, release) doc_key = dname + release model = load_model(pname, DOC_PATH, doc_key) self.assertEqual(9, len(model.pages)) for page_key in model.pages: path = urlparse(page_key).path self.assertTrue(os.path.exists(path))
def toc_view(pname, cname): model = load_model(pname, STHREAD_PATH, cname) size = len(model.toc_sections) downloaded = sum( (1 for section in model.toc_sections if section.downloaded)) last_d = -1 for section in model.toc_sections: if section.downloaded: last_d = section.index else: break print('Table of Content Status for {0}'.format(cname)) print('Number of sections: {0}'.format(size)) print('Number of downloaded sections: {0}'.format(downloaded)) print('Last downloaded section index: {0}'.format(last_d))
def toc_view_entries(pname, cname): model = load_model(pname, STHREAD_PATH, cname) size = len(model.entries) downloaded = sum( (1 for entry in model.entries if entry.downloaded)) last_d = -1 for entry in model.entries: if entry.downloaded: last_d = entry.index else: break print('Table of Content Entries Status for {0}'.format(cname)) print('Number of entries: {0}'.format(size)) print('Number of downloaded entries: {0}'.format(downloaded)) print('Last downloaded entry index: {0}'.format(last_d))
def test_sync_doc_local(self): pname = 'project1' release = '3.0' dname = 'manual' test_doc = os.path.join(settings.TESTDATA, 'httpclient402doc', 'index.html') test_doc = os.path.normpath(test_doc) create_doc_local(pname, dname, release, 'doc.syncer.generic_syncer.SingleURLSyncer', 'file://' + test_doc) sync_doc(pname, dname, release) doc_key = dname + release model = load_model(pname, DOC_PATH, doc_key) self.assertEqual(8, len(model.pages)) for page_key in model.pages: path = urlparse(page_key).path self.assertTrue(os.path.exists(path))
def toc_download_section(pname, cname, start=None, end=None, force=False): model = load_model(pname, STHREAD_PATH, cname) syncer = import_clazz(model.syncer_clazz)() for section in model.toc_sections: index = section.index if start is not None and start > index: continue elif end is not None and end <= index: continue elif section.downloaded and not force: continue try: syncer.toc_download_section(model, section) dump_model(model, pname, STHREAD_PATH, cname) print('Downloaded section {0}'.format(section.index)) except Exception: logger.exception('Error while downloading toc section')
def toc_download_entries(pname, cname, start=None, end=None, force=False): model = load_model(pname, STHREAD_PATH, cname) channel_path = get_channel_path(pname, cname) syncer = import_clazz(model.syncer_clazz)() for entry in model.entries: index = entry.index if start is not None and start > index: continue elif end is not None and end <= index: continue elif entry.downloaded and not force: continue try: syncer.download_entry(entry, channel_path) dump_model(model, pname, STHREAD_PATH, cname) print('Downloaded {0}'.format(entry.url)) except Exception: logger.exception('Error while downloading entry')