def test_fetch(self, mock_nntp): """Test whether it fetches a set of articles""" mock_nntp.return_value = MockNNTPLib() nntp = NNTP(NNTP_SERVER, NNTP_GROUP) articles = [article for article in nntp.fetch(offset=None)] expected = [ ('<*****@*****.**>', 1, 'd088688545d7c2f3733993e215503b367193a26d', 1458039948.0), ('<*****@*****.**>', 2, '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0) ] expected_origin = NNTP_SERVER + '-' + NNTP_GROUP # Although there are 4 messages available on the server, # only two are valid self.assertEqual(len(articles), 2) for x in range(len(articles)): article = articles[x] expc = expected[x] self.assertEqual(article['data']['message_id'], expc[0]) self.assertEqual(article['offset'], expc[1]) self.assertEqual(article['uuid'], expc[2]) self.assertEqual(article['origin'], expected_origin) self.assertEqual(article['updated_on'], expc[3]) self.assertEqual(article['category'], 'article') self.assertEqual(article['tag'], expected_origin)
def test_fetch_empty(self, mock_nntp): """Test if nothing is returned when there are no new articles""" mock_nntp.return_value = MockNNTPLib() nntp = NNTP(NNTP_SERVER, NNTP_GROUP) articles = [article for article in nntp.fetch(offset=3)] self.assertEqual(len(articles), 0)
def test_search_fields(self, mock_nntp): """Test whether the search_fields is properly set""" mock_nntp.return_value = MockNNTPLib() nntp = NNTP(NNTP_SERVER, NNTP_GROUP) articles = [article for article in nntp.fetch(offset=None)] article = articles[0] self.assertEqual(nntp.metadata_id(article['data']), article['search_fields']['item_id']) self.assertEqual(article['data']['Newsgroups'], 'example.dev.project-link') self.assertEqual(article['data']['Newsgroups'], article['search_fields']['newsgroups']) article = articles[1] self.assertEqual(nntp.metadata_id(article['data']), article['search_fields']['item_id']) self.assertEqual(article['data']['Newsgroups'], 'mozilla.dev.project-link') self.assertEqual(article['data']['Newsgroups'], article['search_fields']['newsgroups'])
def test_fetch_from_cache(self, mock_nntp): """Test whether the cache works""" mock_nntp.return_value = MockNNTPLib() # First, we fetch the tasks from the server, # storing them in a cache cache = Cache(self.tmp_path) nntp = NNTP(NNTP_SERVER, NNTP_GROUP, cache=cache) articles = [article for article in nntp.fetch()] self.assertEqual(len(articles), 2) # Now, we get the articles from the cache which # should be the same cached_articles = [article for article in nntp.fetch_from_cache()] self.assertEqual(len(cached_articles), len(articles)) expected = [ ('<*****@*****.**>', 1, 'd088688545d7c2f3733993e215503b367193a26d', 1458039948.0), ('<*****@*****.**>', 2, '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0) ] expected_origin = NNTP_SERVER + '-' + NNTP_GROUP self.assertEqual(len(cached_articles), len(expected)) for x in range(len(cached_articles)): carticle = cached_articles[x] expc = expected[x] self.assertEqual(carticle['data']['message_id'], expc[0]) self.assertEqual(carticle['offset'], expc[1]) self.assertEqual(carticle['uuid'], expc[2]) self.assertEqual(carticle['origin'], expected_origin) self.assertEqual(carticle['updated_on'], expc[3]) self.assertEqual(carticle['category'], 'article') self.assertEqual(carticle['tag'], expected_origin) # Compare chached and fetched task self.assertDictEqual(carticle['data'], articles[x]['data'])
def test_fetch_from_offset(self, mock_nntp): """Test whether it fetches a set of articles from a given offset""" mock_nntp.return_value = MockNNTPLib() nntp = NNTP(NNTP_SERVER, NNTP_GROUP) articles = [article for article in nntp.fetch(offset=2)] expected = ('<*****@*****.**>', 2, '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0) expected_origin = NNTP_SERVER + '-' + NNTP_GROUP self.assertEqual(len(articles), 1) article = articles[0] self.assertEqual(article['data']['message_id'], expected[0]) self.assertEqual(article['offset'], expected[1]) self.assertEqual(article['uuid'], expected[2]) self.assertEqual(article['origin'], expected_origin) self.assertEqual(article['updated_on'], expected[3]) self.assertEqual(article['category'], 'article') self.assertEqual(article['tag'], expected_origin)
class NntpImporter(MailImporter): """Mailing list importer using NNTP""" gmane_mangler_regex = re.compile(r'__[^\$]+\$gmane\$org') def __init__(self, mailing_list): """ mailing_list: Mailing list object; See Importer.__init__ server: NNTP server hostname to connect to port: TCP port used to connect to the server group: NNTP group on ``server'' to import messages from user: Username used to access the server [default: None] password: Password used to access the server [default: None] This constructs an instance of the NntpImporter to import messages from a given group on the NNTP server into the database. """ super().__init__(mailing_list) self._parse_url() self.backend = NNTP(self.server, self.group) def _parse_url(self): """ Parse URL from `self.object' and populate the following fields: - self.server - self.port - self.user - self.password - self.group """ url = urllib.parse.urlparse(self.object.archive_url) assert url.scheme == 'nntp' self.group = url.path.lstrip('/') self.username = url.username self.password = url.password self.server = url.hostname self.port = url.port if url.port else 119 def get_messages(self): """ Iterate through all messages in the NNTP group """ articles = self.backend.fetch() for article in articles: data = article['data'] data['Message-ID'] = self.gmane_mangler_regex.sub('', data['Message-ID']) try: msg = Message(data['From'], data['Date'], data['Subject'], data['Message-ID'], data['References']) except: logger.warning('Malformed message found, skipping:\n%s', article['updated_on'], exc_info=True) msg = None # yield outside the try block to avoid capturing exceptions # that should terminate the loop instead if msg is not None: yield msg