Exemplo n.º 1
0
 def setLinkDead(self, url, error, page, day):
     """
     Adds the fact that the link was found dead to the .dat file.
     """
     self.semaphore.acquire()
     now = time.time()
     if url in self.historyDict:
         timeSinceFirstFound = now - self.historyDict[url][0][1]
         timeSinceLastFound = now - self.historyDict[url][-1][1]
         # if the last time we found this dead link is less than an hour
         # ago, we won't save it in the history this time.
         if timeSinceLastFound > 60 * 60:
             self.historyDict[url].append((page.title(), now, error))
         # if the first time we found this link longer than x day ago
         # (default is a week), it should probably be fixed or removed.
         # We'll list it in a file so that it can be removed manually.
         if timeSinceFirstFound > 60 * 60 * 24 * day:
             # search for archived page
             archiveURL = weblib.getInternetArchiveURL(url)
             if archiveURL is None:
                 archiveURL = weblib.getWebCitationURL(url)
             self.log(url, error, page, archiveURL)
     else:
         self.historyDict[url] = [(page.title(), now, error)]
     self.semaphore.release()
Exemplo n.º 2
0
 def setLinkDead(self, url, error, page, weblink_dead_days):
     """Add the fact that the link was found dead to the .dat file."""
     with self.semaphore:
         now = time.time()
         if url in self.historyDict:
             timeSinceFirstFound = now - self.historyDict[url][0][1]
             timeSinceLastFound = now - self.historyDict[url][-1][1]
             # if the last time we found this dead link is less than an hour
             # ago, we won't save it in the history this time.
             if timeSinceLastFound > 60 * 60:
                 self.historyDict[url].append((page.title(), now, error))
             # if the first time we found this link longer than x day ago
             # (default is a week), it should probably be fixed or removed.
             # We'll list it in a file so that it can be removed manually.
             if timeSinceFirstFound > 60 * 60 * 24 * weblink_dead_days:
                 # search for archived page
                 try:
                     archiveURL = get_archive_url(url)
                 except Exception as e:
                     pywikibot.warning(
                         'get_closest_memento_url({0}) failed: {1}'.format(
                             url, e))
                     archiveURL = None
                 if archiveURL is None:
                     archiveURL = weblib.getInternetArchiveURL(url)
                 if archiveURL is None:
                     archiveURL = weblib.getWebCitationURL(url)
                 self.log(url, error, page, archiveURL)
         else:
             self.historyDict[url] = [(page.title(), now, error)]
Exemplo n.º 3
0
 def setLinkDead(self, url, error, page, day):
     """Add the fact that the link was found dead to the .dat file."""
     self.semaphore.acquire()
     now = time.time()
     if url in self.historyDict:
         timeSinceFirstFound = now - self.historyDict[url][0][1]
         timeSinceLastFound = now - self.historyDict[url][-1][1]
         # if the last time we found this dead link is less than an hour
         # ago, we won't save it in the history this time.
         if timeSinceLastFound > 60 * 60:
             self.historyDict[url].append((page.title(), now, error))
         # if the first time we found this link longer than x day ago
         # (default is a week), it should probably be fixed or removed.
         # We'll list it in a file so that it can be removed manually.
         if timeSinceFirstFound > 60 * 60 * 24 * day:
             # search for archived page
             try:
                 archiveURL = get_archive_url(url)
             except Exception as e:
                 pywikibot.warning(
                     'get_closest_memento_url({0}) failed: {1}'.format(
                         url, e))
                 archiveURL = None
             if archiveURL is None:
                 archiveURL = weblib.getInternetArchiveURL(url)
             if archiveURL is None:
                 archiveURL = weblib.getWebCitationURL(url)
             self.log(url, error, page, archiveURL)
     else:
         self.historyDict[url] = [(page.title(), now, error)]
     self.semaphore.release()
Exemplo n.º 4
0
 def testWebCiteOlder(self):
     archivedversion = weblib.getWebCitationURL('https://google.com',
                                                '20130101')
     self.assertEqual(archivedversion,
                      'http://www.webcitation.org/6DHSeh2L0')
Exemplo n.º 5
0
 def testWebCiteOlder(self):
     archivedversion = weblib.getWebCitationURL("https://google.com", "20130101")
     self.assertEqual(archivedversion, "http://www.webcitation.org/6DHSeh2L0")
Exemplo n.º 6
0
 def _get_archive_url(self, url, date_string=None):
     archivedversion = weblib.getWebCitationURL(url, date_string)
     self.assertOneDeprecation()
     return archivedversion
Exemplo n.º 7
0
 def _get_archive_url(self, url, date_string=None):
     archivedversion = weblib.getWebCitationURL(url, date_string)
     self.assertOneDeprecation()
     return archivedversion
Exemplo n.º 8
0
 def testWebCiteOlder(self):
     """Test WebCite for https://google.com as of January 2013."""
     archivedversion = weblib.getWebCitationURL('https://google.com', '20130101')
     self.assertEqual(archivedversion, 'http://www.webcitation.org/6DHSeh2L0')
Exemplo n.º 9
0
 def testWebCiteOlder(self):
     archivedversion = weblib.getWebCitationURL('http://google.com', '20130101')
     self.assertEqual(archivedversion, 'http://www.webcitation.org/6DHSeh2L0')