def open_wayback_machine_url(url, **kwargs): """ Accepts an URL from the Internet Archive's Wayback Machine and returns an ArchivedURL object """ # Extract the URL and timestamp from the url archive_url, timestamp = storytracker.reverse_wayback_machine_url(url) # Modify the standard Wayback Machine URL to be one that returns the raw # HTML without any of the chrome and navigation tools inserted by # the archive if "id_" not in url: url = url.replace("/%s" % archive_url, "id_/%s" % archive_url) # Retrieve the raw HTML html = storytracker.archive(url, **kwargs).html # Pass it all back return ArchivedURL(archive_url, timestamp, html)
def open_wayback_machine_url(url, **kwargs): """ Accepts an URL from the Internet Archive's Wayback Machine and returns an ArchivedURL object """ # Extract the URL and timestamp from the url archive_url, timestamp = storytracker.reverse_wayback_machine_url(url) # Modify the standard Wayback Machine URL to be one that returns the raw # HTML without any of the chrome and navigation tools inserted by # the archive if "id_" not in url: url = url.replace( "/%s" % archive_url, "id_/%s" % archive_url ) # Retrieve the raw HTML html = storytracker.archive(url, **kwargs).html # Pass it all back return ArchivedURL(archive_url, timestamp, html)
def test_url_reverse(self): reverse = storytracker.reverse_wayback_machine_url(self.url) self.assertTrue(isinstance(reverse[0], str)) self.assertTrue(isinstance(reverse[1], datetime))