示例#1
0
 def test_filenaming(self):
     now = datetime.now()
     filename = storytracker.create_archive_filename(self.url, now)
     url, then = storytracker.reverse_archive_filename(filename)
     self.assertEqual(self.url, url)
     self.assertEqual(now, then)
     with self.assertRaises(storytracker.ArchiveFileNameError):
         storytracker.reverse_archive_filename("foo.bar")
示例#2
0
 def test_filenaming(self):
     now = datetime.now()
     filename = storytracker.create_archive_filename(self.url, now)
     url, then = storytracker.reverse_archive_filename(filename)
     self.assertEqual(self.url, url)
     self.assertEqual(now, then)
     with self.assertRaises(storytracker.ArchiveFileNameError):
         storytracker.reverse_archive_filename("foo.bar")
示例#3
0
def open_archive_filepath(path):
    """
    Accepts a file path and returns an ArchivedURL object
    """
    # Split the file extension from the name
    name = os.path.basename(path)
    name, ext = os.path.splitext(name)
    # Extract the URL and timestamp from the file name
    url, timestamp = storytracker.reverse_archive_filename(name)
    # If it is gzipped, then open it that way
    if ext == '.gz':
        obj = gzip.open(path)
        return ArchivedURL(
            url,
            timestamp,
            obj.read().decode("utf-8"),
            gzip_archive_path=path
        )
    # Otherwise handle it normally
    else:
        obj = open(path, "rb")
        return ArchivedURL(
            url,
            timestamp,
            obj.read().decode("utf-8"),
            html_archive_path=path
        )
示例#4
0
def open_pastpages_url(url, **kwargs):
    """
    Accepts an URL from PastPages and returns an ArchivedURL object if
    there is an HTML archive
    """
    # Break out the unique ID for the page from the URL
    id_ = url.split("http://www.pastpages.org/screenshot/")[1].replace("/", "")
    # Use that request the HTML archive url from the API
    html_url = requests.get(
        "http://www.pastpages.org/api/beta/screenshots/%s/" %
        id_).json()['html']
    # Extract the URL and timestamp from the url
    html_filename = html_url.split("/html/")[1].replace(".gz", "")
    archive_url, timestamp = storytracker.reverse_archive_filename(
        html_filename)
    # Get the archived HTML data
    gzipped = requests.get(html_url).content
    html = gzip.GzipFile(fileobj=BytesIO(gzipped)).read().decode("utf-8")
    # Pass it all back
    return ArchivedURL(archive_url, timestamp, html)
示例#5
0
def open_pastpages_url(url, **kwargs):
    """
    Accepts an URL from PastPages and returns an ArchivedURL object if
    there is an HTML archive
    """
    # Break out the unique ID for the page from the URL
    id_ = url.split(
        "http://www.pastpages.org/screenshot/"
    )[1].replace("/", "")
    # Use that request the HTML archive url from the API
    html_url = requests.get(
        "http://www.pastpages.org/api/beta/screenshots/%s/" % id_
    ).json()['html']
    # Extract the URL and timestamp from the url
    html_filename = html_url.split("/html/")[1].replace(".gz", "")
    archive_url, timestamp = storytracker.reverse_archive_filename(
        html_filename
    )
    # Get the archived HTML data
    gzipped = requests.get(html_url).content
    html = gzip.GzipFile(fileobj=BytesIO(gzipped)).read().decode("utf-8")
    # Pass it all back
    return ArchivedURL(archive_url, timestamp, html)
示例#6
0
def open_archive_filepath(path):
    """
    Accepts a file path and returns an ArchivedURL object
    """
    # Split the file extension from the name
    name = os.path.basename(path)
    name, ext = os.path.splitext(name)
    # Extract the URL and timestamp from the file name
    url, timestamp = storytracker.reverse_archive_filename(name)
    # If it is gzipped, then open it that way
    if ext == '.gz':
        obj = gzip.open(path)
        return ArchivedURL(url,
                           timestamp,
                           obj.read().decode("utf-8"),
                           gzip_archive_path=path)
    # Otherwise handle it normally
    else:
        obj = open(path, "rb")
        return ArchivedURL(url,
                           timestamp,
                           obj.read().decode("utf-8"),
                           html_archive_path=path)