Exemplo n.º 1
0
def rename_file(pdf, bibitem):
    """Attempt to rename pdf according to bibitem."""
    year = _get_bib_element(bibitem, "year")
    author = _get_bib_element(bibitem, "author")
    if author:
        author = author.split(",")[0].replace(' ', '')
    title = _get_bib_element(bibitem, "title")

    # pick one word for the title
    title = title.strip().split()[0]

    filename = secure_filename('%s%s%s.pdf' % (author, year[-2:], title))
    newfile = pdf.replace(os.path.basename(pdf), filename)
    print
    print "Will rename:"
    print
    print "  %s" % pdf
    print
    print "to"
    print
    print "  %s" % newfile
    print
    print "Proceed? [y/N]"
    answer = raw_input()
    if answer == 'y':
        print "Renaming %s to %s" % (pdf, newfile)
        os.rename(pdf, newfile)
    else:
        print "Aborting."
Exemplo n.º 2
0
def download(url, usecache=True, cached=None, cachedir='cache~/', cachedonly=False, **opts):
    """
    Download (or cache) ``url`` to file. On success: return file name of stored
    contents. Upon failure: return None.

    Will retry ``tries`` times with ``pause`` seconds between each attempt to
    download.

    Download will timeout after ``timeout`` seconds.

    If ``cachedonly`` is enabled, this function will not download anything. It
    will simply return the cached filename if it exists.
    """

    if not cached:
        if cachedir:
            mkdir(cachedir)
            cached = os.path.join(cachedir, secure_filename(url))
        else:
            assert not usecache, 'must specify cachedir'

    # only return something for cached files
    if cachedonly and not os.path.exists(cached):
        return

    if usecache and os.path.exists(cached):
        return cached

    # use wget for ftp files
    if url.startswith('ftp'):
        return wget(url, cached)

    if url.startswith('http'):
        return robust_download(url, cached, **opts)
Exemplo n.º 3
0
def cache_document(src, dest=None):
    "Cache a document, return filename of the dest file."

    # TODO: use a staging area in case something breaks in the middle of adding.
    src = Path(src)

    if dest is None:
        # Find a reasonable filename if dest isn't specified
        if is_url(src):
            dest = CACHE / secure_filename(src)
        else:
            dest = CACHE / src.basename()
    else:
        dest = CACHE / dest

    if dest.exists():
        # TODO: Suggest update methods or renaming the file
        raise SkidFileExists(dest)

    if is_url(src):
        cache_url(src, dest)

    elif src.exists():  # is this something on disk?
        src.copy2(dest)
        print('copy:', src, '->', dest)

    else:
        raise SkidError(
            "cache_document doesn't know what to do with source %r\n"
            "Trying to add a nonexistent file?" % str(src))

    return dest
Exemplo n.º 4
0
def cache_document(src, dest=None):
    "Cache a document, return filename of the dest file."

    # TODO: use a staging area in case something breaks in the middle of adding.
    src = path(src)

    if dest is None:
        # Find a reasonable filename if dest isn't specified
        if is_url(src):
            dest = CACHE / secure_filename(src)
        else:
            dest = CACHE / src.basename()
    else:
        dest = CACHE / dest

    if dest.exists():
        # TODO: Suggest update methods or renaming the file
        raise SkidFileExists(dest)

    if is_url(src):
        cache_url(src, dest)

    elif src.exists():   # is this something on disk?
        src.copy2(dest)
        print 'copy:', src, '->', dest

    else:
        raise SkidError("cache_document doesn't know what to do with source %r\n"
                        "Trying to add a nonexistent file?" % str(src))

    return dest
Exemplo n.º 5
0
def rename_file(pdf, bibitem):
    """Attempt to rename pdf according to bibitem."""
    year = _get_bib_element(bibitem, "year")
    author = _get_bib_element(bibitem, "author")
    if author:
        author = author.split(",")[0].replace(' ','')
    title = _get_bib_element(bibitem, "title")

    # pick one word for the title
    title = title.strip().split()[0]

    filename = secure_filename('%s%s%s.pdf' % (author, year[-2:], title))
    newfile = pdf.replace(os.path.basename(pdf), filename)
    print
    print "Will rename:"
    print
    print "  %s" % pdf
    print
    print "to"
    print
    print "  %s" % newfile
    print
    print "Proceed? [y/N]"
    answer = raw_input()
    if answer == 'y':
        print "Renaming %s to %s" % (pdf, newfile)
        os.rename(pdf, newfile)
    else:
        print "Aborting."
Exemplo n.º 6
0
def cache_url(url):
    """
    Download url, write contents to file. Return filename of contents, None on
    failure to download.
    """
    cached = CACHE / secure_filename(url)

    assert not cached.exists(), 'File %s already exists.' % cached

    # TODO: we should tell download where to store stuff explicitly... right now
    # we just both have the same convention.
    if not download(url, timeout=60, usecache=False, cached=cached):
        raise Exception('Failed to download %s.' % url)

    return cached
Exemplo n.º 7
0
def download(url,
             usecache=True,
             cached=None,
             cachedir='cache~/',
             cachedonly=False,
             **opts):
    """
    Download (or cache) ``url`` to file. On success: return file name of stored
    contents. Upon failure: return None.

    Will retry ``tries`` times with ``pause`` seconds between each attempt to
    download.

    Download will timeout after ``timeout`` seconds.

    If ``cachedonly`` is enabled, this function will not download anything. It
    will simply return the cached filename if it exists.
    """

    if not cached:
        if cachedir:
            mkdir(cachedir)
            cached = os.path.join(cachedir, secure_filename(url))
        else:
            assert not usecache, 'must specify cachedir'

    # only return something for cached files
    if cachedonly and not os.path.exists(cached):
        return

    if usecache and os.path.exists(cached):
        return cached

    # use wget for ftp files
    if url.startswith('ftp'):
        return wget(url, cached)

    if url.startswith('http'):
        return robust_download(url, cached, **opts)