This repository has been archived by the owner on Sep 20, 2021. It is now read-only.
forked from tuxor1337/springerdownload
/
springer_download.py
executable file
·97 lines (79 loc) · 3.3 KB
/
springer_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
import sys, string
from gettext import gettext as _
from springerdl import meta, util, download, merge
from springerdl.pyPdf_ext import PdfFileReader_ext as PdfFileReader
from springerdl.const import *
def main(argv=sys.argv):
from os import isatty
if (isatty(0) or len(argv) > 1) and "--gui" not in argv[1:]:
from springerdl.interface.cli import cli_main
interface = cli_main
else:
from springerdl.interface.gui import gui_main
interface = gui_main
interface(springer_fetch)
return 0
def springer_fetch(interface):
util.setupOpener(interface.option("proxy"), interface.option("user-agent"))
springer_key = util.parseSpringerURL(interface.option("springer_name"))
book_url = '%s/book/10.1007/%s' % (SPRINGER_URL, springer_key)
if interface.option('verbose'):
print "ImageMagick: %s" % IM_BIN
print "Ghostscript: %s" % GS_BIN
print "PDF Toolkit: %s" % PDFTK_BIN
interface.doing(_("Fetching book info"))
soup = util.getSoup(book_url)
if soup == None:
interface.err(_("The specified identifier doesn't point to an existing Springer book resource"))
return False
info = meta.fetchBookInfo(soup)
interface.done()
interface.out(", ".join(info['authors']))
bookinfo = info['title']
if info['subtitle'] != None:
bookinfo += ": %s" % (info['subtitle'])
bookinfo += " (%d chapters)" % (info['chapter_cnt'])
interface.out(bookinfo)
interface.doing(_("Fetching chapter data"))
toc = meta.fetchToc(soup, book_url)
if interface.option('sorted'):
toc = sorted(toc, key=lambda el: el['page_range'][0])
accessible_toc = util.getAccessibleToc(toc)
interface.done()
if interface.option('verbose'): util.printToc(accessible_toc)
download.pdf_files(accessible_toc, interface.progress(""), \
interface.option('pause'), info['chapter_cnt'])
inputPDF = PdfFileReader(accessible_toc[0]['pdf_file'])
tmp_box = inputPDF.pages[0].mediaBox
info['pagesize'] = (tmp_box[2], tmp_box[3])
if interface.option('cover'):
if IM_BIN == None:
interface.err(_("Skipping cover due to missing ImageMagick binary."))
else:
interface.doing(_("Fetching book cover"))
cover = meta.fetchCover(info['print_isbn'], \
info['pagesize'])
if cover:
accessible_toc.insert(0,{
'pdf_file': cover,
'children': [],
'page_range': [0,0],
'title': "Cover",
})
interface.done()
else:
interface.done(_("not available"))
outf = interface.option('output-file')
if outf == None:
if interface.option('autotitle'):
outf = "%s - %s.pdf" % (", ".join(info['authors']), \
info['title'])
else:
outf = info['online_isbn']+".pdf"
valid_chars = "-_.,() %s%s" % (string.ascii_letters, string.digits)
outf = "".join(c if c in valid_chars else "_" for c in outf)
merge.merge_by_toc(accessible_toc, info, outf, interface)
return True
if __name__ == "__main__":
sys.exit(main())