/
utils.py
executable file
·79 lines (57 loc) · 1.84 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import urllib
import urllib2
import urlparse
from logbook import debug, info, warn, error
from logbook import FileHandler
def remove_extn(string):
return os.path.splitext(string)[0]
def slugify(string):
import re
string = string.strip().lower()
return re.sub(r'[\s_-]+', '-', string)
def headify(string):
import re
string = remove_extn(string)
return re.sub(r'[\s\W_-]+', ' ', string).title()
def exists(url):
""" Check whether the url exists.
"""
try:
urllib2.urlopen(urllib2.Request(url))
debug(url + " already exists.")
return True
except urllib2.HTTPError:
debug(url + " does not exist")
return False
def get_filename(url):
""" Return the last component from url.
"""
path = urlparse.urlparse(url).path
last_component = path.split('/')[-1]
return last_component
def get_slugified_filename(url):
return slugify(get_filename(url))
def download(url, progress=False):
""" Download the document pointed to by url to cwd
"""
filename = get_filename(url)
if os.path.exists(filename):
info(filename + " already exists in cwd. Not downloading. ")
else:
debug("Downloading " + url)
if progress:
import urlgrabber
from urlgrabber.progress import text_progress_meter
urlgrabber.urlgrab(url=url,
filename=filename,
progress_obj=text_progress_meter())
else:
urllib.urlretrieve(url=url, filename=filename)
debug("Finished Downloading " + filename)
return filename
def start_logging(filename):
this_file = os.path.basename(filename)
log_file = '/var/log/' + remove_extn(this_file) + '.log'
log_handler = FileHandler(log_file, bubble=True)
log_handler.push_application()