forked from vivekdurai/judis-re
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jharkhand.py
77 lines (58 loc) · 2.55 KB
/
jharkhand.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import utils
import urllib
import os
import re
class Jharkhand(utils.BaseCourt):
def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
self.baseurl = 'http://jhr.nic.in'
def get_meta_info(self, title, dateobj):
metainfo = {'date': utils.date_to_xml(dateobj)}
reobj = re.search('Dated', title)
if reobj:
title = title[:reobj.start()]
metainfo['caseno'] = title
return metainfo
def download_oneday(self, relpath, dateobj):
dateurl = urllib.basejoin(self.baseurl, '/hcjudge/date_output.php')
postdata = [('d1', dateobj.day), ('m1', dateobj.month), \
('y1', dateobj.year), ('d2', dateobj.day), \
('m2', dateobj.month), ('y2', dateobj.year), \
('button', 'Submit')]
webpage = self.download_url(dateurl, postdata = postdata)
if not webpage:
self.logger.warning(u'No webpage for %s date: %s' % \
(dateurl, dateobj))
return []
d = utils.parse_webpage(webpage)
if not d:
self.logger.error(u'HTML parsing failed for date: %s' % dateobj)
return []
newdls = []
for link in d.findAll('a'):
href = link.get('href')
title = utils.get_tag_contents(link)
if (not href) or (not title):
self.logger.warning(u'Could not process %s' % link)
continue
words = href.split('/')
filename = words[-1]
url = urllib.basejoin(dateurl, href)
self.logger.info(u'link: %s title: %s' % (href, title))
relurl = os.path.join (relpath, filename)
filepath = os.path.join(self.rawdir, relurl)
metapath = os.path.join(self.metadir, relurl)
if not os.path.exists(filepath):
webpage = self.download_url(url)
if not webpage:
self.logger.warning(u'No webpage %s' % url)
else:
utils.save_file(filepath, webpage)
self.logger.info(u'Saved %s' % url)
newdls.append(relurl)
if os.path.exists(filepath) and \
(self.updateMeta or not os.path.exists(metapath)):
metainfo = self.get_meta_info(title, dateobj)
if metainfo:
utils.print_tag_file(metapath, metainfo)
return newdls