forked from sushant354/judis-re
-
Notifications
You must be signed in to change notification settings - Fork 0
/
greentribunal.py
82 lines (74 loc) · 2.8 KB
/
greentribunal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import re
import urllib
from courtlisting import CourtListing
import utils
class GreenTribunal(CourtListing):
def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
CourtListing.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
self.hostname = u'greentribunal.in'
self.mainurls = [u'http://www.greentribunal.in/judgement.php']
self.INFO_ORDERED = False
def get_next_page(self, d, baseurl):
nextPage = None
for link in d.findAll('a'):
value = utils.get_tag_contents(link)
href = link.get('href')
if href and value and re.search('\s*Next', value):
nextPage = urllib.basejoin(baseurl, href)
return nextPage
def get_meta_info(self, tr):
metainfo = {}
tds = tr.findAll('td')
for link in tr.findAll('a'):
href = link.get('href')
if href:
metainfo['href'] = href
break
if not metainfo.has_key('href'):
return {}
i = 0
for td in tds:
value = utils.get_tag_contents(td)
if value:
if i == 0:
metainfo[self.CASENO] = value
elif i == 1:
pet, res = utils.get_petitioner_respondent(value)
if pet:
metainfo[self.PETITIONER] = pet
if res:
metainfo[self.RESPONDENT] = res
elif i == 2:
dateobj = utils.datestr_to_obj(value)
if dateobj:
metainfo[self.DATE] = dateobj
i += 1
return metainfo
def download_info_page(self, url):
dls = []
nextPage = None
webpage = self.download_url(url)
if webpage:
d = utils.parse_webpage(webpage)
if not d:
self.logger.error(u'Could not parse the date search page')
return [], None
nextPage = self.get_next_page(d, url)
maxtr = -1
mainTable = None
tables = d.findAll('table')
for table in tables:
numtrs = table.findAll('tr')
if numtrs > maxtr:
mainTable = table
maxtr = numtrs
if mainTable:
trs = table.findAll('tr')
for tr in trs:
metainfo = self.get_meta_info(tr)
if metainfo and metainfo.has_key(self.DATE):
self.logger.debug(u'metainfo: %s' % metainfo)
dls.append(metainfo)
dls.sort(cmp = lambda x, y: cmp(x[self.DATE], y[self.DATE]), \
reverse= True)
return dls, nextPage