Exemplo n.º 1
0
    def parse(self):
        duecomments = self.soup.findAll(text=re.compile("Due Date"))

        for comment in duecomments:
            tr = comment.findPrevious('tr')
            item = models.Item()

            marker = tr.find(text=re.compile("Title"))
            if marker is None:
                marker = tr.find(text=re.compile("Print the title"))
            title = self.findcontent(marker.parent)
            title = util.unescape(title)
            item.title = util.stripNonAscii(title)

            marker = tr.find(text=re.compile("Author"))
            author = self.findcontent(marker.parent)
            L = author.split(',')
            author = ','.join(L[0:2])
            author = util.unescape(author)
            item.author = util.stripNonAscii(author)

            marker = tr.find(text=re.compile("Due Date"))
            dueDate = self.findcontent(marker.parent)
            dueDate = dueDate.split(',')[0] #strip time
            item.dueDate = util.toDatetime(dueDate)
            self.itemsOut[item.title] = item
Exemplo n.º 2
0
    def parse_itemlisting_style(self):
        item_tds = self.soup.findAll('td', {'class' : ('itemlisting', 'itemlisting2')})
        for td in item_tds:
            tr = td.findPrevious('tr')
            item = models.Item()

            marker = tr.find(text=re.compile("Print the title"))
            title = marker.nextSibling.strip()
            title = util.unescape(title)
            item.title = util.stripNonAscii(title)

            marker = tr.find(text=re.compile("Print the author"))
            if marker is None or marker.nextSibling is None:
                author = ''
            else:
                author = marker.nextSibling.strip().strip('.')
            L = author.split(',')
            author = ','.join(L[0:2])
            author = util.unescape(author)
            item.author = util.stripNonAscii(author)

            marker = tr.find(text=re.compile("Print the date due"))
            #<td>Due <!--Print the date due--> <strong>12/10/2011,....
            dueDate = marker.parent.find('strong').string.strip()
            dueDate = dueDate.split(',')[0] #strip time
            item.dueDate = util.toDatetime(dueDate)
            self.itemsOut[item.title] = item
        print self.itemsOut
Exemplo n.º 3
0
    def parse(self):
        # look for pending fine
        fine = self.soup.find('div', {'id':'panelVerifyCharges'})
        if fine != None:
            raise PendingFineException

        row = self.soup.find('div', {'id':'panelMessage'})
        titles = row.findAll('i')

        for title in titles:
            item = models.Item()

            reason = title.nextSibling.strip()
            if reason == 'is renewed.':
                item.renewed = True
                item.renewalError = None
            else:
                item.renewed = False
                error_ul = title.findNextSibling('ul')
                if error_ul == None:
                    item.renewalError = 'Renewal failed'
                else:
                    item.renewalError = error_ul.li.string

            titlestr = title.contents[0].strip()
            titlestr = util.unescape(titlestr)
            titlestr = util.stripNonAscii(titlestr)
            self.renewalItems[titlestr] = item
Exemplo n.º 4
0
	def parse(self):
		self.form = self.soup.find("form", {"name" : "hasnow"})
		row = self.soup.find('input', {'name' : 'HASNOW'})
		if row == None:
			return
		
		table = row.findPrevious('table')
		#print table.__class__.__name__

		#print table.prettify()
		rows = table.findAll('tr')
		#print len(rows)
		for itemrow in rows:
			#print row.__class__.__name__

			#print row.prettify()
			# ignore the header row -- we know it's a header if there isn't a renewal checkbox next to it
			if itemrow.find('input', {'name':'HASNOW'}) == row.Null:
				continue
			item = models.Item()
			#print row.prettify()
			renewitemkeys = itemrow.find('input', {'name':'HASNOW'})
			
			divs = itemrow.findAll('div', {'id' : 'globaltext'})
			#print len(divs)
			title = divs[0].string.strip()
			title = util.unescape(title)
			item.title = util.stripNonAscii(title)
			#print title
			dueDate = divs[4].string.strip()
			dueDate = dueDate.split(',')[0] #strip time
			item.dueDate = util.toDatetime(dueDate)
			self.itemsOut[item.title] = item
Exemplo n.º 5
0
 def parse_title(self, td, item):
     link = td.find('a')
     title = util.unescape(link.text.strip(' :/.'))
     item.title = util.stripNonAscii(title)
     span = td.find('span')
     if span is not None and span.text is not None:
         item.author = span.text.strip(' :/.')
     return item
Exemplo n.º 6
0
    def parseTitle(self, td, item):
        links = td.findAll("a", {"class": lambda (x): x != "boldRedFont1"})
        # for some reason many title links have a superfluous ' /' at the end -- remove this
        title = links[0].string.rstrip(" /")
        title = util.unescape(title)
        item.title = util.stripNonAscii(title)

        author = links[1].string
        author = author.rstrip(".")
        if author.startswith("by "):
            author = author.replace("by ", "", 1)
        # sometimes there is extraneous information after the author's name, ex: Dylan, Bob, 1941-
        L = author.split(",")
        author = ",".join(L[0:2])
        author = util.unescape(author)
        item.author = util.stripNonAscii(author)

        return item
Exemplo n.º 7
0
 def parseTitle(self, td, item):
     span = td.find('span')
     link = span.find('a')
     if link == None:
         title = span.contents[0].strip()
     else:
         title = link.contents[0].strip()
     title = util.unescape(title)
     item.title = util.stripNonAscii(title)
     return item
Exemplo n.º 8
0
    def parse(self):
        self.form = self.soup.find('form', {'name' : 'renewitems'})
        checkboxes = self.form.findAll('input', {'type' : 'checkbox'})
        for checkbox in checkboxes:
            item = models.Item()
            item.renewitemkey = checkbox['name']

            title_label = checkbox.findNext('td').label
            title = title_label.contents[2].strip()
            title = util.unescape(title)
            item.title = util.stripNonAscii(title)

            self.renewalitems[item.title] = item
Exemplo n.º 9
0
    def parse(self):
        #print self.soup.prettify()
        dds = self.soup.findAll('dd')

        for dd in dds:
            item = models.Item()

            reasonSoup = dd.findPrevious('strong')
            print reasonSoup.prettify()
            reason = util.inner_text(reasonSoup)
            print "reason=" + reason
            if reason == 'Item renewed':
                item.renewed = True
                item.renewalError = None
            else:
                item.renewed = False
                item.renewalError = reason

            title = dd.contents[0].strip()
            title = util.unescape(title)
            title = util.stripNonAscii(title)
            self.renewalItems[title] = item