def extractItems(self, response): # we keep this dictionary for partially filled items. temp_items = {} item = DliMetaItem() soup = BeautifulSoup.BeautifulSoup(response.body) table = soup.find('table', width='90%') rows = table.findAll('tr') for row in rows: cell = row.find('td') if not cell: continue anchorTag = cell.find('a') if anchorTag and anchorTag.attrMap: item.metadataLink = anchorTag.attrMap['href'] metaText = cell.findAll(text=True) item['pages'] = metaText[1].split('.')[-2].strip( '\n \t') # -1 is empty since there is a dot at the end. item['title'] = metaText[0] item['barcode'] = metaText[1].lstrip(', ') self.temp_items[item['barcode']] = item return temp_items