def authors(self): """Returns the authors names, co-writers or manufacturers for every item. >>> authors = wishlist.authors() """ ret = [] attr = ('de ', 'di ', 'by ', 'von ') for author in self.page.xpath("//div[@class='pTitle']"): subtree = helpers.tostring(author, encoding='unicode', method='html', pretty_print=True) if 'span' in subtree: parser = helpers.etree.HTMLParser() div = helpers.etree.fromstring(subtree, parser) res = div.xpath("//span[@class='small itemByline']//text()") for author in res: author = author.replace('~', '').strip() if author.startswith(tuple(attr)): author = author[3:].strip() ret.append(helpers._stripper(author)) else: ret.append(helpers._stripper(author)) else: ret.append(ur'') dirt = ['DVD', 'VHS'] for item in dirt: while item in ret: ret.remove(item) return ret
def wishlists_details(self): """ Returns a tuple with lists, the first with all wishlists codes and the second with their total number of items (i.e. wishlist size). >>> details = person.wishlists_details() """ retcodes = [] for code in self.page.xpath("//div[@id='profile']/div[@id='regListpublicBlock']/div/@id"): retcodes.append(helpers._stripper(code.replace('regListsList', ''))) retsizes = [] for size in self.page.xpath("//div[@id='profile']/div[@id='regListpublicBlock']/div/div/span[1]//text()"): retsizes.append(helpers._stripper(size)) return retcodes, retsizes
def basic_info(self): """ Returns the name of the wishlist owner and, if available, the address of its profile picture. >>> info = person.basic_info() """ # wishlists are supposed to show a first name, so it's safe to assume it will never be null ret = [] for name in self.page.xpath("//td[@id='profile-name-Field']//text()"): ret.append(helpers._stripper(name)) photo = self.page.xpath("//div[@id='profile']/div/img/@src") if photo: filename = photo[0].split('.') filename = '.'.join(filename[:-2]) + '.' + filename[-1] ret.append(helpers._stripper(filename)) return ret
def titles(self): """ Returns items titles, even if they are pretty long ones (like academic books or journals). >>> titles = wishlist.titles() """ ret = [] for title in self.page.xpath("//div[@class='pTitle']/strong//text()"): ret.append(helpers._stripper(title)) return ret
def covers(self): """Returns the addresses of items pictures (e.g. book covers, albums pictures). >>> covers = wishlist.covers() """ ret = [] for filename in self.page.xpath( "//div/form/table/tbody[*]/tr[*]/td[*]/div[@class='pImage']/img/@src" ): filename = filename.split('.') filename = '.'.join(filename[:-2]) + '.' + filename[-1] ret.append(helpers._stripper(filename)) return ret
def via(self): """ Returns the sorted original web pages from which the wished item was pulled, only for Universal items not sold by Amazon directly. >>> via = wishlist.via() """ ret = [] for url in self.page.xpath( "//div/form/table/tbody[*]/tr[*]/td[*]/strong[2]//text()"): ret.append(helpers._stripper(url)) ret = sorted(list(set(ret))) return ret
def ideas(self): """Returns a list of ideas to shop for later, as reminders >>> ideas = wishlist.ideas() """ ret = [] ideas = [ ur'Idea', ur'Idee', ur'Id\xc3\xa9e', ur'\xe8\xa7\x82\xe5\xbf\xb5', ur'Id\xc3\xa9ia' ] for row in zip(self.titles(), self.prices()): if row[1] in ideas: ret.append(helpers._stripper(row[0])) return ret
def list(self): """ Returns a list with tuples containing all matching usernames and their main wishlist ID, with which you can get secondary lists via the Wishlist() class. >>> wishlists = search.list() >>> for row in wishlists: >>> print row """ # before pipe, page with usernames; after, single exact matches wishlists = self.page.xpath( "//td/span/a//@href | //div[@id='sortbarDisplay']/form//@action") names = self.page.xpath( "//td/span/a//text() | //h1[@class='visitor']//text()") names = [helpers._stripper(n) for n in names] codes = [] for code in wishlists: codes.append(helpers._stripper(code.split('/')[3])) # FIXME: hack not to return empty search results, # whose only anchor text is not english if not 'tg' in codes: return zip(names, codes)
def urls(self): """Returns the page address of a given item in the wishlist, with its full details. >>> urls = wishlist.urls() """ ret = [] for url in self.page.xpath("//tbody[@class='itemWrapper']//@name"): if 'item' in url: code = url.split('.')[3] if code: res = 'http://www.amazon' + self.domain + '/dp/' + code else: res = '' ret.append(helpers._stripper(res)) return ret
def prices(self): """Returns the price tags for every item in a wishlist. >>> prices = wishlist.prices() """ prices = self.page.xpath( "//td[@class='pPrice'][not(text()) and not(strong)] | //td[@class='pPrice']/strong[3] | //td[@class='pPrice']/strong[1] | //td[@class='Price']/span/strong//text()" ) # cleanups, every store has different price tag extras if 'EUR' in self.currency: dust = 'EUR' elif 'CDN' in self.currency: dust = 'CDN%s' % self.symbol elif 'INR' in self.currency: dust = 'Rs. ' elif 'CNY' in self.currency: dust = u'\xa5' elif 'BRL' in self.currency: dust = 'R%s ' % self.symbol elif 'JPY' in self.currency: dust = u'\x81\x8f' else: dust = self.symbol ret = [] for price in prices: res = helpers.tostring(price, encoding='unicode', method='text', pretty_print=True).strip() if 'At' not in res: # TODO: how would it work out for non-english stores? quite a huge bug ahead... if 'Click' in res: res = '' if 'EUR' in self.currency or 'BRL' in self.currency: res = res.replace(dust, '') res = res.replace('.', '') res = res.replace(',', '.') else: res = res.replace(dust, '') res = res.replace(',', '') ret.append(helpers._stripper(res)) return ret