def services_incoming(request): """We have an incoming item (probably from the bookmarklet)""" #TODO: this is nothing more than a test now. cleanup. url = request.GET.get('loc', None) matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url) asin = matches.group(1) aws_key = AMZ.KEY aws_secret_key = AMZ.SECRET_KEY api = API(aws_key, aws_secret_key, 'us') for root in api.item_lookup(asin, IdType='ASIN', AssociateTag= AMZ.ASSOCIATE_TAG): nspace = root.nsmap.get(None, '') amazon_items = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace}) author = u'Unknown' title = u'Unknown' isbn = u'Unknown' for amazon_item in amazon_items: if hasattr(amazon_item.ItemAttributes, 'Author'): author = unicode(amazon_item.ItemAttributes.Author) if hasattr(amazon_item.ItemAttributes, 'Title'): title = unicode(amazon_item.ItemAttributes.Title) return render_to_response('add-item.html', {'user': request.user, 'creator': author, 'title': title, 'isbn': isbn})
def show_product(locale, asin): api = API(locale=locale) result = api.item_lookup(asin, ResponseGroup="ItemIds, ItemAttributes, Images, OfferSummary, Offers") niceProduct = Product() for product in result.Items.Item: niceProduct.title = product.ItemAttributes.Title niceProduct.ASIN = product.ASIN.text niceProduct.imageUrl = product.MediumImage.URL try: niceProduct.newPrice = float(product.OfferSummary.LowestNewPrice.Amount)/100 niceProduct.newFormattedPrice = product.OfferSummary.LowestNewPrice.FormattedPrice niceProduct.newPriceCurrency = product.OfferSummary.LowestNewPrice.CurrencyCode except: pass try: niceProduct.usedPrice = float(product.OfferSummary.LowestUsedPrice.Amount)/100 niceProduct.usedFormattedPrice = product.OfferSummary.LowestUsedPrice.FormattedPrice niceProduct.usedPriceCurrency = product.OfferSummary.LowestUsedPrice.CurrencyCode except: pass niceProduct.type = product.ItemAttributes.ProductGroup niceProduct.region = getRegionFromUrl(product.DetailPageURL.text).upper() #product.ItemAttributes.RegionCode niceProduct.model = product.ItemAttributes.Model return render_template('product.html', product = niceProduct)
def scrape_wish_list_items(list_id): """ Populate wish_list_items with data from wishlist """ print "Scraping wishlist..." wish = Wishlist(list_id) item_ids = wish.get_list_items() wishlist_items = [] api = API(locale='us') for item_id in item_ids: try: result = api.item_lookup(item_id, ResponseGroup="Large") for item in result.Items.Item: itm = { "title": item.ItemAttributes.Title, "price": item.Offers.Offer.OfferListing.Price.FormattedPrice, "amazonid": item.ASIN } wishlist_items.append(itm) except: print "!!! Failed getting " + item_id print "Completed scraping." return wishlist_items
def search_on_amazon(asin, album, artist): ''' Tries to locate the url of album by artis on amazon Returns '' if it can't be found ''' from amazonproduct import API if not AMAZON_KEY or not AMAZON_SECRET or not AMAZON_ASSOCIATE_TAG: return '' api = API(AMAZON_KEY, AMAZON_SECRET, 'us') try: if asin: node = api.item_lookup(asin, AssociateTag=AMAZON_ASSOCIATE_TAG) for item in node.Items: attributes = item.Item.ItemAttributes if attributes.ProductGroup == 'Music': url = item.Item.DetailPageURL if url: return url.text node = api.item_search('MP3Downloads', Keywords=album + ' ' + artist, AssociateTag=AMAZON_ASSOCIATE_TAG) for item in node.Items: attributes = item.Item.ItemAttributes if matching.match(artist, str(attributes.Creator)) \ and matching.match(album, str(attributes.Title)) \ and attributes.ProductGroup == 'Digital Music Album': url = item.Item.DetailPageURL if url: return url.text except : pass return ''
def pullItemInfoFromAmazon(job_id): logger = logging.getLogger('tst') asin_obj_list = Asin.objects.filter(job_id = job_id) asin_list = [x.asin for x in asin_obj_list] pull_fail_list = [] insert_fail_list = [] image_fail_list = [] api = API(access_key_id = ACCESS_KEY_ID, secret_access_key = SECRET_ACCESS_KEY, associate_tag = ASSOCIATE_TAG, locale='us') for asin in asin_list[:10]: asin = asin.strip() result = '' for i in range(0,2): try: result = api.item_lookup(asin,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId = 'Amazon',Condition='New') logger.info('ASIN: %s -- %d time -- Success'%(asin,i+1)) break except Exception,e: logger.info('ASIN: %s -- %d time -- Fail'%(asin,i+1)) continue if result == '': logger.info('ASIN: %s Fail after 3 times'%asin) pull_fail_list.append(asin) continue if not insert_item_info(result,asin): logger.error('Insert item info for %s fail'%asin) insert_fail_list.append(asin) continue if not process_image(asin): logger.error('Processing Image for %s fail'%asin) image_fail_list.append(asin) continue
class AmazonUtil: def __init__(self): #self.associate_tag = settings.ASSOCIATE_TAG #self.access_key_id = settings.ACCESS_KEY_ID #self.secret_access_key = settings.SECRET_ACCESS_KEY self.api = None def item_lookup(self,asin,locale,retry=3,time_interval=10,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId=None,Condition=None): self.api = API(access_key_id = settings.ACCESS_KEY_ID, secret_access_key = settings.SECRET_ACCESS_KEY, associate_tag = settings.ASSOCIATE_TAG, locale=locale) result = '' #status #0 -- Success #1 -- Socket Timeout #2 -- Invalid ASIN #-1 -- Fail status = -1 for i in range(0,retry): try: #result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup,MerchantId = MerchantId,Condition=Condition) result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup) status = 0 break except urllib2.URLError,e: status = 1 continue except socket.timeout,e: status = 1 continue except InvalidParameterValue,e: status = 2 break
def lookup(asin): api = API(locale='jp') #item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All') #item = api.item_lookup(asin) item = api.item_lookup(asin, ResponseGroup='Large') #logging.debug(etree.tostring(item, pretty_print=True)) ## title logging.debug(item.Items.Item.ItemAttributes.Title)
def lookup(asin): api = API(locale="jp") # item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All') # item = api.item_lookup(asin) item = api.item_lookup(asin, ResponseGroup="Large") # logging.debug(etree.tostring(item, pretty_print=True)) ## title logging.debug(item.Items.Item.ItemAttributes.Title)
def get_product_details(cls,asin,product_renderer,locale = 'us'): logging.info('AmazonProductFetcher.get_product_details called, asin: %s, locale: %s' %(asin,locale)) api = API(AWS_KEY, SECRET_KEY, locale) timeout_ms = 100 while True: try: product_node = api.item_lookup(id=asin) #title,product group image_node = api.item_lookup(id=asin, ResponseGroup='Images') #Images break except amazonproduct.TooManyRequests: time.sleep(timeout_ms) timeout_ms *= 2 except AWSError: logging.error('Could not retrieve info for product %s' % asin) return except DownloadError,e: logging.error('%s retrieving URLfor product: %s in RPC' %(e,asin)) return #Early quit
def request_amzn (isbn): r""" Get The Book Title and asin code from Amazon. :param isbn: ISBN string :return: tuple (Book titile, Asin Code, XML RootNode) """ api = API(locale='jp') root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN') root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN') book = root.xpath( "//aws:Items/aws:Item/aws:ItemAttributes/aws:Title", namespaces={"aws": root.nsmap.get(None, '')} ) asin = root.xpath( "//aws:Items/aws:Item/aws:ASIN", namespaces={"aws": root.nsmap.get(None, '')} ) return (book, asin, root)
class Amazon(): def __init__(self, url, locale='us'): self.api = API(locale=locale) self.asin = self.__get_product_id(url) self.result = self.api.item_lookup( self.asin, ResponseGroup='ItemAttributes,Offers,Images' ) try: self.item = self.result.Items.Item[0] except IndexError: self.item = None def __get_product_id(self, url): """ get a amazon asin number from url """ compiled_pattern = re.compile("([A-Z0-9]{10})") search_result = compiled_pattern.search(url) try: return search_result.group(0) except IndexError: return None @property def title(self): """ return title of Item """ return self.item.ItemAttributes.Title.text @property def price(self): """ return lowest price of Amazon Item """ try: return self.item.OfferSummary.LowestNewPrice.FormattedPrice.text except AttributeError: return None @property def currency(self): """ return currency of Amazon Item """ try: return self.item.OfferSummary.LowestNewPrice.CurrencyCode except AttributeError: return None @property def photo(self): """ return small Image of Amazon Item """ try: return self.item.SmallImage.URL.text except AttributeError: return None
class Amazon: def __init__(self): self.api = API(locale='us') self.cart_exists = False self.items = {} def get_asin_from_url(self, url): parts = urlparse(url) path_parts = parts.path.split("/") max = len(path_parts) - 1 reg = re.compile("^([A-Za-z0-9]{10})$") while max >= 0: result = reg.match(path_parts[max]) if result: return path_parts[max] max = max - 1 return None def get_item_by_asin(self, asin): item = self.api.item_lookup(asin) return item def get_items(self, list): items = Item.objects.filter(active=True, list=list) for item in items: self.items["%s" % item.asin] = item.quantity def get_cart(self): cart = self.api.cart_create(self.items) print cart.Cart.PurchaseURL print cart.Cart.SubTotal.FormattedPrice # May need this at some point? """ for item in cart.Cart.CartItems: print dir(item.CartItem) """ return cart def main(self): self.get_items() self.get_cart()
class Amazon: def __init__(self): self.api = API(locale='us'); self.cart_exists = False self.items = {} def get_asin_from_url(self, url): parts = urlparse(url) path_parts = parts.path.split("/") max = len(path_parts)-1 reg = re.compile("^([A-Za-z0-9]{10})$") while max >= 0: result = reg.match(path_parts[max]) if result: return path_parts[max] max = max-1 return None def get_item_by_asin(self, asin): item = self.api.item_lookup(asin) return item def get_items(self, list): items = Item.objects.filter(active=True, list=list) for item in items: self.items["%s" % item.asin] = item.quantity def get_cart(self): cart = self.api.cart_create(self.items) print cart.Cart.PurchaseURL print cart.Cart.SubTotal.FormattedPrice # May need this at some point? """ for item in cart.Cart.CartItems: print dir(item.CartItem) """ return cart def main(self): self.get_items() self.get_cart()
def amazon_lookup(asin): api = API(locale='us') result = api.item_lookup(asin, ResponseGroup="ItemAttributes, OfferSummary", paginate = False) it = result.Items.Item asin = it.ASIN title = it.ItemAttributes.Title link = it.DetailPageURL try: price = it.OfferSummary.LowestNewPrice.FormattedPrice except: price = "no price available" item = {'asin':asin, 'title':title, 'link':link, 'price':price} return item
def pullItemInfoFromAmazon(job_id): logger = logging.getLogger('tst') asin_obj_list = Asin.objects.filter(job_id=job_id) asin_list = [x.asin for x in asin_obj_list] pull_fail_list = [] insert_fail_list = [] image_fail_list = [] api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') for asin in asin_list[:10]: asin = asin.strip() result = '' for i in range(0, 2): try: result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') logger.info('ASIN: %s -- %d time -- Success' % (asin, i + 1)) break except Exception, e: logger.info('ASIN: %s -- %d time -- Fail' % (asin, i + 1)) continue if result == '': logger.info('ASIN: %s Fail after 3 times' % asin) pull_fail_list.append(asin) continue if not insert_item_info(result, asin): logger.error('Insert item info for %s fail' % asin) insert_fail_list.append(asin) continue if not process_image(asin): logger.error('Processing Image for %s fail' % asin) image_fail_list.append(asin) continue
def create_wish_from_url(user, url): AWS_KEY = os.environ['AWS_KEY'] AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'] ASSOCIATE_TAG = os.environ['ASSOCIATE_TAG'] ASIN_MATCH = 'http://www.amazon.com/([\\w-]+/)?(dp|gp/product)/(\\w+/)?(\\w{10})' asin = list(re.match(ASIN_MATCH, url).groups())[-1] api = API(locale='us', associate_tag=ASSOCIATE_TAG, access_key_id=AWS_KEY, secret_access_key=AWS_SECRET_KEY) result = api.item_lookup(asin, ResponseGroup='ItemAttributes, OfferFull, Images') item = result.Items.Item[0] title = item.ItemAttributes.Title url = item.DetailPageURL if item.OfferSummary: amount = (item.OfferSummary.LowestNewPrice.Amount / 100) elif item.ItemAttributes.ListPrice: amount = (item.ItemAttributes.ListPrice.Amount / 100) else: amount = 0.0 image_url = item.LargeImage.URL wish = Wish( user=user, asin=asin, title=title, amount=amount, is_credit=False, url=url, image_url=image_url ) wish.save() return wish
def get_upc(upc): api = API(locale='us') response = api.item_lookup(upc, SearchIndex="Blended", IdType="UPC") for item in response.Items.Item: print item.ASIN
from amazonproduct import API api = API(locale='cn') # get all books from result set and # print author and title # for book in api.item_search('Books', Publisher='Galileo Press'): # print '%s: "%s"' % (book.ItemAttributes.Author, # book.ItemAttributes.Title) result = api.item_lookup('B00OUE9KOO') #result = api.item_lookup('B006H3MIV8') for item in result.Items.Item: print '(%s) in group %s' % (item.ItemAttributes.Title, item.ASIN)
class Crawler(): def __init__(self): self.parseConfigFile() asinTxtPath = '' locales = [] feedPath = '' api = '' log = logger.createLogger("AmazonLogger", "Amazon") def parseConfigFile(self): parser = SafeConfigParser() parser.read('C:/BorderSoftware/Boris/settings/boris.ini') self.asinTxtPath = parser.get('Amazon', 'asintxtpath') self.locales = parser.get('Amazon', 'locales').split(',') self.feedPath = parser.get('Amazon', 'feedpath') def main(self): # Amazon consists of multiple webshops from different countries. for locale in self.locales: productDataList = [] self.api = API(locale=locale) products = self.loadProducts(locale) for product in products: if product != '' and product is not None and product[0] != '#': # Comment or blank line. # Product contains two elements: The ASIN and the shipping cost, divided by `:`. product = product.split(':') ASIN = product[0] productData = self.gatherData(ASIN, locale) if productData is not None: # Something went wrong retrieving data. productData["shipping_cost"] = product[1] # Add the product data to a list so we can convert the list to xml once all products are parsed. productDataList.append(productData) time.sleep(2) self.writeXML(productDataList, locale) # This procedure loads products from the .txt file corresponding with the locale. def loadProducts(self, locale): f = open(self.asinTxtPath + locale + '.txt') products = f.read().splitlines() f.close() return products # This procedure makes the API call and retrieves all necessary data from the response def gatherData(self, ASIN, locale): productData = dict() tries = 0 while True: try: result = self.api.item_lookup(ASIN, ResponseGroup='Large') break except InvalidParameterValue: # ID doesn't exist for this locale return except AWSError: # Product not accessible through API self.log.info('Not accessible through API: ' + ASIN + ' - Locale: ' + locale) return except Exception as e: print 'Amazon timed out' print e tries += 1 time.sleep(7) if tries == 20: return for item in result.Items.Item: productData["asin"] = item.ASIN.text productData["deep_link"] = item.DetailPageURL.text productData["image_large"] = item.ImageSets.ImageSet.LargeImage.URL.text productData["image_medium"] = item.ImageSets.ImageSet.MediumImage.URL.text productData["image_small"] = item.ImageSets.ImageSet.SmallImage.URL.text productData["ean"] = item.ItemAttributes.EAN.text productData["category"] = item.ItemAttributes.Binding.text productData["title"] = item.ItemAttributes.Title.text try: productData["currency"] = item.Offers.Offer.OfferListing.Price.CurrencyCode.text except AttributeError: self.log.info("No offer data available for: " + ASIN + ' - Locale: ' + locale) break productData["price"] = item.Offers.Offer.OfferListing.Price.FormattedPrice.text try: productData["color"] = item.ItemAttributes.Color.text except AttributeError: # Some locales don't contain color. productData["color"] = '' try: productData["brand"] = item.ItemAttributes.Brand.text except AttributeError: # Some products, like dvd's, don't have a brand. productData["brand"] = '' return productData # This procedure converts the product data to a xml file. def writeXML(self, productDataList, locale): root = Element('Products') for productData in productDataList: product = SubElement(root, 'Product') for data in productData: child = SubElement(product, data) child.text = productData[data] if locale == 'uk': locale = 'co.uk' ElementTree(root).write(self.feedPath + 'www.amazon.' + locale + '.xml', encoding='UTF-8') print 'Wrote XML for www.amazon.' + locale
amazon_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) walmart_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) ebay_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) result = api.browse_node_lookup(1000) for child1 in result.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break result1 = api.browse_node_lookup(child1.BrowseNodeId) for child in result1.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break for book in api.item_search('Books', BrowseNode=child.BrowseNodeId): try: detail = api.item_lookup(str(book.ASIN), ResponseGroup='OfferSummary').Items[0] temp_rdd = sc.parallelize([ str(book.ASIN) + '+' + book.ItemAttributes.Title + '+' + book.ItemAttributes.Author + '+' + book.DetailPageURL + '+' + str(detail.Item.OfferSummary.LowestNewPrice.Amount) ]) amazon_rdd = amazon_rdd.union(temp_rdd) #print '%s,%s,%s,%s,%s' % (book.ASIN,book.ItemAttributes.Title,book.ItemAttributes.Author,book.DetailPageURL,detail.Item.OfferSummary.LowestNewPrice.Amount) amazon_price = int( detail.Item.OfferSummary.LowestNewPrice.Amount) amazon_url = str(book.DetailPageURL) walmart_url = amazon_url.replace("amazon", "walmart") walmart_price = getPrice(amazon_price) temp_rdd = sc.parallelize([ str(book.ASIN) + '+' + book.ItemAttributes.Title + '+' +
new_im.paste(im, (offset_w, offset_h)) new_im.save(imageinfo.image_file.path) return True except: return False if __name__ == '__main__': #print startNewItemJob('20150114205955') asin = 'B00001P4ZH' api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') #print insert_item_info(result,asin) #print result.Items.Item.Offers.TotalOffers #for offer in result.Items.Item.Offers: #print offer.Offer.__dict__ #print offer.Merchant.Name # print offer.Offer.OfferListing.Price.FormattedPrice print insert_item_info(result, asin) #print price_conv('$34.90') #print upc_conv('2129914776')
class CWMovieCollectionParsingManager: def __init__(self): self.api = API(locale='de') def initAmazonApi(self, ean): self.amazon = self.api.item_lookup(ItemId=ean, IdType='EAN', SearchIndex='All', ResponseGroup='Large').Items.Item def initOfdbApi(self, ean): xmlUrl = 'http://ofdbgw.geeksphere.de/searchean/'+ean self.ofdb = ET.parse(urllib.urlopen(xmlUrl)).getroot() xmlUrl = 'http://ofdbgw.home-of-root.de/movie/'+self.ofdb.findall('resultat')[0].findall('eintrag')[0].findall('filmid')[0].text self.ofdb = ET.parse(urllib.urlopen(xmlUrl)).getroot() def Parse(self, ean): self.initAmazonApi(ean) self.initOfdbApi(ean) MCItem = CWMovieCollectionItem() MCItem.ean = ean MCItem.actors = self.GetActors() MCItem.directors = self.GetDirectors() MCItem.manufacturer = self.GetManufacturer() MCItem.productGroup = self.GetProductGroup() MCItem.title = self.GetTitle() MCItem.price = self.GetPrice() MCItem.amazonUrl = self.GetAmazonUrl() MCItem.asin = self.GetAsin() MCItem.studio = self.GetStudio() MCItem.audienceRating = self.GetAudienceRating() MCItem.imageUrl = self.GetImageUrl(MCItem.asin) MCItem.summary = self.GetSummary() MCItem.languages = self.GetLanguage() MCItem.subtitles = self.GetSubtitles() MCItem.audioFormats = self.GetAudioFormat() MCItem.publicationDate = self.GetPublicationDate() MCItem.runningTime = self.GetRunningTime() MCItem.rating = CWMovieCollectionRating(MCItem.title, self.GetOfdbStars()) MCItem.rental.append(CWMovieCollectionRental()) return MCItem def GetOfdbStars(self): ofdbStars = '' try: ofdbStars = self.ofdb.findall('resultat')[0].findall('bewertung')[0].findall('note')[0].text except: if DEBUG == True: print 'Error parsing ofdbstars' print sys.exc_info() return ofdbStars def GetRunningTime(self): runningTime = '' try: runningTime = str(self.amazon.ItemAttributes.RunningTime) except: if DEBUG == True: print 'Error parsing runningtime' print sys.exc_info() return runningTime def GetPublicationDate(self): publicationDate = '' try: publicationDate = str(self.amazon.ItemAttributes.PublicationDate) except: if DEBUG == True: print 'Error parsing publicationdate' print sys.exc_info() return publicationDate def GetAudioFormat(self): audioFormats = [] try: for audioFormat in self.amazon.ItemAttributes.Languages.Language: try: audioFormats.append(str(audioFormat.AudioFormat)) except: if DEBUG == True: print 'No audio format found' except: if DEBUG == True: print 'Error parsing audioformat' print sys.exc_info() return audioFormats def GetSubtitles(self): subtitles = [] try: for subtitle in self.amazon.ItemAttributes.Languages.Language: if subtitle.Type == 'Subtitled': subtitles.append(str(subtitle.Name)) except: if DEBUG == True: print 'Error parsing subtitle' print sys.exc_info() return subtitles def GetLanguage(self): languages = [] try: for language in self.amazon.ItemAttributes.Languages.Language: if language.Type != 'Original': languages.append(str(language.Name)) except: if DEBUG == True: print 'Error parsing language' print sys.exc_info() return languages def GetSummary(self): summary = '' try: summary = self.ofdb.findall('resultat')[0].findall('beschreibung')[0].text except: if DEBUG == True: print 'Error parsing summary' print sys.exc_info() return summary def GetImageUrl(self, asin): imageUrl = '' imageName = "Images/" + asin + ".jpg" print imageName try: imageUrl = str(self.amazon.LargeImage.URL) if imageUrl != '' and os.path.isfile(imageName) == False: urllib.urlretrieve(imageUrl, imageName) except: if DEBUG == True: print 'Error parsing imageurl' print sys.exc_info() return imageName def GetAudienceRating(self): audienceRating = '' try: audienceRating = str(self.amazon.ItemAttributes.AudienceRating) except: if DEBUG == True: print 'Error parsing audiencerating' print sys.exc_info() return audienceRating def GetStudio(self): studio = '' try: studio = str(self.amazon.ItemAttributes.Studio) except: if DEBUG == True: print 'Error parsing studio: ' print sys.exc_info() return studio def GetAsin(self): asin = '' try: asin = str(self.amazon.ASIN) except: if DEBUG == True: print 'Error parsing asin: ' print sys.exc_info() return asin def GetAmazonUrl(self): amazonUrl = '' try: amazonUrl = str(self.amazon.DetailPageURL) except: if DEBUG == True: print 'Error parsing detailpageurl: ' print sys.exc_info() return amazonUrl def GetPrice(self): price = '' try: price = str(self.amazon.Offers.Offer.OfferListing.Price.FormattedPrice) except: if DEBUG == True: print 'Error parsing price: ' print sys.exc_info() return price def GetTitle(self): title = '' try: title = str(self.amazon.ItemAttributes.Title) except: if DEBUG == True: print 'Error parsing title: ' print sys.exc_info() return title def GetProductGroup(self): productGroup = '' try: productGroup = str(self.amazon.ItemAttributes.ProductGroup) except: if DEBUG == True: print 'Error parsing productgroup: ' print sys.exc_info() return productGroup def GetManufacturer(self): manufacturer = '' try: manufacturer = str(self.amazon.ItemAttributes.Manufacturer) except: if DEBUG == True: print 'Error parsing manufacturer: ' print sys.exc_info() return manufacturer def GetDirectors(self): directors = [] for director in self.amazon.ItemAttributes.Director: try: directors.append(str(director)) except: if DEBUG == True: print 'Error parsing directors: ' print sys.exc_info() return directors def GetActors(self): actors = [] for actor in self.amazon.ItemAttributes.Actor: try: actors.append(str(actor)) except: if DEBUG == True: print 'Error parsing actors: ' print sys.exc_info() return actors
class AmazonPipeline(object): def __init__(self): self.api = API(locale='us') self.datatxt = DataTXT(app_id=settings['DANDELION_APP_ID'], app_key=settings['DANDELION_KEY']) def process_item(self, item, spider): if spider.name in ['ebay_spider', 'amazon_spider']: return item item['asin'] = [] if 'upc' in item: if item['upc']: asin = self.get_upc(item['upc']) item['asin'] = asin elif 'ean' in item: if item['ean']: asin = self.get_ean(item['ean']) item['asin'] = asin elif False and 'mpn' in item and 'brand' in item: if item['mpn'] and item['brand']: asin = self.search("%s+%s" % (item['mpn'], item['brand']), item['description']) item['asin'] = asin elif 'mpn' in item and 'brand' in item: if item['mpn'] and item['brand']: asin = self.search("%s+%s" % (item['mpn'], item['brand']), item['description']) item['asin'] = asin return item def get_upc(self, upc): response = self.api.item_lookup(upc, SearchIndex="Blended", IdType="UPC") asin = list() for amazon_item in response.Items.Item: asin.append(unicode(amazon_item.ASIN.text, 'utf-8')) return asin def get_ean(self, ean): response = self.api.item_lookup(ean, SearchIndex="Blended", IdType="EAN") asin = list() for amazon_item in response.Items.Item: asin.append(unicode(amazon_item.ASIN.text, 'utf-8')) return asin def search(self, keyword, description): asin = list() try: response = self.api.item_search("Blended", Keywords=keyword, ResponseGroup="EditorialReview") except NoExactMatchesFound: return asin #if 'response' in locals() and response.results >=1: for amazon_item in response: # start matching the editorial review if hasattr(amazon_item, "EditorialReviews") and hasattr( amazon_item.EditorialReviews, "EditorialReview"): match = self.find_match( description, amazon_item.EditorialReviews. EditorialReview.Content.text) if float(match) > 70.00: asin.append(unicode(amazon_item.ASIN.text, 'utf-8')) return asin def find_match(self, source, dest): paragraphs = list() match = list() for line in source.splitlines(): if len(line) > 20: paragraphs.append(line) paragraphs = paragraphs[0:5] try: for p in paragraphs: response = self.datatxt.sim(p, dest) match.append(response.similarity) except DandelionException: return 0.00 match.sort(reverse=True) return match[0]
imageinfo.save() im = Image.open(imageinfo.image_file.path) (w,h) = im.size offset_w = (800 - w)/2 offset_h = (800 - h)/2 new_im = Image.new('RGBA',(800,800),(255,255,255,0)) new_im.paste(im,(offset_w,offset_h)) new_im.save(imageinfo.image_file.path) return True except: return False if __name__ == '__main__': #print startNewItemJob('20150114205955') asin = 'B00001P4ZH' api = API(access_key_id = ACCESS_KEY_ID, secret_access_key = SECRET_ACCESS_KEY, associate_tag = ASSOCIATE_TAG, locale='us') result = api.item_lookup(asin,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId = 'Amazon',Condition='New') #print insert_item_info(result,asin) #print result.Items.Item.Offers.TotalOffers #for offer in result.Items.Item.Offers: #print offer.Offer.__dict__ #print offer.Merchant.Name # print offer.Offer.OfferListing.Price.FormattedPrice print insert_item_info(result,asin) #print price_conv('$34.90') #print upc_conv('2129914776')
if __name__ == "__main__": api = API(locale='de') # Opens the root directory that contains all of the movie ID and # outputs a file that contains the movie ID and the respective movie titles rootdir = "data/" output = open("NameIDs.txt", "w") # Loops through the directory for subdir, dirs, files in os.walk(rootdir): for dirNum in dirs: # Looks up the product. try: product = api.item_lookup(ItemId=dirNum) except: #print "Error: The file %s was not found." %dir continue # Obtains the name of the movie for item in product.Items.Item: try: title = item.ItemAttributes.Title.__str__() except: continue # Outputs the title and the movie ID print (title + "|" + dirNum + "\n") output.write(title.encode('UTF-8') + "|" + dirNum + "\n")
b = nltk.word_tokenize(a) c = nltk.pos_tag(b) print c d = filter(lambda (a,b): b == 'NNP' or b == 'NN', c) print d[0][0] """ f = open("recommendations.txt", "a") count = 0 g = open("prices.txt", "a") for item in items: a = item.ASIN result = api.item_lookup(str(a)) #for i in result.Items.Item: #print '%s (%s) in group' % (i.ItemAttributes.Title, i.ASIN) try: result = api.similarity_lookup(str(a)) for b in result.Items.Item: # print '%s (%s)' % (b.ItemAttributes.Title, b.ASIN) if count >= 20: break image = api.item_lookup(str(b.ASIN), ResponseGroup = "Images") price = api.item_lookup(str(b.ASIN), ResponseGroup = "Offers") for i in image.Items.Item: # print '%s' % i.LargeImage.URL if(i.LargeImage.URL != None): f.write("%s $ %s\n" % (b.ItemAttributes.Title, b.DetailPageURL))
from bs4 import BeautifulSoup import hashlib import hmac count_csv = 0 data = [] counter = 0 filename = "amazon_no_access.csv" with open(filename, 'r') as csvfile: reader = csv.reader(csvfile, delimiter= ',') for row in reader: api = API(locale='us') counter = counter + 1 if counter % 100 == 0: print counter try: result = api.item_lookup(row[0]) except Exception, e: print str(e) if str(e) == "AWS.ECommerceService.ItemNotAccessible: This item is not accessible through the Product Advertising API.": data.append((row[0], "no access")) else: data.append((row[0], "invalid")) time.sleep(1) continue try: for item in result.Items.Item: category = item.ItemAttributes.ProductGroup data.append((row[0], category)) except: data.append((row[0], "error")) pass
from xml.dom.minidom import parse def minidom_response_parser(fp): """ Custom response parser using xml.dom.minidom.parse instead of lxml.objectify. """ root = parse(fp) # parse errors for error in root.getElementsByTagName('Error'): code = error.getElementsByTagName('Code')[0].firstChild.nodeValue msg = error.getElementsByTagName('Message')[0].firstChild.nodeValue raise AWSError(code, msg) return root if __name__ == '__main__': api = API(AWS_KEY, SECRET_KEY, 'us', processor=minidom_response_parser) root = api.item_lookup('0718155157') print root.toprettyxml() # ... # now do something with it!
class Amazon_Api: """ This class connects to Amazon Product API and Pulls images, links and price into the application""" def __init__(self, some_dict, list_of_numbers, number_of_recs): self.api = API(locale='us', ) self.image_url = [] self.book_numbers = [ book for book in list_of_numbers if book in some_dict ] self.isbns = [(10 - len(i)) * '0' + i if type(i) != float else i for i in [ some_dict[book_number][0] for book_number in list_of_numbers if book_number in some_dict ]] self.some_dict = some_dict self.list_of_numbers = list_of_numbers self.number_of_recs = number_of_recs def format_response(self, resp): for item in resp.Items.Item: try: self.image_url.append({ 'title': item.ItemAttributes.Title, 'page_url': item.DetailPageURL, 'image_url': item.LargeImage.URL, 'price': item.OfferSummary.LowestNewPrice.FormattedPrice }) break except AttributeError as at: continue # This function handles values where ISBN is missing def handle_null_isbn(self, book_number): try: response = self.api.item_search( 'Books', Title=self.some_dict[book_number][1], Author=self.some_dict[book_number][2], Limit=1) for i in response: current_asin = str(i.ASIN.values.im_self) #print current_asin break resp = self.api.item_lookup( ItemId=current_asin, ResponseGroup='Images,OfferSummary,Small', IdType='ASIN') format_response(resp) except: pass return 'ok' # need breaks here due to multiple images returned def generate_images(self): print self.isbns for book_number, isbn in zip( self.book_numbers, self.isbns): # book_number over all recommendations while len(self.image_url) < self.number_of_recs: #print 'booknumber is', book_number try: if type(self.some_dict[book_number] [0]) == float: # handle nulls print 'null is', book_number self.handle_null_isbn(book_number) break else: try: print 'isbn is', isbn, 'book is', self.some_dict[ book_number][1] response = self.api.item_lookup( ItemId=isbn, ResponseGroup='Images,OfferSummary,Small', IdType='ISBN', SearchIndex='Books') self.format_response(response) break except: self.handle_null_isbn( book_number) # run this if ISBN doesn't work break except: pass return self.image_url
def newByBarcode(request): if request.method != "POST": raise Http404 if not request.user.is_authenticated(): resp = json.dumps({'status':'error','custom_errors':[{'message': 'Login required'}]}) return HttpResponse(resp, content_type='application/json') errors = {} if not 'barcode' in request.POST or not request.POST['barcode']: resp = json.dumps({'status':'error','errors':{'barcode': ['This item is required']}}) return HttpResponse(resp, content_type='application/json') barcode = request.POST['barcode'] if not 'barcode_type' in request.POST or not request.POST['barcode_type']: resp = json.dumps({'status':'error','errors':{'barcode_type': ['This item is required']}}) return HttpResponse(resp, content_type='application/json') barcode_type = request.POST['barcode_type'] if barcode_type == "UPCA": barcode_type = "UPC" if not 'item_type' in request.POST or not request.POST['item_type']: resp = json.dumps({'status':'error','errors':{'item_type': ['This item is required']}}) return HttpResponse(resp, content_type='application/json') item_type = request.POST['item_type'] catagory = "" if item_type == 'food': catagory = 'Grocery' elif item_type == 'equipment': catagory = 'HomeGarden' else: resp = json.dumps({'status':'error','errors':{'item_type': ['Barcode scanning only works for Food and Equipment']}}) return HttpResponse(resp, content_type='application/json') api = API(locale='us') try: results = api.item_lookup(barcode, IdType=barcode_type, SearchIndex=catagory, ResponseGroup="ItemAttributes, OfferSummary, Images", paginate=False) except: resp = json.dumps({'status':'error','custom_errors':[{'message': 'Amazon database lookup error. Please create by name.'}]}) return HttpResponse(resp, content_type='application/json') # print(lxml.etree.tostring(results.Items.Item, pretty_print=True)) result = results.Items.Item asin = result.ASIN name = result.ItemAttributes.Title # Check that doesn't already exist if item_type == 'food': try: item = Food.objects.get(asin=asin) resp = json.dumps({'status':'error','exists':item.id}) return HttpResponse(resp, content_type='application/json') except: item = Food(user=request.user, name=name, asin=asin) elif item_type == 'equipment': try: item = Equipment.objects.get(asin=asin) resp = json.dumps({'status':'error','exists':item.id}) return HttpResponse(resp, content_type='application/json') except: item = Equipment(user=request.user, name=name, asin=asin) item.stars = 0 item.save() resp = json.dumps( { 'status':'success', 'id':item.id }) return HttpResponse(resp, content_type='application/json')
__author__ = 'rs2bd' from amazonproduct import API import csv # with open("amazon_category_item_id.csv", "r") as file: # reader = csv.reader(file) # for row in reader: api = API(locale='us') try: result = api.item_lookup("B009A17D6O") for item in result.Items.Item: category = item.ItemAttributes.ProductGroup print category except Exception, e: print str(e)
def price_offers(xxx): best=999.00 one=True AutFin="" titFin="" api = API(Pp, Pl, 'it') try:items = api.item_search('Books', Keywords=xxx,AssociateTag=Pk) except Exception: return 999.00 KK=0 noDis=0 try: for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" in str(a.OfferListing.Availability).lower()): noDis=noDis+1 continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin session['urlAMZ']=url except Exception as e: continue if(best==999.00 and noDis>=1): KK=0 one=True AutFin="" titFin="" for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Availability='Available', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" not in str(a.OfferListing.Availability).lower()): continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url except Exception as e: continue return best except Exception: return best
def Recommend(): _movie1 = str(request.form['movie1']) _movie2 = str(request.form['movie2']) _movie3 = str(request.form['movie3']) _movie4 = str(request.form['movie4']) _movie5 = str(request.form['movie5']) _rate1 = int(request.form['rate1']) _rate2 = int(request.form['rate2']) _rate3 = int(request.form['rate3']) _rate3 = int(request.form['rate4']) _rate3 = int(request.form['rate5']) # keywords to ASIN # api = API("AKIAJGEEABW2F4H7ZB4Q", "6+ShIy2suLuPzWOdhEbzA8y4Cd3QDdfzokAbILB1","us","yueyingteng-20") api = API("AKIAIKFQCWRMAQBAIGDQ","V3URxyjcNbnRgak1CnWSoNqze2OFo2xkzxhYgYbg","us","chenlji-20") print(1) ASIN = {} print("1.1") keywords = [_movie1, _movie2, _movie3, _movie4, _movie5] print("1.2") for keyword in keywords: ASIN[keyword] = [] results = api.item_search('DVD', Title = keyword) print("1.3") for item in results: item = item.ASIN ASIN[keyword].append(item) print(2) # ASIN = {} # keywords = ['little miss sunshine'] # ASIN['little miss sunshine'] = ['B000K7VHQE', 'B000MR1V22', 'B001JNNDDI', 'B000JU9OJ4'] #from recommender import create_new_user_data # def create_new_user_data(username, keywords, ratings): # print(a) # empty_dict = {} # print(b) # for i in range(len(keywords)): # print(c) # # if there are no ASINs in common between the Amazon API results and our data, do not create an entry # if len(set(ASIN[keywords[i]]) & set(movies_list)) == 0: # print(d) # continue # else: # print(e) # # get the first entry from the intersection of the Amazon API results and the ASINs in our data # empty_dict[list(set(ASIN[keywords[i]]) & set(movies_list))[0]] = ratings[i] # users_data[username] = empty_dict # print(keywords[0]) # print(ASIN[keywords[0]]) # print(set(ASIN[keywords[0]])) # a = [filter(lambda x: x in ASIN[keywords[0]], sublist) for sublist in movies_list] # print("a") def create_new_user_data(username, keywords, ratings): userids[len(userids)] = 'newuser1' print("a") empty_dict = {} print("b") for i in range(len(keywords)): print("c") if len(set(ASIN[keywords[i]]) & set(movies_list)) == 0: print("d") continue else: empty_dict[list(set(ASIN[keywords[i]]) & set(movies_list))[0]] = ratings[i] print("e") users_data[username] = empty_dict print(3) create_new_user_data('newuser1', keywords, [_rate1, _rate2, _rate3, _rate2, _rate1]) print(users_data['newuser1']) testrun = recommend('newuser1', userids, users_data) print(testrun) movies = {} for movie in testrun: movies[movie] = [] #result = api.item_lookup(str(movie)) for item in api.item_lookup(str(movie)).Items.Item: title = item.ItemAttributes.Title URL = item.ItemLinks.ItemLink.URL movies[movie].append(str(title)) movies[movie].append(str(URL)) #result2 = api.item_lookup(str(movie), ResponseGroup='Images') for items in api.item_lookup(str(movie), ResponseGroup='Images').Items.Item: imageURL = items.ImageSets.ImageSet.LargeImage.URL movies[movie].append(str(imageURL)) # # movies2 = {'B004L9GLKE': ['Departed', 'http://www.amazon.com/Departed-Leonardo-DiCaprio/dp/tech-data/B004L9GLKE%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3DB004L9GLKE', 'http://ecx.images-amazon.com/images/I/51CN2a6OGvL.jpg'], 'B000S0DDG0': ['Dreamgirls', 'http://www.amazon.com/Dreamgirls-Jamie-Foxx/dp/tech-data/B000S0DDG0%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3DB000S0DDG0', 'http://ecx.images-amazon.com/images/I/51NsSmJiUxL.jpg'], '6300267881': ['The Exorcist [VHS]', 'http://www.amazon.com/The-Exorcist-VHS-Ellen-Burstyn/dp/tech-data/6300267881%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3D6300267881', 'http://ecx.images-amazon.com/images/I/21HWKZ0WSNL.jpg']} print(movies[testrun[0]][0]) print(movies[testrun[0]][1]) print(movies[testrun[0]][2]) # print(movies2[testrun[0]][0]) # print(movies2[testrun[0]][1]) # print(movies2[testrun[0]][2]) data = [{"title1" : movies[testrun[0]][0], "url1" : movies[testrun[0]][1], "imgUrl1" : movies[testrun[0]][2], "title2" : movies[testrun[1]][0], "url2" : movies[testrun[1]][1], "imgUrl2" : movies[testrun[1]][2], "title3" : movies[testrun[2]][0], "url3" : movies[testrun[2]][1], "imgUrl3" : movies[testrun[2]][2]}] # Writing JSON data #data = [{'title1': 'The Exorcist [VHS]', 'title2': 'Departed', 'title3': 'Dreamgirls', 'url1': 'http://www.amazon.com/The-Exorcist-VHS-Ellen-Burstyn/dp/tech-data/6300267881%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3D6300267881', 'url3': 'http://www.amazon.com/Dreamgirls-Jamie-Foxx/dp/tech-data/B000S0DDG0%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3DB000S0DDG0', 'url2': 'http://www.amazon.com/Departed-Leonardo-DiCaprio/dp/tech-data/B004L9GLKE%3FSubscriptionId%3DAKIAJGEEABW2F4H7ZB4Q%26tag%3Dyueyingteng-20%26linkCode%3Dxm2%26camp%3D2025%26creative%3D386001%26creativeASIN%3DB004L9GLKE', 'imgUrl3': 'http://ecx.images-amazon.com/images/I/51NsSmJiUxL.jpg', 'imgUrl2': 'http://ecx.images-amazon.com/images/I/51CN2a6OGvL.jpg', 'imgUrl1': 'http://ecx.images-amazon.com/images/I/21HWKZ0WSNL.jpg'}] print(data) with open('static/js/data.json', 'w') as f: json.dump(data,f, ensure_ascii = False, encoding = 'utf-8') return render_template('index.html')
for game in cmn_games: for plt in ['PC', 'Xbox One', 'Playstation 4']: game_dict = dict() game_dict['Title'] = game game_dict['Platform'] = plt try: print game + ' ' + plt + '\n' ress = api.item_search('VideoGames', Keywords=game + ' ' + plt) for res in ress: root = res # xml_new = etree.tostring(root, pretty_print=True) # print xml_new game_dict['Amzn_Title'] = root.ItemAttributes.Title asin = root.ASIN.text sleep(2) result0 = api.item_lookup(asin,ResponseGroup='OfferSummary') game_dict['Price'] = result0.Items.Item.OfferSummary.LowestNewPrice.FormattedPrice.text sleep(2) result = api.item_lookup(asin,ResponseGroup='Reviews', TruncateReviewsAt=10) review_link = result.Items.Item.CustomerReviews.IFrameURL.text response = urllib2.urlopen(review_link).read() soup = BeautifulSoup(response) try: game_dict['Rating'] = soup.find_all("div","crIFrameNumCustReviews")[0].find('img').get('title') no_reviews = soup.find_all("div","crIFrameNumCustReviews")[0].span.text for ch in ['\n','(',')']: no_reviews = no_reviews.replace(ch,'') game_dict['Reviews'] = no_reviews print game_dict game_list.append(game_dict) except: