def Create_Data_Product(entrada, place): accesskey = '5grcrbpkkeo30reskiljkn0mbn4sn26bqsmoen1u8m973sh6lgdvjmb3k8lracth' # enter real access key here api = keepa.Keepa(accesskey) # Single ASIN query products = api.query( entrada, domain=place ) # returns list of product data, no domain->.com; domain=ES->Spain;One of the following Amazon domains: RESERVED, US, GB, DE, FR, JP, CA, CN, IT, ES, IN, MX Defaults to US. # Plot result (requires matplotlib) #keepa.plot_product(products[0]) #AMAZON SELLER name = products[0]['title'] #lst=pd.DataFrame(list(name)) Data_amazon_price = pd.DataFrame(products[0]['data']['AMAZON']) Data_amazon_timestamp = pd.DataFrame(products[0]['data']['AMAZON_time']) Data_amazon_timestamp Data_amazon_price.columns = ["PRICE"] Data_amazon_timestamp.columns = ["TIMESTAMP"] Data_amazon = pd.concat([Data_amazon_timestamp, Data_amazon_price], axis=1) Data_amazon = Data_amazon.fillna(method="ffill") Data_amazon.dropna(inplace=True) #EXTERNAL SELLER Data_new_price = pd.DataFrame(products[0]['data']['NEW']) Data_new_timestamp = pd.DataFrame(products[0]['data']['NEW_time']) Data_new_price.columns = ["PRICE"] Data_new_timestamp.columns = ["TIMESTAMP"] Data_new = pd.concat([Data_new_timestamp, Data_new_price], axis=1) Data_new = Data_new.fillna(method="ffill") Data_new.dropna(inplace=True) print("\n") return Data_amazon, Data_new, name
def __init__(self): with open('access_keys.json') as file: data = json.load(file) keepa_accesskey = data['keepa_key'] self.keepa_api = keepa.Keepa(keepa_accesskey) self.saved_file_name = 'price_info' self.is_file_created = False self.bing_search = BingSearch()
def keepa_data(ASIN,COUNTRY): #https://keepa.com/#!api keepa数据的api accesskey = 'change yours' # enter real access key here api = keepa.Keepa(accesskey) products = api.query(ASIN,domain=COUNTRY) #offers=[20-100] product = products[0] # keepa.plot_product(product) ### 直接将所有的数据写入到MongoDB中。 import pymongo # 创建MongoDB数据库连接 mongocli = pymongo.MongoClient(host = "127.0.0.1", port = 27017) # 创建mongodb数据库名称 dbname = mongocli["operate_2020"] # keepa_data 按照获取数据的nowtime进行保存 sheetname = dbname["keepa_data"] #类目 '''排名变化''' SALES_time=product["data"]["SALES_time"].astype(str) SALES=product["data"]["SALES"] SALES[SALES<0]=0 SALES=SALES.astype(str) '''价格变化''' # AMAZON_time=product['data']['AMAZON_time'].astype(str) # 价格的日期 # AMAZON=product["data"]['AMAZON'].astype(str), # 价格的变化 # now_time = datetime.now() #程序运行的日期 listed_date =keepa_minutes_to_time(product['listedSince']) #上架时间 data_insert={ 'SALES_time':list(SALES_time), #时间 'SALES':list(SALES) ,#排名, 'nowtime':str(now_time.date()), #写入数据的时间 'categoryTree':product["categoryTree"],#类目树 'salesRanks':product["salesRanks"],#类目排名 'variationCSV':product["variationCSV"], #变体 'variations':product["variations"], #变体属性,变体名称和变体的颜色 'asin':product["asin"], #搜索的ASIN 'parentAsin':product["parentAsin"] ,#父ASIN 'imagesCSV':product["imagesCSV"] ,#图片 'title':product["title"] ,#标题 'brand':product["brand"] ,#品牌名称 'frequentlyBoughtTogether':product["frequentlyBoughtTogether"] ,#经常组合购买 'productGroup':product["productGroup"] ,#产品 分组 'partNumber':product["partNumber"] ,#零件号 2L 'listed_date':listed_date , #上架日期 'binding':product["binding"] ,#绑定类目 'AMAZON_time':list(product['data']['AMAZON_time'].astype(str)) ,#价格的日期 'AMAZON':list(product["data"]['AMAZON'].astype(str) ),#价格的变化 } print(data_insert) sheetname.insert_one(data_insert)
def test_throttling(): api = keepa.Keepa(WEAKTESTINGKEY) keepa.interface.REQLIM = 20 # exaust tokens while api.tokens_left > 0: api.query(PRODUCT_ASINS[:5]) # this must trigger a wait... t_start = time.time() products = api.query(PRODUCT_ASINS) assert (time.time() - t_start) > 1 keepa.interface.REQLIM = 2
def getProductsForCategory( rootCategory=172282, number_of_products=50): # page size in range of [50, 10000] accesskey = getAccessKey() api = keepa.Keepa(accesskey) product_parms = { 'rootCategory': rootCategory, "page": 0, "perPage": number_of_products, } # A list of all the product ASINs products = api.product_finder(product_parms) #print(products) return products
def current_used_price(): try: accesskey = os.environ['KEEPA_KEY'] api = keepa.Keepa(accesskey) products = api.query(request.get_json()['isbn'], product_code_is_asin=False) except (ConnectionError, OSError, Exception) as msg: packet = { "id": uuid.uuid4(), "isbn": request.get_json()['isbn'], "price": "none", "title": "none", "date": "none", "msg": str(msg), "status": "500" } return json.dumps(packet) if products == []: packet = { "id": uuid.uuid4(), "price": "none", "title": "none", "date": "none", "isbn": request.get_json()['isbn'], "msg": str("Found no price for sku #" + str(request.get_json()['isbn'])), "status": "401" } else: packet = { "id": uuid.uuid4(), "price": str(products[0]['data']['USED'][-1]), "title": str(products[0]['title']), "date": date.today().strftime("%m/%d/%y"), "isbn": request.get_json()['isbn'], "msg": "none", "status": "200" } return json.dumps(packet)
def save_bestsellers_from_cat(cat, filename): '''Save bestsellers from given cat to numpy file :param cat: Amazon category ID :type cat: str :param filename: filename to save bestsellers to :type filename: str :return: list of bestsellers :rtype: list of str ''' assert isinstance(cat, str) assert isinstance(filename, str) assert len(cat) > 0 and len(filename) > 0 api_key = 'e6ihvarndmd2iee2bgeg60afm06gru9242g310tb4tv1kji72u57uon4us908d5h' api = keepa.Keepa(api_key) # Obtain all bestsellers from category bestsellers = api.best_sellers_query(cat) np.save(filename, bestsellers) print("Best Sellers for Category {0} saved to {1}".format(cat, filename)) return bestsellers
def save_products(product_ids, filename, ratings=True): '''Save product dicts from given ids into numpy file :param cat: product IDs :type cat: list of str :param filename: filename to save product dictionaries to :type filename: str :return: product dicts :rtype: list of dict ''' assert isinstance(product_ids, (list, np.ndarray)) assert all(isinstance(i, str) for i in product_ids) assert isinstance(filename, str) assert len(filename) > 0 # Keepa API limit assert isinstance(ratings, bool) if not ratings: assert 0 < len(product_ids) <= 300 else: # Query for ratings and reviews consumes more tokens assert 0 < len(product_ids) <= 150 api_key = 'e6ihvarndmd2iee2bgeg60afm06gru9242g310tb4tv1kji72u57uon4us908d5h' api = keepa.Keepa(api_key) try: # Obtain product dicts products = api.query(product_ids, rating=ratings) np.save(filename, products) print("Product Dictionaries saved to {0}".format(filename)) return products # If manual exit from program due to lack of tokens except: print("Out of tokens...") print('Time to refill tokens: ', api.time_to_refill, '\n') print('Token Status: ', api.update_status())
print('Length of UPC segment 1:\t\t' + str(len(upcs1))) print('Length of SKU segment 1:\t\t' + str(len(skus1))) print('Length of UPC segment 2:\t\t' + str(len(upcs2))) print('Length of SKU segment 2:\t\t' + str(len(skus2))) print("\nFirst Half: \n") for x in range(len(upcs1)): print(str(x) + '\t' + str(upcs1[x]) + '\t' + str(skus1[x])) print("\nSecond Half: \n") for x in range(len(upcs2)): print(str(x) + '\t' + str(upcs2[x]) + '\t' + str(skus2[x])) # End of DEBUGGING PRINT STATEMENTS for the INPUT LIST #=========================================================================== # starting Keepa and passing UPC array into api query accesskey = 'b9pms3n5aile7ihpmgvigtigf1cs2eu04ms13ikvbn92l9q559neucdidho9511f' # enter real access key here api = keepa.Keepa(accesskey) if os.path.exists("output1.csv"): os.remove("output1.csv") else: print("The file does not exist") with open('output1.csv', "w") as output_file: output_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') output_writer.writerow([ 'SKU', 'UPC', 'ASIN', 'Title', 'Description', 'Bullet Point 1', 'Bullet Point 2', 'Bullet Point 3', 'Bullet Point 4', 'Bullet Point 5', 'color', 'size', 'Current Sales Rank', 'Current Price', 'Average 30', 'Average 90', 'Average 180', 'Category', 'Category 2', 'Item Type',
def get_data_for_product(asin, plot=False): path = get_data_path(asin) if not os.path.isfile(path): accesskey = getAccessKey() api = keepa.Keepa(accesskey) products = api.query(asin) history = products[0]['data'] data = {} # keepa.plot_product(products[0]) base_key = 'NEW' # Whatever key we're using to set the time start_time = history[base_key + '_time'][0].replace(microsecond=0, second=0, minute=0, hour=0) end_time = history[base_key + '_time'][-1].replace(microsecond=0, second=0, minute=0, hour=0) potential_keys = [ 'AMAZON', 'NEW', 'USED', 'SALES', 'COUNT_NEW', 'COUNT_USED', 'LISTPRICE', 'RATING', 'COUNT_REVIEWS' ] for key in potential_keys: if key not in history: continue # plt.step(history[key], history[key + '_time'], where='pre') time = start_time furthest_time_index = 0 data[key] = [] data[key + '_time'] = [] while time < end_time: # Find latest price update for this day, and set that as the price for this day. while furthest_time_index < len( history[key + '_time']) and history[key + '_time'][ furthest_time_index] < time + datetime.timedelta( days=1): furthest_time_index += 1 data[key].append(history[key][furthest_time_index - 1]) data[key + '_time'].append(time) time += datetime.timedelta(days=1) # print(data[key]) # print(data[key + '_time']) if plot: plt.figure(figsize=(16, 8)) plt.xlabel('Time') plt.ylabel(key) plt.title(key + ' over Time') plt.plot(data[key + '_time'], data[key]) plt.show() df = pd.DataFrame() for key in potential_keys: if key in data: df[key] = data[key] df['Time'] = data[key + '_time'] date_path = '../Data/{}'.format(get_now_string()) if not os.path.isdir(date_path): os.mkdir(date_path) df.to_pickle(path) else: df = pd.read_pickle(path) return df
'B01CUJMT3E', 'B01A5ZIXKI', 'B00KQPBF1W', 'B000J3UZ58', 'B00196LLDO', 'B002VWK2EE', 'B00E2I3BPM', 'B004FRSUO2', 'B00CM1TJ1G', 'B00VS4514C', 'B075G1B1PK', 'B00R9EAH8U', 'B004L2JKTU', 'B008SIDW2E', 'B078XL8CCW', 'B000VXII46', 'B07D1CJ8CK', 'B07B5HZ7D9', 'B002VWK2EO', 'B000VXII5A', 'B004N1AA5W', 'B002VWKP3W', 'B00CM9OM0G', 'B002VWKP4G', 'B004N18JDC', 'B07MDHF4CP', 'B002VWKP3C', 'B07FTVSNL2', 'B002VWKP5A', 'B002O0LBFW', 'B07BM1Q64Q', 'B004N18JM8', 'B004N1AA02', 'B002VWK2EY'] # open connection to keepa API = keepa.Keepa(TESTINGKEY) assert API.tokens_left assert API.time_to_refill >= 0 def test_invalidkey(): with pytest.raises(Exception): keepa.Api('thisisnotavalidkey') def test_deadkey(): with pytest.raises(Exception): keepa.Api(DEADKEY)
import datetime import numpy as np import pandas as pd import time from dateutil.parser import parse import keepa ACCESSKEY = '7sommorss711l4ci5f3n97ftgvcq8jm7tak3316h3a61jkifqq3qh3keebkm9rsl' k_api = keepa.Keepa(ACCESSKEY) def get_info(items): check_basic = [ 'asin', 'title', 'imagesCSV', 'categories', 'categoryTree', 'brand', 'color', 'size', 'packageLength', 'itemWidth', 'itemHeight', 'itemWeight', 'packageLength', 'packageWidth', 'packageHeight', 'packageWeight', 'frequentlyBoughtTogether' ] check_date = ['NEW_time', 'NEW', 'SALES_time', 'SALES'] info_list = [] for each in k_api.query(items, domain='US'): info_each = {} for item in check_basic: info_each[item] = each.get(item, None) for date_item in check_date: info_each[date_item] = each.get('data', {}).get(date_item, np.array([])) info_list.append(info_each)
def api(): keepa_api = keepa.Keepa(TESTINGKEY) assert keepa_api.tokens_left assert keepa_api.time_to_refill >= 0 return keepa_api
import keepa from getAccessKey import getAccessKey secureKey = getAccessKey() api = keepa.Keepa(secureKey) product = api.query("B0775451TT") product[0].keys() # pulls all the keys in the data object print("Manufacturer: " + product[0]["manufacturer"]) print("Title:" + product[0]["title"]) # only works if matplotlib is installed #keepa.plot_product(product[0])