#pprint.pprint(vars(api.browse_node_lookup(281407).BrowseNodes.BrowseNode.Children.BrowseNode)) def explore_browse_sub_node(node, parent_id, root_id, sub_root_id): SUB_NODES[int(node.BrowseNodeId)] = (str(node.Name), parent_id, root_id, sub_root_id) if sub_root_id == 0: sub_root_id = int(node.BrowseNodeId) print ('Found Sub BrowseNode {0} with id {1} and parent {2}'.format(node.Name, node.BrowseNodeId, parent_id)) try: for sub_node in node.BrowseNodes.BrowseNode.Children: browse_node = api.browse_node_lookup(sub_node.BrowseNodeId) explore_browse_sub_node(sub_node, node.BrowseNodeId, root_id, sub_root_id) except AttributeError, e: print('Reached leaf node') for id, name in BASE_NODES.items(): browse_node_lookup = api.browse_node_lookup(id) BASE_NODES[int(browse_node_lookup.BrowseNodes.BrowseNode.BrowseNodeId)] = str(browse_node_lookup.BrowseNodes.BrowseNode.Name) print ('Found Base BrowseNode {0} with id {1}'.format(browse_node_lookup.BrowseNodes.BrowseNode.Name, browse_node_lookup.BrowseNodes.BrowseNode.BrowseNodeId)) for browse_sub_node in browse_node_lookup.BrowseNodes.BrowseNode.Children.BrowseNode: explore_browse_sub_node(browse_sub_node, id, id, 0) def sub_node_generator(): for k, v in SUB_NODES.items(): yield (k, v[0], v[1], v[2], v[3]) conn = sqlite3.connect('tracker.db') print('Connected to tracker.db') c = conn.cursor() c.execute('CREATE TABLE IF NOT EXISTS old_categories (id, name, parent, root, sub_root)') c.executemany('INSERT INTO old_categories (id, name) VALUES (?,?)', BASE_NODES.items()) c.executemany('INSERT INTO old_categories (id, name, parent, root, sub_root) VALUES (?, ?, ?, ?, ?)', sub_node_generator())
return ','.join(str(d) for d in data) def getPrice(price): return price + randint(-100, 100) count = 0 limit_reached = False sc = SparkContext(appName="AskMeMP") amazon_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) walmart_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) ebay_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) result = api.browse_node_lookup(1000) for child1 in result.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break result1 = api.browse_node_lookup(child1.BrowseNodeId) for child in result1.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break for book in api.item_search('Books', BrowseNode=child.BrowseNodeId): try: detail = api.item_lookup(str(book.ASIN), ResponseGroup='OfferSummary').Items[0] temp_rdd = sc.parallelize([ str(book.ASIN) + '+' + book.ItemAttributes.Title + '+' + book.ItemAttributes.Author + '+' + book.DetailPageURL + '+' + str(detail.Item.OfferSummary.LowestNewPrice.Amount)
class BrowseNodeExplorer (gtk.Window): """ Gtk explorer for Amazon BrowseNodes. """ def on_delete(self, widget, event, data=None): # closes the window and quits. gtk.main_quit() return False def on_tree_click(self, widget, event, data=None): # if double click if event.type == gtk.gdk._2BUTTON_PRESS: # get data from highlighted selection treeselection = self.treeview.get_selection() model, iter = treeselection.get_selected() name_of_data = self.treestore.get_value(iter, 0) # and fetch selected node self.fetch_nodes(name_of_data) def __init__(self, locale='de'): gtk.Window.__init__(self, gtk.WINDOW_TOPLEVEL) self.set_title("BrowseNode Explorer") self.set_size_request(400, 200) self.connect("delete_event", self.on_delete) self.locale = locale self.api = API(AWS_KEY, SECRET_KEY, self.locale) # create a TreeStore with one string column to use as the model self.treestore = gtk.TreeStore(int, str) # create the TreeView using treestore self.treeview = gtk.TreeView(self.treestore) # add column id renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('id', renderer, text=0) self.treeview.append_column(column) # add column name renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('name', renderer, text=1) column.set_sort_column_id(1) # Allow sorting on the column self.treeview.append_column(column) # make it clickable self.treeview.add_events(gtk.gdk.BUTTON_PRESS_MASK) self.treeview.connect('button_press_event', self.on_tree_click) scrolled = gtk.ScrolledWindow() scrolled.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) scrolled.add(self.treeview) self.add(scrolled) self.show_all() # populate with root nodes # but avoid duplicated node ids node_ids = set(NODE_IDS[self.locale].values()) for name, id in NODE_IDS[self.locale].items(): if id in node_ids: self.treestore.append(None, [id, name]) node_ids.remove(id) def _find_row(self, node_id): def match_func(row, data): # data is a tuple containing column number, key column, key = data return row[column] == key def search(rows, func, data): if not rows: return None for row in rows: if func(row, data): return row result = search(row.iterchildren(), func, data) if result: return result return None return search(self.treestore, match_func, (0, node_id)) def fetch_nodes(self, node_id): """ Fetches a BrowseNode from Amazon. """ # fetch matching row from treestore row = self._find_row(node_id) # fetch Amazon data node = self.api.browse_node_lookup(node_id).BrowseNodes.BrowseNode id = node.BrowseNodeId.pyval name = node.Name.pyval is_root = hasattr(node, 'IsCategoryRoot') and node.IsCategoryRoot.pyval == 1 #~ from lxml import etree #~ print etree.tostring(node, pretty_print=True) #try: # parents = dict((parent.BrowseNodeId.pyval, parent.Name.pyval) # for parent in node.Ancestors.BrowseNode) #except AttributeError: # parents = {} # #piter = None #for parent_id, parent_name in parents.items(): # piter = self.treestore.append(None, [parent_id, parent_name]) # #iter = self.treestore.append(piter, [id, name]) # replace node name if is_root: row[1] = node.Ancestors.BrowseNode.Name.text try: children = dict((child.BrowseNodeId.pyval, child.Name.pyval) for child in node.Children.BrowseNode) except AttributeError: children = {} for child_id, child_name in children.items(): self.treestore.append(row.iter, [child_id, child_name]) # expand nodes of just added self.treeview.expand_row(tuple(row.path), True) def main(self): gtk.main()