def handle_data(self, text): if self.atag_title_stack: productId = self.atag_title_stack.pop() result = extract_data(text) if result: self.products[productId]['net'] = tuple(result) else: self.products[productId]['net'] = "not found"
def test_run(): parser = ProdInfoParser() f = open('yhd.html') c = f.read() # print c[:100] parser.feed(c) f.close() d = parser.output() print d for k, v in d.items(): result = extract_data(v['title']) if result: net, unit = extract_data(v['title']) unit_price = float(v['yhdprice']) / net * 500 print k print v['title'] print "Unit price: ¥%.2f/500%s" % (unit_price, unit)
def start_a(self, attrs): attrs = dict(attrs) #print attrs if attrs.get('id') and attrs.get('pmid') and attrs.get('title'): #print attrs productId = attrs.get('id').split('_')[1] if not self.products.get(productId): self.products[productId] = {} # self.products[productId]['title'] = attrs.get('title') result = extract_data(attrs.get('title')) if result: # net, unit = result self.products[productId]['net'] = tuple(result) else: self.products[productId]['net'] = "not found"