class GetTaokDetailOld(object): def __init__(self, ): self.logger = logging.getLogger("taoke") self.taobao = Taobao('12570801', 'fbab4f2ded890ae889e876ae0eee90b9') self.oss = OssAPI("storage.aliyun.com", "dcixul0kll5ubeisualf3q1d", "1fMUf01mRTfjXe/Ub4qEmLiu7tU=") def __call__(self, site, http, next_task, url, local_url, *args): local_abs_path = site.real_path(local_url) if os.path.isfile(local_abs_path): self.logger.info("the topic is exist in local:%s, local:%s" % (url, local_abs_path)) return #self.logger.info("start fetch topic, url:%s" % url) #data = http.get(url) self.taobao.http = http num_iid = re.search(r"/(\d{7,})/", url).group(1) self.logger.info("start fetch taoke details id:%s" % num_iid) try: data = self.taobao.taobao_item_get(fields='detail_url,num_iid,title,nick,type,cid,seller_cids,props,input_pids,input_str,desc,pic_url,num,valid_thru,list_time,delist_time,stuff_status,location,price,post_fee,express_fee,ems_fee,has_discount,freight_payer,has_invoice,has_warranty,has_showcase,modified,increment,approve_status,postage_id,product_id,auction_point,property_alias,item_img,prop_img,sku,video,outer_id,is_virtual', num_iid=num_iid) data = data.get('item_get_response') except Exception, e: if e.__class__.__name__ == 'TaobaoException' and 'isv.' in e.sub_code: # == 'isv.item-is-delete:invalid-numIid-or-iid': self.logger.info("expired taoke id:%s" % num_iid) remove_url = "http://%s/queue/expired_taoke/" % site.hostname http.post_data(remove_url, {'num_iid': num_iid}) return else: raise #self.logger.info("data:%s" % data) data['item']['traderates'] = self.get_comments(num_iid, data['item']['nick']) self.logger.info("start fetch main images...") index = 0 main_image = [e['url'] for e in data['item']['item_imgs']['item_img'] ] (main_image, index) = self.save_image_to_oss(main_image, index, http, site, num_iid) self.logger.info("start fetch desc images...") desc_images = self._parse_image_from_desc(data['item']['desc']) (desc_images, index) = self.save_image_to_oss(desc_images, index, http, site, num_iid) data['item']['main_images'] = main_image data['item']['desc_images'] = desc_images #print data if os.environ.get('HUDSON_URL'): http.post_data("http://127.0.0.1:8924/queue/q/imported_taoke?format=json", {'details': json.dumps(data['item']), 'num_iid': num_iid}, {}) else: http.post_data("http://data.deonwu84.com/queue/q/imported_taoke?format=json", {'details': json.dumps(data['item']), 'num_iid': num_iid}, {}) http.post(url, site.real_path("log/%s/%s.txt" % (num_iid[-1:], num_iid)), {'data': json.dumps(data['item'])}) self.save_topic_data(data, local_abs_path) self.logger.info("done process taoke, id:%s" % num_iid)
class GetTaokDetail(object): def __init__(self, ): from taobao import Taobao from oss.oss_api import * self.logger = logging.getLogger("taoke") self.taobao = Taobao('12570801', 'fbab4f2ded890ae889e876ae0eee90b9') self.oss = OssAPI("storage.aliyun.com", "dcixul0kll5ubeisualf3q1d", "1fMUf01mRTfjXe/Ub4qEmLiu7tU=") def __call__(self, site, http, next_task, url, local_url, *args): local_abs_path = site.real_path(local_url) #self.logger.info("start fetch topic, url:%s" % url) #data = http.get(url) self.taobao.http = http num_iid = re.search(r"/(\d{7,})/", url).group(1) self.logger.info("start fetch taoke details id:%s" % num_iid) try: data = self.taobao.taobao_item_get(fields='detail_url,num_iid,title,nick,type,cid,seller_cids,props,input_pids,input_str,desc,pic_url,num,valid_thru,list_time,delist_time,stuff_status,location,price,post_fee,express_fee,ems_fee,has_discount,freight_payer,has_invoice,has_warranty,has_showcase,modified,increment,approve_status,postage_id,product_id,auction_point,property_alias,item_img,prop_img,sku,video,outer_id,is_virtual', num_iid=num_iid) data = data.get('item_get_response') except Exception, e: if e.__class__.__name__ == 'TaobaoException' and 'isv.' in e.sub_code: # == 'isv.item-is-delete:invalid-numIid-or-iid': self.logger.info("expired taoke id:%s" % num_iid) remove_url = "http://%s/queue/expired_taoke/" % site.hostname http.post_data(remove_url, {'num_iid': num_iid}) return else: raise self.load_cid_props(unicode(data['item']['cid'])) #self.logger.info("data:%s" % data) data['item']['props_str'] = self.convert_props_tostr(data['item']['props']) logging.info(u"prpos:%s" % data['item']['props_str']); try: traderates = self.get_comments(num_iid, data['item']['nick']) data['item']['traderates'] = traderates.get("trade_rates", {}).get('trade_rate', []) data['item']['traderates_count'] = traderates.get('total_results', 0) self.logger.info("traderates_count:%s" % data['item']['traderates_count']) except Exception, e: self.logger.info("failed to get comments:%s" % e)
from taobao import Taobao def test_get_detail(num_iid): pass def test_get_cate_list(pid): pass if __name__ == '__main__': api = Taobao('12395385', '53697d99eccd670191af0603d7256f77') #data = api.taobao_itemcats_get(fields='cid,parent_cid,name,is_parent', parent_cid=0) data = api.taobao_item_get(fields='detail_url,num_iid,title,nick,type,cid,seller_cids,props,input_pids,input_str,desc,pic_url,num,valid_thru,list_time,delist_time,stuff_status,location,price,post_fee,express_fee,ems_fee,has_discount,freight_payer,has_invoice,has_warranty,has_showcase,modified,increment,approve_status,postage_id,product_id,auction_point,property_alias,item_img,prop_img,sku,video,outer_id,is_virtual', num_iid='4735623930') print data
class GetTaokDetailOld(object): def __init__(self, ): self.logger = logging.getLogger("taoke") self.taobao = Taobao('12570801', 'fbab4f2ded890ae889e876ae0eee90b9') self.oss = OssAPI("storage.aliyun.com", "dcixul0kll5ubeisualf3q1d", "1fMUf01mRTfjXe/Ub4qEmLiu7tU=") def __call__(self, site, http, next_task, url, local_url, *args): local_abs_path = site.real_path(local_url) if os.path.isfile(local_abs_path): self.logger.info("the topic is exist in local:%s, local:%s" % (url, local_abs_path)) return #self.logger.info("start fetch topic, url:%s" % url) #data = http.get(url) self.taobao.http = http num_iid = re.search(r"/(\d{7,})/", url).group(1) self.logger.info("start fetch taoke details id:%s" % num_iid) try: data = self.taobao.taobao_item_get( fields= 'detail_url,num_iid,title,nick,type,cid,seller_cids,props,input_pids,input_str,desc,pic_url,num,valid_thru,list_time,delist_time,stuff_status,location,price,post_fee,express_fee,ems_fee,has_discount,freight_payer,has_invoice,has_warranty,has_showcase,modified,increment,approve_status,postage_id,product_id,auction_point,property_alias,item_img,prop_img,sku,video,outer_id,is_virtual', num_iid=num_iid) data = data.get('item_get_response') except Exception, e: if e.__class__.__name__ == 'TaobaoException' and 'isv.' in e.sub_code: # == 'isv.item-is-delete:invalid-numIid-or-iid': self.logger.info("expired taoke id:%s" % num_iid) remove_url = "http://%s/queue/expired_taoke/" % site.hostname http.post_data(remove_url, {'num_iid': num_iid}) return else: raise #self.logger.info("data:%s" % data) data['item']['traderates'] = self.get_comments(num_iid, data['item']['nick']) self.logger.info("start fetch main images...") index = 0 main_image = [e['url'] for e in data['item']['item_imgs']['item_img']] (main_image, index) = self.save_image_to_oss(main_image, index, http, site, num_iid) self.logger.info("start fetch desc images...") desc_images = self._parse_image_from_desc(data['item']['desc']) (desc_images, index) = self.save_image_to_oss(desc_images, index, http, site, num_iid) data['item']['main_images'] = main_image data['item']['desc_images'] = desc_images #print data if os.environ.get('HUDSON_URL'): http.post_data( "http://127.0.0.1:8924/queue/q/imported_taoke?format=json", { 'details': json.dumps(data['item']), 'num_iid': num_iid }, {}) else: http.post_data( "http://data.deonwu84.com/queue/q/imported_taoke?format=json", { 'details': json.dumps(data['item']), 'num_iid': num_iid }, {}) http.post(url, site.real_path("log/%s/%s.txt" % (num_iid[-1:], num_iid)), {'data': json.dumps(data['item'])}) self.save_topic_data(data, local_abs_path) self.logger.info("done process taoke, id:%s" % num_iid)
from taobao import Taobao def test_get_detail(num_iid): pass def test_get_cate_list(pid): pass if __name__ == '__main__': api = Taobao('12395385', '53697d99eccd670191af0603d7256f77') #data = api.taobao_itemcats_get(fields='cid,parent_cid,name,is_parent', parent_cid=0) data = api.taobao_item_get( fields= 'detail_url,num_iid,title,nick,type,cid,seller_cids,props,input_pids,input_str,desc,pic_url,num,valid_thru,list_time,delist_time,stuff_status,location,price,post_fee,express_fee,ems_fee,has_discount,freight_payer,has_invoice,has_warranty,has_showcase,modified,increment,approve_status,postage_id,product_id,auction_point,property_alias,item_img,prop_img,sku,video,outer_id,is_virtual', num_iid='4735623930') print data