def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx ) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'p', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'strong', 'name') ############################## # 가격 # <ul class="xans-element- xans-product xans-product-listitem"><li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:12px;color:#555555;font-weight:bold;"></span> :</strong><span style="font-size:12px;color:#555555;font-weight:bold;">₩15,000</span><span id="span_product_tax_type_text" style=""> </span></li> # </ul> ############################## li_list = product_ctx.find_all('li') li_num = 0 for li_ctx in li_list : li_num += 1 value_str = li_ctx.get_text().strip() split_list = value_str.split(':') sub_split_list = split_list[1].split('(') price_str = sub_split_list[0].strip() if( li_num == 1) : product_data.crw_price = int( __UTIL__.get_only_digit( price_str ) ) elif( li_num == 2) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( price_str )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb"> ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'div', '-name') ############################ # 가격 및 브랜드 # #<div class="xans-element- xans-product xans-product-listitem -description"><div rel="판매가" class=" xans-record-"> #<span class="title displaynone"><span style="font-size:12px;color:#333333;font-weight:bold;">판매가</span> :</span> <span style="font-size:12px;color:#333333;font-weight:bold;">39,000원</span><span id="span_product_tax_type_text" style=""> </span></div> #</div> # ############################ price_div_list = product_ctx.find_all('div', {'rel':'판매가'}) for price_div_ctx in price_div_list : span_list = price_div_ctx.find_all('span') for span_ctx in span_list : value_str = span_ctx.get_text().strip() if(value_str != '') and (value_str.find('판매가') < 0) and (value_str.find(':') < 0) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( value_str )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # # 장바구니 이미지가 없으면 품절 # <img src="/web/upload/icon_201905151555185500.png" onclick="CAPP_SHOP_NEW_PRODUCT_OPTIONSELECT.selectOptionCommon(856, 163, 'basket', '')" alt="장바구니 담기" class="ec-admin-icon cart"> soldout_img_ctx = product_ctx.find('img', class_='ec-admin-icon cart') if (soldout_img_ctx == None): product_data.crw_is_soldout = 1 ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'strong', 'name') ############################## # 가격 ############################## price_ctx = product_ctx.find('p', class_='prices') if (price_ctx != None): span_ctx = price_ctx.find('span', class_='price normal') if (span_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(span_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] ########################### # 상품 이미지 확인 # ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'div', 'name') ############################## # 가격 # # <div> # <p><span class="info displaynone"> / </span><span class="price normal">19,900 won</span><span class="price normal displaynone"></span></p> # <p class="icon"></p> # </div> ############################## #self.set_product_price_brand_first(product_data, product_ctx) price_ctx = product_ctx.find('span', class_='price normal') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # ############################## li_list = product_ctx.find_all('li') for li_ctx in li_list: span_ctx = li_ctx.find_all('span') if (1 < len(span_ctx)): split_list = span_ctx[0].get_text().strip().split('(') value_str = split_list[0].strip() product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'strong', 'name') ############################## # 가격 # # <p class="price">KRW 46,000</p> ############################## price_ctx = product_ctx.find('p', class_='price') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_third(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'p', 'name') ############################## # #<p><strike class="displaynone"></strike><strong class="price">25,000원</strong></p> ############################## strong_ctx = product_ctx.find('strong', class_='price') if(strong_ctx != None) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( strong_ctx.get_text().strip() )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_third(product_data, soup) ########################### # 상품 이미지 확인 # # <a name="anchorBoxName_741" href="/product/detail.html?product_no=741&cate_no=42&display_group=1" class="prdImg"><img src="//babiana.co.kr/web/product/medium/201907/23d619d612a7e377f9f6eb3a8ffd193a.jpg" style="border: 1px solid transparent;" class="borderEffect" onmouseover="this.style.border='1px solid #8bc1c6'" onmouseout="this.style.border='1px solid transparent'"></a> ########################### self.set_product_image_first(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') price_ctx = product_ctx.find('p', class_='price') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'div', '-name') ############################## # 가격 # <div class="xans-element- xans-product xans-product-listitem -detail"><p rel="판매가" class=" xans-record-"><span class="title displaynone"><span style="font-size:12px;color:#008BCC;font-weight:bold;">판매가</span> :</span> <span class="-real"><span style="font-size:12px;color:#008BCC;font-weight:bold;">8,000원</span><span id="span_product_tax_type_text" style=""> </span></span></p> # <p rel="원산지" class=" xans-record-"><span class="title displaynone"><span style="font-size:12px;color:#555555;">원산지</span> :</span> <span class="-real"><span style="font-size:12px;color:#555555;">중국 yolan oem</span></span></p> # </div> ############################## p_list = product_ctx.find_all('p') for p_ctx in p_list: if ('rel' in p_ctx.attrs): title_name = p_ctx.attrs['rel'] split_list = p_ctx.get_text().strip().split(':') sub_split_list = split_list[1].strip().split('(') value_str = sub_split_list[0].strip() if (0 == title_name.find('브랜드')): product_data.crw_brand1 = value_str elif (0 == title_name.find('원산지')): product_data.crw_brand2 = value_str elif (0 == title_name.find('소비자가')): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (0 == title_name.find('판매가')): product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, img_ctx, name_ctx, price_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <a href="/shop/shopdetail.html?branduid=624477&xcode=032&mcode=002&scode=&type=X&sort=manual&cur_code=032&GfDT=Z213UQ%3D%3D"><img class="MS_prod_img_s" src="/shopimages/lovespet/0320020000533.gif?1590117644" alt=""></a> # #################################### img_list = img_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break #################################### # 상품명 및 브랜드 # # <strong class="name"><a href="/shop/shopdetail.html?branduid=624477&xcode=032&mcode=002&scode=&type=X&sort=manual&cur_code=032&GfDT=Z213UQ%3D%3D">도기스타 쿨하네스 ( S ~ XL )</a></strong> #################################### product_data.crw_name = name_ctx.get_text().strip() product_link_ctx = name_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) #################################### # 가격 / 품절 여부 확인 # # <li class="price"> # <p class="price02"><strike>₩24,000</strike></p> # <p class="price03">₩24,000</p> # </li> # # ---- 품절시 ------ # <li class="price"> # <div class="sold">[품절상품]</div> # </li> #################################### sell_ctx = price_ctx.find('p', class_='price03') consumer_ctx = price_ctx.find('p', class_='price02') soldout_ctx = price_ctx.find('div', class_='sold') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit(consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb"> ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') ############################## # # <ul class="xans-element- xans-product xans-product-listitem"><!-- 일반목록 상품정보 --><li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:14px;color:#25334d;font-weight:bold;">판매가</span> :</strong> <span style="font-size:14px;color:#25334d;font-weight:bold;text-decoration:line-through;">39,000원</span><span id="span_product_tax_type_text" style="text-decoration:line-through;"> </span></li> # <li class=" xans-record-"> # <strong class="title "><span style="font-size:12px;color:#c71616;font-weight:bold;"></span> :</strong> <span style="font-size:12px;color:#c71616;font-weight:bold;">37,050원 <span style="font-size:11px;color:#555555;font-style:italic;">(1,950원 할인)</span></span></li> # </ul> # ############################## li_list = product_ctx.find_all('li') for li_ctx in li_list: strong_ctx = li_ctx.find('strong') span_ctx = li_ctx.find_all('span') if (strong_ctx != None): if (1 < len(span_ctx)): title_name = strong_ctx.get_text().strip() split_list = span_ctx[1].get_text().strip().split('(') value_str = split_list[0].strip() if (0 == title_name.find('판매가')) and ( 0 < title_name.find(':')): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (0 <= title_name.find(':')): span_str = span_ctx[1].get_text().strip() if (0 <= span_str.find('할인')) or ( 0 <= span_str.find('이벤트')): product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb salebox"> <a href="/shop/shopdetail.html?branduid=3356611&xcode=002&mcode=005&scode=&type=X&sort=manual&cur_code=002&GfDT=aG13UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/petnoriter/0020050000022.jpg?1590140914" alt="상품 섬네일"></a> # <input type="hidden" name="custom_price" value="49900"> # <input type="hidden" name="product_price" value="28900"> # <span class="sale_text" style="display: block;">42%</span> </div> # #################################### img_div_list = product_ctx.find_all('div', class_='thumb salebox') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li> #################################### name_strong_list = product_ctx.find_all('li', class_='dsc') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # <ul class="info"> # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li> # <li class="subname">배변걱정 이제그만~</li> # <li class="price">28,900원</li> # <li class="consumer">49,900원</li> # <li class="icon"><span class="MK-product-icons"></span></li> # <li class="closeup"><a class="btn-overlay-show" href="javascript:viewdetail('002005000002', '1', '');"><img src="/images/common/view_shopdetail2.gif" alt="미리보기"></a></li> # <li class="cboth icon_option"></li> # </ul> # # #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='price') consumer_ctx = div_ctx.find('li', class_='consumer') soldout_ctx = div_ctx.find('li', class_='soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_first(product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) name_div_list = product_ctx.find_all('div', class_='sp-product__title') for name_div_ctx in name_div_list : product_link_list = name_div_ctx.find_all('a') for product_link_ctx in product_link_list : if('href' in product_link_ctx.attrs ) : span_list = product_link_ctx.find_all('span') for span_ctx in span_list : name_value = span_ctx.get_text().strip() if(0 != name_value.find('상품명') ) and (0 != name_value.find(':') ) : product_data.crw_name = name_value tmp_product_link = product_link_ctx.attrs['href'].strip() if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') split_list = crw_post_url.split('/') if( product_data.crw_name == '') : product_data.crw_name = split_list[4].strip() product_data.crw_goods_code = split_list[5].strip() div_list = product_ctx.find_all('div') for div_ctx in div_list : if('rel' in div_ctx.attrs) : title_name = div_ctx.attrs['rel'] span_list = div_ctx.find_all('span') for span_ctx in span_list : span_value = span_ctx.get_text().strip() if(span_value != '브랜드' ) and (span_value != '판매가' ) and (span_value != '할인가' ) and (span_value != '' ) : if(title_name == '판매가') : product_data.crw_price = int( __UTIL__.get_only_digit( span_value ) ) if(title_name == '할인가') : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_value ) ) if(title_name == '브랜드') : product_data.crw_brand1 = span_value if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # ############################## price_all_ctx = product_ctx.find('li', class_='price_all') if (price_all_ctx != None): custom_list = price_all_ctx.find_all('span', class_='custom') for custom_ctx in custom_list: product_data.crw_price = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) custom_list = price_all_ctx.find_all('span', class_='pri') for custom_ctx in custom_list: product_data.crw_price_sale = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) custom_list = price_all_ctx.find_all('span', class_='strike') for custom_ctx in custom_list: product_data.crw_price_sale = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') ########################### # 가격 # # <ul class="xans-element- xans-product xans-product-listitem spec"><li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:12px;color:#828282;font-weight:bold;">소비자가</span> :</strong> <span style="font-size:12px;color:#828282;font-weight:bold;text-decoration:line-through;">72,800원</span></li> # <li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:12px;color:#2e2828;font-weight:bold;">판매가</span> :</strong> <span style="font-size:12px;color:#2e2828;font-weight:bold;">품절</span></li> # <li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:12px;color:#de546e;font-weight:bold;">상품요약정보</span> :</strong> <span style="font-size:12px;color:#de546e;font-weight:bold;">속방석 양면사용이 가능하며 커버분리도 가능해요</span></li> # </ul> ########################### li_list = product_ctx.find_all('li') for li_ctx in li_list: strong_ctx = li_ctx.find('strong') span_ctx = li_ctx.find_all('span') if (strong_ctx != None): if (1 < len(span_ctx)): title_name = strong_ctx.get_text().strip() split_list = span_ctx[1].get_text().strip().split('(') value_str = split_list[0].strip() if (0 == title_name.find('브랜드')): product_data.crw_brand1 = value_str elif (0 == title_name.find('제조사')): product_data.crw_brand2 = value_str elif (0 == title_name.find('소비자가')): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (0 == title_name.find('판매가')): if (value_str == '품절'): product_data.crw_is_soldout = 1 product_data.crw_price_sale = product_data.crw_price else: product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dd class="prd-img"><img class="MS_prod_img_s" src="/shopimages/ecofoam/0450010000053.jpg?1527467204" alt="상품 섬네일" title="상품 섬네일"></dd> # # # <dl class="item grid-item opa70" style="position: absolute; left: 0px; top: 0px;"> #<a href="/shop/shopdetail.html?branduid=841206&xcode=046&mcode=004&scode=&type=Y&sort=manual&cur_code=046&GfDT=bW53UQ%3D%3D"> # # #################################### img_div_list = product_ctx.find_all('dd', class_='prd-img') for img_div_ctx in img_div_list: img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break product_link_ctx = product_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) #################################### # 상품명 및 브랜드 # # <span class="prd-name ft_eb">도그자리 플랫<br></span> # # --- 품절시 상품명 --- # <span class="prd-name ft_eb">맘편한매트 소프트W<br>8세트(품절)</span> # # --- 브랜드 --- # <span class="prd-brand">도그자리</span> #################################### name_strong_ctx = product_ctx.find('span', class_='prd-name ft_eb') if (name_strong_ctx != None): crw_name = name_strong_ctx.get_text().strip() if (0 < crw_name.find('(품절)')): product_data.crw_is_soldout = 1 tmp_crw_name = crw_name.replace('(품절)', '').strip() crw_name = tmp_crw_name product_data.crw_name = crw_name name_strong_ctx = product_ctx.find('span', class_='prd-brand') if (name_strong_ctx != None): product_data.crw_brand1 = name_strong_ctx.get_text().strip() #################################### # 가격 # # <span class="prd-price-discount"><del>75,000</del></span> # # <span class="prd-discount ft_eb">52,000 원</span> # #################################### div_list = product_ctx.find_all('div', class_='prd-sub') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='prd-discount ft_eb') consumer_ctx = div_ctx.find('span', class_='prd-price-discount') if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb"> ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') ############################## # # <p class="price" style="padding:0px 0px 0px 0px;"> # <span style="font-size:13px;"> <strike>33,000원</strike> > # <span style="color:#f05e5e; font-size:18px;"> <strong>17,000원</strong></span> # </span><span class="discount_rate " data-prod-custom="33000" data-prod-price="17000">48%</span></p> # ############################## price_p_list = product_ctx.find_all('p', class_='price') for price_p_ctx in price_p_list: strong_ctx = price_p_ctx.find('strong') strike_ctx = price_p_ctx.find('strike') if (strong_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(strong_ctx.get_text().strip())) if (strike_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit(strike_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) self.get_category_value( product_data, page_url, soup ) #################################### # 브랜드 추출 # # <div class="brand">SALLYS LAW</div> #################################### brand_div_list = product_ctx.find_all('div', class_='brand') for brand_ctx in brand_div_list : product_data.crw_brand1 = brand_ctx.get_text().strip() #################################### # 상품 이미지 확인 # # <div class="img"> # <img src="//image.wconcept.co.kr/productimg/image/img1/96/300972496.jpg?RS=300" alt=""> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='img') for img_div_ctx in img_div_list : img_list = img_div_ctx.find_all('img') for img_ctx in img_list : img_src = '' if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip() elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip() split_list = img_src.split('?') img_src = split_list[0].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) ''' #################################### # 품절여부 추출 #################################### soldout_div_list = product_ctx.find_all('div', class_='item_icon_box') for soldout_div_ctx in soldout_div_list : img_list = soldout_div_ctx.find_all('img') for img_ctx in img_list : if('src' in img_ctx.attrs ) : if(0 < img_ctx.attrs['src'].find('soldout') ) : product_data.crw_is_soldout = 1 ''' #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <a href="/Product/300972496"> # #################################### product_link_ctx = product_ctx.find('a') if( product_link_ctx != None ) : if('href' in product_link_ctx.attrs ) : tmp_product_link = product_link_ctx.attrs['href'].strip() if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') split_list = crw_post_url.split('/') product_data.crw_goods_code = split_list[4].strip() name_strong_list = product_ctx.find_all('div', class_='product ellipsis multiline') for name_strong_ctx in name_strong_list : product_data.crw_name = name_strong_ctx.get_text().strip() #################################### # 가격 # # <div class="price"> # <span class="discount_price">74,400</span> # <span class="base_price">93,000</span> # <span class="discount_rate">20%</span> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list : span_list = div_ctx.find_all('span') for span_ctx in span_list : if('class' in span_ctx.attrs ) : class_name_list = span_ctx.attrs['class'] if(class_name_list[0] == 'base_price' ) : product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ) ) elif(class_name_list[0] == 'discount_price' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() )) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data_second(self, page_url, soup): # # try: product_data = ProductData() crw_post_url = page_url split_list = crw_post_url.split('?product_no=') crw_goods_code_list = split_list[1].strip().split('&') product_data.crw_goods_code = crw_goods_code_list[0].strip() # 상품 카테고리 # product_data.crw_category1 = 'PRODUCT' split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category2 = split_data.strip() elif (idx == 2): product_data.crw_category3 = split_data.strip() # 상품 이미지 확인 img_list = soup.find_all('img', class_='BigImage') for img_ctx in img_list: if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) if (product_data.product_img == ''): product_data.product_img = self.get_hangul_url_convert( img_link) p_list = soup.find_all('p', class_='prd_model') for p_ctx in p_list: if (product_data.crw_name == ''): product_data.crw_name = p_ctx.get_text().strip() # 품절여부 확인 sold_out_ctx = soup.find('span', {'id': 'btnReserve'}) if (sold_out_ctx != None): if ('class' in sold_out_ctx.attrs): if ('displaynone' != sold_out_ctx.attrs['class'][0]): product_data.crw_is_soldout = 1 else: product_data.crw_is_soldout = 1 # 가격 price_list = soup.find_all('div', class_='info_price') for price_ctx in price_list: sell_ctx = price_ctx.find('span', class_='sell') customer_ctx = price_ctx.find('span', class_='customer') if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (customer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( customer_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''): split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data #div_list = soup.find_all( 'div' , class_='sub_title_txt' ) #for div_ctx in div_list : # category_list = div_ctx.find_all( 'h2' ) # for category_ctx in category_list : # product_data.crw_category1 = category_ctx.get_text().strip() ''' #################################### # 브랜드 추출 #################################### brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 # # <div class="img"> # <img src="https://img.mywisa.com/freeimg/smallstuff/_data/product/201803/31/9ee1628095bceaf0f9bb5d8dae079791.jpg" width="260" height="260"> # <!-- 상품품절 영역 --> # 생략 # </div> #################################### img_div_list = product_ctx.find_all('div', class_='prdimg') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # 품절시 <div class="info out"> 으로 표현됨 # # <!-- 상품품절 영역 --> # <div class="soldout" onclick="location.href='https://www.smallstuff.kr/shop/detail.php?pno=A02FFD91ECE5E7EFEB46DB8F10A74059&rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&ctype=1&cno1=1001'">Sold out</div> # <!-- //상품품절 영역 --> #################################### if ('class' in product_ctx.attrs): class_name_list = product_ctx.attrs['class'] # if( len(class_name_list) == 2 ) : if (class_name_list[0] == 'soldout'): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <p class="name"><a href="https://www.smallstuff.kr/shop/detail.php?pno=BEED13602B9B0E6ECB5B568FF5058F07&rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&ctype=1&cno1=1001">DINING SET 1P OLIVE</a></p> # #################################### name_strong_list = product_ctx.find_all('p', class_='name') for name_strong_ctx in name_strong_list: product_link_ctx = name_strong_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?pno=') second_split_list = split_list[1].split('&') product_data.crw_goods_code = second_split_list[ 0].strip() #################################### # 가격 # # <div class="price"> # <p class="consumer consumer">KRW 24,000</p> # <p class="sell sell"><strong>KRW 22,800 </strong></p> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list: sell_ctx = div_ctx.find('p', class_='sell') # print('sell_ctx',sell_ctx) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) crw_price = div_ctx.find('p', class_='consumer') if (crw_price != None): product_data.crw_price = int( __UTIL__.get_only_digit(crw_price.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # # <ul class="xans-element- xans-product xans-product-listitem spec"><li class=" xans-record-"><span style="font-size:15px;color:#000000;font-weight:bold;">4,900원</span><span id="span_product_tax_type_text" style=""> </span></li> # </ul> ############################## li_list = product_ctx.find_all('li') idx = 0 for li_ctx in li_list: span_ctx = li_ctx.find('span') if (span_ctx != None): value_str = span_ctx.get_text().strip() if ('style' in span_ctx.attrs) and (0 <= value_str.find('원')): span_style = span_ctx.attrs['style'] if (0 < span_style.find('line-through;')): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (0 < span_style.find('bold;')): product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb salebox"> # <a href="/shop/shopdetail.html?branduid=3534594&xcode=003&mcode=001&scode=&type=X&sort=manual&cur_code=003&GfDT=aWt3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/cocochien/0030010000152.jpg?1581790516" alt="상품 섬네일"></a> # <input type="hidden" name="custom_price" value="0"> # <input type="hidden" name="product_price" value="34500"> # <div id="sale_bg" style="display: none;"><span class="sale_text"></span></div> # <div class="info_icon"> # <span class="m_quickview"><a class="btn-overlay-show" href="javascript:viewdetail('003001000015', '1', '');"><img src="/design/cocochien/0746amelie/info_icon02.gif"></a></span> <span class="m_option"><img src="/shopimages/cocochien/bt_opt_preview.gif" onclick="javascript:mk_prd_option_preview('3534594',event);"></span> </div><!-- //info_icon --> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='thumb salebox') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # <li class="dsc">앨리스튜튜</li> #################################### name_strong_list = product_ctx.find_all('li', class_='dsc') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # #<ul class="info"> # <li class="dsc">네이비도트원피스(50%SALE)SM,XL주문가능</li> # <li class="subname"></li> # <li class="consumer">26,000원</li> <li class="price">13,000원</li> # <li class="icon"><span class="MK-product-icons"></span></li> # </ul> # #---------- 품절시 -------------------- # <ul class="info"> # <li class="dsc">마카롱나시원피스(50%SALE)</li> # <li class="subname"></li> # <li class="soldout">SOLD OUT</li> # <li class="icon"><span class="MK-product-icons"></span></li> # </ul> #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='price') consumer_ctx = div_ctx.find('li', class_='consumer') soldout_ctx = div_ctx.find('li', class_='soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### __LOG__.Trace(self.PAGE_URL_HASH[page_url]) split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data ''' div_list = soup.find_all( 'div' , class_='cntbody' ) for div_ctx in div_list : category_list = div_ctx.find_all( 'h2', class_='subtitle' ) for category_ctx in category_list : product_data.crw_category1 = category_ctx.get_text().strip() ''' ''' #################################### # 브랜드 추출 #################################### brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 # # <div class="prdimg"><a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&ctype=1&cno1=1037"><img src="https://howlpotdesign.wisacdn.com/_data/product/d0dcc887757a47bd539823e77b7a3da6.jpg" width="292" height="292"></a></div> # #################################### img_div_list = product_ctx.find_all('div', class_='prdimg') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # 품절시 <div class="box out"> 으로 표현됨 # # <div class="box out"> # <div class="no">03</div> # <div class="img"> # 생략 # </div> # <div class="info"> # 생략 # </div> # </div> # #################################### if ('class' in product_ctx.attrs): class_name_list = product_ctx.attrs['class'] if (len(class_name_list) == 2): if (class_name_list[1] == 'out'): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="name"> # <a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&ctype=1&cno1=1037">메모리폼_라이트 그레이</a> # <span class="wish"><a href="#" onclick="wishPartCartAjax("41AE36ECB9B3EEE609D05B90C14222FB", this); return false;">관심상품 담기</a></span> # </div> # #################################### name_strong_list = product_ctx.find_all('div', class_='name') for name_strong_ctx in name_strong_list: product_link_ctx = name_strong_ctx.find('a') if (product_link_ctx != None): #__LOG__.Trace( product_link_ctx ) if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?pno=') second_split_list = split_list[1].split('&') product_data.crw_goods_code = second_split_list[ 0].strip() #################################### # 가격 # # <div class="price"> # <span class="sell"><span class="font">98,000</span></span> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='sell') consumer_ctx = div_ctx.find('span', class_='consumer') if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): # 타임세일일때 뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법, crw_price_sale = sell_ctx.get_text().strip().split('\n') product_data.crw_price_sale = int( __UTIL__.get_only_digit(crw_price_sale[0].strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_third(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'div', 'name') ############################## # 가격 # # <div class="xans-element- xans-product xans-product-listitem table"><div class="price xans-record-"><span style="font-size:12px;color:#555555;text-decoration:line-through;">73,000원</span><span id="span_product_tax_type_text" style="text-decoration:line-through;"> </span></div> # <div class="saleprice xans-record-"><span style="font-size:12px;color:#ff0000;">65,700원 <span style="font-size:12px;color:#ff0000;font-weight:bold;">(7,300원 할인)</span></span></div> # <div class="saleprice xans-record-"><div class="discountPeriod"> # <a href="#none"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/product/btn_details.gif" alt="자세히"></a> # <div class="layerDiscountPeriod ec-base-tooltip" style="display: none;"> # <div class="content"> # <strong class="title"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/common/ico_tip_title.gif" alt=""> 할인기간</strong> # <p><strong>남은시간 1794일 11:24:06 (7,300원 할인)</strong></p> # <p>2020-07-24 00:00 ~ 2025-07-01 23:55</p> # </div> # <a href="#none" class="close btnClose"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/common/btn_close_tip.gif" alt="닫기"></a> # <span class="edge"></span> # </div> # </div></div> # <div class="saleprice xans-record-"><div class="color"><span class="chips" title="#FFFFFF" style="background-color:#FFFFFF" color_no="" displaygroup="1"></span><span class="chips" title="#A9A9A9" style="background-color:#A9A9A9" color_no="" displaygroup="1"></span><span class="chips" title="#FEC0CB" style="background-color:#FEC0CB" color_no="" displaygroup="1"></span><span class="chips" title="#FFFFFF" style="background-color:#FFFFFF" color_no="" displaygroup="1"></span><span class="chips" title="#A9A9A9" style="background-color:#A9A9A9" color_no="" displaygroup="1"></span><span class="chips" title="#FEC0CB" style="background-color:#FEC0CB" color_no="" displaygroup="1"></span></div></div> # </div> ############################## #self.set_product_price_brand_first(product_data, product_ctx) price_div_list = product_ctx.find_all('div', class_='price') for price_div_ctx in price_div_list: product_data.crw_price = int( __UTIL__.get_only_digit(price_div_ctx.get_text().strip())) sale_price_div_list = product_ctx.find_all('div', class_='saleprice') for sale_price_div_ctx in sale_price_div_list: check_div_ctx = sale_price_div_ctx.find('div') #div 가 없어야 함. if (check_div_ctx == None): split_list = sale_price_div_ctx.get_text().strip().split( '(') value_str = split_list[0].strip() product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=803964&xcode=035&mcode=003&scode=&type=Y&sort=manual&cur_code=035&GfDT=aGV%2BVA%3D%3D"><img class="MS_prod_img_s" src="/shopimages/aromnaom/0330290001733.jpg?1581494094"></a></dt> # #################################### img_div_list = product_ctx.find_all('dt', class_='thumb') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # # <li class="prd-name">[애니케어] 면역을 위한 멀티파우더 <span class="MK-product-icons"></span></li> #################################### name_strong_list = product_ctx.find_all('li', class_='prd-name') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # <li class="prd-price">74,800원</li> # # ---- 품절시 ------- # <li class="prd-price"> # <span class="fc-red">품절</span> # </li> #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='prd-price') if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) soldout_ctx = div_ctx.find('span', class_='fc-red') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fifth( product_data, product_ctx, 'div', 'item_name') ############################## # 가격 # <ul class="xans-element- xans-product xans-product-listitem"><li class=" xans-record-"> # <span class="title displaynone"><span style="font-size:12px;color:#929292;">소비자가</span> :</span> <span style="font-size:12px;color:#929292;text-decoration:line-through;">159,000원</span></li> # <li class=" xans-record-"> # <span class="title displaynone"><span style="font-size:16px;color:#3e3a39;font-weight:bold;">판매가</span> :</span> <span style="font-size:16px;color:#3e3a39;font-weight:bold;">127,000원</span><span id="span_product_tax_type_text" style=""> </span></li> # </ul> ############################## li_list = product_ctx.find_all('li') for li_ctx in li_list: span_ctx = li_ctx.find_all('span') if (2 < len(span_ctx)): title_name = span_ctx[1].get_text().strip() split_list = span_ctx[2].get_text().strip().split('(') value_str = split_list[0].strip() if (0 == title_name.find('소비자가')): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (0 == title_name.find('판매가')): product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.CRW_CATEGORY_1 = '' self.CRW_CATEGORY_2 = '' self.CRW_CATEGORY_3 = '' split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data #self.set_product_category_second(page_url, product_data, soup) self.CRW_CATEGORY_1 = product_data.crw_category1 self.CRW_CATEGORY_2 = product_data.crw_category2 self.CRW_CATEGORY_3 = product_data.crw_category3 #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ] # 상품 이미지 확인 self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) img_div_list = product_ctx.find_all( 'div', class_=self.C_PRODUCT_IMG_SELECTOR_CLASSNAME) for img_div_ctx in img_div_list: product_link_ctx = img_div_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): split_list = crw_post_url.split('/') if (5 < len(split_list)): product_data.crw_goods_code = split_list[ 5].strip() name_div_list = product_ctx.find_all('div', class_='description') for name_div_ctx in name_div_list: # # 상품 링크 정보 및 상품명 / 상품코드 # name_strong_list = name_div_ctx.find_all('strong', class_='name') for name_strong_ctx in name_strong_list: product_link_list = name_strong_ctx.find_all('a') for product_link_ctx in product_link_list: span_list = product_link_ctx.find_all('span') for span_ctx in span_list: name_value = span_ctx.get_text().strip() if (0 != name_value.find('상품명')) and ( 0 != name_value.find(':')) and ( name_value.strip() != ''): product_data.crw_name = name_value # 가격 li_list = name_div_ctx.find_all('li') for li_ctx in li_list: span_list = li_ctx.find_all('span') for span_ctx in span_list: price_value = span_ctx.get_text().strip() if (price_value != '') and (price_value != ':'): if ('style' in span_ctx.attrs): if (0 < span_ctx.attrs['style'].find( 'text-decoration')): product_data.crw_price = int( __UTIL__.get_only_digit(price_value)) else: product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_value)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=10163894&xcode=001&mcode=005&scode=003&type=X&sort=manual&cur_code=001&GfDT=bml9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/dermadog/0010050000192.jpg?1591754112" alt="상품 섬네일" title="상품 섬네일"></a></dt> #################################### img_div_list = product_ctx.find_all('dt', class_='thumb') for img_div_ctx in img_div_list : img_list = img_div_ctx.find_all('img') for img_ctx in img_list : img_src = '' if('src' in img_ctx.attrs ) : split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) break product_link_ctx = img_div_ctx.find('a') if( product_link_ctx != None) : if('href' in product_link_ctx.attrs ) : crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if(crw_post_url != '') : self.get_crw_goods_code( product_data, crw_post_url ) self.get_category_value( product_data, crw_post_url ) #################################### # 상품명 및 브랜드 # # <dd class="prd-info"> # <ul> # <li class="prd-brand"><span class="MK-product-icons"><img src="/shopimages/dermadog/prod_icons/4154?1591753540" class="MK-product-icon-2"></span></li> # <li class="prd-name"><a href="/shop/shopdetail.html?branduid=10163894&xcode=001&mcode=005&scode=003&type=X&sort=manual&cur_code=001&GfDT=bml9W1w%3D">연어/스킨 헬스츄 15g</a></li> # </ul> # </dd> # #################################### name_dd_list = product_ctx.find_all('dd', class_='prd-info') for name_dd_ctx in name_dd_list : name_ctx = name_dd_ctx.find('li', class_='prd-name') if( name_ctx != None) : product_data.crw_name = name_ctx.get_text().strip() brand_ctx = name_dd_ctx.find('li', class_='prd-brand') if( brand_ctx != None) : product_data.crw_brand1 = brand_ctx.get_text().strip() #################################### # 가격 / 품절 여부 확인 # # # <p class="price-info"> # <strike>10,000</strike><br> # <span class="won">₩</span><span class="price">9,000</span> # </p> # #---- 품절시 ------- # # <p class="price-info"> # Sold Out # </p> # #################################### div_list = product_ctx.find_all('p', class_='price-info') for div_ctx in div_list : price_str = div_ctx.get_text().strip() if(0 <= price_str.find('Out')) : product_data.crw_is_soldout = 1 sell_ctx = div_ctx.find('span', class_='price') consumer_ctx = div_ctx.find('strike') if( consumer_ctx != None ) : product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip() )) if( sell_ctx != None ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( sell_ctx.get_text().strip() )) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # # <div class="prdImg"> # <a href="/product/chicken-terrine/304/category/137/display/1/" name="anchorBoxName_304"><img src="//bymona.co.kr/web/product/medium/202008/35319c8b46eba6ca86653a26193b993d.jpg" id="eListPrdImage304_1" alt="chicken terrine"></a> # </div> ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <strong class="name"><a href="/product/chicken-terrine/304/category/137/display/1/" class=""><span class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</span> <span style="font-size:12px;color:#555555;">chicken terrine</span></a></strong> ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') ############################ # # <ul class="xans-element- xans-product xans-product-listitem left"><li class=" xans-record-"> # <span class="title displaynone"><span style="font-size:12px;color:#000000;font-weight:bold;">판매가</span> :</span> <span style="font-size:12px;color:#000000;font-weight:bold;">4,000 won</span><span id="span_product_tax_type_text" style=""> </span></li> # </ul> # ############################ li_list = product_ctx.find_all('li') for li_ctx in li_list: value_str = li_ctx.get_text().strip() split_list = value_str.split(':') if (0 <= value_str.find('브랜드')) and (0 < value_str.find(':')): product_data.crw_brand1 = split_list[1].strip() elif (0 <= value_str.find('판매가')) and (0 < value_str.find(':')): sub_split_list = split_list[1].split('(') product_data.crw_price_sale = int( __UTIL__.get_only_digit(sub_split_list[0].strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) category_ctx_list = soup.select( 'body > div.body_wrap > div.content_wrap > div.section_tit > div.close' ) for category_ctx in category_ctx_list: split_list = category_ctx.get_text().strip().split('>') idx = 0 for split_data in split_list: idx += 1 category_name = split_data.strip() if (idx == 2): product_data.crw_category1 = category_name elif (idx == 3): product_data.crw_category2 = category_name elif (idx == 4): product_data.crw_category3 = category_name #split_list = self.PAGE_URL_HASH[page_url].split('(') #product_data.crw_category1 = split_list[0].replace('BEST','').strip() #################################### # 브랜드 추출 # # <div class="line_sub"> # 한국산 </div> #################################### div_list = product_ctx.find_all('div', class_='line_sub') for div_ctx in div_list: brand_str = div_ctx.get_text().strip() product_data.crw_brand1 = brand_str #################################### # 상품 이미지 확인 / 상품 링크 정보 / 상품번호 # # <div class="picture"><a href="./product.html?pd_code=A010489&event_type=%C3%CA%C6%AF%B0%A1"><img src="http://queenpuppy.co.kr/shop/pd_img/A01/489/A010489_2.jpg"></a></div> #################################### span_list = product_ctx.find_all('div', class_='picture') for span_ctx in span_list: product_link_ctx = span_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('&event_type=') crw_post_url = split_list[0].strip() split_list = crw_post_url.split('?pd_code=') sub_split_list = split_list[1].strip().split('&') product_data.crw_goods_code = sub_split_list[0] img_list = product_link_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품명 # # <div class="name"> # <div style="color:#fd705f; font-weight: bold; valign:top; height: 15px; padding-bottom: 3px;"></div> # <a href="./product.html?pd_code=A010489&event_type=%C3%CA%C6%AF%B0%A1"> # 건국유업 프로젝트 닥터케이 펫밀크 10개입 </a> # </div> #################################### name_div_list = product_ctx.find_all('div', class_='name') for name_div_ctx in name_div_list: span_ctx = name_div_ctx.find('a') if (span_ctx != None): crw_name = span_ctx.get_text().strip() product_data.crw_name = crw_name if (0 < crw_name.find('[품절]')): product_data.crw_is_soldout = 1 product_data.crw_name = crw_name.replace('[품절]', '').strip() #################################### # 가격 # # # <div class="line_np">20,000원</div> # <div class="line_sp"> # 12,000원 # <span style="font-size: 0.8em; color: #666; vertical-align:bottom;">40%↓</span> </div> #################################### div_list = product_ctx.find_all('div', class_='line_np') for div_ctx in div_list: price_str = div_ctx.get_text().strip() product_data.crw_price = int( __UTIL__.get_only_digit(price_str)) div_list = product_ctx.find_all('div', class_='line_sp') for div_ctx in div_list: price_str = div_ctx.get_text().strip() span_ctx = div_ctx.find('span') split_str = '' if (span_ctx != None): split_str = span_ctx.get_text().strip() if (split_str == ''): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_str.strip())) else: split_list = price_str.split(split_str) product_data.crw_price_sale = int( __UTIL__.get_only_digit(split_list[0].strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True