def searchProduct(filename, productcode, productName, period, targetUrl, listUrl, productDetailUrl, departCity, tourkind, dmst_div, country='', city='', comment=''): detailHtml = savefilegethtml.getHtml(targetUrl, '', '', 'naeiltourDetailHtml.txt') print >> filename, 'TargetUrl : ', targetUrl departDayList = list() for detail_each_line in detailHtml: if detail_each_line.find("fn_goodDetail('") > -1: departDayList.append( detail_each_line.split("fn_goodDetail('")[1].split("'")[0]) # 출발 가능 날짜에 항공사 찾아오는 부분 try: con = cx_Oracle.connect( "bigtour/[email protected]:1521/ora11g") codeList = codes.getCityCode(productName, city, comment, country) cityList = codeList[0] nationList = codeList[1] #print nationList #print cityList #print nationList #print cityList query = savefilegethtml.getMasterMergeQueryTest1( 'naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList) # A : 해외(Abroad) #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList) # A : 해외(Abroad) #print query cursor = con.cursor() cursor.execute(query) con.commit() productCls = clsProduct() for dayInfo in departDayList: try: productListUrl = listUrl + productcode + '&sel_day=' + dayInfo print 'ProductListUrl : ' + productListUrl productListHtml = savefilegethtml.getHtml( productListUrl, '', '', 'naeiltourproductListHtml.txt') print >> filename, 'ProductListUrl : ' + productListUrl for product in productListHtml: try: if product.find("fn_price('") > -1: productCls = clsProduct() productSplit = product.split('fn_price')[1].split( "'") productCls.productCode = productSplit[1] productCls.dDay = productSplit[3] productCls.code = productSplit[5] if tourkind == 'W' or tourkind == 'G': productCls.airCode = product[ product.find('.gif') - 2:product.find('.gif')] else: productCls.airCode = productSplit[ 7] # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다... productCls.price = productSplit[9].replace(',', '') #print productSplit[11] productCls.status = codes.getStatus( 'naeiltour', productSplit[11] ) # 공백 : 예약가능, 03 : 마감임박, 05 : 마감 #if tourkind == 'W': #productCls.city = productSplit[13] productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay productCls.productname = productName productCls.dTime = '' productCls.aDay = '' productCls.aTime = '' if period != '' and tourkind == 'F': if product.find('<td width="134">') > -1: productCls.period = period #print productCls.toString() productCls.airCode = product[ product.find('.gif') - 2:product.find('.gif')] query = savefilegethtml.getDetailMergeQueryTest1( 'naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'F': if product.find('<td class="FRIDAYSPACING" >' ) > -1 and product.find( '.gif') > -1: productCls.airCode = product[ product.find('.gif') - 2:product.find('.gif')] if product.find('idth="220">') > -1: """ # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정 if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[0])) productCls.night = tmpText[ len(tmpText) - 1].encode('utf-8') tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[1])) productCls.period = tmpText[0].encode( 'utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1( 'naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() if period == '' and tourkind == 'W': if product.find('valign="middle"') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[0])) productCls.night = tmpText[ len(tmpText) - 1].encode('utf-8') tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[1])) productCls.period = tmpText[0].encode( 'utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1( 'naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'G': if product.find('valign="middle"') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[0])) productCls.night = tmpText[ len(tmpText) - 1].encode('utf-8') tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[1])) productCls.period = tmpText[0].encode( 'utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1( 'naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'D': if product.find('<td class="FRIDAYSPACING" >' ) > -1 and product.find( '.gif') > -1: productCls.airCode = product[ product.find('.gif') - 2:product.find('.gif')] if product.find('idth="220">') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('')[1])[1] else: productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0] productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[0])) productCls.night = tmpText[ len(tmpText) - 1].encode('utf-8') tmpText = re.findall( u'[\^0-9]+', tourUtil.getRemovedHtmlTag( splitText[1])) productCls.period = tmpText[0].encode( 'utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# if product.find('COLOR=BLUE>') > -1: departCity = 'PUS' else: departCity = 'ICN' #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1( 'naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break except cx_Oracle.DatabaseError as err1: print >> filename, err1 pass except: print >> filename, "Depth3 Error:", sys.exc_info()[0] pass #break except: print >> filename, "Depth2 Error:", sys.exc_info()[0] pass except: print >> filename, "Depth1 Error:", sys.exc_info()[0] pass finally: con.close()
detailProductCls.proc_cd = detailInfo.split('/web/tour/')[1].split('?')[0] #print 'productname : ', detailProductCls.productName.decode('utf-8') #print 'url : ', detailProductCls.url #print 'proc_cd : ', detailProductCls.proc_cd elif detailInfo.find('class="wons"') > -1: chkPrice = True elif chkPrice: chkPrice = False numArray = tourUtil.getNumArray(detailInfo) detailProductCls.price = '' for num in numArray: detailProductCls.price += num #print 'Price : ', detailProductCls.price elif detailInfo.find('class="reservation"') > -1: if detailInfo.find('예약마감') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Finish') elif detailInfo.find('예약가능') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Avail') elif detailInfo.find('출발가능') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Confirm') else: detailProductCls.status = 'Etc' #print 'status : ', detailProductCls.status #print >> exceptFile, 'status : ', detailProductCls.status query = tourQuery.getDetailMergeQuery(tourAgency, productCls.productCode, detailProductCls.proc_cd, detailProductCls.productName.decode('utf-8'), detailProductCls.dDay + detailProductCls.dTime, detailProductCls.aDay+detailProductCls.aTime, productCls.period, mainMenu.departCity, '', detailProductCls.airCode, detailProductCls.status, detailProductCls.url, detailProductCls.price, '0', '0', '0', '', productCls.night) #print query cursor = con.cursor() cursor.execute(query) con.commit()
'#text'].replace("'", "") air_cd = t['SAirCode'][:2] st_dt = t['SPriceDay']['#text'] st_time = t[ 'SstartTime'].replace( ':', '') arr_day = t['SArrivalDay'][ '#text'] arr_time = t[ 'SArrivalTime'].replace( ':', '') tr_term = t['SDay'] tr_div = themeCode prd_fee = t['SPrice']['#text'] prd_status = codes.getStatus( 'modetour', t['SDetailState']['#text']) prd_code = t['SPriceNum'][ '#text'] flynum = t['SstartAir'] #period = t['SNight'] #기간이 아니라... 잠자는 횟수임.. 1박2일이면.. 1 airline = t['SAirName'] prd_url = 'http://www.modetour.com/Package/Itinerary.aspx?startLocation=' + sublist.startLocation + '&location=' + sublist.location + '&location1=' + sublist.location1 + '&theme=' + sublist.Theme + '&theme1=' + sublist.Theme1 + '&MLoc=' + sublist.MLoc + '&Pnum=' + prd_code #print 'product url:' + prd_url query = tourQuery.getDetailMergeQuery( tourAgency, productCode, prd_code, prd_nm, st_dt + st_time, arr_day + arr_time, tr_term, sublist.startLocation, '',
for product in productListHtml: if product.find("fn_price('") > -1: productCls = clsProduct() productSplit = product.split('fn_price')[1].split("'") productCls.productCode = productSplit[1] productCls.dDay = productSplit[3] productCls.code = productSplit[5] if tourkind == 'W' or tourkind == 'G': productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] else: productCls.airCode = productSplit[ 7] # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다... productCls.price = productSplit[9].replace(',', '') #print productSplit[11] productCls.status = codes.getStatus( 'naeiltour', productSplit[11]) # 공백 : 예약가능, 03 : 마감임박, 05 : 마감 #if tourkind == 'W': #productCls.city = productSplit[13] productCls.url = 'URL' productCls.productname = 'productName' productCls.dTime = '' productCls.aDay = '' productCls.aTime = '' if period != '' and tourkind == 'F': if product.find('<td width="134">') > -1: productCls.period = period #print productCls.toString() productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] query = savefilegethtml.getDetailMergeQueryTest1(
'대기예약'.encode( 'cp949') ) > -1: waitSeat = True elif detailProduct.find( '<td class="reservation">' ) > -1: detailProductCls.url = homepageUrl + detailProduct.split( "location.href='" )[1].split("'")[0] if detailProduct.find( '예약마감'.encode( 'cp949') ) > -1: detailProductCls.status = codes.getStatus( 'tourbaksa', '예약마감') elif detailProduct.find( '바로예약'.encode( 'cp949') ) > -1: detailProductCls.status = codes.getStatus( 'tourbaksa', '바로예약') elif waitSeat and detailProduct.find( '예약접수'.encode( 'cp949') ) > -1: detailProductCls.status = codes.getStatus( 'tourbaksa', '대기예약')
detailProduct, 'grade":"', '","gname') detailClass.gname = valueParcing( detailProduct, 'gname":"', '","pname').replace( "'", "").decode('utf-8') detailClass.pname = valueParcing( detailProduct, 'pname":"', '","amt').replace( "'", "").decode('utf-8') detailClass.amt = valueParcing( detailProduct, 'amt":"', '","lminute') detailClass.lminute = codes.getStatus( 'hanatour', valueParcing( detailProduct, 'lminute":"', '"}')) detailClass.url = 'http://www.hanatour.com/asp/booking/productPackage/pk-12000.asp?pkg_code=' + detailClass.pcode #print detailClass.toString() #print idx #idx += 1 # 2014. 6. 29. 정규식으로 이름에서 국가, 도시 코드 빼오도록.. query = tourQuery.getDetailMergeQuery( tourAgency, productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday + detailClass.dtime,
productCls.url = 'URL' #http://www.verygoodtour.com/Product/Package/PackageDetail?ProCode=APP5099-140612LJ&MenuCode=1010201 tmp = len(product.split('</td>')[0].split('>')) #print >> exceptFile, product.split('</td>')[0].split('>')[tmp - 1] if product.find('출발확정') > -1: departConfirm = True productCls.name = product.split('</td>')[0].split('>')[ tmp - 1].decode('utf-8') elif product.find('pro_price') > -1: productCls.price = product.split('원')[0].split( '>')[1].replace(',', '') elif product.find('class="pro_condition"') > -1: #print >> exceptFile, product.split('title="')[1].split('"')[0] if product.find('예약마감') > -1: productCls.booked = codes.getStatus( 'verygoodtour', '예약마감') elif product.find('대기예약') > -1: productCls.booked = codes.getStatus( 'verygoodtour', '대기예약') elif departConfirm: productCls.booked = codes.getStatus( 'verygoodtour', '출발확정') elif product.find('예약하기') > -1 or product.find('석') > -1: productCls.booked = codes.getStatus( 'verygoodtour', '예약하기') else: productCls.booked = codes.getStatus( 'verygoodtour', 'None') elif product.find('</tr>') > -1: if productCls.code.strip() == '':
clsProduct.price = spliter[ 1].split('원')[ 0].replace( ',', '') #print >> ybtourproductfile, 'Price:' + clsProduct.price elif parcer.find( '출발확정' ) > -1 or parcer.find( '예약마감' ) > -1 or parcer.find( '예약가능') > -1: spliter = parcer.strip( ).split('>') #print codes.getStatus('ybtour', spliter[1].split('<')[0]) clsProduct.status = codes.getStatus( 'ybtour', spliter[1].split( '<')[0]) #print >> ybtourproductfile, 'Status:' + clsProduct.status elif parcer.strip( ) == '</tr>': flag = False # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용.. query = tourQuery.getDetailMergeQuery( tourAgency, detailProduct, clsProduct. detailcode, clsProduct. productName, targetYear +
def searchProduct(filename, productcode, productName, period, targetUrl, listUrl, productDetailUrl, departCity, tourkind, dmst_div, country='', city='', comment=''): detailHtml = savefilegethtml.getHtml(targetUrl, '', '', 'naeiltourDetailHtml.txt') print >> filename, 'TargetUrl : ', targetUrl departDayList = list() for detail_each_line in detailHtml: if detail_each_line.find("fn_goodDetail('") > -1: departDayList.append(detail_each_line.split("fn_goodDetail('")[1].split("'")[0]) # 출발 가능 날짜에 항공사 찾아오는 부분 try: con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g") codeList = codes.getCityCode(productName, city, comment, country) cityList = codeList[0] nationList = codeList[1] #print nationList #print cityList #print nationList #print cityList query = savefilegethtml.getMasterMergeQueryTest1('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList) # A : 해외(Abroad) #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList) # A : 해외(Abroad) #print query cursor = con.cursor() cursor.execute(query) con.commit() productCls = clsProduct() for dayInfo in departDayList: try: productListUrl = listUrl + productcode + '&sel_day=' + dayInfo print 'ProductListUrl : ' + productListUrl productListHtml = savefilegethtml.getHtml(productListUrl, '', '', 'naeiltourproductListHtml.txt') print >> filename, 'ProductListUrl : ' + productListUrl for product in productListHtml: try: if product.find("fn_price('") > -1: productCls = clsProduct() productSplit = product.split('fn_price')[1].split("'") productCls.productCode = productSplit[1] productCls.dDay = productSplit[3] productCls.code = productSplit[5] if tourkind == 'W' or tourkind == 'G': productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] else: productCls.airCode = productSplit[7] # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다... productCls.price = productSplit[9].replace(',', '') #print productSplit[11] productCls.status = codes.getStatus('naeiltour', productSplit[11]) # 공백 : 예약가능, 03 : 마감임박, 05 : 마감 #if tourkind == 'W': #productCls.city = productSplit[13] productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay productCls.productname = productName productCls.dTime = '' productCls.aDay = '' productCls.aTime = '' if period != '' and tourkind == 'F': if product.find('<td width="134">') > -1: productCls.period = period #print productCls.toString() productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'F': if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1: productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] if product.find('idth="220">') > -1: """ # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정 if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0])) productCls.night = tmpText[len(tmpText)-1].encode('utf-8') tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1])) productCls.period = tmpText[0].encode('utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() if period == '' and tourkind == 'W': if product.find('valign="middle"') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0])) productCls.night = tmpText[len(tmpText)-1].encode('utf-8') tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1])) productCls.period = tmpText[0].encode('utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'G': if product.find('valign="middle"') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('(')[1])[0] productCls.period = re.findall(r"\d", product.split('(')[1])[1] elif product.find('[') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('[')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0])) productCls.night = tmpText[len(tmpText)-1].encode('utf-8') tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1])) productCls.period = tmpText[0].encode('utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break if period == '' and tourkind == 'D': if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1: productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] if product.find('idth="220">') > -1: """ if product.find('(') > -1: productCls.night = re.findall(r"\d", product.split('[')[1])[0] productCls.period = re.findall(r"\d", product.split('')[1])[1] else: productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0] productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1] """ splitText = product.decode('cp949').split(u'박') if len(splitText) > 1: tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0])) productCls.night = tmpText[len(tmpText)-1].encode('utf-8') tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1])) productCls.period = tmpText[0].encode('utf-8') else: productCls.night = '0' productCls.period = '0' ############################################################################################# if product.find('COLOR=BLUE>') > -1: departCity = 'PUS' else: departCity = 'ICN' #print productCls.toString() query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break except cx_Oracle.DatabaseError as err1: print >> filename, err1 pass except: print >> filename, "Depth3 Error:", sys.exc_info()[0] pass #break except: print >> filename, "Depth2 Error:", sys.exc_info()[0] pass except: print >> filename, "Depth1 Error:", sys.exc_info()[0] pass finally: con.close()
detailClass = clsDetailProduct() detailClass.pcode = valueParcing(detailProduct, 'pcode":"', '","sdate') sDate = valueParcing(detailProduct, 'sdate":"', '","adate') aDate = valueParcing(detailProduct, 'adate":"', '","acode') detailClass.dday = targetYear + sDate.split('(')[0].strip().replace('/', '') detailClass.dtime = sDate.split(')')[1].strip().replace(':', '') detailClass.aday = targetYear + aDate.split('(')[0].strip().replace('/', '') detailClass.atime = aDate.split(')')[1].strip().replace(':', '') detailClass.acode = valueParcing(detailProduct, 'acode":"', '","aline') detailClass.aline = valueParcing(detailProduct, 'aline":"', '","tday') detailClass.tday = valueParcing(detailProduct, 'tday":"', '","grade') detailClass.grade = valueParcing(detailProduct, 'grade":"', '","gname') detailClass.gname = valueParcing(detailProduct, 'gname":"', '","pname').replace("'", "").decode('utf-8') detailClass.pname = valueParcing(detailProduct, 'pname":"', '","amt').replace("'", "").decode('utf-8') detailClass.amt = valueParcing(detailProduct, 'amt":"', '","lminute') detailClass.lminute = codes.getStatus('hanatour', valueParcing(detailProduct, 'lminute":"', '"}')) detailClass.url = 'http://www.hanatour.com/asp/booking/productPackage/pk-12000.asp?pkg_code=' + detailClass.pcode #print detailClass.toString() #print idx #idx += 1 # 2014. 6. 29. 정규식으로 이름에서 국가, 도시 코드 빼오도록.. query = tourQuery.getDetailMergeQuery(tourAgency, productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday+detailClass.dtime, detailClass.aday+detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('hanatour', productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday+detailClass.dtime, detailClass.aday+detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #print >> exceptFile ,query cursor = con.cursor() cursor.execute(query) con.commit() #break except cx_Oracle.DatabaseError as dberr:
spliter = parcer.strip().split(' ') clsProduct.url = 'http://www.ybtour.co.kr' + spliter[2].split('"')[1] spliter = parcer.strip().split('>') clsProduct.productName = spliter[2].split('<')[0].replace("'", '').decode('utf-8') #print >> ybtourproductfile, 'URL:' + clsProduct.url + ', Name:' + clsProduct.productName elif parcer.find('박') > -1 and len(parcer) < 9: clsProduct.period = parcer.strip()[:1] #print >> ybtourproductfile, 'Period:' + clsProduct.period elif parcer.find('<td class="blue">') > -1 and parcer.find('원') > -1: spliter = parcer.strip().split('>') clsProduct.price = spliter[1].split('원')[0].replace(',', '') #print >> ybtourproductfile, 'Price:' + clsProduct.price elif parcer.find('출발확정') > -1 or parcer.find('예약마감') > -1 or parcer.find('예약가능') > -1: spliter = parcer.strip().split('>') #print codes.getStatus('ybtour', spliter[1].split('<')[0]) clsProduct.status = codes.getStatus('ybtour', spliter[1].split('<')[0]) #print >> ybtourproductfile, 'Status:' + clsProduct.status elif parcer.strip() == '</tr>': flag = False # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용.. query = tourQuery.getDetailMergeQuery(tourAgency, detailProduct, clsProduct.detailcode, clsProduct.productName, targetYear+clsProduct.departDay+clsProduct.departTime, targetYear+clsProduct.arriveDay+clsProduct.arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('ybtour', detailProduct, clsProduct.detailcode, clsProduct.productName, targetYear+clsProduct.departDay+clsProduct.departTime, targetYear+clsProduct.arriveDay+clsProduct.arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #print query con.commit() cursor = con.cursor() cursor.execute(query) con.commit() #break except: print "ML5 Parcing Error:", sys.exc_info()[0]
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div): print 'Product Url : ', productCls.url print >> exceptFile, 'Product Url : ', productCls.url # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함... #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8')) codeList = codes.getCityCode(productCls.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0: codeList = codes.getCityCode(detailUrl.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 # Master 상품 입력 query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() # Region Data 삭제 codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList) detailProductHtml = savefilegethtml.getHtml(productCls.url, '', '', 'tour2000DetailHtml'+targetMonth+'.txt') pl10Idx = 0 for detailProduct in detailProductHtml: try: if detailProduct.find('<span class="text_pink">') > -1 and detailProduct.find('<a href=') < 0: detailCls = clsProductDetail() numArray = tourUtil.getNumArray(detailProduct) if len(numArray) > 7: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = numArray[2] + numArray[3] detailCls.aDay = targetYear + numArray[4] + numArray[5] detailCls.aTime = numArray[6] + numArray[7] elif len(numArray) == 4: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = '' detailCls.aDay = targetYear + numArray[2] + numArray[3] detailCls.aTime = '' elif detailProduct.find('onError') > -1: detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif') - 2] elif detailProduct.find('text_redB') > -1: numArray = tourUtil.getNumArray(tourUtil.getRemovedHtmlTag(detailProduct)) for num in numArray: detailCls.price += num elif detailProduct.find('</a></td>') > -1: if detailProduct.find('text_pink') > -1: detailCls.status = codes.getStatus('tour2000', '예약가능') elif detailProduct.find('text_blau') > -1: detailCls.status = codes.getStatus('tour2000', '출발가능') elif detailProduct.find('text_green') > -1: detailCls.status = codes.getStatus('tour2000', '대기예약') elif detailProduct.find('text_grayLightSmall') > -1: detailCls.status = codes.getStatus('tour2000', '예약마감') detailCls.remainSeat = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip() elif detailProduct.find('<p class="pl10">') > -1: if pl10Idx == 0: pl10Idx = 1 detailCls.productName = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip() detailCls.url = mainUrl + tourUtil.getTagAttr(detailProduct, 'a', 'href') detailCls.productSeq = detailProduct.split('ev_ym=')[1].split('&')[0] + detailProduct.split('ev_seq=')[1].split('&')[0] else: pl10Idx = 0 if detailCls.productName.find('부산출발') > -1: departCity = 'PUS' else: departCity = 'ICN' query = tourQuery.getDetailMergeQuery(tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay+detailCls.dTime, detailCls.aDay+detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night) #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0] pass
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div): print 'Product Url : ', productCls.url print >> exceptFile, 'Product Url : ', productCls.url # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함... #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8')) codeList = codes.getCityCode(productCls.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0: codeList = codes.getCityCode(detailUrl.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 # Master 상품 입력 query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() # Region Data 삭제 codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList) detailProductHtml = savefilegethtml.getHtml( productCls.url, '', '', 'tour2000DetailHtml' + targetMonth + '.txt') pl10Idx = 0 for detailProduct in detailProductHtml: try: if detailProduct.find( '<span class="text_pink">') > -1 and detailProduct.find( '<a href=') < 0: detailCls = clsProductDetail() numArray = tourUtil.getNumArray(detailProduct) if len(numArray) > 7: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = numArray[2] + numArray[3] detailCls.aDay = targetYear + numArray[4] + numArray[5] detailCls.aTime = numArray[6] + numArray[7] elif len(numArray) == 4: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = '' detailCls.aDay = targetYear + numArray[2] + numArray[3] detailCls.aTime = '' elif detailProduct.find('onError') > -1: detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif' ) - 2] elif detailProduct.find('text_redB') > -1: numArray = tourUtil.getNumArray( tourUtil.getRemovedHtmlTag(detailProduct)) for num in numArray: detailCls.price += num elif detailProduct.find('</a></td>') > -1: if detailProduct.find('text_pink') > -1: detailCls.status = codes.getStatus('tour2000', '예약가능') elif detailProduct.find('text_blau') > -1: detailCls.status = codes.getStatus('tour2000', '출발가능') elif detailProduct.find('text_green') > -1: detailCls.status = codes.getStatus('tour2000', '대기예약') elif detailProduct.find('text_grayLightSmall') > -1: detailCls.status = codes.getStatus('tour2000', '예약마감') detailCls.remainSeat = tourUtil.getRemovedHtmlTag( detailProduct).replace("'", "").strip() elif detailProduct.find('<p class="pl10">') > -1: if pl10Idx == 0: pl10Idx = 1 detailCls.productName = tourUtil.getRemovedHtmlTag( detailProduct).replace("'", "").strip() detailCls.url = mainUrl + tourUtil.getTagAttr( detailProduct, 'a', 'href') detailCls.productSeq = detailProduct.split( 'ev_ym=')[1].split('&')[0] + detailProduct.split( 'ev_seq=')[1].split('&')[0] else: pl10Idx = 0 if detailCls.productName.find('부산출발') > -1: departCity = 'PUS' else: departCity = 'ICN' query = tourQuery.getDetailMergeQuery( tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay + detailCls.dTime, detailCls.aDay + detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night) #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0] pass
if not tree['ModeSangPum'].has_key('SangList'): continue for t in tree['ModeSangPum']['SangList']: reg_div = anCode prd_nm = t['SName']['#text'].replace("'", "") air_cd = t['SAirCode'][:2] st_dt = t['SPriceDay']['#text'] st_time = t['SstartTime'].replace(':', '') arr_day = t['SArrivalDay']['#text'] arr_time = t['SArrivalTime'].replace(':', '') tr_term = t['SDay'] tr_div = themeCode prd_fee = t['SPrice']['#text'] prd_status = codes.getStatus('modetour', t['SDetailState']['#text']) prd_code = t['SPriceNum']['#text'] flynum = t['SstartAir'] #period = t['SNight'] #기간이 아니라... 잠자는 횟수임.. 1박2일이면.. 1 airline = t['SAirName'] prd_url = 'http://www.modetour.com/Package/Itinerary.aspx?startLocation='+sublist.startLocation+'&location='+sublist.location+'&location1='+sublist.location1+'&theme='+sublist.Theme+'&theme1='+sublist.Theme1+'&MLoc='+sublist.MLoc+'&Pnum='+prd_code #print 'product url:' + prd_url query = tourQuery.getDetailMergeQuery(tourAgency, productCode, prd_code, prd_nm, st_dt+st_time, arr_day+arr_time, tr_term, sublist.startLocation, '', air_cd, prd_status, prd_url, prd_fee, '0', '0', '0', '') #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() normalCnt += 1 #break
# Detail List Url로 부터 비행기 시간, 금액 등등 찾기 # 03:예약마감, 01:예약가능, 04:대기예약 print >> exceptFile, 'Detail Url : ' + detailListUrl #print 'Detail Url : ' + detailListUrl detailList = urllib2.urlopen(detailListUrl).read() #print detailList json_loaded = json.loads(detailList) for detail in json_loaded: try: clsDetail = detailProductCls() clsDetail.name = detail['name'] clsDetail.night = str(detail['night']) clsDetail.period = str(detail['days']) clsDetail.airCode = detail['airMark'] clsDetail.status = codes.getStatus('lottetour', detail['status']) clsDetail.seq = detail['id'] clsDetail.price = str(detail['sellingPrice']) clsDetail.dDay = detail['departureAir']['departureDate'].strip() clsDetail.dTime = detail['departureAir']['departureTime'].strip() clsDetail.aDay = detail['entryAir']['arrivalDate'].strip() clsDetail.aTime = detail['entryAir']['arrivalTime'].strip() clsDetail.url = mainUrl + '/evt/' + clsDetail.seq #http://www.lottetour.com/evt/A140721565?menu=558 if detail['meetPlace'].find('인천'.decode('utf-8')) > -1: clsDetail.departCity = 'ICN' elif detail['meetPlace'].find('김해'.decode('utf-8')) > -1: clsDetail.departCity = 'PUS' elif detail['meetPlace'].find('김포'.decode('utf-8')) > -1: clsDetail.departCity = 'GMP' else:
elif product.find('class="pro_detail tl"') > -1: productCls.code = product.split("DetailPage('")[1].split("'")[0] productCls.url = 'URL' #http://www.verygoodtour.com/Product/Package/PackageDetail?ProCode=APP5099-140612LJ&MenuCode=1010201 tmp = len(product.split('</td>')[0].split('>')) #print >> exceptFile, product.split('</td>')[0].split('>')[tmp - 1] if product.find('출발확정') > -1: departConfirm = True productCls.name = product.split('</td>')[0].split('>')[tmp - 1].replace("'", "").decode('utf-8') elif product.find('pro_price') > -1: productCls.price = product.split('원')[0].split('>')[1].replace(',', '') elif product.find('class="pro_condition"') > -1: #print >> exceptFile, product.split('title="')[1].split('"')[0] if product.find('예약마감') > -1: productCls.booked = codes.getStatus('verygoodtour', '예약마감') elif product.find('대기예약') > -1: productCls.booked = codes.getStatus('verygoodtour', '대기예약') elif departConfirm: productCls.booked = codes.getStatus('verygoodtour', '출발확정') elif product.find('예약하기') > -1 or product.find('석') > -1: productCls.booked = codes.getStatus('verygoodtour', '예약하기') else: productCls.booked = codes.getStatus('verygoodtour', 'None') elif product.find('</tr>') > -1: if productCls.code.strip() == '': continue query = savefilegethtml.getDetailMergeQueryTest1('vgtour', mastercode, productCls.code, productCls.name, productCls.sDay+productCls.sTime, productCls.aDay+productCls.aTime, productCls.period, 'ICN', '', productCls.aCode, productCls.booked, productCls.url, productCls.price, '0', '0', '0', '') #print query
for detailProduct in detailProductHtml: #print 'Detail Product : ' + detailProduct if detailProduct.find('startDate">') > -1: detailProductCls = clsDetailProduct() waitSeat = False detailProductCls.dDay = targetYear + detailProduct.split('startDate">')[1].split('(')[0].replace('.', '') #20140611 elif detailProduct.find('price">') > -1: #detailProductCls.price = detailProduct.split('price">')[1].split('원')[0].replace(',', '') #130000 detailProductCls.price = re.findall(r'\d+', detailProduct.split('price">')[1].replace(',', ''))[0] elif detailProduct.find('status') > -1: if detailProduct.find('대기예약'.encode('cp949')) > -1: waitSeat = True elif detailProduct.find('<td class="reservation">') > -1: detailProductCls.url = homepageUrl + detailProduct.split("location.href='")[1].split("'")[0] if detailProduct.find('예약마감'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '예약마감') elif detailProduct.find('바로예약'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '바로예약') elif waitSeat and detailProduct.find('예약접수'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '대기예약') elif detailProduct.find('예약접수'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '예약접수') else: detailProductCls.status = codes.getStatus('tourbaksa', 'None') #detailProductCls.status = detailProduct.split('</button>')[0].split('>')[2] detailProductCls.detailCode = detailProduct.split('EV_YM=')[1].split('&')[0] + detailProduct.split('EV_SEQ=')[1].split('&')[0] #print detailProductCls.toString() query = tourQuery.getDetailMergeQuery(tourAgency, productList.productCode, detailProductCls.detailCode, productList.productname, detailProductCls.dDay, '', productList.period, level1.departCity, '', productList.airCode, detailProductCls.status, detailProductCls.url, detailProductCls.price, '0', '0', '0', '', productList.night) #print 'Query : ' + query cursor = con.cursor()
productListHtml = savefilegethtml.getHtml(productListUrl, '', '', 'naeiltourproductListHtml.txt') print 'ProductListUrl : ' + productListUrl for product in productListHtml: if product.find("fn_price('") > -1: productCls = clsProduct() productSplit = product.split('fn_price')[1].split("'") productCls.productCode = productSplit[1] productCls.dDay = productSplit[3] productCls.code = productSplit[5] if tourkind == 'W' or tourkind == 'G': productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] else: productCls.airCode = productSplit[7] # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다... productCls.price = productSplit[9].replace(',', '') #print productSplit[11] productCls.status = codes.getStatus('naeiltour', productSplit[11]) # 공백 : 예약가능, 03 : 마감임박, 05 : 마감 #if tourkind == 'W': #productCls.city = productSplit[13] productCls.url = 'URL' productCls.productname = 'productName' productCls.dTime = '' productCls.aDay = '' productCls.aTime = '' if period != '' and tourkind == 'F': if product.find('<td width="134">') > -1: productCls.period = period #print productCls.toString() productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')] query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', 'productcode', productCls.code, productCls.productname, '20' + productCls.dDay, '', 'ICN', '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') #print query