示例#1
0
def pusanUrl(name, url):
    subMenuUrls = clsSubMenuUrls()
    subMenuUrls.name = name
    subMenuUrls.url = url
    
    print subMenuUrls.name.decode('utf-8') + ' : ' + subMenuUrls.url
    #print >> exceptFile, subMenuUrls + ' : ' + subMenuUrls.url
    
    detailProductPusanHtml = savefilegethtml.getHtml(subMenuUrls.url, 'class="container', '<!-- end .ot_tab_style1 -->', 'onlinetourSubPagePusan.txt')
    
    for subMenu in detailProductPusanHtml:
        #if subMenu.find('<li class="">') > -1 and subMenu.find('전체') < 0:
        if subMenu.find('<li') > -1 and subMenu.find('<a') > -1 and (subMenu.find('전체') < 0 or subMenuUrls.url.find('D50') > -1  or subMenuUrls.url.find('D60') > -1  or subMenuUrls.url.find('D70') > -1):
            detailRegionUrls = clsDetailRegionUrls()
            detailRegionUrls.name = tourUtil.getRemovedHtmlTag(subMenu).strip()
            detailRegionUrls.url = mainUrl + tourUtil.getTagAttr(subMenu, 'a', 'href')
            subMenuUrls.detailRegionList.append(detailRegionUrls)
            
            print detailRegionUrls.name.decode('utf-8') + ' : ' + detailRegionUrls.url
            #print >> exceptFile, detailRegionUrls.name + ' : ' + detailRegionUrls.url
            
    return subMenuUrls
try:
    for menu in backpackMenuList:
        #tit_position2 부산출발 검색조건..
        print menu.url
        productListHtml = savefilegethtml.getHtml(menu.url,
                                                  '<div id="sub_box2">',
                                                  'function btn(ckbtn){',
                                                  'productListHtml.txt')

        for each_line in productListHtml:
            if each_line.find('<h1 class="bic_h">') > -1:
                productName = each_line.split('bic_h">')[2].split('<')[0]
                #productNameSplit = productName.split(' ')
                #period = productNameSplit[len(productNameSplit)-1].replace('일', '')
                productNameSplit = re.findall(
                    '[\^0-9]+', tourUtil.getRemovedHtmlTag(each_line))
                period = productNameSplit[len(productNameSplit) - 1]

            #if each_line.find('<span class="goods_text">') > -1:            # 여행 설명이 잘 적혀 있긴 하지만.. 내일투어에서는 국가 정보가 있는 걸 보조로 가져가야 할듯..
            #tourRoute = each_line.split('px;">')[1].split('<')[0]
            if each_line.find('<span class="travel_box">') > -1:
                tourRoute = tourUtil.getRemovedHtmlTag(each_line)

            if each_line.find("sview('") > -1:
                productCode = each_line.split("sview('")[1].split("'")[0]
                code2 = each_line.split("sview('")[1].split("'")[2]
                detailUrl = 'http://www.naeiltour.co.kr/backpack/program_include_list.asp?good_cd=' + productCode + '&sel_ym=' + targetYear + targetMonth
                print >> exceptFile, 'DetailUrl : ', detailUrl

                listUrl = 'http://www.naeiltour.co.kr/backpack/program_include_list.asp?good_cd='
                productDetailUrl = 'http://www.naeiltour.co.kr/backpack/show.asp?good_cd='
示例#3
0
mainpageHtml = savefilegethtml.getHtml('http://www.onlinetour.co.kr/web/home', '<li id="n_pack">', '<!--}} ot_navi-->', 'onlinetourMainPage.txt')

# URL 쑤셔넣는 부분...
mainMenuList = list()
mainMenuUrls = clsMenuUrls()
subMenuUrls = clsSubMenuUrls()
detailRegionUrls = clsDetailRegionUrls()
chkFree = False
chkDomestic = False
for menuList in mainpageHtml:
    try:
        #print menuList
        if menuList.find('<a href=') > -1 and menuList.find('<li>') < 0:
            mainMenuUrls = clsMenuUrls()
            mainMenuUrls.name = tourUtil.getRemovedHtmlTag(menuList).strip()
            mainMenuUrls.url = tourUtil.getTagAttr(menuList, 'a', 'href')
            mainMenuUrls.dmst_div = 'A'
            if menuList.find('부산출발') > -1:
                mainMenuUrls.departCity = 'PUS'
                # 부산의 경우.. 세부 지역 URL이 바로 노출되어 있지 않아.. 강제로 쭈셔 넣어 준다..
                mainMenuUrls.subMenuList.append(pusanUrl('동남아', 'http://www.onlinetour.co.kr/web/tour?region_cd=D10'))
                mainMenuUrls.subMenuList.append(pusanUrl('일본', 'http://www.onlinetour.co.kr/web/tour?region_cd=D20'))
                mainMenuUrls.subMenuList.append(pusanUrl('중국', 'http://www.onlinetour.co.kr/web/tour?region_cd=D30'))
                mainMenuUrls.subMenuList.append(pusanUrl('괌/사이판', 'http://www.onlinetour.co.kr/web/tour?region_cd=D40'))
                mainMenuUrls.subMenuList.append(pusanUrl('남태평양', 'http://www.onlinetour.co.kr/web/tour?region_cd=D50'))
                mainMenuUrls.subMenuList.append(pusanUrl('유럽/특수', 'http://www.onlinetour.co.kr/web/tour?region_cd=D60'))
                mainMenuUrls.subMenuList.append(pusanUrl('미주/특수', 'http://www.onlinetour.co.kr/web/tour?region_cd=D70'))
            else:
                mainMenuUrls.departCity = 'ICN'
            
 #query = savefilegethtml.getMasterMergeQuery('vgtour', mastercode, '', '', productGroupCls.name, productName, tourType, region, productComment, '')  # A : 해외(Abroad)
 #print query
 productCls = clsProduct()
 #productListHtml = open('productListHtml.txt')
 departConfirm = False
 
 
 for product in productListHtml:
     #print 'product : ' + product
     if product.find('pro_date') > -1:
         productCls = clsProduct()
         departConfirm = False
         #<td class="pro_date">07/07 (월) 16:15<br/><span>07/09 (수) 21:05</span></td>
         #<td class="pro_date">07/07 (월) <br/><span></span></td>
         #<td class="pro_date">07/28 (월) 09:10<br/><span>08/17 (<span style="color:red;margin-bottom:0;">일</span>) 05:50</span></td>
         daySplit = tourUtil.getNumArray(tourUtil.getRemovedHtmlTag(product))
         productCls.sDay = ''
         productCls.sTime = ''
         productCls.aDay = ''
         productCls.aTime = ''
         
         if len(daySplit) > 1:
             productCls.sDay = '2014' + daySplit[0] + daySplit[1]
         if len(daySplit) > 3:
             productCls.sTime = daySplit[2] + daySplit[3]
         if len(daySplit) > 5:
             productCls.aDay = '2014' + daySplit[4] + daySplit[5]
         if len(daySplit) > 7:
             productCls.aTime = daySplit[6] + daySplit[7]
         #productCls.sDay = '2014' + product.split('pro_date">')[1].split('(')[0].strip().replace('/', '')
         #productCls.sTime = product.split('<br/>')[0].split(')')[1].strip().replace(':', '')
        if period == '' and tourkind == 'F':
            if product.find('<td class="FRIDAYSPACING" >'
                            ) > -1 and product.find('.gif') > -1:
                productCls.airCode = product[product.find('.gif') -
                                             2:product.find('.gif')]
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>2박3일 도미인 아사쿠사 호텔(12박 13일)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">369,000원</FONT> 2박3일<BR>신주쿠 워싱톤 호텔(더블룸)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>(2박3일) 도미인 아사쿠사 호텔</td>

            if product.find('idth="220">') > -1:
                print product
                print type(product)
                splitText = product.split('박'.decode('utf-8'))
                tmpText = re.findall('[\^0-9]+',
                                     tourUtil.getRemovedHtmlTag(splitText[0]))
                print 'Night : ', tmpText[len(tmpText) - 1]

                tmpText = re.findall('[\^0-9]+',
                                     tourUtil.getRemovedHtmlTag(splitText[1]))
                print 'day : ', tmpText[0]
                #if product.find('(') > -1:
                #productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                #productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                #elif product.find('[') > -1:
                #productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                #productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                #print productCls.toString()
                query = savefilegethtml.getDetailMergeQueryTest1(
                    'naeiltour', 'productcode', productCls.code,
                    productCls.productname, '20' + productCls.dDay, '', 'ICN',
示例#6
0
                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', 'productcode', productCls.code, productCls.productname, '20' + productCls.dDay, '', 'ICN', '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') 
                #print query
                #break
        
        if period == '' and tourkind == 'F':
            if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1:
                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>2박3일 도미인 아사쿠사 호텔(12박 13일)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">369,000원</FONT> 2박3일<BR>신주쿠 워싱톤 호텔(더블룸)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>(2박3일) 도미인 아사쿠사 호텔</td>

            if product.find('idth="220">') > -1:
                print product
                print type(product)
                splitText = product.split('박'.decode('utf-8'))
                tmpText = re.findall('[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                print 'Night : ', tmpText[len(tmpText)-1]
                
                tmpText = re.findall('[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                print 'day : ', tmpText[0]
                #if product.find('(') > -1:
                    #productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                    #productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                #elif product.find('[') > -1:
                    #productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                    #productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                #print productCls.toString()
                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', 'productcode', productCls.code, productCls.productname, '20' + productCls.dDay, '', 'ICN', '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                #print 'Query : ' + query
                #break
        
def searchProduct(filename, productcode, productName, period, targetUrl, listUrl, productDetailUrl, departCity, tourkind, dmst_div, country='', city='', comment=''):
    detailHtml = savefilegethtml.getHtml(targetUrl, '', '', 'naeiltourDetailHtml.txt')
    print >> filename, 'TargetUrl : ', targetUrl
    departDayList = list()
    for detail_each_line in detailHtml:
        if detail_each_line.find("fn_goodDetail('") > -1:
            departDayList.append(detail_each_line.split("fn_goodDetail('")[1].split("'")[0])
            
    # 출발 가능 날짜에 항공사 찾아오는 부분
    try:
        con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")

        codeList = codes.getCityCode(productName, city, comment, country)
        cityList = codeList[0]
        nationList = codeList[1]
        
        #print nationList
        #print cityList
        
        #print nationList
        #print cityList
        query = savefilegethtml.getMasterMergeQueryTest1('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #print query
        cursor = con.cursor()
        cursor.execute(query)
        con.commit()
        
        productCls = clsProduct()
        
        for dayInfo in departDayList:
            try:
                productListUrl = listUrl + productcode + '&sel_day=' + dayInfo
                print 'ProductListUrl : ' + productListUrl
                productListHtml = savefilegethtml.getHtml(productListUrl, '', '', 'naeiltourproductListHtml.txt')
                print >> filename, 'ProductListUrl : ' + productListUrl
                for product in productListHtml:
                    try:
                        if product.find("fn_price('") > -1:
                            productCls = clsProduct()
                            productSplit = product.split('fn_price')[1].split("'")
                            productCls.productCode = productSplit[1]
                            productCls.dDay = productSplit[3]
                            productCls.code = productSplit[5]
                            if tourkind == 'W' or tourkind == 'G':
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                            else:
                                productCls.airCode = productSplit[7]            # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다...
                            productCls.price = productSplit[9].replace(',', '')
                            #print productSplit[11]
                            productCls.status = codes.getStatus('naeiltour', productSplit[11])                    # 공백 : 예약가능, 03 : 마감임박, 05 : 마감
                            #if tourkind == 'W':
                                #productCls.city = productSplit[13]
                            productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay
                            productCls.productname = productName
                            productCls.dTime = ''
                            productCls.aDay = ''
                            productCls.aTime = ''
                                
                        if period != '' and tourkind == 'F':
                            if product.find('<td width="134">') > -1:
                                productCls.period = period
                                #print productCls.toString()
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') 
                                #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') 
                                #print query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'F':
                            if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1:
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                            
                            if product.find('idth="220">') > -1:
                                """
                                # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                        
                        if period == '' and tourkind == 'W':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'G':
                            if  product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'D':
                            if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1:
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                                
                            if product.find('idth="220">') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('')[1])[1]
                                else:
                                    productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                if product.find('COLOR=BLUE>') > -1:
                                    departCity = 'PUS'
                                else:
                                    departCity = 'ICN'
                                
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                                
                    except cx_Oracle.DatabaseError as err1:
                        print >> filename, err1
                        pass
                    except:
                        print >> filename, "Depth3 Error:", sys.exc_info()[0]
                        pass
                #break
            except:
                print >> filename, "Depth2 Error:", sys.exc_info()[0]
                pass
       
    except:
        print >> filename, "Depth1 Error:", sys.exc_info()[0]
        pass
    finally:
        con.close()
                            productNameHtml = savefilegethtml.htmlToList(
                                productNameHtml, 'xxx.txt')
                            for pdName in productNameHtml:
                                if pdName.find('height="110" alt="') > 0:
                                    productNameList.append(
                                        pdName.split(
                                            'alt="')[1].split('"')[0].replace(
                                                "'",
                                                "").strip().decode('utf-8'))
                                # description을.. 다른놈으로 가져가야 할듯.. route로..
                                #if pdName.find('<p class="desc">') > 0:
                                #productCommentList.append(pdName.split('desc">')[1].split('<')[0].replace("'", "").strip().decode('utf-8'))
                                if pdName.find('<p class="route">') > 0:
                                    productCommentList.append(
                                        tourUtil.getRemovedHtmlTag(
                                            pdName).strip().replace(
                                                "'", "").decode('utf-8'))

                            #today = today.replace(month = today.month + 1)
                            codeIdx = 0

                            for pcode in codeList:
                                detailProduct = pcode.split('s')[1]

                                detailProductUrl = ''
                                if not (
                                        package.menuCode == 'A03'
                                        or package.menuCode == 'A06'
                                ):  # 출발일정 눌렀을때 List가 펼쳐지는 경우랑, 페이지가 이동하는 경우 나눔..
                                    detailProductUrl = ''
                                    #if package.menuCode == 'A01':
示例#9
0
        if period == '' and tourkind == 'F':
            if product.find('<td class="FRIDAYSPACING" >'
                            ) > -1 and product.find('.gif') > -1:
                productCls.airCode = product[product.find('.gif') -
                                             2:product.find('.gif')]
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>2박3일 도미인 아사쿠사 호텔(12박 13일)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">369,000원</FONT> 2박3일<BR>신주쿠 워싱톤 호텔(더블룸)</td>
            #<td class="FRIDAYSPACING" width="220"><B><FONT COLOR="RED">569,000원</FONT>(2박3일) 도미인 아사쿠사 호텔</td>

            if product.find('idth="220">') > -1:
                print product
                print type(product)
                splitText = product.split('박'.decode('utf-8'))
                tmpText = re.findall('[\^0-9]+',
                                     tourUtil.getRemovedHtmlTag(splitText[0]))
                print 'Night : ', tmpText[len(tmpText) - 1]

                tmpText = re.findall('[\^0-9]+',
                                     tourUtil.getRemovedHtmlTag(splitText[1]))
                print 'day : ', tmpText[0]
                #if product.find('(') > -1:
                #productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                #productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                #elif product.find('[') > -1:
                #productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                #productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                #print productCls.toString()
                query = savefilegethtml.getDetailMergeQueryTest1(
                    'naeiltour', 'productcode', productCls.code,
                    productCls.productname, '20' + productCls.dDay, '', 'ICN',
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div):
    print 'Product Url : ', productCls.url
    print >> exceptFile, 'Product Url : ', productCls.url
    
     # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함...
    #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8'))
    codeList = codes.getCityCode(productCls.name.decode('utf-8'))
    cityList = codeList[0]
    nationList = codeList[1]
    continentList = codeList[2]
    siteList = codeList[3]              # 2014. 8. 3. site 추가
    
    if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0:
        codeList = codes.getCityCode(detailUrl.name.decode('utf-8'))
        cityList = codeList[0]
        nationList = codeList[1]
        continentList = codeList[2]
        siteList = codeList[3]              # 2014. 8. 3. site 추가
    
    # Master 상품 입력
    query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '')
    #print query
    cursor = con.cursor()
    cursor.execute(query)
    con.commit()
    # Region Data 삭제
    codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList)
    
    detailProductHtml = savefilegethtml.getHtml(productCls.url, '', '', 'tour2000DetailHtml'+targetMonth+'.txt')
    pl10Idx = 0
    for detailProduct in detailProductHtml:
        try:
            if detailProduct.find('<span class="text_pink">') > -1 and detailProduct.find('<a href=') < 0:
                detailCls = clsProductDetail()
                numArray = tourUtil.getNumArray(detailProduct)
                if len(numArray) > 7:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = numArray[2] + numArray[3]
                    detailCls.aDay = targetYear + numArray[4] + numArray[5]
                    detailCls.aTime = numArray[6] + numArray[7]
                elif len(numArray) == 4:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = ''
                    detailCls.aDay = targetYear + numArray[2] + numArray[3]
                    detailCls.aTime = ''
            elif detailProduct.find('onError') > -1:
                detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif') - 2]
            elif detailProduct.find('text_redB') > -1:
                numArray = tourUtil.getNumArray(tourUtil.getRemovedHtmlTag(detailProduct))
                for num in numArray:
                    detailCls.price += num
            elif detailProduct.find('</a></td>') > -1:
                if detailProduct.find('text_pink') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약가능')
                elif detailProduct.find('text_blau') > -1:
                    detailCls.status = codes.getStatus('tour2000', '출발가능')
                elif detailProduct.find('text_green') > -1:
                    detailCls.status = codes.getStatus('tour2000', '대기예약')
                elif detailProduct.find('text_grayLightSmall') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약마감')
                    
                detailCls.remainSeat = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip()
            elif detailProduct.find('<p class="pl10">') > -1:
                if pl10Idx == 0:
                    pl10Idx = 1
                    detailCls.productName = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip()
                    detailCls.url = mainUrl + tourUtil.getTagAttr(detailProduct, 'a', 'href')
                    detailCls.productSeq = detailProduct.split('ev_ym=')[1].split('&')[0] + detailProduct.split('ev_seq=')[1].split('&')[0]
                else:
                    pl10Idx = 0
                
                if detailCls.productName.find('부산출발') > -1:
                    departCity = 'PUS'
                else:
                    departCity = 'ICN'
                
                query = tourQuery.getDetailMergeQuery(tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay+detailCls.dTime, detailCls.aDay+detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night)
                #print >> exceptFile, query
                #print query
                cursor = con.cursor()
                cursor.execute(query)
                con.commit()
                #break
        except:
            print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0]
            pass
                         print >> exceptFile, 'List URL : ', defaultproductListUrl
                         productList = urllib2.urlopen(defaultproductListUrl).read()
                         codeList = re.findall(r"goodFocus\w*", productList)
                         
                         productNameList = list()
                         productCommentList = list()
                         productNameHtml = productList[productList.find('travel_top_section'):productList.find('frmGD')]
                         productNameHtml = savefilegethtml.htmlToList(productNameHtml, 'xxx.txt')
                         for pdName in productNameHtml:
                             if pdName.find('height="110" alt="') > 0:
                                 productNameList.append(pdName.split('alt="')[1].split('"')[0].replace("'", "").strip().decode('utf-8'))
                             # description을.. 다른놈으로 가져가야 할듯.. route로..
                             #if pdName.find('<p class="desc">') > 0:
                                 #productCommentList.append(pdName.split('desc">')[1].split('<')[0].replace("'", "").strip().decode('utf-8'))
                             if pdName.find('<p class="route">') > 0:
                                 productCommentList.append(tourUtil.getRemovedHtmlTag(pdName).strip().replace("'", "").decode('utf-8'))
                                 
                         #today = today.replace(month = today.month + 1)
                         codeIdx = 0
 
                         for pcode in codeList:
                             detailProduct = pcode.split('s')[1]
                             
                             detailProductUrl = ''
                             if not (package.menuCode == 'A03' or package.menuCode == 'A06'):       # 출발일정 눌렀을때 List가 펼쳐지는 경우랑, 페이지가 이동하는 경우 나눔..
                                 detailProductUrl = ''
                                 #if package.menuCode == 'A01':
                                 detailProductUrl = 'http://www.ybtour.co.kr/Goods/' + urlMap[package.menuCode] + '/inc_evList_ajax.asp?goodCD=' + detailProduct + '&startDT=' + targetYear + targetMonth
                                 
                                 #detailProductUrl = 'http://www.ybtour.co.kr/Goods/overseas/inc_evList_ajax.asp?goodCD=150201119&startDT=201408'
                                 
exceptFileName = 'tour2000Exception' + scrappingStartTime + '.txt'
exceptFile = open(exceptFileName, 'w')
print >> exceptFile, "Start : %s" % time.ctime()

mainUrl = 'http://www.tour2000.co.kr'

mainHtml = savefilegethtml.getHtml('http://www.tour2000.co.kr/index.asp', '<div class="navi_wholeMenu_box">', '<!-- navi_wholeMenu_wrapper// -->', 'tour2000mainHtml.txt')

startMainUrl = False
menuList = list()
MenuUrlCls = clsMenuUrls()
for each_line in mainHtml:
    if each_line.find('text_pinkB14') > -1:
        MenuUrlCls = clsMenuUrls()
        MenuUrlCls.kind = codes.getTourKind(tourAgency, tourUtil.getRemovedHtmlTag(each_line).strip())
        startMainUrl = True
    
    # 해외여행(패키지), 허니문, 골프, 국내(제주) 제외하고는 일단 패스
    if MenuUrlCls.kind == 'A' or MenuUrlCls.kind == 'F' or MenuUrlCls.kind == 'H' or MenuUrlCls.kind == 'No':
        continue    
    
    if startMainUrl and each_line.find('<li>') > -1:
        SubMenuCls = clsSubMenu()
        SubMenuCls.name = tourUtil.getRemovedHtmlTag(each_line).strip()
        SubMenuCls.url = mainUrl + tourUtil.getTagAttr(each_line, 'a', 'href')
        MenuUrlCls.subMenuList.append(SubMenuCls)
        
    if startMainUrl and each_line.find('</div>') > -1:
        startMainUrl = False
        menuList.append(MenuUrlCls)
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div):
    print 'Product Url : ', productCls.url
    print >> exceptFile, 'Product Url : ', productCls.url

    # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함...
    #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8'))
    codeList = codes.getCityCode(productCls.name.decode('utf-8'))
    cityList = codeList[0]
    nationList = codeList[1]
    continentList = codeList[2]
    siteList = codeList[3]  # 2014. 8. 3. site 추가

    if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0:
        codeList = codes.getCityCode(detailUrl.name.decode('utf-8'))
        cityList = codeList[0]
        nationList = codeList[1]
        continentList = codeList[2]
        siteList = codeList[3]  # 2014. 8. 3. site 추가

    # Master 상품 입력
    query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code,
                                          productCls.name.decode('utf-8'),
                                          menu.kind, dmst_div, '', '')
    #print query
    cursor = con.cursor()
    cursor.execute(query)
    con.commit()
    # Region Data 삭제
    codes.insertRegionData(tourAgency, productCls.code, cityList, nationList,
                           continentList, siteList)

    detailProductHtml = savefilegethtml.getHtml(
        productCls.url, '', '', 'tour2000DetailHtml' + targetMonth + '.txt')
    pl10Idx = 0
    for detailProduct in detailProductHtml:
        try:
            if detailProduct.find(
                    '<span class="text_pink">') > -1 and detailProduct.find(
                        '<a href=') < 0:
                detailCls = clsProductDetail()
                numArray = tourUtil.getNumArray(detailProduct)
                if len(numArray) > 7:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = numArray[2] + numArray[3]
                    detailCls.aDay = targetYear + numArray[4] + numArray[5]
                    detailCls.aTime = numArray[6] + numArray[7]
                elif len(numArray) == 4:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = ''
                    detailCls.aDay = targetYear + numArray[2] + numArray[3]
                    detailCls.aTime = ''
            elif detailProduct.find('onError') > -1:
                detailCls.airCode = detailProduct[detailProduct.find('.gif') -
                                                  4:detailProduct.find('.gif'
                                                                       ) - 2]
            elif detailProduct.find('text_redB') > -1:
                numArray = tourUtil.getNumArray(
                    tourUtil.getRemovedHtmlTag(detailProduct))
                for num in numArray:
                    detailCls.price += num
            elif detailProduct.find('</a></td>') > -1:
                if detailProduct.find('text_pink') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약가능')
                elif detailProduct.find('text_blau') > -1:
                    detailCls.status = codes.getStatus('tour2000', '출발가능')
                elif detailProduct.find('text_green') > -1:
                    detailCls.status = codes.getStatus('tour2000', '대기예약')
                elif detailProduct.find('text_grayLightSmall') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약마감')

                detailCls.remainSeat = tourUtil.getRemovedHtmlTag(
                    detailProduct).replace("'", "").strip()
            elif detailProduct.find('<p class="pl10">') > -1:
                if pl10Idx == 0:
                    pl10Idx = 1
                    detailCls.productName = tourUtil.getRemovedHtmlTag(
                        detailProduct).replace("'", "").strip()
                    detailCls.url = mainUrl + tourUtil.getTagAttr(
                        detailProduct, 'a', 'href')
                    detailCls.productSeq = detailProduct.split(
                        'ev_ym=')[1].split('&')[0] + detailProduct.split(
                            'ev_seq=')[1].split('&')[0]
                else:
                    pl10Idx = 0

                if detailCls.productName.find('부산출발') > -1:
                    departCity = 'PUS'
                else:
                    departCity = 'ICN'

                query = tourQuery.getDetailMergeQuery(
                    tourAgency, productCls.code, detailCls.productSeq,
                    detailCls.productName.decode('utf-8'),
                    detailCls.dDay + detailCls.dTime,
                    detailCls.aDay + detailCls.aTime, productCls.period,
                    departCity, '', detailCls.airCode, detailCls.status,
                    detailCls.url, detailCls.price, '0', '0', '0', '',
                    productCls.night)
                #print >> exceptFile, query
                #print query
                cursor = con.cursor()
                cursor.execute(query)
                con.commit()
                #break
        except:
            print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0]
            pass
mainUrl = 'http://www.tour2000.co.kr'

mainHtml = savefilegethtml.getHtml('http://www.tour2000.co.kr/index.asp',
                                   '<div class="navi_wholeMenu_box">',
                                   '<!-- navi_wholeMenu_wrapper// -->',
                                   'tour2000mainHtml.txt')

startMainUrl = False
menuList = list()
MenuUrlCls = clsMenuUrls()
for each_line in mainHtml:
    if each_line.find('text_pinkB14') > -1:
        MenuUrlCls = clsMenuUrls()
        MenuUrlCls.kind = codes.getTourKind(
            tourAgency,
            tourUtil.getRemovedHtmlTag(each_line).strip())
        startMainUrl = True

    # 해외여행(패키지), 허니문, 골프, 국내(제주) 제외하고는 일단 패스
    if MenuUrlCls.kind == 'A' or MenuUrlCls.kind == 'F' or MenuUrlCls.kind == 'H' or MenuUrlCls.kind == 'No':
        continue

    if startMainUrl and each_line.find('<li>') > -1:
        SubMenuCls = clsSubMenu()
        SubMenuCls.name = tourUtil.getRemovedHtmlTag(each_line).strip()
        SubMenuCls.url = mainUrl + tourUtil.getTagAttr(each_line, 'a', 'href')
        MenuUrlCls.subMenuList.append(SubMenuCls)

    if startMainUrl and each_line.find('</div>') > -1:
        startMainUrl = False
        menuList.append(MenuUrlCls)
def searchProduct(filename,
                  productcode,
                  productName,
                  period,
                  targetUrl,
                  listUrl,
                  productDetailUrl,
                  departCity,
                  tourkind,
                  dmst_div,
                  country='',
                  city='',
                  comment=''):
    detailHtml = savefilegethtml.getHtml(targetUrl, '', '',
                                         'naeiltourDetailHtml.txt')
    print >> filename, 'TargetUrl : ', targetUrl
    departDayList = list()
    for detail_each_line in detailHtml:
        if detail_each_line.find("fn_goodDetail('") > -1:
            departDayList.append(
                detail_each_line.split("fn_goodDetail('")[1].split("'")[0])

    # 출발 가능 날짜에 항공사 찾아오는 부분
    try:
        con = cx_Oracle.connect(
            "bigtour/[email protected]:1521/ora11g")

        codeList = codes.getCityCode(productName, city, comment, country)
        cityList = codeList[0]
        nationList = codeList[1]

        #print nationList
        #print cityList

        #print nationList
        #print cityList
        query = savefilegethtml.getMasterMergeQueryTest1(
            'naeiltour', productcode, '', country, city, productName, tourkind,
            dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #print query
        cursor = con.cursor()
        cursor.execute(query)
        con.commit()

        productCls = clsProduct()

        for dayInfo in departDayList:
            try:
                productListUrl = listUrl + productcode + '&sel_day=' + dayInfo
                print 'ProductListUrl : ' + productListUrl
                productListHtml = savefilegethtml.getHtml(
                    productListUrl, '', '', 'naeiltourproductListHtml.txt')
                print >> filename, 'ProductListUrl : ' + productListUrl
                for product in productListHtml:
                    try:
                        if product.find("fn_price('") > -1:
                            productCls = clsProduct()
                            productSplit = product.split('fn_price')[1].split(
                                "'")
                            productCls.productCode = productSplit[1]
                            productCls.dDay = productSplit[3]
                            productCls.code = productSplit[5]
                            if tourkind == 'W' or tourkind == 'G':
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]
                            else:
                                productCls.airCode = productSplit[
                                    7]  # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다...
                            productCls.price = productSplit[9].replace(',', '')
                            #print productSplit[11]
                            productCls.status = codes.getStatus(
                                'naeiltour', productSplit[11]
                            )  # 공백 : 예약가능, 03 : 마감임박, 05 : 마감
                            #if tourkind == 'W':
                            #productCls.city = productSplit[13]
                            productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay
                            productCls.productname = productName
                            productCls.dTime = ''
                            productCls.aDay = ''
                            productCls.aTime = ''

                        if period != '' and tourkind == 'F':
                            if product.find('<td width="134">') > -1:
                                productCls.period = period
                                #print productCls.toString()
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', '')
                                #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '')
                                #print query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'F':
                            if product.find('<td class="FRIDAYSPACING" >'
                                            ) > -1 and product.find(
                                                '.gif') > -1:
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]

                            if product.find('idth="220">') > -1:
                                """
                                # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()

                        if period == '' and tourkind == 'W':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'G':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'D':
                            if product.find('<td class="FRIDAYSPACING" >'
                                            ) > -1 and product.find(
                                                '.gif') > -1:
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]

                            if product.find('idth="220">') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('')[1])[1]
                                else:
                                    productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                if product.find('COLOR=BLUE>') > -1:
                                    departCity = 'PUS'
                                else:
                                    departCity = 'ICN'

                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                    except cx_Oracle.DatabaseError as err1:
                        print >> filename, err1
                        pass
                    except:
                        print >> filename, "Depth3 Error:", sys.exc_info()[0]
                        pass
                #break
            except:
                print >> filename, "Depth2 Error:", sys.exc_info()[0]
                pass

    except:
        print >> filename, "Depth1 Error:", sys.exc_info()[0]
        pass
    finally:
        con.close()
except:
    print >> exceptFile, "backpack :", sys.exc_info()[0]
    pass

try:
    for menu in backpackMenuList:
        #tit_position2 부산출발 검색조건..
        print menu.url
        productListHtml = savefilegethtml.getHtml(menu.url, '<div id="sub_box2">', 'function btn(ckbtn){', 'productListHtml.txt')
        
        for each_line in productListHtml:
            if each_line.find('<h1 class="bic_h">') > -1:
                productName = each_line.split('bic_h">')[2].split('<')[0]
                #productNameSplit = productName.split(' ')
                #period = productNameSplit[len(productNameSplit)-1].replace('일', '')
                productNameSplit = re.findall('[\^0-9]+', tourUtil.getRemovedHtmlTag(each_line))
                period = productNameSplit[len(productNameSplit)-1]
                
            #if each_line.find('<span class="goods_text">') > -1:            # 여행 설명이 잘 적혀 있긴 하지만.. 내일투어에서는 국가 정보가 있는 걸 보조로 가져가야 할듯..
                #tourRoute = each_line.split('px;">')[1].split('<')[0]
            if each_line.find('<span class="travel_box">') > -1:
                tourRoute = tourUtil.getRemovedHtmlTag(each_line)
            
            if each_line.find("sview('") > -1:
                productCode = each_line.split("sview('")[1].split("'")[0]
                code2 = each_line.split("sview('")[1].split("'")[2]
                detailUrl = 'http://www.naeiltour.co.kr/backpack/program_include_list.asp?good_cd='+ productCode + '&sel_ym=' + targetYear + targetMonth
                print >> exceptFile, 'DetailUrl : ', detailUrl
                
                listUrl = 'http://www.naeiltour.co.kr/backpack/program_include_list.asp?good_cd='
                productDetailUrl = 'http://www.naeiltour.co.kr/backpack/show.asp?good_cd='
for each_line in mainpageHtml:
    #print main
    if each_line.find('<!--') > -1 :
        startComment = True
    elif each_line.find('-->') > -1:
        startComment = False
        
    if firstOversea and each_line.find('해외패키지') > -1:
        clsMain = mainCls()
        clsMain.name = codes.getTourKind('lottetour', 'package')
        firstOversea = False
    elif not startComment and each_line.find('<li') > -1 and each_line.find('<a href=') > -1:
        clsSubMenu = subMenuCls()
        clsSubMenu.url = mainUrl + tourUtil.getTagAttr(each_line, 'a', 'href')
        if each_line.find('title') > -1:
            clsSubMenu.name = tourUtil.getRemovedHtmlTag(each_line).strip()
            clsMain.subMenuList.append(clsSubMenu)
        else:
            subMenu = True
    elif not startComment and subMenu and each_line.find('title=') > -1:
        clsSubMenu.name = each_line.split('>')[1].split('<')[0]
        clsMain.subMenuList.append(clsSubMenu)
        subMenu = False
    elif each_line.find('sub_depth0') > -1:
        if len(clsMain.subMenuList) > 0:
            clsMain.subMenuList.pop()
    elif each_line.find('class="fit"') > -1 or each_line.find('class="honeymoon _open"') > -1 or each_line.find('class="golf"') > -1 or each_line.find('class="fit"') > -1 or each_line.find('class="cruise line"') > -1 or each_line.find('class="air line"') > -1:
        mainList.append(clsMain)
        clsMain = mainCls()
        clsMain.name = codes.getTourKind('lottetour', tourUtil.getTagAttr(each_line, 'li', 'class'))
        
示例#18
0
                                #최종 상품들 잡아넣자..
                                try:
                                    productCls = clsProduct()
                                    #productListHtml = open('productListHtml.txt')
                                    departConfirm = False
                                    for product in productListHtml:
                                        #print 'product : ' + product
                                        if product.find('pro_date') > -1:
                                            productCls = clsProduct()
                                            departConfirm = False
                                            #productCls.sDay = targetYear + product.split('pro_date">')[1].split('(')[0].strip().replace('/', '')
                                            #productCls.sTime = product.split('<br/>')[0].split(')')[1].strip().replace(':', '')
                                            #productCls.aDay = targetYear + product.split('<span>')[1].split('(')[0].strip().replace('/', '')
                                            #productCls.aTime = product.split('<span>')[1].split(')')[1].split('<')[0].strip().replace(':', '')
                                            daySplit = tourUtil.getNumArray(
                                                tourUtil.getRemovedHtmlTag(
                                                    product))
                                            productCls.sDay = ''
                                            productCls.sTime = ''
                                            productCls.aDay = ''
                                            productCls.aTime = ''

                                            if len(daySplit) > 1:
                                                productCls.sDay = targetYear + daySplit[
                                                    0] + daySplit[1]
                                            if len(daySplit) > 3:
                                                productCls.sTime = daySplit[
                                                    2] + daySplit[3]
                                            if len(daySplit) > 5:
                                                productCls.aDay = targetYear + daySplit[
                                                    4] + daySplit[5]
                                            if len(daySplit) > 7: