Пример #1
0
def GetData(tripType, orig, dest, deptDate, retDate):
    searchURL = "https://www.bookryanair.com/SkySales/Search.aspx"
    refererURL = "https://www.bookryanair.com/SkySales/booking.aspx?culture=en-gb&lc=en-gb&cmpid2=Google"

    data = {"fromaction": "Search.aspx", "SearchInput$TripType": tripType,
                "SearchInput$Orig": orig,
                "SearchInput$Dest": dest,
                "SearchInput$DeptDate": deptDate,
                "SearchInput$RetDate": retDate,
                "SearchInput$IsFlexible": "on",
                "SearchInput$PaxTypeADT": 1,
                "SearchInput$PaxTypeCHD": 0,
                "SearchInput$PaxTypeINFANT": 0,
                "SearchInput$AcceptTerms": "on",
                "__EVENTTARGET": "SearchInput$ButtonSubmit",
                }

    # 如果抓起失败,换一个代理IP,然后重试
    for i in range(3):
        p = get_proxy()
        resp = request_post_data(searchURL, data, referer = refererURL, proxy = p)
        if resp == None or len(resp) == 0:
            invalid_proxy(p)
        else:
            return resp
    return resp
Пример #2
0
def GetData(tripType, orig, dest, deptDate, retDate):
    searchURL = "https://www.bookryanair.com/SkySales/Search.aspx"
    refererURL = "https://www.bookryanair.com/SkySales/booking.aspx?culture=en-gb&lc=en-gb&cmpid2=Google"

    data = {"fromaction": "Search.aspx", "SearchInput$TripType": tripType,
                "SearchInput$Orig": orig,
                "SearchInput$Dest": dest,
                "SearchInput$DeptDate": deptDate,
                "SearchInput$RetDate": retDate,
                "SearchInput$IsFlexible": "on",
                "SearchInput$PaxTypeADT": 1,
                "SearchInput$PaxTypeCHD": 0,
                "SearchInput$PaxTypeINFANT": 0,
                "SearchInput$AcceptTerms": "on",
                "__EVENTTARGET": "SearchInput$ButtonSubmit",
                }

    #p = get_proxy()
    p = get_proxy(type = 'f')
    resp = request_post_data(searchURL, data, referer = refererURL, proxy = p)
    if resp == None or len(resp) == 0:
        #invalid_proxy(p)
        pass
    else:
        return resp
    return resp
Пример #3
0
def GetData(tripType, orig, dest, deptDate, retDate):
    searchURL = "https://www.bookryanair.com/SkySales/Search.aspx"
    refererURL = "https://www.bookryanair.com/SkySales/booking.aspx?culture=en-gb&lc=en-gb&cmpid2=Google"

    data = {"fromaction": "Search.aspx", "SearchInput$TripType": tripType,
                "SearchInput$Orig": orig,
                "SearchInput$Dest": dest,
                "SearchInput$DeptDate": deptDate,
                "SearchInput$RetDate": retDate,
                "SearchInput$IsFlexible": "on",
                "SearchInput$PaxTypeADT": 1,
                "SearchInput$PaxTypeCHD": 0,
                "SearchInput$PaxTypeINFANT": 0,
                "SearchInput$AcceptTerms": "on",
                "__EVENTTARGET": "SearchInput$ButtonSubmit",
                }

    p = get_proxy()
    p = '221.181.104.11:8080'
    resp = request_post_data(searchURL, data, referer = refererURL, proxy = p,Accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
    if resp == None or len(resp) == 0:
        #invalid_proxy(p)
        pass
    else:
        return resp
    return resp
Пример #4
0
        req_dept_time = dept_date + 'T' + dept_hour + ':00'

    except Exception, e:
        logger.error('Parse taskcontent failed!' + str(e))
        return -1
    postdata = getPostData(dept_date, dept_id, dest_id)

    #获取代理
    #p = '116.228.55.217:8000'

    p = get_proxy()

    url = 'http://tickets.vueling.com/ScheduleSelect.aspx'
    Referer = 'http://tickets.vueling.com/ScheduleSelect.aspx'

    content = request_post_data(url,postdata,referer=Referer,proxy=p,\
            Accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")

    result = -1

    if content != '' and len(content) > 100:
        result = vuelingparser(content, flight_no, req_dept_time)
    else:
        invalid_proxy(p)
        logger.error('Get web content failed!')

    return result


def vuelingparser(content, flight_no, req_dept_time):
    #allinfos = []
    #get flight num
Пример #5
0
                     taskcontent)
        result['error'] = TASK_ERROR
        return result

    if hotel_id_temp == '0':
        result['error'] = TASK_ERROR
        return result

    p = get_proxy(source='elongHotel')
    if p == None:
        result['error'] = PROXY_NONE
        return result

    post_data = get_post_data(hotel_id_temp, check_in, check_out)

    page = request_post_data(request_url, data=post_data, proxy=p)
    if page == None or page == '':
        invalid_proxy(proxy=p, source='elongHotel')
        result['error'] = PROXY_INVALID
        return result

    room_list = parseRoom(page, hotel_name, city_name_zh, check_in, check_out,
                          hotel_id)

    if room_list != []:
        result['para'] = room_list
        return result
    else:
        result['error'] = DATA_NONE

    return result
Пример #6
0
        url = get_url(dept_city_zh, dest_city_zh, dept_day, dest_day)

        i = 0
        content_len = 0
        while i < 3 and content_len < CONTENT_LEN:
            page = crawl_single_page(url, proxy=p, referer=first_url, n=1)
            content_len = len(page)
            i += 1

        if page != '' and page != None and len(page) > CONTENT_LEN:
            post_data = get_post_data(page, dept_day, dest_day)
            price_url = PRICE_URL % str(time.time() * 1000)
            i = 0
            content_len = 0
            while i < 3 and content_len < CONTENT_LEN:
                price_page = request_post_data(url=price_url, data=post_data, referer=first_url, \
                                               n=1, proxy=p)
                content_len = len(price_page)
                i += 1

            price_dict = parsePrice(price_page)

            flights = parse_page(page, price_dict)
            result['para'] = flights
        else:
            result['error'] = PROXY_INVALID
            return result
    else:
        result['error'] = DATA_NONE
        return result

    return result
Пример #7
0
        logger.error('elongHotelParser: Wrong Content Format with %s'%taskcontent)
        result['error'] = TASK_ERROR
        return result
        
    if hotel_id_temp == '0':
        result['error'] = TASK_ERROR
        return result

    p = get_proxy(source='elongHotel')
    if p == None:
        result['error'] = PROXY_NONE
        return result

    post_data = get_post_data(hotel_id_temp, check_in, check_out)

    page = request_post_data(request_url,data=post_data,proxy=p)
    if page == None or page == '':
        invalid_proxy(proxy=p, source='elongHotel')
        result['error'] = PROXY_INVALID
        return result

    room_list = parseRoom(page,hotel_name,city_name_zh,check_in,check_out,hotel_id)

    if room_list != []:
        result['para'] = room_list
        return result
    else:
        result['error'] = DATA_NONE

    return result
Пример #8
0
    for flight_content in flights_content:
        try:
            flight_adding_id = flight_to_add_state_pat.findall(flight_content)[0]
        except:
            continue
        post_data = {
                'flightToAddState':flight_adding_id,
                'flightSearchSession':search_session,
                'basketOptions':backet_option,
                'flightOptionsState':'Visible',
                '__BasketState':backet_state
                }
        i = 0
        content_len = 0
        while i < 3 and content_len < CONTENT_LEN:
            content = request_post_data(url=REQUEST_URL, data=post_data, proxy=proxy, n=1)
            content_len = len(content)
            i += 1

        if len(content) < 100 or content == '' or content == None:
            continue

        para = parseFlightAndTicket(content, time_zone_A, time_zone_B)
        if para['flight'] != {}:
            flights.update(para['flight'])
        if para['ticket'] != []:
            tickets += para['ticket']

    result = {'flight':flights, 'ticket':tickets}
    return result
Пример #9
0
        req_dept_time = dept_date + 'T' + dept_hour + ':00'

    except Exception,e:
        logger.error('Parse taskcontent failed!' + str(e))
        return -1
    postdata = getPostData(dept_date,dept_id,dest_id)

    #获取代理
    #p = '116.228.55.217:8000'

    p = get_proxy()

    url = 'http://tickets.vueling.com/ScheduleSelect.aspx'
    Referer = 'http://tickets.vueling.com/ScheduleSelect.aspx'

    content = request_post_data(url,postdata,referer=Referer,proxy=p,\
            Accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")

    result = -1

    if content != '' and len(content) > 100:
        result = vuelingparser(content,flight_no,req_dept_time)
    else:
        invalid_proxy(p)
        logger.error('Get web content failed!')

    return result


def vuelingparser(content,flight_no,req_dept_time):
    #allinfos = []
    #get flight num
Пример #10
0
    check_in = check_in_temp[:4] + '-' + check_in_temp[4:6] + '-' + \
            check_in_temp[6:]
    check_out_temp = datetime.datetime(int(check_in_temp[:4]), int(check_in_temp[4:6]), \
            int(check_in_temp[6:]))
    check_out = str(check_out_temp + datetime.timedelta(days=1))[:10]
    hotel_id_temp = hotel_id.split('_')[1]
    if hotel_id_temp == '0':
        return room_list

    i = 0
    content_len = 0
    while i < 3 and content_len < 100:
        p = get_proxy()
        logger.info('Proxy: ' + p)
        post_data = get_post_data(hotel_id_temp, check_in, check_out)
        content = request_post_data(request_url, data=post_data, proxy=p)
        content_len = len(content)
        i += 1
    room_list = parseRoom(content, hotel_name, city_name_zh, check_in,
                          check_out, hotel_id)

    return room_list


def parseRoom(content, hotel_name, city_name_zh, check_in, check_out,
              hotel_id):
    room_list = []
    if content == '' or len(content) < 100:
        return room_list

    try:
Пример #11
0
    cj = cookielib.CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)

    resp = crawl_single_page(first_url,proxy=p, \
         Accept='text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', n = 1)

    if resp.find('404错误') < 0:
        url = get_url(dept_city_zh,dest_city_zh,dept_day)
        page = crawl_single_page(url, proxy = p, referer = first_url)

        if page != '' and len(page) > 300:
            post_data = get_post_data(page, dept_day)
            price_url = PRICE_URL%str(time.time()*1000)

            price_page = request_post_data(price_url, data=post_data, referer=first_url, \
                                           n=1, proxy=p)
            price_dict = parsePrice(price_page)
            #print price_dict
            time.sleep(5)
            flights = parse_page(page, price_dict)
            result['para'] = flights
        else:
            result['error'] = PROXY_INVALID
            return result
    else:
        result['error'] = DATA_NONE
        return result

    return result

Пример #12
0
            flight_adding_id = flight_to_add_state_pat.findall(
                flight_content)[0]
        except:
            continue
        post_data = {
            'flightToAddState': flight_adding_id,
            'flightSearchSession': search_session,
            'basketOptions': backet_option,
            'flightOptionsState': 'Visible',
            '__BasketState': backet_state
        }
        i = 0
        content_len = 0
        while i < 3 and content_len < CONTENT_LEN:
            content = request_post_data(url=REQUEST_URL,
                                        data=post_data,
                                        proxy=proxy,
                                        n=1)
            content_len = len(content)
            i += 1

        if len(content) < 100 or content == '' or content == None:
            continue

        para = parseFlightAndTicket(content, time_zone_A, time_zone_B)
        if para['flight'] != {}:
            flights.update(para['flight'])
        if para['ticket'] != []:
            tickets += para['ticket']

    result = {'flight': flights, 'ticket': tickets}
    return result
Пример #13
0
    check_in = check_in_temp[:4] + '-' + check_in_temp[4:6] + '-' + \
            check_in_temp[6:]
    check_out_temp = datetime.datetime(int(check_in_temp[:4]), int(check_in_temp[4:6]), \
            int(check_in_temp[6:]))
    check_out = str(check_out_temp + datetime.timedelta(days=1))[:10]
    hotel_id_temp = hotel_id.split('_')[1]
    if hotel_id_temp == '0':
        return room_list

    i = 0
    content_len = 0
    while i < 3 and content_len < 100:
        p = get_proxy()
        logger.info('Proxy: ' + p)
        post_data = get_post_data(hotel_id_temp, check_in, check_out)
        content = request_post_data(request_url,data=post_data,proxy=p)
        content_len = len(content)
        i += 1
    room_list = parseRoom(content,hotel_name,city_name_zh,check_in,check_out,hotel_id)

    return room_list
    

def parseRoom(content,hotel_name,city_name_zh,check_in,check_out,hotel_id):
    room_list = []
    if content == '' or len(content) < 100:
        return room_list

    try:
        content_json = json.loads(content)['value']['hotelRoomList']
    except Exception, e: