Exemplo n.º 1
0
def crawling_page(window_arrow, is_yesterday=False):
    start_date = window_arrow.format('YYYYMMDD')
    url = "http://www.cjmall.com/etv/broad/schedule_list_week_iframe.jsp?start_date=" + start_date
    page_source = build_soup(url)
    table_source = page_source.findAll('tr')
    table_date = window_arrow.format('YYYY/MM/DD')
    week_days = table_source[0].findAll('td')
    index_for_the_day = 0
    for week_day in week_days:
        if table_date in str(week_day):
            break
        index_for_the_day += 1

    product_list = []
    for column in table_source[1:]:
        hour = int(column.findAll('th')[0].text)
        if not is_yesterday and 0 <= hour <= 5:
            continue
        if is_yesterday and hour > 5:
            continue

        item_for_week_days = column.findAll('td')
        item = item_for_week_days[index_for_the_day]
        for category, the_time, parsed_name in parsing_item(item):
            product_list.append(
                ProductInfo(
                    name=parsed_name,
                    start_time=the_time,
                    category=category,
                ))
    return product_list
Exemplo n.º 2
0
def home_and_shopping(window_arrow):
    print('HNSMALL')
    url = "http://www.hnsmall.com/display/tvtable.do?from_date={0}".format(
        urllib.parse.quote(window_arrow.format('YYYY/MM/DD'), safe=''))

    soup = build_soup(url)
    rows = soup.find('table').find('tbody').find_all('tr')
    product_list = []
    for prod_info in parse_table(rows, window_arrow):
        product_list.append(prod_info)

    return product_list
Exemplo n.º 3
0
def hyundai_home_shopping(window_arrow):
    print('H  MALL')
    url = 'http://www.hyundaihmall.com/front/bmc/brodPordPbdv.do?cnt=0&date={0}'.format(
        window_arrow.format('YYYYMMDD'))
    soup = build_soup(url)
    table = soup.find('table')
    rows = table.find('tbody').find_all('tr')
    product_list = []
    for product_info in parse_table(rows):
        product_list.append(product_info)
    return product_list


# for prod in hyundai_home_shopping(arrow.get('2017-04-09')):
#     print(prod)
Exemplo n.º 4
0
def gs_shop(window_arrow):
    print('GS MALL')
    url = 'http://with.gsshop.com/tv/tvScheduleMain.gs?lseq=397357&selectDate={0}'.format(
        window_arrow.format('YYYYMMDD'))
    soup = build_soup(url)
    tables = soup.findAll('table')
    product_list = []
    for table in tables:
        rows = table.findAll('tr')
        the_time = ''
        for row in rows:
            column_times = row.find_all('td', {'class': 'time'})
            column_descs = row.find_all('td', {'class': 'desc'})
            column_prices = row.find_all('td', {'class': 'price'})
            column_pics = row.find_all('td', {'class': 'pic'})
            the_time = parsing_td_time(column_times) or the_time
            category, product = parsing_td_desc(column_descs)
            price = parsing_td_price(column_prices)
            the_id = parsing_td_pic(column_pics)
            if not category or not product:
                continue
            image_url = get_image_url_by_prod_id(the_id) if the_id else ''
            prod_detail_url = get_product_detail_url_by_prod_id(
                the_id) if the_id else ''

            if not the_id:
                continue

            product_list.append(
                ProductInfo(
                    name=product,
                    start_time=the_time.split('-')[0],
                    end_time=the_time.split('-')[1],
                    shop_code='7',
                    ch_no='8',
                    category=category,
                    price=price,
                    image=image_url,
                    product_id='000711' + the_id,
                    shop_prod_id=the_id,
                    detail_product_url=prod_detail_url,
                ))
    return product_list