示例#1
0
def iqc_parse_commodity(barcode, ret = {}):

    printf("\n===> Start to dump barcode %s ...", barcode)

    # http://iqc.com.tw/Commodities/Detail/176725
    url = 'http://iqc.com.tw/Commodities/Detail/' + barcode

    target_file = iqc_url_to_local_file(url)

    html_text = jagabee_pycurl.pycurl_wrapper_fetch(url, target_file)

    if len(html_text) > 0:
        iqc_parse_pages.parse_commodities_page(html_text, ret)

    # http://iqc.com.tw/Commodity/Detail/176725
    url = 'http://iqc.com.tw/Commodity/Detail/' + barcode

    target_file = iqc_url_to_local_file(url)

    html_text = jagabee_pycurl.pycurl_wrapper_fetch(url, target_file)

    if len(html_text) > 0:
        iqc_parse_pages.parse_commodity_page(html_text, ret)

    return ret
示例#2
0
def sixlucky_dump_list_page(url, main_cat = "main_cat", sub_cat = "sub_cat", index = 0):

    printf("sixlucky_dump_list_page: url = %s, main_cat = %s, sub_cat = %s " , url, main_cat, sub_cat)

    target_file = sixlucky_url_to_local_file(url, main_cat = main_cat, sub_cat = sub_cat, page_type = "list", index = index)

    printf('dump %s to target_file: %s ', url, target_file)
    # Fetch html page
    html_text = jagabee_pycurl.pycurl_wrapper_fetch(url, target_file)
   
    return html_text
示例#3
0
def iqc_dump_list_page(url, referer = ''):

    printf("iqc_dump_list_page: url = %s " ,url)

    target_file = iqc_url_to_local_file(url)

    printf('dump %s to target_file: %s , referer: %s', url, target_file, referer)
    # Fetch html page
    html_text = jagabee_pycurl.pycurl_wrapper_fetch(url, target_file, referer)
   
    return html_text
示例#4
0
def sixlucky_dump_and_parse_commodity(url, main_cat = "main_cat", sub_cat = "sub_cat", index = -1, ret = {}):


    target_file = sixlucky_url_to_local_file(url, main_cat, sub_cat, "commodity", index)

    printf("\n===> Start to dump commodity page url: %s to target_file: %s ...", url, target_file)

    html_text = jagabee_pycurl.pycurl_wrapper_fetch(url, target_file)

    if len(html_text) > 0:
        sixlucky_parse_pages.parse_commodity_page(html_text, ret)

    return ret