Exemplo n.º 1
0
def pager(task, rule):
    try:
        tree = etree.HTML(task["text"])
    except:
        traceback.print_exc()
        return
    count = tree.xpath(rule["normal"])
    if not count:
        log_with_time("pager, no count: %s" % task["url"])
        return
    count = int(count[0])
    if count > 450:
        log_with_time("need split page: %s" % task["url"])
    url = task["url"]
    cats = re.findall("=([0-9,]+)", url)
    price_range = re.findall("(ev=.*%40)", url)
    if price_range:
        base = "%s&%s" % (normal_base, price_range[0])
    else:
        base = normal_base
    ret = []
    if not cats:
        log_with_time("no cats in url: %s" % url)
        return
    if task.get("limit"):
        count = min((task.get("limit"), count))
    for i in range(1, count + 1):
        ret.append(base.format(async_http.quote(cats[0]), i))
    return ret
Exemplo n.º 2
0
def pager(task, rule):
    try:
        tree = etree.HTML(task["text"])
    except:
        traceback.print_exc()
        return
    count = tree.xpath(rule["normal"])
    if not count:
        log_with_time("pager, no count: %s" % task["url"])
        return
    count = int(count[0])
    if count > 450:
        log_with_time("need split page: %s" % task["url"])
    url = task["url"]
    cats =  re.findall("=([0-9,]+)", url)
    price_range = re.findall("(ev=.*%40)", url)
    if price_range:
        base = "%s&%s" % (normal_base, price_range[0])
    else:
        base = normal_base
    ret = []
    if not cats:
        log_with_time("no cats in url: %s" % url);
        return
    if task.get("limit"):
        count = min((task.get("limit"), count))
    for i in range(1, count+1):
        ret.append(base.format(async_http.quote(cats[0]), i))
    return ret
Exemplo n.º 3
0
def rt_parser(items): 
    pids = get_pids(items)
    if not pids:
        log_with_time("got nothing: %s" % entries)
        return
    purl = price_url % (",".join(["J_" + i for i in pids]), 
            random.randint(1000000, 10000000), int(time.time() * 1000)) 
    surl = stock_url % (async_http.quote(",".join([i for i in pids])), 
            random.randint(1000000, 10000000), int(time.time() * 1000)) 

    price_res = simple_http.get(purl) 
    stock_res = simple_http.get(surl)
    if price_res["status"] != 200 or stock_res["status"] != 200:
        log_with_time("not200: %s" % price["res"])
        return
    try:
        price_json = jsonp_json(price_res["text"]) 
        stock_json = jsonp_json(stock_res["text"].decode("gbk"))
    except: 
        traceback.print_exc()
        return
    prices = {} 
    for i in price_json: 
        prices[i["id"].split("_")[1]] = i["p"]
    stocks = {} 
    for k,v in stock_json.items(): 
        s = v["StockStateName"]
        if u"有货" in s or u"现货" in s:
            stocks[k] = 1
        else:
            stocks[k] = 0 
    ret = []
    for pid in prices:
        ret.append((str(pid), str(prices[pid]), stocks[pid])) 
    return format_price(ret)
Exemplo n.º 4
0
def stock_filter(items): 
    keys = async_http.quote(",".join(items.keys()))
    url = stock_url % (keys, random.randint(1000000, 10000000),
                int(time.time() * 1000)) 
    return {
            "url": url,
            "price": items
            }
Exemplo n.º 5
0
def stock_filter(items): 
    keys = async_http.quote(",".join(items.keys()))
    url = stock_url % (keys, random.randint(1000000, 10000000),
                int(time.time() * 1000)) 
    return {
            "url": url,
            "price": items
            } 
Exemplo n.º 6
0
def rt_parser(items):
    pids = get_pids(items)
    if not pids:
        log_with_time("got nothing: %s" % entries)
        return
    purl = price_url % (",".join([
        "J_" + i for i in pids
    ]), random.randint(1000000, 10000000), int(time.time() * 1000))
    surl = stock_url % (async_http.quote(",".join([
        i for i in pids
    ])), random.randint(1000000, 10000000), int(time.time() * 1000))

    price_res = simple_http.get(purl)
    stock_res = simple_http.get(surl)
    if price_res["status"] != 200 or stock_res["status"] != 200:
        log_with_time("not200: %s" % price["res"])
        return
    try:
        price_json = jsonp_json(price_res["text"])
        stock_json = jsonp_json(stock_res["text"].decode("gbk"))
    except:
        traceback.print_exc()
        return
    prices = {}
    for i in price_json:
        prices[i["id"].split("_")[1]] = i["p"]
    stocks = {}
    for k, v in stock_json.items():
        s = v["StockStateName"]
        if u"有货" in s or u"现货" in s:
            stocks[k] = 1
        else:
            stocks[k] = 0
    ret = []
    for pid in prices:
        ret.append((str(pid), str(prices[pid]), stocks[pid]))
    return format_price(ret)