示例#1
0
def html_cdc(conp,f,headless=True):
    m=page()
    sql="select href from %s.gg where href not in(select href from %s.gg_html where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页')"%(conp[4],conp[4]) 
    
    df=db_query(sql,dbtype="postgresql",conp=conp)
    arr=df["href"].values
    if arr==[]:
        print("无href更新")
        return None

    setting={"num":5,"arr":arr,"f":f,"conp":conp,"tb":"gg_html","headless":headless}
    m.write(**setting)
示例#2
0
def html_work(conp,f,size=None,headless=True):
    m=page()
    if size is not None:
        sql="select href from %s.gg where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页' limit %d"%(conp[4],size) 
    else :
        sql="select href from %s.gg where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页' "%(conp[4])

    df=db_query(sql,dbtype="postgresql",conp=conp)
    arr=df["href"].values
    print(arr[:3])
    setting={"num":20,"arr":arr,"f":f,"conp":conp,"tb":"gg_html","headless":headless}
    m.write(**setting)
示例#3
0
def bujiu(tb):
    m = page()
    sql = "select href from wuhan.%s where href not in(select href from wuhan.%s_html)" % (
        tb, tb)
    conp = ["postgres", "since2015", "192.168.3.171", "scrapy4", "wuhan"]
    df = db_query(sql, dbtype="postgresql", conp=conp)
    arr = df["href"].values

    setting = {
        "num": 20,
        "arr": arr,
        "f": f,
        "conp": conp,
        "tb": "%s_html" % tb
    }
    m.write(**setting)
示例#4
0
def template(tb, conp, size=None):
    m = page()
    if size is not None:
        sql = "select href from %s.%s limit %d" % (conp[4], tb, size)
    else:
        sql = "select href from %s.%s " % (conp[4], tb)

    df = db_query(sql, dbtype="postgresql", conp=conp)
    arr = df["href"].values

    setting = {
        "num": 20,
        "arr": arr,
        "f": f,
        "conp": conp,
        "tb": "%s_html" % tb
    }
    m.write(**setting)
示例#5
0
def html_template(tb, size=None):
    m = page()
    if size is not None:
        sql = "select href from wuhan.%s limit %d" % (tb, size)
    else:
        sql = "select href from wuhan.%s " % tb

    conp = ["postgres", "since2015", "192.168.3.171", "scrapy4", "wuhan"]
    df = db_query(sql, dbtype="postgresql", conp=conp)
    arr = df["href"].values

    setting = {
        "num": 20,
        "arr": arr,
        "f": f,
        "conp": conp,
        "tb": "%s_html" % tb
    }
    m.write(**setting)