def html_cdc(conp,f,headless=True): m=page() sql="select href from %s.gg where href not in(select href from %s.gg_html where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页')"%(conp[4],conp[4]) df=db_query(sql,dbtype="postgresql",conp=conp) arr=df["href"].values if arr==[]: print("无href更新") return None setting={"num":5,"arr":arr,"f":f,"conp":conp,"tb":"gg_html","headless":headless} m.write(**setting)
def html_work(conp,f,size=None,headless=True): m=page() if size is not None: sql="select href from %s.gg where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页' limit %d"%(conp[4],size) else : sql="select href from %s.gg where not coalesce(info,'{}')::jsonb?'hreftype' or coalesce(info,'{}')::jsonb->>'hreftype'='可抓网页' "%(conp[4]) df=db_query(sql,dbtype="postgresql",conp=conp) arr=df["href"].values print(arr[:3]) setting={"num":20,"arr":arr,"f":f,"conp":conp,"tb":"gg_html","headless":headless} m.write(**setting)
def bujiu(tb): m = page() sql = "select href from wuhan.%s where href not in(select href from wuhan.%s_html)" % ( tb, tb) conp = ["postgres", "since2015", "192.168.3.171", "scrapy4", "wuhan"] df = db_query(sql, dbtype="postgresql", conp=conp) arr = df["href"].values setting = { "num": 20, "arr": arr, "f": f, "conp": conp, "tb": "%s_html" % tb } m.write(**setting)
def template(tb, conp, size=None): m = page() if size is not None: sql = "select href from %s.%s limit %d" % (conp[4], tb, size) else: sql = "select href from %s.%s " % (conp[4], tb) df = db_query(sql, dbtype="postgresql", conp=conp) arr = df["href"].values setting = { "num": 20, "arr": arr, "f": f, "conp": conp, "tb": "%s_html" % tb } m.write(**setting)
def html_template(tb, size=None): m = page() if size is not None: sql = "select href from wuhan.%s limit %d" % (tb, size) else: sql = "select href from wuhan.%s " % tb conp = ["postgres", "since2015", "192.168.3.171", "scrapy4", "wuhan"] df = db_query(sql, dbtype="postgresql", conp=conp) arr = df["href"].values setting = { "num": 20, "arr": arr, "f": f, "conp": conp, "tb": "%s_html" % tb } m.write(**setting)