def query(qry): t=util.getQuery(qry) dbg(t) items=[] if (t[0]==type): list= getCryptoWriterList() cand=[] count=0 for writer in list: field = writer.getByField(t[1]) if (field=="NA"): continue index= field.lower().find(t[2].lower()) #Find the exchange which matches the field if index<0: continue else: count=count+1 printWriter(writer, items) cand.append(writer) print "------------Total:", count return items
def query(qry): t = util.getQuery(qry) dbg(t) items = [] if (t[0] == "ico"): list = getCryptoICOList() cand = [] count = 0 for ico in list: field = ico.getByField(t[1]) dbg(field + ":" + t[2]) if (field == "NA"): continue index = field.lower().find(t[2].lower()) #Find the company which matches the field if index < 0: continue else: count = count + 1 printObject(ico, items) cand.append(ico) print "------------Total:", count return items
def query(qry): t=util.getQuery(qry) dbg(t) items=[] if (t[0]=="startup"): list= getCryptoCompanyList() cand=[] count=0 for company in list: field = company.getByField(t[1]) if (field=="NA"): continue index= field.lower().find(t[2].lower()) #Find the company which matches the field if index<0: continue else: count=count+1 printCompany(company, items) cand.append(company) print "------------Total:", count return items
def main(): path.append(".") queryobj = util.getQuery() # Retrieve query parameters packagepath = "data.packages." + queryobj["package"] + ".code." + queryobj[ "module"] module = import_module(packagepath) # Find and execute the function with the necessary parameters return getattr(module, queryobj["action"])(queryobj["query"], queryobj["entities"])
def create(string, entities): content = None if ("reply" in util.getQuery()["extra"]): content = util.getQuery()["extra"]["reply"]["content"] if not content: return util.output("end", "empty_paste", util.translate("empty_paste")) query = {"apikey": util.config("apikey")} payload = { "text": content, "title": "Automatic Upload", "name": "Guinevere" } request = requests.post("https://paste.gemwire.uk/api/create", params=query, data=payload) url = request.text.strip() return util.output("end", "paste_made", util.translate("paste_made", {"paste": url}))
def query(qry): t = util.getQuery(qry) items = [] if (t[0] == type): list = getCryptoVCList() cand = [] count = 0 for vc in list: field = vc.getByField(t[1]) if (field == "NA"): continue index = field.lower().find(t[2].lower()) #Find the vc which matches the field if index < 0: continue else: count = count + 1 printVC(vc, items) cand.append(vc) print "------------Total:", count return items
def main(count, path, url, headers): query = getQuery(path) report = execute(query, url, HEADERS, count) print_result(report)
def parse(self, response): ''' start_requests已经爬取到页面,那如何提取我们想要的内容呢?那就可以在这个方法里面定义。 这里的话,并木有定义,只是简单的把页面做了一个保存,并没有涉及提取我们想要的数据,后面会慢慢说到 也就是用xpath、正则、或是css进行相应提取,这个例子就是让你看看scrapy运行的流程: 1、定义链接; 2、通过链接爬取(下载)页面; 3、定义规则,然后提取数据; 就是这么个流程,似不似很简单呀? ''' qu = util.getQuery(response.url) qu.setdefault(None) if (qu.get('version')): version = qu['version'][0] else: version = None if (qu.get('search')): search = qu['search'][0] page = "%s-%s-%s-%s" % (version, qu['start'][0], qu['rows'][0], qu['search'][0]) else: page = "%s-%s-%s" % (version, qu['start'][0], qu['rows'][0]) search = None if (qu.get('start')): start = int(qu.get('start')[0]) else: start = 0 save_file_name = f"{myconfig.output_file}/resp/{page}.json" util.savefile(self, save_file_name, response.body) json_response = util.parse_json(response.body) numFoud = json_response["ret"]["numFound"] if numFoud % 50 > 0: p = int(numFoud / 50) + 1 else: p = int(numFoud / 50) ''' p 为总页数''' for start in range(1, p): if not search: search_p = "" else: search_p = "&search=%s" % search if (version): version_p = "&version=%s" % version else: version_p = "" url = self.format_url(search_p, version_p, 50 * start) print("request url=%s" % url) yield scrapy.Request( url=url, callback=self.parse, headers=myconfig.header, errback=self.errback) #爬取到的页面如何处理?提交给parse方法处理 #解析json有用的数据 numFoud = json_response["ret"]["numFound"] pagetotal = 0 for i in json_response["ret"]["issueList"]: pagetotal = pagetotal + int(i["count"]) with open("%s/result" % myconfig.output_file, "a") as f: f.write("%s-%s-%s-%s\n" % (version, search, start, pagetotal)) #分号隔开 version, search ,start ,pagetotal