def download_bd_info(name, dst): ofile = os.path.join(dst, u"{}_bd.txt".format(name)) if os.path.exists(ofile): print(u'Baidu Skip {}'.format(name)) return ofile bd_url = BD_URL.format(name) bd_text = None r = commons.get(bd_url, encoding='utf-8', allow_redirects=False) if r.status_code == 200: bd_text = parse_bd_text(r.text) elif r.status_code == 302: location = r.headers['Location'] if location and location.startswith('/item/'): bd_url = "{}{}".format(BD_HOST, location) r = commons.get(bd_url, encoding='utf-8', allow_redirects=False) if r.status_code == 200: bd_text = parse_bd_text(r.text) else: if name.endswith(u'鱼'): bd_url = BD_URL.format(name[:-1]) r = commons.get(bd_url, encoding='utf-8', allow_redirects=False) if r.status_code == 200: bd_text = parse_bd_text(r.text) if bd_text: with codecs.open(ofile, 'w', 'utf-8') as f: print(u'Baidu Found {}'.format(name)) f.write(bd_url) f.write('\n\n') f.write(bd_text) return ofile
def download_fish_list(list_file, dst=None): if not dst: dst = os.path.dirname(list_file) names = codecs.open(list_file, 'r', encoding='utf-8').read().splitlines() for name in names: url = LS_URL.format(name) r = commons.get(url, encoding='utf-8', allow_redirects=False) if r.status_code != 200 or not r.text: print(u'No match {}'.format(name)) continue url = LS_INFO_URL.format(r.text) # print(url) r = commons.get(url, encoding='utf-8', allow_redirects=False) if r.status_code != 200: continue title, content = parse_ls_text(r.text) if title and content: ofile = os.path.join(dst, u'{}.txt'.format(title)) if os.path.exists(ofile): print(u'Skip {}'.format(title)) continue with codecs.open(ofile, 'w', 'utf-8') as f: print(u'Saved {}'.format(title)) f.write(content)
def download_hd_info(name, dst): ofile = os.path.join(dst, u"{}_hd.txt".format(name)) if os.path.exists(ofile): print(u'Hudong Skip {}'.format(name)) return ofile hd_url = HD_URL.format(name) hd_text = None r = commons.get(hd_url, encoding='utf-8', allow_redirects=False) if r.status_code == 200: hd_text = parse_hd_text(r.text) if hd_text: with codecs.open(ofile, 'w', 'utf-8') as f: print(u'Hudong Found {}'.format(name)) f.write(hd_url) f.write('\n\n') f.write(hd_text) return ofile
def download_csdb_info(name, dst): ofile = os.path.join(dst, u"{}_csdb.txt".format(name)) if os.path.exists(ofile): print(u'CSDB Skip {}'.format(name)) return ofile csdb_url = get_csdb_url(name) csdb_text = None if csdb_url: r = commons.get(csdb_url, encoding='utf-8', allow_redirects=False) if r.status_code == 200: csdb_text = parse_csdb_text(r.text) if csdb_text: with codecs.open(ofile, 'w', 'utf-8') as f: print(u'CSDB Found {}'.format(name)) f.write(csdb_url) f.write('\n\n') f.write(csdb_text) return ofile
def get_csdb_url(name): exclude = [u'属', u'科', u'目', u'蚊', u'蛾', u'虫'] r = commons.get(CSDB_URL.format(name), encoding='utf-8', allow_redirects=False) if r.status_code == 200: soup = BeautifulSoup(r.text, "lxml") result = None for s in soup.find_all(csdb_info_link): skip = False for e in exclude: if e in s.text: skip = True break if skip: continue if name in s.text: result = s['href'] break if result: return CSDB_HOST + result
def test_index(): url = 'http://127.0.0.1:5000/bill_type' get(url) pass
def test_detail(): url = 'http://127.0.0.1:5000/bill_type/detail/1' get(url) pass
def test_list(): url = 'http://127.0.0.1:5000/bill_type/list' get(url) pass
def test_detail(): url = 'http://127.0.0.1:5000/bill/detail/1' get(url) pass
def test_index(): url = 'http://127.0.0.1:5000/bill' get(url) pass
def test_list(): url = 'http://127.0.0.1:5000/bill/list' get(url) pass
def test_index(): url = 'http://127.0.0.1:5000/user' get(url) pass
def test_detail(): url = 'http://127.0.0.1:5000/user/detail/1' get(url) pass
def test_list(): url = 'http://127.0.0.1:5000/user/list' get(url) pass
def detail(poll_id): raw = get("polls/"+str(poll_id)+"/") final = dict() for x in raw: final[int(x)] = raw[x] return final