def tycFromPage(): idbloom = getBloom() homeUrl = 'http://www.tianyancha.com/' driver = getWebDriver() driver.get(homeUrl)
def getQichachaInvestDigests(): idbloom = getBloom() conn, csor = getConnCsor() csor.execute('select uid from com_invest') ids = csor.fetchall() [idbloom.add(mid[0]) for mid in ids] # if ids[0][0] in idbloom: print 'load exists ids ok' return idbloom
def getQichachaDigests(): idbloom = loadBloomFromFile('qichachaUIDs') if idbloom: print 'load bloom from file succ, no need load from db' # return idbloom else: print 'no dump bloom file, load from db' idbloom = getBloom(2000 * 10000) # idbloom = getBloom() conn, csor = getConnCsor() csor.execute('select id from com_base_copy') # csor.execute('select id from com_base_copy limit 10') ids = csor.fetchall() [idbloom.add(mid[0]) for mid in ids] # if ids[0][0] in idbloom: print 'load exists ids ok, generate dump bloom file' dumpBloomToFile(idbloom, fileName='qichachaUIDs') return idbloom