def get_img(): mp = MongoOp('localhost', 'twitter', 'user_kabutoyama_taro') driver = get_png.factory() k = 'max_fav' #cond={k:{'$gt':50_000},'fpath':{'$exists':False}} cond = {'fpath': {'$exists': False}} cnt = mp.col.count(cond) ql = mp.col.find(cond) for i, a in enumerate(ql): url = a['url'] if 'fpath' in a: continue (r, path) = get_png.check_path('static', url) if r: up_elm = {'$set': {'fpath': path}} mp.col.update_one({'_id': a['_id']}, up_elm) msg = 'file exists={}'.format(path) logging.info(msg) continue result = get_png.save_png(driver, url, 'static', logging) msg = 'k={} cnt={} i={} url={} fpath={}'.format(k, cnt, i, url, result) logging.info(msg) up_elm = {'$set': {'fpath': path}} if not result: up_elm = {'$set': {'fpath': 'NotFound'}} col_r = mp.col.update_one({'_id': a['_id']}, up_elm) get_png.close(driver)
def get_img(): mp=MongoOp('localhost',db='fav_tweet',col='hage') driver = get_png.factory() cond={'fpath':{'$exists':False}} cnt=mp.col.count(cond) k='created_at' ql=mp.col.find(cond).sort([(k,pymongo.DESCENDING)]) #https://twitter.com/terrakei07/status/1211085215696732160 ql=list(ql) for i,a in enumerate(ql): if 'user' in a and 'screen_name' in a['user']: screen_name=a['user']['screen_name'] else: screen_name='_' tid=a['tweet_id'] url='http://twitter.com/{}/status/{}'.format(screen_name,tid) if 'fpath' in a:continue (r,path) = get_png.check_path('static',url) if r: up_elm={'$set':{'fpath':path}} mp.col.update_one({'_id':a['_id']},up_elm) msg='file exists={}'.format(path) logging.info(msg) continue result = get_png.save_png(driver,url,'static',logging) msg='k={} cnt={} i={} url={} fpath={}'.format(k,cnt,i,url,result) logging.info(msg) up_elm={'$set':{'fpath':path}} if not result: up_elm={'$set':{'fpath':'NotFound'}} col_r = mp.col.update_one({'_id':a['_id']},up_elm) time.sleep(5) get_png.close(driver)
def get_tw(): query = u'"猿" OR "雉"' until = datetime(2020, 1, 21) mp = MongoOp(MONGO_HOST, COLLECTION, db=DATABASE) ft = FetchTweet(mp) logging.info(u"until={}".format(until)) ft.term(until) ft.query(query) ft.invoke_fetch()
def add_fpath(): mp=MongoOp('localhost',) k='max_rt' cnt=mp.col.count({k:{'$gt':50_000}}) ql=mp.col.find({k:{'$gt':50_000}}).sort([(k,pymongo.DESCENDING)]) for i,a in enumerate(ql): url = a['url'] (r,path) = get_png.check_path('static',url) up_elm={'$set':{'fpath':path}} if not r: up_elm={'$set':{'fpath':'NotFound'}} col_r = mp.col.update_one({'_id':a['_id']},up_elm) msg='k={} cnt={} i={} url={} path={} col_r={}'.format(k,cnt,i,url,path,col_r) logging.info(msg)
def connect(): app.logger.info('MONGO_HOST={} DATABASE={}'.format(MONGO_HOST, DATABASE)) g.client = MongoOp(MONGO_HOST, COLLECTION, DATABASE)
def main(): mp = MongoOp('localhost','fav_tweet','hage') results = execute_sql() insert_mp(mp,results)
def mongo_hook(): web.ctx.mongo=MongoOp('localhost')
def before(): g.mp = MongoOp('localhost', db='fav_tweet', col='2019-12-07')