class ServiceEvent(object): '''服务进程''' def __init__(self, cfg, _GuiRecvMsg): self.GuiRecvMsg = _GuiRecvMsg self.cfg = cfg['tumblr'] self.proxies = cfg['proxies'] self.imgTemp = cfg['imgTemp'] self.imgSave = cfg['imgSave'] self.imgList = [] self.working = 0 self.liHtml = ''' <li.loading imgid=%s> <footer .li-footer></footer> <div.imgtype></div> </li> ''' self.new_loop = asyncio.new_event_loop() self.dld = AsyncioDownload(self.new_loop, self.GuiRecvMsg, self.proxies) def __run_coroutine_threadsafe(self, data, _GuiRecvMsgDict, _Timeout): asyncio.run_coroutine_threadsafe( self.dld.stream_download(data, _GuiRecvMsgDict, _Timeout), self.new_loop) def tumblr__init(self, data_=None): print('initTumblr') with open('tumblr_credentials.json', 'r') as f: self.tumblr_key = JLoad(f) self.tumblr = Tumblpy(self.tumblr_key['consumer_key'], self.tumblr_key['consumer_secret'], self.tumblr_key['oauth_token'], self.tumblr_key['oauth_token_secret'], proxies={ "http": self.proxies, "https": self.proxies }) self.__putGui('tumblr', 'statusBar', {'text': '获取图片列表'}) self.__tumblr__getImgList() return self.tumblr__getDashboards() def tumblr__downloadImg(self, d): file_name = d['id'] + '_' + d['download'].split("_")[-1] file_path = osPath.join(self.imgSave, file_name) _GuiRecvMsgDict = { 'type_': 'tumblr', 'event_': 'downloaded', 'data_': { 'id': d['id'], 'fpath': file_path, 'module': '"'.join(('#tumblr .list li[imgid=', d['id'], ']')) } } _Timeout = { 'type_': 'tumblr', 'event_': 'statusBar', 'data_': { 'text': d['id'] + '下载失败!' } } if not osPath.isfile(file_path): self.__run_coroutine_threadsafe( { 'id': d['id'], 'http': d['download'], 'fpath': file_path }, _GuiRecvMsgDict, _Timeout) else: self.GuiRecvMsg.put(_GuiRecvMsgDict) def tumblr__getDashboards(self, data_=None): print('getDashboards') imgid_list = self.__tumblr__imgPretreatment() limit = int(self.cfg['dashboard_param']['limit']) self.__tumblr__setImgList(imgid_list) self.__putGui('tumblr', 'setImgIdOver') if len(self.imgList) < limit * 2: self.__tumblr__getImgList() def tumblr__getPreviewSize(self, d): '''获取预览大图''' print('getPreviewSize') file_name = d['id'] + '_' + d['original_size'].split("_")[-1] file_path = osPath.join(self.imgSave, file_name) if not osPath.isfile(file_path): file_name = d['id'] + '_' + d['preview_size'].split("_")[-1] file_path = osPath.join(self.imgTemp, file_name) _GuiRecvMsgDict = { 'type_': 'tumblr', 'event_': 'setPreview', 'data_': { 'id': d['id'], 'fpath': file_path } } if not osPath.isfile(file_path): _Timeout = { 'type_': 'tumblr', 'event_': 'timeout', 'data_': { 'id': d['id'], 'http': d['preview_size'], 'module': '"'.join( ('#tumblr .list li[imgid=', d['id'], ']')) } } self.__run_coroutine_threadsafe( { 'id': d['id'], 'http': d['preview_size'], 'fpath': file_path }, _GuiRecvMsgDict, _Timeout) else: self.GuiRecvMsg.put(_GuiRecvMsgDict) def tumblr__refreshTimeoutImg(self, d): '''刷新加载失败的缩略图''' print('refreshTimeoutImg') file_name = d['id'] + '_' + d['alt_size'].split("_")[-1] file_path = osPath.join(self.imgTemp, file_name) _GuiRecvMsgDict = { 'type_': 'tumblr', 'event_': 'setImgBg', 'data_': { 'id': d['id'], 'fpath': file_path } } _Timeout = { 'type_': 'tumblr', 'event_': 'timeout', 'data_': { 'id': d['id'], 'http': d['alt_size'], 'module': '"'.join(('#tumblr .view li[imgid=', d['id'], ']')) } } self.__run_coroutine_threadsafe( { 'id': d['id'], 'http': d['alt_size'], 'fpath': file_path }, _GuiRecvMsgDict, _Timeout) def __tumblr__getImgList(self): ''' 获取图片列表 预期格式:[{ 'id': '0', 'link_url': 'xx', 'source_url': '', 'original_size': 'xx', 'preview_size': 'x', 'alt_sizes': 'x' }] ''' print('getImgList') p = self.cfg['dashboard_param'].copy() p['limit'] *= 5 # # print('p',p) # dashboard = tumblr.dashboard( param['dashboard_param'] ) # dashboard = self.tumblr.posts('kuvshinov-ilya.tumblr.com', None, p) # if not dashboard: # raise 'not dashboard' # return try: dashboard = self.tumblr.dashboard(p) # dashboard = self.tumblr.posts('kuvshinov-ilya.tumblr.com', None, p) # # print('dashboard',dashboard) except Exception as e: print('err dashboard') return self.cfg['dashboard_param']['offset'] += p['limit'] # # print(self.cfg) imgList = self.__tumblr__mkDict(dashboard, self.cfg['preview_size'], self.cfg['alt_sizes']) for d in imgList: self.imgList.append(d) def __tumblr__imgPretreatment(self): html = '' limit = self.cfg['dashboard_param']['limit'] # i = 0 imgid = [] time_now = '' for i in range(0, limit): time_now = '-'.join((str(i), str(time()))) imgid.append(time_now) html += self.liHtml % (time_now) self.__putGui('tumblr', 'appendImg', html) return imgid def __tumblr__mkDict(self, d, preview_size, alt_sizes): print('mkDict') data = [] for v in d['posts']: if v['type'] == 'video': continue t = { 'link_url': v.get('link_url', ''), 'source_url': v.get('source_url', '') } index = 1 # print('mk',data) for i in v['photos']: t['id'] = ''.join((str(v['id']), '[', str(index), ']')) t['original_size'] = gets(i, 'original_size.url', '') t['preview_size'] = gets( i, '.'.join(('alt_sizes', str(preview_size), 'url')), '') t['alt_sizes'] = gets( i, '.'.join(('alt_sizes', str(alt_sizes), 'url')), '') t['type'] = t['alt_sizes'].split(".")[-1] data.append(t.copy()) index += 1 # print('mk2',t) # print('re_mkDict') return data def __tumblr__setImgList(self, imgid_list): print('setImgList') imgDict = [] for imgid in imgid_list: d = self.imgList.pop(0) self.__putGui( 'tumblr', 'setImgId', { 'id': d['id'], 'imgid': imgid, 'type': d['type'], 'preview': d['preview_size'], 'download': d['original_size'] }) file_name = d['id'] + '_' + d['alt_sizes'].split("_")[-1] file_path = osPath.join(self.imgTemp, file_name) # # print(file_path) _GuiRecvMsgDict = { 'type_': 'tumblr', 'event_': 'setImgBg', 'data_': { 'id': d['id'], 'fpath': file_path } } _Timeout = { 'type_': 'tumblr', 'event_': 'timeout', 'data_': { 'id': d['id'], 'http': d['alt_sizes'], 'module': '"'.join( ('#tumblr .view li[imgid=', d['id'], ']')) } } if not osPath.isfile(file_path): self.__run_coroutine_threadsafe( { 'id': d['id'], 'http': d['alt_sizes'], 'fpath': file_path }, _GuiRecvMsgDict, _Timeout) else: self.GuiRecvMsg.put(_GuiRecvMsgDict) def __putGui(self, t, e, d=None): self.GuiRecvMsg.put({'type_': t, 'event_': e, 'data_': d})
# 162826769394 # 162826780134_250 # 162824270739_250 with open('tumblr_credentials.json', 'r') as f: tumblr_key = json.load(f) t = Tumblpy( tumblr_key['consumer_key'], tumblr_key['consumer_secret'], tumblr_key['oauth_token'], tumblr_key['oauth_token_secret'], proxies=cfg['proxies'] ) dashboard = t.dashboard( cfg['dashboard_param'] ) print('Here are some posts this blog has made:', json.dumps(dashboard, indent=4)) data = [] distId = [] for v in dashboard["posts"]: distId.append(v['id']) data.append( { 'id': v['id'], 'source_url' : v.get('source_url', ''), 'original_size' : v['photos'][0]['original_size']['url'], 'alt_sizes' : v['photos'][0]['alt_sizes'][cfg['alt_sizes']]['url'] } ) distId.sort()
DBSession = sessionmaker(bind=engine) session = DBSession() ################ from tumblpy import Tumblpy from time import time # Authenticate via OAuth client = Tumblpy('zLgPh6LeV7DyczfPALkTEfr8rOgzcYAY8TzAlabVIYrgpATPON', 'mGP5mVle2ZUNKHzK4ayjAGpfUCkLTmQm91ic9YtWTTcDkdFLPE', 'hRwAn1CoZJ5Q96T8o51aQL2YcKnh1k66RlnCRLQtqjtWf0WZ4W', 'oqlple5FP9MVRTxbUQHjrEVSs4DDLFP7h4zBE5D4g952qeqRo3') dashboard = client.dashboard() for post in dashboard['posts']: resource_urls = [] # resource_names = [] if post['type'] == 'photo': for photo in post['photos']: resource_urls.append(photo['original_size']['url']) # resource_names.append(photo['original_size']['url'].rsplit('/', 1)[-1]) elif post['type'] == 'video': resource_urls.append(post['video_url']) # resource_names.append(post['video_url'].rsplit('/', 1)[-1]) else: continue
class TumblrCtrl(object): """docstring for TumblrCtrl""" def __init__(self, frame): super(TumblrCtrl, self).__init__() self.frame = frame self.popup = None # self.imgView = self.frame.get_root().find_first('#ul') self.cfg = { "alt_sizes": -3, "dashboard_param": { "limit": 20, "offset": 0 }, "posts_param": { "limit": 20, "offset": 0 }, "proxies": {} } with open('data.json', 'r') as f: self.cfg.update(json.load(f)) with open('tumblr_credentials.json', 'r') as f: self.tumblr_key = json.load(f) self.proxies = self.cfg['proxies'] # self.offset = self.cfg['dashboard_param']['limit'] self.current_folder = os.getcwd() self.target_folder = os.path.join(self.current_folder, 'imgTemp') if not os.path.isdir(self.target_folder): os.mkdir(self.target_folder) self.download_folder = os.path.join(self.current_folder, 'download') if not os.path.isdir(self.download_folder): os.mkdir(self.download_folder) # 创建一个线程池 self.tpool = TPool(max_workers=20) # 创建一个进程池 self.ppool = PPool(max_workers=2) # self.queue = Queue.Queue() self.tumblr = Tumblpy(self.tumblr_key['consumer_key'], self.tumblr_key['consumer_secret'], self.tumblr_key['oauth_token'], self.tumblr_key['oauth_token_secret'], proxies=self.proxies) def myOnLoadDatas(self, uri): self.tpool.submit(self._downloadInRAM, "photo", uri) return True def loadPreviewImg(self, data): fileName = data['id'] + '_' + data['preview_size'].split("_")[-1] print(fileName) # return file_path = os.path.join(self.target_folder, fileName) if not os.path.isfile(file_path): self.tpool.submit(self._downloadPrev, "photo", data, file_path) else: self.popup.set_style_attribute("background-image", "url(" + file_path + ")") self.popup.set_attribute("imgid", data['id']) self.popup.set_attribute("original", data['original']) pass def loadImgList(self): '''获取图片列表 { "id": 0, "source_url": "", "original_size": "https://*_1280.jpg", "alt_sizes": "https://*_100.jpg" } ''' print('获取图片列表') future_tasks = [ self.ppool.submit(getDashboards, self.tumblr, self.cfg) ] for f in future_tasks: if f.running(): print('is running') for f in as_completed(future_tasks): try: if f.done(): self.cfg['dashboard_param']['offset'] += self.cfg[ 'dashboard_param']['limit'] for x in f.result(): fileName = x['id'] + '_' + x['alt_sizes'].split( "_")[-1] # print(fileName) file_path = os.path.join(self.target_folder, fileName) if not os.path.isfile(file_path): self.tpool.submit(self._download, "photo", x, file_path) else: html = htmlTemplate.format(x['id'], file_path, x['original_size'], x['preview_size']) self.frame.call_function('appendImgList', html) except Exception as e: f.cancel() return # li = sciter.Element.create("li") # li.set_attribute("id", x['id']) # # li.set_attribute("data-src", file_path) # ul.append(li) # li.set_style_attribute( "background-image", file_path ) def _getTumblrList(self): print('_getTumblrList') return self.getDashboard() # return self.getBloggers() def _downloadPrev(self, medium_type, data, file_path): if medium_type == "photo": print('_download photo') req = requests.get(data['preview_size'], proxies=self.proxies) with open(file_path, 'wb') as fh: for chunk in req.iter_content(chunk_size=1024): fh.write(chunk) self.popup.set_style_attribute("background-image", "url(" + file_path + ")") self.popup.set_attribute("imgid", data['id']) self.popup.set_attribute("original", data['original']) # html = htmlTemplate.format( x['id'], file_path, x['original_size'], x['preview_size'] ) # self.frame.call_function('appendImgList', html ) return def downloadOriginal(self, id, url): fileName = id + '_' + url.split("_")[-1] file_path = os.path.join(self.download_folder, fileName) if not os.path.isfile(file_path): self.tpool.submit(self._downloadOriginal, url, file_path) def _downloadOriginal(self, url, file_path): req = requests.get(url, proxies=self.proxies) with open(file_path, 'wb') as fh: for chunk in req.iter_content(chunk_size=1024): fh.write(chunk) def _download(self, medium_type, x, file_path): if medium_type == "photo": print('_download photo') req = requests.get(x['alt_sizes'], proxies=self.proxies) with open(file_path, 'wb') as fh: for chunk in req.iter_content(chunk_size=1024): fh.write(chunk) html = htmlTemplate.format(x['id'], file_path, x['original_size'], x['preview_size']) self.frame.call_function('appendImgList', html) # li = sciter.Element.create("li") # li.set_attribute("id", id) # # li.set_attribute("data-src", file_path) # ul.append(li) # li.set_style_attribute( "background-image", file_path ) return def _downloadInRAM(self, medium_type, uri): print("_downloadInRAM", uri) try: if medium_type == "photo": req = requests.get(uri, proxies=self.proxies) self.frame.data_ready(uri, req.content) except Exception as e: raise e def _mkMainDict(self, d): data = [] for v in d["posts"]: data.append({ 'id': gets(v, 'id', 0), 'link_url': gets(v, 'link_url', ''), 'original_size': gets(v, 'photos.0.original_size.url', ''), 'preview_size': gets( v, 'photos.0.alt_sizes.' + str(self.cfg['preview_size']) + '.url', ''), 'alt_sizes': gets( v, 'photos.0.alt_sizes.' + str(self.cfg['alt_sizes']) + '.url', '') }) return data def getDashboard(self): dashboard = self.tumblr.dashboard(self.cfg['dashboard_param']) if dashboard: self.cfg['dashboard_param']['offset'] += self.cfg[ 'dashboard_param']['limit'] return self._mkMainDict(dashboard) def getBloggers(self): '''取得博主的列表''' all_posts = self.tumblr.posts('kuvshinov-ilya.tumblr.com', None, self.cfg['posts_param']) if all_posts: self.cfg['posts_param']['offset'] += self.cfg['posts_param'][ 'limit'] return self._mkMainDict(all_posts)