def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] offset = remark.get('offset') html = tools.get_html_by_webdirver(root_url) headers = tools.get_tag(html, 'div', {'class': 'result'}, find_all=True) if not headers: base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE) for header in headers: # 查看更多相关新闻 regex = ' <span class="c-info"><a.*?href="(.*?)".*?查看更多相关新闻' more_news_url = tools.get_info(str(header), regex, fetch_one = True) if more_news_url: more_news_url = tools.get_full_url('http://news.baidu.com', more_news_url) more_news_url = more_news_url.replace('amp;', '') base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, more_news_url, depth = 1, remark = {'offset':0}) url = header.h3.a['href'] article_extractor = ArticleExtractor(url) content = title = release_time = author = website_domain ='' content = article_extractor.get_content() if content: title = article_extractor.get_title() release_time = article_extractor.get_release_time() author = article_extractor.get_author() website_domain = tools.get_domain(url) uuid = tools.get_uuid(title, website_domain) website_name = '' website_position = None log.debug(''' uuid %s title %s author %s release_time %s domain %s url %s content %s '''%(uuid, title, author, release_time, website_domain, url, '...')) # 入库 if tools.is_have_chinese(content): is_continue = self_base_parser.add_news_acticle(uuid, title, author, release_time, website_name , website_domain, website_position, url, content) if not is_continue: break else: # 循环正常结束 该页均正常入库, 继续爬取下页 offset += 50 url = tools.replace_str(root_url, 'pn=\d*', 'pn=%d'%offset) base_parser.add_url('BAIDU_NEWS_urls', SITE_ID, url, depth = 0, remark = {'offset': offset}) base_parser.update_url('BAIDU_NEWS_urls', root_url, Constance.DONE)
def save_article_dynamic(data): log.debug(tools.dumps_json(data)) sql = tools.make_insert_sql('wechat_article_dynamic', data, insert_ignore=True) db.add(sql)
def __init__(self, ip_ports=IP_PORTS, db=DB, user_pass=USER_PASS): # super(RedisDB, self).__init__() if not hasattr(self, '_redis'): self._is_redis_cluster = False try: if len(ip_ports) > 1: startup_nodes = [] for ip_port in ip_ports: ip, port = ip_port.split(':') startup_nodes.append({"host": ip, "port": port}) self._redis = StrictRedisCluster( startup_nodes=startup_nodes, decode_responses=True) self._pipe = self._redis.pipeline(transaction=False) self._is_redis_cluster = True else: ip, port = ip_ports[0].split(':') self._redis = redis.Redis( host=ip, port=port, db=db, password=user_pass, decode_responses=True) # redis默认端口是6379 self._pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 except Exception as e: raise else: log.info('连接到redis数据库 %s' % (tools.dumps_json(ip_ports)))
def add_account_info(self, account_info): log.debug(''' -----公众号信息----- %s''' % tools.dumps_json(account_info)) WechatService._es.add('wechat_account', account_info, account_info.get('__biz'))
def deal_request(self, name): web.header('Content-Type', 'text/html;charset=UTF-8') data = json.loads(json.dumps(web.input())) print(name) print(data) if name == 'get_task': tasks = self.task_service.get_task() return tools.dumps_json(tasks) elif name == 'update_task': tasks = eval(data.get('tasks', [])) status = data.get('status') self.task_service.update_task_status(tasks, status) return tools.dumps_json('{"status":1}')
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark']
def parser_program(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] # 解析 html, request = tools.get_html_by_requests(root_url) if not html: base_parser.update_url('PROGRAM_urls', root_url, Constance.EXCEPTION) return regex = '<li class="v-item-v5.*?">(.*?)</li>' video_blocks = tools.get_info(html, regex) for video_block in video_blocks: regex = '<a class="u-video" href="(.*?)"' program_url = tools.get_info(video_block, regex, fetch_one = True) program_id = program_url[program_url.find('b/') + 2 : program_url.rfind('/')] program_url = 'http://www.mgtv.com/h/%s.html'%program_id regex = '<img class="u-image" src="(.*?)"' image_url = tools.get_info(video_block, regex, fetch_one = True) regex = 'em class="u-time">(.*?)</em>' episode = tools.get_info(video_block, regex, fetch_one = True) regex = '<a class="u-title".*?>(.*?)</a>' title = tools.get_info(video_block, regex, fetch_one = True) regex = '<span class="u-desc">(.*?)</span>' actors_block = tools.get_info(video_block, regex, fetch_one = True) regex = '<a .*?>(.*?)</a?' actors = tools.get_info(actors_block, regex) actors = '/'.join(actors) if actors else '暂无' detail_html, r = tools.get_html_by_requests(program_url) regex = '<em class="label">简介.*?<span>(.*?)</span>' summary = tools.get_info(detail_html, regex, fetch_one = True) if detail_html else '' log.debug(''' program_url %s image_url %s episode %s title %s actors %s summary %s '''%(program_url, image_url, episode, title, actors, summary)) program_mongo_id = base_parser.add_program_info('PROGRAM_info', site_id, title, program_url, image_url, episode, directors = '', actors = actors, summary = summary, release_time = '') # 获取集信息url 没月份参数默认是最近月份的数据 episode_detail_url = 'http://pcweb.api.mgtv.com/variety/showlist?collection_id=' + program_id base_parser.add_url('PROGRAM_urls', SITE_ID, episode_detail_url, depth = 1, remark = {'program_mongo_id' : program_mongo_id, 'program_id' : program_id}) base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE)
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) source_url = url_info['url'] depth = url_info['depth'] website_id = url_info['site_id'] description = url_info['remark'] html = tools.get_html_by_urllib(source_url) if html == None: base_parser.update_url('article_urls', source_url, Constance.EXCEPTION) return # 判断中英文 regex = '[\u4e00-\u9fa5]+' chinese_word = tools.get_info(html, regex) if not chinese_word: base_parser.update_url('article_urls', source_url, Constance.EXCEPTION) return urls = tools.get_urls(html) fit_url = tools.fit_url(urls, FIT_URLS) for url in fit_url: # log.debug('url = ' + url) base_parser.add_url('article_urls', website_id, url, depth + 1) # 取当前页的文章信息 # 标题 regexs = '<h1.*?>(.*?)</h1>' title = tools.get_info(html, regexs) title = title and title[0] or '' title = tools.del_html_tag(title) # 内容 regexs = ['<div id="content">(.*?)<div class="clear"></div>', '<div class="article">(.*?)<!--文章操作-->', '<div id="video_area">(.*?)<!--文章操作-->', '<div class="content">(.*?)<div id="article_edit">' ] content = tools.get_info(html, regexs) content = content and content[0] or '' content = tools.del_html_tag(content) log.debug(''' depth = %d url = %s title = %s content = %s '''%(depth+1, source_url, title, content)) if content and title: base_parser.add_article_info('article_text_info', website_id, source_url, title, content) # 更新source_url为done base_parser.update_url('article_urls', source_url, Constance.DONE)
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] # 获取搜索词比配到的url start = 0 while True: urls = mg.search_url(query=root_url, num=50, start=start, pause=random.randint(MIN_SLEEP_TIME, MAX_SLEEP_TIME)) if not urls: break for url in urls: url = url.replace('amp;', '') article_extractor = ArticleExtractor(url) content = title = release_time = author = website_domain = '' content = article_extractor.get_content() if content: title = article_extractor.get_title() release_time = article_extractor.get_release_time() author = article_extractor.get_author() website_domain = tools.get_domain(url) uuid = tools.get_uuid(title, website_domain) website_name = '' website_position = 35 # 境外 log.debug(''' uuid %s title %s author %s release_time %s domain %s url %s content %s ''' % (uuid, title, author, release_time, website_domain, url, '...')) # 入库 if tools.is_have_chinese(content): is_continue = self_base_parser.add_news_acticle( uuid, title, author, release_time, website_name, website_domain, website_position, url, content) if not is_continue: break else: # 循环正常结束 该页均正常入库, 继续爬取下页 start += 50 base_parser.update_url('google_news_urls', root_url, Constance.DONE)
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] html = tools.get_html_by_urllib(root_url) title = '<tr height="25"><td><a href=".*?" title="(.*?)"' video_url = ['<tr height="25"><td><a href="(.*?)"'] author = ['<a href="user-.*?.html" target="_blank">(.*?)</a>'] watched_count = ['浏览次数: </span>(.*?) '] file_size = ['资料大小: </span>(.*?) '] download_count = ['下载次数: </span>(.*?) '] titles = tools.get_info(html, title, allow_repeat = True) video_urls = tools.get_info(html, video_url, allow_repeat = True) authors = tools.get_info(html, author, allow_repeat = True) watched_counts = tools.get_info(html, watched_count, allow_repeat = True) file_sizes = tools.get_info(html, file_size, allow_repeat= True) download_counts = tools.get_info(html, download_count, allow_repeat = True) for i in range(len(titles)): title = titles[i] title = tools.del_html_tag(title) video_url = video_urls[i] video_url = tools.get_full_url('http://www.sobaidupan.com', video_url) author = authors[i] watched_count = watched_counts[i] file_size = file_sizes[i] download_count = download_counts[i] log.debug(''' 标题: %s 视频地址: %s 作者: %s 观看数 %s 资料大小 %s 下载次数 %s '''%(title, video_url, author, watched_count, file_size, download_count)) contained_key, contained_key_count = base_parser.get_contained_key(title, '', remark['search_keyword1'], remark['search_keyword2'], remark['search_keyword3']) if not contained_key: continue base_parser.add_content_info('VA_content_info', SITE_ID, video_url, title, file_size = file_size, file_name = title, author = author, watched_count = watched_count, download_count = download_count, search_type = search_type, keyword = contained_key, keyword_count = contained_key_count, task_id = remark['task_id']) base_parser.update_url('VA_urls', root_url, Constance.DONE)
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) source_url = url_info['url'] depth = url_info['depth'] website_id = url_info['site_id'] description = url_info['remark'] html = tools.get_html_by_urllib(source_url) if html == None: base_parser.update_url('article_urls', source_url, Constance.EXCEPTION) return # 判断中英文 regex = '[\u4e00-\u9fa5]+' chinese_word = tools.get_info(html, regex) if not chinese_word: base_parser.update_url('article_urls', source_url, Constance.EXCEPTION) return urls = tools.get_urls(html, STOP_URLS) urls = tools.fit_url(urls, "cctv.com") for url in urls: # log.debug('url = ' + url) base_parser.add_url('article_urls', website_id, url, depth + 1) # 取当前页的文章信息 # 标题 regexs = '<h1><!--repaste.title.begin-->(.*?)<!--repaste.title.end-->' title = tools.get_info(html, regexs) title = title and title[0] or '' title = tools.del_html_tag(title) # 内容 regexs = ['<!--repaste.body.begin-->(.*?)<!--repaste.body.end-->'] content = tools.get_info(html, regexs) content = content and content[0] or '' content = tools.del_html_tag(content) log.debug(''' depth = %d url = %s title = %s content = %s ''' % (depth + 1, source_url, title, content)) if content and title: base_parser.add_article_info('article_text_info', website_id, source_url, title, content) # 更新source_url为done base_parser.update_url('article_urls', source_url, Constance.DONE)
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] if depth == 0: parser_video_info(root_url, depth, site_id, remark)
def main(): ''' @summary: --------- @param : --------- @result: ''' clues_json = get_clues() clues_json = tools.dumps_json(clues_json)
def add_department(self, name): url = 'https://qyapi.weixin.qq.com/cgi-bin/department/create?access_token=%s' % self._sync_user_access_token data = { "name": name, "parentid": 1, } data = tools.dumps_json(data).encode('utf-8') result = tools.get_json_by_requests( url, headers=HEADER, data=data) # {'errcode': 0, 'id': 4, 'errmsg': 'created'} return result.get("id")
def get_article(self): ''' @summary: 目前取的是record_time 为了保证有数据, 正常应该取releast_time TODO --------- --------- @result: ''' per_record_time = self.get_per_record_time() if per_record_time: body = { "size": 200, "query": { "filtered": { "filter": { "range": { "record_time": { "gt": per_record_time } } } } }, "sort": [{ "record_time": "asc" }] } else: body = { # "query": { # "filtered": { # "filter": { # "range": { # "release_time" : { # "gte": today_time + ' 00:00:00', # 今日 # "lte": today_time + ' 23:59:59' # 今日 # } # } # } # } # }, "size": 200, "sort": [{ "record_time": "asc" }] } log.debug(self._table + " => " + tools.dumps_json(body)) article = self._data_pool_es.search(self._table, body) return article.get('hits', {}).get('hits', [])
def parser_episode_info(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] program_id = remark['program_id'] program_mongo_id = remark['program_mongo_id'] episode_json = tools.get_json_by_requests(root_url) if not episode_json: base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE) return code = episode_json.get('code') if code is not 200: base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE) return episode_data = episode_json.get('data', {}) episode_info = episode_data.get('info', {}) name = episode_info.get('title', '') url = episode_info.get('url', '') image_url = episode_info.get('thumb', '') episode_num = episode_info.get('series', '') summary = episode_info.get('desc', '') time_length = episode_info.get('duration', '') episode_download_url = episode_data.get('stream', [{'url':''}])[0].get('url') episode_download_url = 'http://disp.titan.mgtv.com' + episode_download_url episode_download_info = tools.get_json_by_requests(episode_download_url) if episode_download_info: episode_download_url = episode_download_info.get('info', '') else: episode_download_url = '' log.debug(''' program_mongo_id %s name %s url %s image_url %s episode_num %s summary %s time_length %s episode_download_url %s '''%(program_mongo_id, name, url, image_url, episode_num, summary, time_length, episode_download_url)) base_parser.add_program_episode_info('PROGRAM_EPISODE_info', site_id, program_mongo_id, episode_num = episode_num, time_length = time_length, episode_name = name, download_status = '', download_url = episode_download_url, episode_url = url, summary = summary, image_url = image_url, sto_path = '') base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE)
def deal_request(self, name): web.header('Content-Type','text/html;charset=UTF-8') data = json.loads(json.dumps(web.input())) client_ip = web.ctx.ip if name == 'get_task': tasks = self.task_service.get_task() tasks = tools.dumps_json(tasks) log.info(''' 客户端 ip: %s 取任务 : %s'''%(client_ip, tasks)) return tasks elif name == 'update_task': tasks = eval(data.get('tasks', [])) status = data.get('status') self.task_service.update_task_status(tasks, status) return tools.dumps_json('{"status":1}')
def parser_url_info(url_info): log.info('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] remark = url_info['remark'] website_name = remark['website_name'] website_position = remark['website_position'] website_url = remark['website_url'] website_domain = remark['website_domain'] spider_depth = remark['spider_depth'] return root_url, depth, remark, website_name, website_position, website_url, website_domain, spider_depth
def parser(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] if depth == 0: parser_content_info(url_info) elif depth == 1: pass
def add_article_info(self, article_info): ''' @summary: --------- @param article_info: --------- @result: ''' log.debug(''' -----文章信息----- %s''' % tools.dumps_json(article_info)) self._es.add('wechat_article', article_info, article_info.get('article_id'))
def save_article_list(datas: list): log.debug(tools.dumps_json(datas)) sql, articles = tools.make_batch_sql('wechat_article_list', datas) db.add_batch(sql, articles) # 存文章任务 article_task = [{ "sn": article.get('sn'), "article_url": article.get('url'), "__biz": article.get('__biz') } for article in datas] sql, article_task = tools.make_batch_sql('wechat_article_task', article_task) db.add_batch(sql, article_task)
def save_baidu_info(release_time='', content='', url='', author='', title='', is_debug=False): domain = tools.get_domain(url) content_info = { 'domain': domain, 'title': title, 'author': author, 'url': url, 'content': content, 'release_time': release_time, } log.debug(tools.dumps_json(content_info))
def main(): db = OracleDB() sql = 'select t.id clues_id,to_char(t.keyword1),to_char(t.keyword2),to_char(t.keyword3),t.zero_id from TAB_IOPM_CLUES t' results = db.find(sql) clues_json = { "message": "查询成功", "status": 1, "data": [{ "clues_id": 104, "keyword1": "", "keyword2": "", "keyword3": "", "zero_id": 2 }] } clues_json['data'] = [] for result in results: data = { "clues_id": result[0] if result[0] else "", "keyword1": "%s" % (result[1].replace('"', '“').replace('、', '')[:-1] if result[1][-1] == ',' else result[1].replace('"', '')) if result[1] else "", "keyword2": "%s" % (result[2].replace('"', '“').replace('、', '')[:-1] if result[2][-1] == ',' else result[2].replace('"', '')) if result[2] else "", "keyword3": "%s" % (result[3].replace('"', '“').replace('、', '')[:-1] if result[3][-1] == ',' else result[3].replace('"', '')) if result[3] else "", "zero_id": result[4] if result[4] else "" } clues_json["data"].append(data) clues_json = tools.dumps_json(clues_json) print(clues_json) tools.write_file('./clues.txt', clues_json)
def deal_request(self): web.header('Content-Type', 'text/html;charset=UTF-8') print(str(web.input())) data = json.loads(json.dumps(web.input())) # 文章信息 article_id = data.get('article_id') may_invalid = data.get('may_invalid') or 0 # 热点信息 hot_id = data.get('hot_id') hot_value = data.get('hot_value') or 0 # 通用参数 clues_ids = data.get('clues_ids') or '' article_count = data.get('article_count') or 0 vip_count = data.get('vip_count') or 0 negative_emotion_count = data.get('negative_emotion_count') or 0 zero_ids = data.get('zero_ids') or '' status = 0 # 0 处理失败 1 处理成功 weight = -1 try: if hot_id: status, weight = RelatedSortAction._related_sort_service.deal_hot( hot_id, float(hot_value), clues_ids, zero_ids, int(article_count), int(vip_count), int(negative_emotion_count)) elif article_id: status, weight = RelatedSortAction._related_sort_service.deal_article( article_id, clues_ids, zero_ids, int(may_invalid), int(vip_count), int(negative_emotion_count)) except Exception as e: log.error(e) result = { "status": 1 if status else 0, "message": "处理成功" if status else "处理失败", "id": hot_id or article_id, "weight": weight } return tools.dumps_json(result)
def __invite_user(self, user_id): ''' @summary: 邀请成员 --------- @param user_id: --------- @result: ''' url = 'https://qyapi.weixin.qq.com/cgi-bin/batch/invite?access_token=' + self._sync_user_access_token data = { "user": [user_id], } data = tools.dumps_json(data).encode('utf-8') result = tools.get_json_by_requests(url, headers=HEADER, data=data) return result
def main(): ''' @summary: --------- @param : --------- @result: ''' clues_json = get_clues() clues_count = len(clues_json['data']) clues_json = tools.dumps_json(clues_json) print(clues_json) # save_clues_to_file(clues_json) keys = 'pattek.com.cn' prpcrypt = Prpcrypt(keys) encrypt_text = prpcrypt.encrypt(clues_json) data = {'info': encrypt_text} # 同步到内网 url = 'http://192.168.60.38:8002/datasync_al/interface/cluesConfSync?' json = tools.get_json_by_requests(url, data=data) # 记录同步行为 result = record_sync_status(clues_count, json.get("status"), json.get('message'), json.get('data'), 0) print(result) log.debug(''' ------ 同步线索到内网 ----- %s 记录到数据库 %d ''' % (json, result)) # 同步到外网 url = 'http://124.205.229.232:8005/gdyq/datasync_al/interface/cluesConfSync' json = tools.get_json_by_requests(url, data=data) # 记录同步行为 result = record_sync_status(clues_count, json.get("status"), json.get('message'), json.get('data'), 1) log.debug(''' ------ 同步线索到外网 ----- %s 记录到数据库 %d ''' % (json, result))
def parser_episode_detail_url(url_info): url_info['_id'] = str(url_info['_id']) log.debug('处理 \n' + tools.dumps_json(url_info)) root_url = url_info['url'] depth = url_info['depth'] site_id = url_info['site_id'] remark = url_info['remark'] program_id = remark['program_id'] program_mongo_id = remark['program_mongo_id'] episode_json = tools.get_json_by_requests(root_url) if not episode_json: base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE) return code = episode_json.get('code') if code is not 200: base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE) return episode_data = episode_json.get('data', {}) # 解析分集详细信息地址 episode_list = episode_data.get('list', []) for episode in episode_list: episode_id = episode['video_id'] episode_detail_url = 'http://pcweb.api.mgtv.com/player/video?video_id=' + episode_id base_parser.add_url('PROGRAM_urls', SITE_ID, episode_detail_url, depth = 2, remark = {'program_mongo_id' : program_mongo_id, 'program_id' : program_id}) # 解析其他年份和月份的url episode_years = episode_data.get('tab_y', []) episode_months = episode_data.get('tab_m', []) for episode_year in episode_years: # year = episode_year['t'] temp_program_id = episode_year['id'] episode_list_url = 'http://pcweb.api.mgtv.com/variety/showlist?collection_id=%s'%temp_program_id # 添加url 没月份参数默认是最近月份的数据 base_parser.add_url('PROGRAM_urls', SITE_ID, episode_list_url, depth = 1, remark = {'program_mongo_id' : program_mongo_id, 'program_id' : temp_program_id}) for episode_month in episode_months[1:]: #去掉最近月份的数据 episode_month = episode_month['m'] episode_list_url = 'http://pcweb.api.mgtv.com/variety/showlist?collection_id=%s&month=%s'%(program_id, episode_month) # 添加url base_parser.add_url('PROGRAM_urls', SITE_ID, episode_list_url, depth = 1, remark = {'program_mongo_id' : program_mongo_id, 'program_id' : program_id}) base_parser.update_url('PROGRAM_urls', root_url, Constance.DONE)
def update_user(self, user_id, user_name='', mobile='', email='', enable=1): url = 'https://qyapi.weixin.qq.com/cgi-bin/user/update?access_token=' + self._sync_user_access_token data = { "userid": user_id, "name": user_name, "mobile": mobile, "email": email, "enable": 1, } data = tools.dumps_json(data).encode('utf-8') result = tools.get_json_by_requests(url, headers=HEADER, data=data) return result
def add_user(self, user_name, mobile, email='', user_id='', enable=1): ''' @summary: 添加用户 access_token 中的secret 需使用管理工具中的通讯录同步的secret --------- @param user_name: @param mobile: @param email: @param user_id: @param enable: 启用成员 0 禁用 1 启用 --------- @result: ''' user_id = user_id if user_id else tools.get_uuid() # 返回的数据格式 return_json = {"errcode": 0, "errmsg": "created", 'user_id': user_id} url = 'https://qyapi.weixin.qq.com/cgi-bin/user/create?access_token=' + self._sync_user_access_token data = { "userid": user_id, "name": user_name, "mobile": mobile, "department": [WechatService._depertment_id], "email": email, 'enable': enable, 'to_invite': False #是否邀请该成员使用企业微信(将通过微信服务通知或短信或邮件下发邀请,每天自动下发一次,最多持续3个工作日),默认值为true。 } data = tools.dumps_json(data).encode('utf-8') result = tools.get_json_by_requests(url, headers=HEADER, data=data) if result.get('errcode') == 0: result = self.__invite_user(user_id) if result.get('errcode'): return_json['errcode'] = result.get('errcode') return_json['errmsg'] = result.get('errmsg') return return_json
def save_video_info(release_time='', content='', url='', author='', title='', image_url='', site_name='', play_count=None, comment_count=None, praise_count=None, summary='', time_length=None): domain = tools.get_domain(url) uuid = tools.get_uuid(title, domain) if es.get('video_news', uuid): log.debug(title + ' 已存在') return False content_info = { 'domain': domain, 'uuid': uuid, 'site_name': site_name, 'image_url': image_url, 'title': title, 'author': author, 'url': url, 'content': content, 'release_time': tools.format_date(release_time), 'play_count': play_count, 'comment_count': comment_count, 'praise_count': praise_count, 'time_length': time_length, 'record_time': tools.get_current_date(), 'summary': summary } log.debug(tools.dumps_json(content_info)) es.add('video_news', content_info, content_info['uuid']) return True