class TencentAIBase(object): def __init__(self, appid, appsec): self.appid = appid self.appsec = appsec self.http_client = HttpClient() self.base_url = 'https://api.ai.qq.com/fcgi-bin' def get_sign(self, data): data['app_id'] = self.appid data['time_stamp'] = int(time.time()) data['nonce_str'] = random_str(16) sorted_list = sorted(data.keys()) sign_str = u'' for param in sorted_list: sign_str += u'{}={}&'.format(param, quote(str(data[param]))) sign_str += u'app_key={}'.format(self.appsec) return hashlib.md5(sign_str.encode('utf-8')).hexdigest().upper() def do_get(self, url, data): sign = self.get_sign(data) data['sign'] = sign url = self.base_url + url return self.http_client.get(url, params=data) def do_post(self, url, data): sign = self.get_sign(data) data['sign'] = sign url = self.base_url + url return self.http_client.post(url, data=data, data_fromat='str')
def crawler_one_page(link, table, mid): parse_ret = urlparse(link) domain = parse_ret.netloc config = DATA_FIELD.get(domain) if not config: LOG.info("domain: %s not config", domain) return res_data_field = config.get("res_data") id_field = config.get("id") start = time.time() client = HttpClient() res = client.get(link) goods_list = res.get(res_data_field, []) for goods in goods_list: num_id = goods.get(id_field) tmp = _ship_goods(num_id) if not tmp: continue tmp.update({'mid': mid}) if isinstance(table, unicode): table = table.encode("utf-8") tmp.update({'table': table}) searcher.update_index(tmp) goods_obj = TbkGoods(**tmp) goods_obj.__table__ = table goods_obj.save() LOG.info("link: %s takes: %s", link, time.time() - start)
def get_detail_url(detail_url, author_id): client = HttpClient() page_content = client.get(detail_url) if page_content: dom = fromstring(page_content) cont_xpath = '//div[@class="main3"]/div[@class="left"]/'\ 'div[@class="sons"][1]' title = dom.xpath("//h1/text()") dynasty = dom.xpath(cont_xpath + '/div[@class="cont"]/p/a[1]/text()') author = dom.xpath(cont_xpath + '/div[@class="cont"]/p/a[2]/text()') content = dom.xpath(cont_xpath + '/div[@class="cont"]/div[@class="contson"]') content = split_content(content[0]) keywords = dom.xpath(cont_xpath + '/div[@class="tag"]/a/text()') keywords = '&'.join(keywords) likes = dom.xpath(cont_xpath + '//div[@class="good"]/a/span/text()') if len(likes) >= 1: likes = match_number(likes[0]) else: likes = 0 fanyi = dom.xpath("//div[starts-with(@id, 'fanyi')][1]/@id") if fanyi: fanyi_id = match_number(fanyi[0]) fanyi_con = get_fanyi_content(fanyi_id) else: fanyi_xpath = "//div[@class='left']/div[@class='sons'][2]/div[@class='contyishang']/p/text()" fanyi_con = dom.xpath(fanyi_xpath) if fanyi_con: fanyi_con = '\n'.join(fanyi_con) else: fanyi_con = '' shangxi = dom.xpath("//div[starts-with(@id, 'shangxi')][1]/@id") if shangxi: shangxi_id = match_number(shangxi[0]) shangxi_con = get_shangxi_content(shangxi_id) else: shangxi_con = '' if not shangxi_con: LOG.info("url: %s no shangxi", detail_url) if not fanyi_con: LOG.info("url: %s no fanyi", detail_url) poetry_data = { 'title': title[0], 'dynasty': dynasty[0], 'author': author[0], 'content': content, 'tags': keywords, 'likes': likes, 'author_id': author_id, 'translate': fanyi_con, 'shangxi': shangxi_con, 'plink': detail_url } # print(poetry_data) return poetry_data else: LOG.error("download url: %s, error", detail_url) return {}
def get_fanyi_content(fanyi_id): url = 'https://so.gushiwen.org/shiwen2017/ajaxfanyi.aspx' params = {'id': fanyi_id} time.sleep(10) client = HttpClient() page_content = client.get(url, params=params) # page_content = open("fanyi.html").read() fanyi = '' if page_content: page_content = unicode(page_content, 'utf-8') dom = fromstring(page_content) elements = dom.xpath("//div[@class='contyishang']/p") for element in elements: for sub in element: tag = sub.tag if tag == 'strong': continue elif tag == 'a': fanyi = fanyi[:-1] tmp = sub.text elif tag == 'br': tmp = sub.tail if tmp is None: continue tmp += '\n' if tmp: tmp = tmp.replace(u"▲", "") fanyi += tmp else: LOG.info("down page error: %s, params: %s", url, params) return fanyi
def get_image_data(self): url = self.message_instance.pic_url client = HttpClient() image_data = client.get(url) height, width = self.get_image_size(image_data) url = upload_file_to_server(image_data) return {'content': url, 'height': height, 'width': width}
class HeFengWeather(object): API_BASE = 'https://free-api.heweather.net/s6' def __init__(self, user_name, api_key): self.user_name = user_name self.api_key = api_key self.client = HttpClient() @sign_wrapper @convert_res def get(self, url, **kwargs): kwargs['verify'] = False return self.client.get(url, **kwargs) @sign_wrapper @convert_res def post(self, url, **kwargs): kwargs['verify'] = False return self.client.post(url, **kwargs) def forecast_weather(self, city): url_path = '/weather/forecast' data = {'location': city} return self.get(url_path, params=data)
def check_img_risk(image_path): client = WechatClient(XCX_APPID, XCX_APPSEC, session=data_cache) url = 'https://api.weixin.qq.com/wxa/img_sec_check?'\ 'access_token={}'.format(client.access_token) media = {'media': open(image_path, 'rb')} _http = HttpClient() res = _http.post(url, files=media) return res
def get_sentence(date_str=None): if date_str is None: date_str = datetime.now().strftime('%Y-%m-%d') url = URL.format(date_str) client = HttpClient() res = client.get(url) data = ship_kingdata(res) data['date_str'] = date_str return data
def download_page(url, **kwargs): '''down page by requests session''' client = HttpClient() content = client.get(url, **kwargs) if not content: return None if content.find("404 Not Found") != -1: return None return content
def down_page(url): client = HttpClient() try_times = 3 while try_times > 0: res = client.get(url) if res: return res try_times -= 1 return ''
def download_page(c, t=None): client = HttpClient() params = { 'p': 1, "c": quote(c.encode('utf-8')), } if t: params.update({'t': quote(t.encode("utf-8"))}) res = client.get(START_URL, params=params) return res
def check_msg_risk(content): client = WechatClient(XCX_APPID, XCX_APPSEC, session=data_cache) url = 'https://api.weixin.qq.com/wxa/msg_sec_check?'\ 'access_token={}'.format(client.access_token) _http = HttpClient() if isinstance(content, unicode): content = content.encode("utf-8") data = {'content': content} data = json.dumps(data, ensure_ascii=False) res = _http.post(url, data=data) return res
def upload_qiniu_storage(url): _http = HttpClient() res = None try: res = _http.get(url) except Exception as ex: LOGGER.error("url: %s, ex: %s", url, ex, exc_info=True) return '' if not res: return '' return upload_image_qiniu(res)
class LunarDate(object): API_URL = 'http://v.juhe.cn/calendar/day' API_KEY = '53fb358b92abe89bf5c4edb3db993829' _HTTP_CLIENT = None def __init__(self): self._HTTP_CLIENT = HttpClient() def get_lunar(self, date_str): lunar_info = weather_cache.get_lunar_date(date_str) if lunar_info: return lunar_info # resp_data = self.get_lunar_api(date_str) # lunar_data = resp_data.get("data") # lunar_year = lunar_data.get("lunarYear") # lunar_date = lunar_data.get("lunar") # weekday = lunar_data.get("weekday") # lunar_info = "\n".join([lunar_year, lunar_date, weekday]) date_list = date_str.split('-') lunar_info = lunar_converter.get_lunar_info(date_list[0], date_list[1], date_list[2]) weather_cache.cache_lunar_date(date_str, lunar_info) return lunar_info def get_lunar_api(self, date_str): params = {'date': date_str, 'key': self.API_KEY} resp = self._HTTP_CLIENT.get(url=self.API_URL, params=params) return resp.get("result")
def share_tbk_image(goods_id, params=None): from mini_goods import miniapp_goods_detail # goods = get_goods_info_by_id(goods_id) # if not goods: # goods_instance = TbkGoods(num_id=goods_id) # goods = goods_instance.find_goods_by_id() # if not goods: # return False, u"找不到该商品" if params: mid = params.get("mid") data = miniapp_goods_detail(goods_id, mid) else: data = miniapp_goods_detail(goods_id) if data['errcode'] != 0: return False, data.get("errmsg", "") goods = data['data'] scene = "id={}&t=1".format(goods_id) if params: user_config = params['user_config'] qr_data = create_qrcode( scene, page='pages/goods/detail/index', appid=user_config['appid'], appsec=user_config['appsec']) else: qr_data = create_qrcode(scene) local_qr_path = qr_data['local_path'] banner = goods['pic_url'] client = HttpClient() data = client.get(banner) path = os.path.join(TBK_PATH, datetime.now().strftime("%Y-%m-%d")) if not os.path.exists(path): os.makedirs(path) banner_path = os.path.join(path, uuid.uuid4().hex) with open(banner_path, 'w') as _file: _file.write(data) sales = goods['sales'] coupon_amount = goods['coupon_amount'] coupon_fee = float(goods['price']) - float(coupon_amount) if sales < 10000: sales = str(sales) else: sales = str(round(float(sales) / 10000.0, 2)) + u'万' path = draw_tbk_share( local_qr_path, banner_path, goods['title'], goods['price'], round(coupon_fee, 2), sales) return True, get_url_from_path(path)
class TuringClient(object): API_KEY = ['14c218cb6cb23bb27b7dc89e18eb9689'] _HTTP_CLIENT = None URL = 'http://openapi.tuling123.com/openapi/api/v2' DEFAULT_UID = '14c218cb6cb23bb27b7dc89e18eb9689' def __init__(self): self._HTTP_CLIENT = HttpClient() self.DEFAULT_REPLY = u"我还不太懂呢,我去学习啦:)" def _handle_result(self, resp): intent = resp.get("intent") if not intent: return self.DEFAULT_REPLY resp_code = intent.get("code") if resp_code in (5000, 6000): return self.DEFAULT_REPLY elif int(resp_code) / 1000 == 4: print("error resp: %s", resp) return self.DEFAULT_REPLY results = resp.get("results", []) return_msg = [] for result in results: result_type = result.get('resultType', 'text') if result_type == 'text': return_msg.insert(0, result['values']['text']) elif result_type == 'url': return_msg.append(result['values']['url']) elif result_type == 'news': newses = result['values']['news'] for news in newses: detail_url = news['detailurl'] info = news['info'] tmp = info + '\n' + detail_url + '\n' return_msg.append(tmp) return '\n'.join(return_msg) def query_text(self, content, uid=None, location=None): ''' @parameter : param uid --- userid : param location --- user location;{'city': '北京', 'province':'北京', 'street': ''} ''' if isinstance(content, unicode): content = content.encode("utf-8") req_data = {"reqType": 0} req_data['perception'] = {"inputText": {'text': content}} if location: req_data['perception'].update({'selfInfo': {'location': location}}) req_data['userInfo'] = { 'apiKey': random.choice(self.API_KEY), 'userId': uid if uid else self.DEFAULT_UID } resp = self._HTTP_CLIENT.post(self.URL, json=req_data) LOGGER.info("turling query: %s, res: %s", content, resp) return self._handle_result(resp)
def get_headimg_path(headimg_url, openid): today = datetime.now() date_str = today.strftime("%Y-%m-%d") dir_path = os.path.join(HEADIMG_TMP, date_str) if not os.path.exists(dir_path): os.makedirs(dir_path) commend_ret = os.popen("ls %s/%s* -t" % (dir_path, openid)) exists_head = commend_ret.read() if exists_head: return exists_head.split('\n')[0] client = HttpClient() pic_content = client.get(headimg_url) suffix = get_image_suffix(pic_content) filename = openid + '.' + suffix abpath = os.path.join(dir_path, filename) with open(abpath, 'w') as head_file: head_file.write(pic_content) return abpath
class BaiduAIBase(object): def __init__(self, appid, appsec): self.appid = appid self.appsec = appsec self.http_client = HttpClient() self.base_url = 'http://tsn.baidu.com' @property def token_key(self): return 'bdtoken_{}'.format(self.appid) def refresh_new_token(self): token_url = 'https://openapi.baidu.com/oauth/2.0/token' params = { 'grant_type': 'client_credentials', 'client_id': self.appid, 'client_secret': self.appsec } res = self.do_get(token_url, params) if 'error' in res: raise Exception('get token {} error {}'.format( token_url, res.get('error_description', ''))) token = res['access_token'] expire = res.get('expires_in', 3600) wx_cache.set(self.token_key, token) wx_cache.expire(self.token_key, int(expire)) return token @property def access_token(self): token = wx_cache.get(self.token_key) if token: return token return self.refresh_new_token() def do_get(self, url, data): if not url.startswith(('http', 'https')): url = self.base_url + url return self.http_client.get(url, params=data) def do_post(self, url, data): if not url.startswith(('http', 'https')): url = self.base_url + url return self.http_client.post(url, data=data, data_fromat='str')
def get_shangxi_content(shangxi_id): url = 'https://so.gushiwen.org/shiwen2017/ajaxshangxi.aspx' params = {'id': shangxi_id} time.sleep(10) client = HttpClient() page_content = client.get(url, params=params) shangxi = '' if page_content: page_content = unicode(page_content, 'utf-8') dom = fromstring(page_content) elements = dom.xpath("//div[@class='contyishang']/p") for element in elements: tmp = element.xpath("string(.)") tmp = tmp.replace(u"▲", "") shangxi += tmp shangxi += '\n' else: LOG.debug("down page error: %s, params: %s", url, params) return shangxi
class IPData(object): def __init__(self): self.http_client = HttpClient() @cache_wrapper('location', 86400) def get_ip_location(self, ip): ''' get the user city name accroding to its ip; query it from baidu.com ''' return self._query_ip_from_ali(ip) @cache_wrapper('weather', 3600) def get_ip_weather(self, ip): res = weather_client.forecast_weather(ip) if res['errcode'] != 0: return '' resp_data = res['data'] status = resp_data['status'] weather_msg = '' if status.lower() == 'ok': weather_data = resp_data['daily_forecast'][0] day_wea = weather_data['cond_txt_d'] night_wea = weather_data['cond_txt_n'] if day_wea != night_wea: weather = day_wea + u'转' + night_wea else: weather = day_wea wind = weather_data['wind_dir'] + weather_data['wind_sc'] + u'级' weather_msg = u"{weather}\n{wind}\n气温{low}℃/{high}℃".format( weather=weather, wind=wind, low=weather_data['tmp_min'], high=weather_data['tmp_max']) else: LOGGER.info(resp_data) return weather_msg def _query_ip_from_ali(self, ip): url = 'http://ip.taobao.com/service/getIpInfo.php?ip={}'.format(ip) resp = self.http_client.get(url) if 'data' in resp: regin = resp['data'].get('region') city = resp['data'].get('city') if regin == city: return regin + u'市' return u'{}省{}市'.format(regin, city) return '' def _query_ip_from_baidu(self, ip): url = 'http://www.baidu.com/s?wd=%s' url = url % ip ip_xpath = '//div[@id="content_left"]//div[@class="c-border"]'\ '//table/tr/td' content = self.http_client.get(url) dom = fromstring(content) ip_content = dom.xpath(ip_xpath) if ip_content: ip_content = ip_content[0] ip_content = ip_content.xpath("string()") ip_content = ip_content.strip() ip_content = ip_content.replace("\n", '') ip_content = ip_content.replace("\t", '') address = self._match_address(ip_content) # if address: # weather_cache.cache_ip_location(ip, address) else: address = '' LOGGER.info("ip: %s, addr: %s", ip, address) return address def _match_address(self, ip_content): pattern = u'IP地址: [^\u4E00-\u9FA5]*(?P<address>[\u4E00-\u9FA5]+)[ ]?' pattern = re.compile(pattern) match = pattern.search(ip_content) address = '' if match: address = match.group("address") return address
def __init__(self): self._HTTP_CLIENT = HttpClient() self.DEFAULT_REPLY = u"我还不太懂呢,我去学习啦:)"
def __init__(self, appid, appsec): self.appid = appid self.appsec = appsec self.http_client = HttpClient() self.base_url = 'http://tsn.baidu.com'
def __init__(self): self._HTTP_CLIENT = HttpClient()
def __init__(self): self.http_client = HttpClient()
def __init__(self, appid, appsec): self.appid = appid self.appsec = appsec self.http_client = HttpClient() self.base_url = 'https://api.ai.qq.com/fcgi-bin'
def __init__(self, user_name, api_key): self.user_name = user_name self.api_key = api_key self.client = HttpClient()