Exemplo n.º 1
0
def get_fanyi_content(fanyi_id):
    url = 'https://so.gushiwen.org/shiwen2017/ajaxfanyi.aspx'
    params = {'id': fanyi_id}
    time.sleep(10)
    client = HttpClient()
    page_content = client.get(url, params=params)
    # page_content = open("fanyi.html").read()
    fanyi = ''
    if page_content:
        page_content = unicode(page_content, 'utf-8')
        dom = fromstring(page_content)
        elements = dom.xpath("//div[@class='contyishang']/p")
        for element in elements:
            for sub in element:
                tag = sub.tag
                if tag == 'strong':
                    continue
                elif tag == 'a':
                    fanyi = fanyi[:-1]
                    tmp = sub.text
                elif tag == 'br':
                    tmp = sub.tail
                    if tmp is None:
                        continue
                    tmp += '\n'
                if tmp:
                    tmp = tmp.replace(u"▲", "")
                    fanyi += tmp
    else:
        LOG.info("down page error: %s, params: %s", url, params)
    return fanyi
Exemplo n.º 2
0
class HeFengWeather(object):

    API_BASE = 'https://free-api.heweather.net/s6'

    def __init__(self, user_name, api_key):
        self.user_name = user_name
        self.api_key = api_key
        self.client = HttpClient()

    @sign_wrapper
    @convert_res
    def get(self, url, **kwargs):
        kwargs['verify'] = False
        return self.client.get(url, **kwargs)

    @sign_wrapper
    @convert_res
    def post(self, url, **kwargs):
        kwargs['verify'] = False
        return self.client.post(url, **kwargs)

    def forecast_weather(self, city):
        url_path = '/weather/forecast'
        data = {'location': city}
        return self.get(url_path, params=data)
Exemplo n.º 3
0
class LunarDate(object):

    API_URL = 'http://v.juhe.cn/calendar/day'
    API_KEY = '53fb358b92abe89bf5c4edb3db993829'
    _HTTP_CLIENT = None

    def __init__(self):
        self._HTTP_CLIENT = HttpClient()

    def get_lunar(self, date_str):
        lunar_info = weather_cache.get_lunar_date(date_str)
        if lunar_info:
            return lunar_info
        # resp_data = self.get_lunar_api(date_str)
        # lunar_data = resp_data.get("data")
        # lunar_year = lunar_data.get("lunarYear")
        # lunar_date = lunar_data.get("lunar")
        # weekday = lunar_data.get("weekday")
        # lunar_info = "\n".join([lunar_year, lunar_date, weekday])
        date_list = date_str.split('-')
        lunar_info = lunar_converter.get_lunar_info(date_list[0], date_list[1],
                                                    date_list[2])
        weather_cache.cache_lunar_date(date_str, lunar_info)
        return lunar_info

    def get_lunar_api(self, date_str):
        params = {'date': date_str, 'key': self.API_KEY}
        resp = self._HTTP_CLIENT.get(url=self.API_URL, params=params)
        return resp.get("result")
Exemplo n.º 4
0
 def get_image_data(self):
     url = self.message_instance.pic_url
     client = HttpClient()
     image_data = client.get(url)
     height, width = self.get_image_size(image_data)
     url = upload_file_to_server(image_data)
     return {'content': url, 'height': height, 'width': width}
Exemplo n.º 5
0
class TencentAIBase(object):
    def __init__(self, appid, appsec):
        self.appid = appid
        self.appsec = appsec
        self.http_client = HttpClient()
        self.base_url = 'https://api.ai.qq.com/fcgi-bin'

    def get_sign(self, data):
        data['app_id'] = self.appid
        data['time_stamp'] = int(time.time())
        data['nonce_str'] = random_str(16)
        sorted_list = sorted(data.keys())
        sign_str = u''
        for param in sorted_list:
            sign_str += u'{}={}&'.format(param, quote(str(data[param])))
        sign_str += u'app_key={}'.format(self.appsec)
        return hashlib.md5(sign_str.encode('utf-8')).hexdigest().upper()

    def do_get(self, url, data):
        sign = self.get_sign(data)
        data['sign'] = sign
        url = self.base_url + url
        return self.http_client.get(url, params=data)

    def do_post(self, url, data):
        sign = self.get_sign(data)
        data['sign'] = sign
        url = self.base_url + url
        return self.http_client.post(url, data=data, data_fromat='str')
Exemplo n.º 6
0
def get_detail_url(detail_url, author_id):
    client = HttpClient()
    page_content = client.get(detail_url)
    if page_content:
        dom = fromstring(page_content)
        cont_xpath = '//div[@class="main3"]/div[@class="left"]/'\
            'div[@class="sons"][1]'
        title = dom.xpath("//h1/text()")
        dynasty = dom.xpath(cont_xpath + '/div[@class="cont"]/p/a[1]/text()')
        author = dom.xpath(cont_xpath + '/div[@class="cont"]/p/a[2]/text()')
        content = dom.xpath(cont_xpath +
                            '/div[@class="cont"]/div[@class="contson"]')
        content = split_content(content[0])
        keywords = dom.xpath(cont_xpath + '/div[@class="tag"]/a/text()')
        keywords = '&'.join(keywords)
        likes = dom.xpath(cont_xpath + '//div[@class="good"]/a/span/text()')
        if len(likes) >= 1:
            likes = match_number(likes[0])
        else:
            likes = 0
        fanyi = dom.xpath("//div[starts-with(@id, 'fanyi')][1]/@id")
        if fanyi:
            fanyi_id = match_number(fanyi[0])
            fanyi_con = get_fanyi_content(fanyi_id)
        else:
            fanyi_xpath = "//div[@class='left']/div[@class='sons'][2]/div[@class='contyishang']/p/text()"
            fanyi_con = dom.xpath(fanyi_xpath)
            if fanyi_con:
                fanyi_con = '\n'.join(fanyi_con)
            else:
                fanyi_con = ''
        shangxi = dom.xpath("//div[starts-with(@id, 'shangxi')][1]/@id")
        if shangxi:
            shangxi_id = match_number(shangxi[0])
            shangxi_con = get_shangxi_content(shangxi_id)
        else:
            shangxi_con = ''

        if not shangxi_con:
            LOG.info("url: %s no shangxi", detail_url)
        if not fanyi_con:
            LOG.info("url: %s no fanyi", detail_url)

        poetry_data = {
            'title': title[0],
            'dynasty': dynasty[0],
            'author': author[0],
            'content': content,
            'tags': keywords,
            'likes': likes,
            'author_id': author_id,
            'translate': fanyi_con,
            'shangxi': shangxi_con,
            'plink': detail_url
        }
        # print(poetry_data)
        return poetry_data
    else:
        LOG.error("download url: %s, error", detail_url)
        return {}
Exemplo n.º 7
0
def crawler_one_page(link, table, mid):
    parse_ret = urlparse(link)
    domain = parse_ret.netloc
    config = DATA_FIELD.get(domain)
    if not config:
        LOG.info("domain: %s not config", domain)
        return
    res_data_field = config.get("res_data")
    id_field = config.get("id")
    start = time.time()
    client = HttpClient()
    res = client.get(link)
    goods_list = res.get(res_data_field, [])
    for goods in goods_list:
        num_id = goods.get(id_field)
        tmp = _ship_goods(num_id)
        if not tmp:
            continue
        tmp.update({'mid': mid})
        if isinstance(table, unicode):
            table = table.encode("utf-8")
        tmp.update({'table': table})
        searcher.update_index(tmp)
        goods_obj = TbkGoods(**tmp)
        goods_obj.__table__ = table
        goods_obj.save()
    LOG.info("link: %s takes: %s", link, time.time() - start)
Exemplo n.º 8
0
def get_sentence(date_str=None):
    if date_str is None:
        date_str = datetime.now().strftime('%Y-%m-%d')
    url = URL.format(date_str)
    client = HttpClient()
    res = client.get(url)
    data = ship_kingdata(res)
    data['date_str'] = date_str
    return data
Exemplo n.º 9
0
def down_page(url):
    client = HttpClient()
    try_times = 3
    while try_times > 0:
        res = client.get(url)
        if res:
            return res
        try_times -= 1
    return ''
Exemplo n.º 10
0
def download_page(url, **kwargs):
    '''down page by requests session'''
    client = HttpClient()
    content = client.get(url, **kwargs)
    if not content:
        return None
    if content.find("404 Not Found") != -1:
        return None
    return content
Exemplo n.º 11
0
def download_page(c, t=None):
    client = HttpClient()
    params = {
        'p': 1,
        "c": quote(c.encode('utf-8')),
    }
    if t:
        params.update({'t': quote(t.encode("utf-8"))})
    res = client.get(START_URL, params=params)
    return res
Exemplo n.º 12
0
def upload_qiniu_storage(url):
    _http = HttpClient()
    res = None
    try:
        res = _http.get(url)
    except Exception as ex:
        LOGGER.error("url: %s, ex: %s", url, ex, exc_info=True)
        return ''
    if not res:
        return ''
    return upload_image_qiniu(res)
Exemplo n.º 13
0
def share_tbk_image(goods_id, params=None):
    from mini_goods import miniapp_goods_detail
    # goods = get_goods_info_by_id(goods_id)
    # if not goods:
    #     goods_instance = TbkGoods(num_id=goods_id)
    #     goods = goods_instance.find_goods_by_id()
    #     if not goods:
    #         return False, u"找不到该商品"
    if params:
        mid = params.get("mid")
        data = miniapp_goods_detail(goods_id, mid)
    else:
        data = miniapp_goods_detail(goods_id)
    if data['errcode'] != 0:
        return False, data.get("errmsg", "")
    goods = data['data']
    scene = "id={}&t=1".format(goods_id)
    if params:
        user_config = params['user_config']
        qr_data = create_qrcode(
                scene,
                page='pages/goods/detail/index',
                appid=user_config['appid'],
                appsec=user_config['appsec'])
    else:
        qr_data = create_qrcode(scene)
    local_qr_path = qr_data['local_path']
    banner = goods['pic_url']
    client = HttpClient()
    data = client.get(banner)
    path = os.path.join(TBK_PATH, datetime.now().strftime("%Y-%m-%d"))
    if not os.path.exists(path):
        os.makedirs(path)
    banner_path = os.path.join(path, uuid.uuid4().hex)
    with open(banner_path, 'w') as _file:
        _file.write(data)
    sales = goods['sales']
    coupon_amount = goods['coupon_amount']
    coupon_fee = float(goods['price']) - float(coupon_amount)
    if sales < 10000:
        sales = str(sales)
    else:
        sales = str(round(float(sales) / 10000.0, 2)) + u'万'
    path = draw_tbk_share(
            local_qr_path, banner_path, goods['title'],
            goods['price'], round(coupon_fee, 2), sales)
    return True, get_url_from_path(path)
Exemplo n.º 14
0
def get_headimg_path(headimg_url, openid):
    today = datetime.now()
    date_str = today.strftime("%Y-%m-%d")
    dir_path = os.path.join(HEADIMG_TMP, date_str)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    commend_ret = os.popen("ls %s/%s* -t" % (dir_path, openid))
    exists_head = commend_ret.read()
    if exists_head:
        return exists_head.split('\n')[0]
    client = HttpClient()
    pic_content = client.get(headimg_url)
    suffix = get_image_suffix(pic_content)
    filename = openid + '.' + suffix
    abpath = os.path.join(dir_path, filename)
    with open(abpath, 'w') as head_file:
        head_file.write(pic_content)
    return abpath
Exemplo n.º 15
0
class BaiduAIBase(object):
    def __init__(self, appid, appsec):
        self.appid = appid
        self.appsec = appsec
        self.http_client = HttpClient()
        self.base_url = 'http://tsn.baidu.com'

    @property
    def token_key(self):
        return 'bdtoken_{}'.format(self.appid)

    def refresh_new_token(self):
        token_url = 'https://openapi.baidu.com/oauth/2.0/token'
        params = {
            'grant_type': 'client_credentials',
            'client_id': self.appid,
            'client_secret': self.appsec
        }
        res = self.do_get(token_url, params)
        if 'error' in res:
            raise Exception('get token {} error {}'.format(
                token_url, res.get('error_description', '')))
        token = res['access_token']
        expire = res.get('expires_in', 3600)
        wx_cache.set(self.token_key, token)
        wx_cache.expire(self.token_key, int(expire))
        return token

    @property
    def access_token(self):
        token = wx_cache.get(self.token_key)
        if token:
            return token
        return self.refresh_new_token()

    def do_get(self, url, data):
        if not url.startswith(('http', 'https')):
            url = self.base_url + url
        return self.http_client.get(url, params=data)

    def do_post(self, url, data):
        if not url.startswith(('http', 'https')):
            url = self.base_url + url
        return self.http_client.post(url, data=data, data_fromat='str')
Exemplo n.º 16
0
def get_shangxi_content(shangxi_id):
    url = 'https://so.gushiwen.org/shiwen2017/ajaxshangxi.aspx'
    params = {'id': shangxi_id}
    time.sleep(10)
    client = HttpClient()
    page_content = client.get(url, params=params)
    shangxi = ''
    if page_content:
        page_content = unicode(page_content, 'utf-8')
        dom = fromstring(page_content)
        elements = dom.xpath("//div[@class='contyishang']/p")
        for element in elements:
            tmp = element.xpath("string(.)")
            tmp = tmp.replace(u"▲", "")
            shangxi += tmp
            shangxi += '\n'
    else:
        LOG.debug("down page error: %s, params: %s", url, params)
    return shangxi
Exemplo n.º 17
0
class IPData(object):
    def __init__(self):
        self.http_client = HttpClient()

    @cache_wrapper('location', 86400)
    def get_ip_location(self, ip):
        '''
        get the user city name accroding to its ip;
        query it from baidu.com
        '''
        return self._query_ip_from_ali(ip)

    @cache_wrapper('weather', 3600)
    def get_ip_weather(self, ip):
        res = weather_client.forecast_weather(ip)
        if res['errcode'] != 0:
            return ''
        resp_data = res['data']
        status = resp_data['status']
        weather_msg = ''
        if status.lower() == 'ok':
            weather_data = resp_data['daily_forecast'][0]
            day_wea = weather_data['cond_txt_d']
            night_wea = weather_data['cond_txt_n']
            if day_wea != night_wea:
                weather = day_wea + u'转' + night_wea
            else:
                weather = day_wea
            wind = weather_data['wind_dir'] + weather_data['wind_sc'] + u'级'
            weather_msg = u"{weather}\n{wind}\n气温{low}℃/{high}℃".format(
                weather=weather,
                wind=wind,
                low=weather_data['tmp_min'],
                high=weather_data['tmp_max'])
        else:
            LOGGER.info(resp_data)
        return weather_msg

    def _query_ip_from_ali(self, ip):
        url = 'http://ip.taobao.com/service/getIpInfo.php?ip={}'.format(ip)
        resp = self.http_client.get(url)
        if 'data' in resp:
            regin = resp['data'].get('region')
            city = resp['data'].get('city')
            if regin == city:
                return regin + u'市'
            return u'{}省{}市'.format(regin, city)
        return ''

    def _query_ip_from_baidu(self, ip):
        url = 'http://www.baidu.com/s?wd=%s'
        url = url % ip
        ip_xpath = '//div[@id="content_left"]//div[@class="c-border"]'\
            '//table/tr/td'
        content = self.http_client.get(url)
        dom = fromstring(content)
        ip_content = dom.xpath(ip_xpath)
        if ip_content:
            ip_content = ip_content[0]
            ip_content = ip_content.xpath("string()")
            ip_content = ip_content.strip()
            ip_content = ip_content.replace("\n", '')
            ip_content = ip_content.replace("\t", '')
            address = self._match_address(ip_content)
            # if address:
            #     weather_cache.cache_ip_location(ip, address)
        else:
            address = ''
        LOGGER.info("ip: %s, addr: %s", ip, address)
        return address

    def _match_address(self, ip_content):
        pattern = u'IP地址: [^\u4E00-\u9FA5]*(?P<address>[\u4E00-\u9FA5]+)[ ]?'
        pattern = re.compile(pattern)
        match = pattern.search(ip_content)
        address = ''
        if match:
            address = match.group("address")
        return address