def login_morning_star(self, cookie_str=None):
     login_url = 'https://www.morningstar.cn/membership/signin.aspx'
     if self._chrome_driver == None:
         from selenium import webdriver
         chrome_options = webdriver.ChromeOptions()
         chrome_options.add_argument("--no-sandbox")
         # _chrome_driver = webdriver.Chrome("/usr/local/chromedriver")
         self._chrome_driver = webdriver.Chrome(options=chrome_options)
         self._chrome_driver.set_page_load_timeout(12000)
         """
     模拟登录,支持两种方式:
         1. 设置已经登录的cookie
         2. 输入账号,密码,验证码登录(验证码识别正确率30%,识别识别支持重试)
     """
     if cookie_str:
         set_cookies(self._chrome_driver, login_url, cookie_str)
     else:
         if self._morning_cookies == None:
             login_status = login_site(self._chrome_driver, login_url)
             if login_status:
                 print('login success')
                 sleep(3)
             else:
                 print('login fail')
                 exit()
             # 获取网站cookie
             _morning_cookies = self._chrome_driver.get_cookies()
         else:
             self._morning_cookies = self._chrome_driver.get_cookies()
Exemplo n.º 2
0
def handle_logout(request):
    logger.info("Starting handle_logout")
    cookies = get_cookies(request["headers"])
    # check for refresh token, which we need to call logout
    if CONFIG["REFRESH_COOKIE"] in cookies:
        logger.info("Posting to logout URL")
        resp = post_to_url(url=wkc_data["end_session_endpoint"],
                           client_id=CONFIG["CLIENT_ID"],
                           client_secret=CONFIG["CLIENT_SECRET"],
                           refresh_token=cookies[CONFIG["REFRESH_COOKIE"]])
        logger.info("Back from post to logout URL, resp={r}".format(r=resp))
    # unset cookies
    r = redirect("/")
    cookies[CONFIG["AUTH_COOKIE"]] = ""
    cookies[CONFIG["REFRESH_COOKIE"]] = ""
    r = set_cookies(response=r, cookies=cookies)
    logger.info("Returning response to client")
    return r
Exemplo n.º 3
0
def get_fund_list(cookie_str=None):
    from selenium import webdriver

    options = webdriver.ChromeOptions()
    options.add_argument("--no-sandbox")
    chrome_driver = webdriver.Chrome('./chromedriver/chromedriver.exe',
                                     chrome_options=options)
    chrome_driver.set_page_load_timeout(12000)  # 防止页面加载个没完

    morning_fund_selector_url = "https://www.morningstar.cn/fundselect/default.aspx"
    # "https://cn.morningstar.com/quickrank/default.aspx"
    """
    模拟登录,支持两种方式:
        1. 设置已经登录的cookie
        2. 输入账号,密码,验证码登录(验证码识别正确率30%,识别识别支持重试)
    """
    if cookie_str:
        set_cookies(chrome_driver, morning_fund_selector_url, cookie_str)
    else:
        morning_cookies = ""
        if morning_cookies == "":
            login_status = login_site(chrome_driver, morning_fund_selector_url)
            if login_status:
                print('login success')
                sleep(3)
            else:
                print('login fail')
                exit()
            # 获取网站cookie
            morning_cookies = chrome_driver.get_cookies()
        else:
            chrome_driver.get(morning_fund_selector_url)  # 再次打开爬取页面
            print(chrome_driver.get_cookies())  # 打印设置成功的cookie
    # 定义起始页码
    page_num = 1
    page_count = 25
    page_num_total = math.ceil(
        int(
            chrome_driver.find_element_by_xpath(
                '/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) /
        page_count)

    result_dir = '../output/'
    output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
        '类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n'
    # 设置表头
    if page_num == 1:
        with open(result_dir + 'fund_morning_star.csv', 'w+') as csv_file:
            csv_file.write(output_head)
    while page_num <= page_num_total:
        # 求余
        remainder = page_num_total % 10
        # 判断是否最后一页
        num = (remainder +
               2) if page_num > (page_num_total - remainder) else 12
        xpath_str = '/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/a[%s]' % (
            num)
        print('page_num', page_num)
        # 等待,直到当前页(样式判断)等于page_num
        WebDriverWait(chrome_driver, timeout=600).until(
            text_to_be_present_in_element(
                "/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/span[@style='margin-right:5px;font-weight:Bold;color:red;']",
                str(page_num), xpath_str))
        sleep(1)
        # 列表用于存放爬取的数据
        id_list = []  # 雪花id
        code_list = []  # 基金代码
        morning_star_code_list = []  # 晨星专属代码
        name_list = []  # 基金名称
        fund_cat = []  # 基金分类
        fund_rating_3 = []  # 晨星评级(三年)
        fund_rating_5 = []  # 晨星评级(五年)
        rate_of_return = []  # 今年以来汇报(%)

        # 获取每页的源代码
        data = chrome_driver.page_source
        # 利用BeautifulSoup解析网页源代码
        bs = BeautifulSoup(data, 'lxml')
        class_list = ['gridItem', 'gridAlternateItem']  # 数据在这两个类下面

        # 取出所有类的信息,并保存到对应的列表里
        for i in range(len(class_list)):
            for tr in bs.find_all('tr', {'class': class_list[i]}):
                # 雪花id
                worker = IdWorker()
                id_list.append(worker.get_id())
                tds_text = tr.find_all('td', {'class': "msDataText"})
                tds_nume = tr.find_all('td', {'class': "msDataNumeric"})
                # 基金代码
                code_a_element = tds_text[0].find_all('a')[0]
                code_list.append(code_a_element.string)
                # 从href中匹配出晨星专属代码
                current_morning_code = re.findall(
                    r'(?<=/quicktake/)(\w+)$',
                    code_a_element.get('href')).pop(0)
                # 晨星基金专属晨星码
                morning_star_code_list.append(current_morning_code)
                name_list.append(tds_text[1].find_all('a')[0].string)
                # 基金分类
                fund_cat.append(tds_text[2].string)
                # 三年评级
                rating = get_star_count(tds_text[3].find_all('img')[0]['src'])
                fund_rating_3.append(rating)
                # 5年评级
                rating = get_star_count(tds_text[4].find_all('img')[0]['src'])
                fund_rating_5.append(rating)
                # 今年以来回报(%)
                return_value = tds_nume[
                    3].string if tds_nume[3].string != '-' else None
                rate_of_return.append(return_value)

        print('数据准备完毕')
        fund_df = pd.DataFrame({
            'id': id_list,
            'fund_code': code_list,
            'morning_star_code': morning_star_code_list,
            'fund_name': name_list,
            'fund_cat': fund_cat,
            'fund_rating_3': fund_rating_3,
            'fund_rating_5': fund_rating_5,
            'rate_of_return': rate_of_return
        })
        sql_insert = "replace into fund_morning_star(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s, %s, %s)"
        # print('fund_df', fund_df)
        fund_list = fund_df.values.tolist()
        # cursor.executemany(sql_insert, fund_list)
        # connect.commit()
        print('fund_list', fund_list)
        with open(result_dir + 'fund_morning_star.csv', 'a') as csv_file:
            for fund_item in fund_list:
                output_line = ', '.join(str(x) for x in fund_item) + '\n'
                csv_file.write(output_line)

        # 获取下一页元素
        next_page = chrome_driver.find_element_by_xpath(xpath_str)
        # 点击下一页
        next_page.click()
        page_num += 1
    chrome_driver.close()
    print('end')
Exemplo n.º 4
0
def handle_login(request):
    logger.info("Starting handle_login")
    args = parse_qs(request["querystring"])
    redirect_to = "/"
    # check if state is present
    if "state" in args:
        # need to decode state
        state = args["state"][0]
        logger.info("Got state of: {state}".format(state=state))
        state = base64.b64decode(state).decode("utf-8")
        logger.info("Decoded state: {state}".format(state=state))
        state = json.loads(state)
        if "source_url" in state:
            # check hash
            source_url_secret = CONFIG["STATE_SECRET"] + state["source_url"]
            source_url_secret_hash = sha256(
                source_url_secret.encode("utf-8")).hexdigest()
            if source_url_secret_hash != state["hash"]:
                return return_bad_request("Error validating state")
            redirect_to = state["source_url"]
            logger.info(
                "Got source url from state of {url}".format(url=redirect_to))
    try:
        auth_code = args["code"]
        logger.info(
            "Got auth code from query string: {code}".format(code=auth_code))
        # swap the authorisation code for tokens
        resp = post_to_url(url=wkc_data["token_endpoint"],
                           grant_type="authorization_code",
                           client_id=CONFIG["CLIENT_ID"],
                           client_secret=CONFIG["CLIENT_SECRET"],
                           code=auth_code,
                           redirect_uri=CONFIG["REDIRECT_URI"])
        resp = json.loads(resp)
        logger.info("Exchanged authorisation code for tokens")
        access_token = resp["access_token"]
        refresh_token = resp["refresh_token"]
        # if there is an allowed group set, we need to check the token has the claim for that group
        if "ALLOWED_GROUP" in CONFIG:
            allowed_group = CONFIG["ALLOWED_GROUP"]
            if allowed_group != "":
                logger.info("Need to check groups")
                decoded_token = validate_jwt(api=CONFIG["VAL_API_URL"],
                                             token=access_token,
                                             key_set=keys,
                                             aud=CONFIG["CLIENT_ID"])
                decoded_token = json.loads(decoded_token.decode("utf-8"))
                if "groups" in decoded_token:
                    if allowed_group in decoded_token["groups"]:
                        logger.info(
                            f"user has expected group of {allowed_group}")
                    else:
                        logger.info(
                            f"user is missing expected group of {allowed_group}"
                        )
                        return forbidden(
                            message=CONFIG["ACCESS_DENIED_MESSAGE"])
                else:
                    logger.info("no groups claim in token")
                    return forbidden(message=CONFIG["ACCESS_DENIED_MESSAGE"])
        # prepare response
        r = redirect(redirect_to)
        cookies = {}
        cookies[CONFIG["AUTH_COOKIE"]] = access_token
        cookies[CONFIG["REFRESH_COOKIE"]] = refresh_token
        r = set_cookies(response=r,
                        cookies=cookies,
                        max_age=CONFIG.get("MAX_AGE", "10"))
        logger.info("Returning response to client")
        return r
    except KeyError:
        return return_bad_request("Bad request missing parameter")
Exemplo n.º 5
0
def check_session(request):
    logger.info("Starting check_session")
    cookies = get_cookies(request["headers"])
    # update aurl with state data
    source_url = request["uri"]
    if request["querystring"]:
        source_url = source_url + "?{qs}".format(qs=request["querystring"])
    logger.info("Determined source_url is {s}".format(s=source_url))
    source_url_secret = CONFIG["STATE_SECRET"] + source_url
    state = {
        "source_url": source_url,
        "hash": sha256(source_url_secret.encode("utf-8")).hexdigest(),
        "nonce": get_rand_string(10)
    }
    state = base64.b64encode(json.dumps(state).encode("utf-8")).decode("utf-8")
    logger.info("Encoded state: {state}".format(state=state))
    aurl_with_state = "{aurl}&state={state}".format(aurl=aurl, state=state)
    # check for auth token
    if CONFIG["AUTH_COOKIE"] in cookies:
        logger.info("Got access token")
        # try to validate the token
        try:
            decoded = validate_jwt(api=CONFIG["VAL_API_URL"],
                                   token=cookies[CONFIG["AUTH_COOKIE"]],
                                   key_set=keys,
                                   aud=CONFIG["CLIENT_ID"])
            logger.info("Access token is valid")
            return request
        except ExpiredSignatureError as e:
            # token is not valid
            logger.info(
                "Access token has expired, so going to attempt to refresh")
            if CONFIG["REFRESH_COOKIE"] in cookies:
                # we have a refresh token
                logger.info("Refreshing token")
                resp = post_to_url(
                    url=wkc_data["token_endpoint"],
                    grant_type="refresh_token",
                    client_id=CONFIG["CLIENT_ID"],
                    client_secret=CONFIG["CLIENT_SECRET"],
                    refresh_token=cookies[CONFIG["REFRESH_COOKIE"]])
                logger.info("Called to refresh token")
                resp = json.loads(resp)
                if "error" in resp:
                    logger.info(
                        "There was an error refreshing the token, so need to log in again"
                    )
                    return redirect(aurl)
                else:
                    access_token = resp["access_token"]
                    logger.info("Got new access token, returning to client")
                    r = redirect("/")
                    cookies[CONFIG["AUTH_COOKIE"]] = access_token
                    r = set_cookies(response=r,
                                    cookies=cookies,
                                    max_age=CONFIG.get("MAX_AGE", "10"))
                    return r
            else:
                # return a 302 redirect as we don't have a refresh token
                logger.info(
                    "No refresh token present, so need to log in again")
                return redirect(aurl_with_state)
    else:
        logger.info("No access token present, so need to log in")
        return redirect(aurl_with_state)