Пример #1
0
def _newstocks(data, pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            html = lxml.html.parse(rv.NEW_STOCKS_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'],
                         ct.PAGES['newstock'], pageNo))
            res = html.xpath('//table[@id=\"NewStockTable\"]/tr')
            if len(res) == 0:
                return data
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = sarr.replace('<font color="red">*</font>', '')
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0]
            df = df.drop([df.columns[idx] for idx in [12, 13, 14]], axis=1)
            df.columns = rv.NEW_STOCKS_COLS
            df['code'] = df['code'].map(lambda x : str(x).zfill(6))
            df['xcode'] = df['xcode'].map(lambda x : str(x).zfill(6))
            res = html.xpath('//table[@class=\"table2\"]/tr[1]/td[1]/a/text()')
            tag = '下一页' if ct.PY3 else unicode('下一页', 'utf-8')
            hasNext = True if tag in res else False 
            data = data.append(df, ignore_index=True)
            pageNo += 1
            if hasNext:
                data = _newstocks(data, pageNo, retry_count, pause)
        except Exception as ex:
            print(ex)
        else:
            return data 
Пример #2
0
def _get_cashflow_data(year, quarter, pageNo, dataArr,
                       retry_count=3, pause=0.001):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.CASHFLOW_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                                    ct.PAGES['fd'], year,
                                                    quarter, pageNo, ct.PAGE_NUM[1]))
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            text = text.replace('--', '')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//table[@class=\"list_table\"]/tr")
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(sarr)[0]
            df.columns = ct.CASHFLOW_COLS
            dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
            if len(nextPage)>0:
                pageNo = re.findall(r'\d+', nextPage[0])[0]
                return _get_cashflow_data(year, quarter, pageNo, dataArr)
            else:
                return dataArr
        except Exception as e:
            pass
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #3
0
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'],
                                                         ct.DOMAINS['vsf'], ct.PAGES['t_ticks'],
                                                         symbol, tdate, pageNo
                                ))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x : x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #4
0
def _get_debtpaying_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:
        request = Request(ct.DEBTPAYING_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                                  ct.PAGES['fd'], year,
                                                  quarter, pageNo, ct.PAGE_NUM[1]))
        text = urlopen(request, timeout=ct.HTTP_TIMEOUT).read()
        text = text.decode('GBK')
        html = lxml.html.parse(StringIO(text))
        res = html.xpath("//table[@class=\"list_table\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = '<table>%s</table>'%sarr
        df = pd.read_html(sarr)[0]
        df.columns = ct.DEBTPAYING_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_debtpaying_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Пример #5
0
def _get_forecast_data(year, quarter, pageNo, dataArr):
    url = ct.FORECAST_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                           quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]//span/a/text()')[0]
            name = trs.xpath('td[2]/span/a/text()')[0]
            type = trs.xpath('td[3]/a/text()')
            type = type[0] if len(type)>0 else trs.xpath('td[3]/text()')[0]
            report_date = trs.xpath('td[4]/text()')[0] 
            pre_eps = trs.xpath('td[7]/text()')[0] 
            pre_eps = '0' if pre_eps == '--' else pre_eps
            range = trs.xpath('td[8]/text()')[0] 
            dataArr.append([code,name,type,report_date,pre_eps,range])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+',nextPage[0])[0]
            return _get_forecast_data(year,quarter,pageNo,dataArr)
        else:
            return dataArr
    except:
        pass
Пример #6
0
def _inst_tops(last=5, pageNo=1, retry_count=3, pause=0.001, dataArr=pd.DataFrame()):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(
                rv.LHB_SINA_URL % (ct.P_TYPE["http"], ct.DOMAINS["vsf"], rv.LHB_KINDS[2], ct.PAGES["fd"], last, pageNo)
            )
            text = urlopen(request, timeout=10).read()
            text = text.decode("GBK")
            html = lxml.html.parse(StringIO(text))
            res = html.xpath('//table[@id="dataTable"]/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode("utf-8") for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = "".join(sarr)
            sarr = "<table>%s</table>" % sarr
            df = pd.read_html(sarr)[0]
            df = df.drop([2, 3], axis=1)
            df.columns = rv.LHB_JGZZ_COLS
            dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class="pages"]/a[last()]/@onclick')
            if len(nextPage) > 0:
                pageNo = re.findall(r"\d+", nextPage[0])[0]
                return _inst_tops(last, pageNo, retry_count, pause, dataArr)
            else:
                return dataArr
        except Exception as e:
            print(e)
Пример #7
0
def _get_detail(tag, retry_count=3, pause=10):
    dfc = pd.DataFrame()
    p = 0
    num_limit = 100
    while(True):
        p = p+1
        for _ in range(retry_count):
            time.sleep(pause)
            try:
                ct._write_console()
                request = Request(ct.SINA_DATA_DETAIL_URL%(ct.P_TYPE['http'],
                                                                   ct.DOMAINS['vsf'], ct.PAGES['jv'],
                                                                   p,tag))
                text = urlopen(request, timeout=10).read()
                text = text.decode('gbk')
            except _network_error_classes:
                pass
            else:
                break
        reg = re.compile(r'\,(.*?)\:')
        text = reg.sub(r',"\1":', text)
        text = text.replace('"{symbol', '{"symbol')
        text = text.replace('{symbol', '{"symbol"')
        jstr = json.dumps(text)
        js = json.loads(jstr)
        df = pd.DataFrame(pd.read_json(js, dtype={'code':object}), columns=ct.THE_FIELDS)
        df = df[ct.FOR_CLASSIFY_B_COLS]
        dfc = pd.concat([dfc, df])
        if df.shape[0] < num_limit:
            return dfc
Пример #8
0
def _dist_cotent(year, pageNo, retry_count, pause):
    url = rv.DP_163_URL%(ct.P_TYPE['http'], ct.DOMAINS['163'],
                     ct.PAGES['163dp'], year, pageNo)
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if pageNo > 0:
                ct._write_console()
            html = lxml.html.parse(url)  
            res = html.xpath('//div[@class=\"fn_rp_list\"]/table')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            df = pd.read_html(sarr, skiprows=[0])[0]
            df = df.drop(df.columns[0], axis=1)
            df.columns = rv.DP_163_COLS
            df['divi'] = df['plan'].map(_fun_divi)
            df['shares'] = df['plan'].map(_fun_into)
            df = df.drop('plan', axis=1)
            df['code'] = df['code'].astype(object)
            df['code'] = df['code'].map(lambda x : str(x).zfill(6))
            if pageNo == 0:
                page = html.xpath('//div[@class=\"mod_pages\"]/a')
                asr = page[len(page)-2]
                pages = asr.xpath('text()')
        except _network_error_classes:
            pass
        else:
            if pageNo == 0:
                return df, pages[0]
            else:
                return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)    
Пример #9
0
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'],
                                                         ct.DOMAINS['vsf'], ct.PAGES['t_ticks'],
                                                         symbol, tdate, pageNo
                                ))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            #df = pd.DataFrame(columns=ct.TODAY_TICK_COLUMNS)
            df = pd.read_html(StringIO(sarr), infer_types=False)[0]
            df.columns = list(ct.TODAY_TICK_COLUMNS)
            df['Pchange'] = df['Pchange'].map(lambda x : x.replace('%',''))
            df.insert(0,'Date',tdate)
            #cols = ct.TODAY_TICK_COLUMNS.insert(0,'Date')
            df = df[cols]
        except _network_error_classes:
            pass
        else:
            return df
    raise IOError("获取失败,请检查网络" )
Пример #10
0
def _newcbonds(pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        if pageNo != 1:
            ct._write_console()
        try:
            html = lxml.html.parse(rv.NEW_CBONDS_URL%(ct.P_TYPE['http'],ct.DOMAINS['sstar'],
                         pageNo))
            res = html.xpath('//table/tr')
            if len(res) == 0:
                return None
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(StringIO(sarr), skiprows=[0])
            if len(df) < 1:
                return None
            df = df[0]
            df = df.drop([df.columns[14], df.columns[15]], axis=1)
            df.columns = rv.NEW_CBONDS_COLS
            df['scode'] = df['scode'].map(lambda x: str(x).zfill(6))
            df['xcode'] = df['xcode'].map(lambda x: str(x).zfill(6))
        except Exception as ex:
            print(ex)
        else:
            return df 
Пример #11
0
def _parsing_dayprice_json(pageNum=1):
    """
           处理当日行情分页数据,格式为json
     Parameters
     ------
        pageNum:页码
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
    """
    ct._write_console()
    request = Request(ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                 ct.PAGES['jv'], pageNum))
    text = urlopen(request, timeout=10).read()
    if text == 'null':
        return None
    reg = re.compile(r'\,(.*?)\:') 
    text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) 
    text = text.replace('"{symbol', '{"symbol')
    text = text.replace('{symbol', '{"symbol"')
    if ct.PY3:
        jstr = json.dumps(text)
    else:
        jstr = json.dumps(text, encoding='GBK')
    js = json.loads(jstr)
    df = pd.DataFrame(pd.read_json(js, dtype={'code':object}),
                      columns=ct.DAY_TRADING_COLUMNS)
    df = df.drop('symbol', axis=1)
    df = df.ix[df.volume > 0]
    return df
Пример #12
0
def _get_forecast_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:
        html = lxml.html.parse(
            ct.FORECAST_URL
            % (ct.P_TYPE["http"], ct.DOMAINS["vsf"], ct.PAGES["fd"], year, quarter, pageNo, ct.PAGE_NUM[1])
        )
        res = html.xpath('//table[@class="list_table"]/tr')
        if ct.PY3:
            sarr = [etree.tostring(node).decode("utf-8") for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = "".join(sarr)
        sarr = sarr.replace("--", "0")
        sarr = "<table>%s</table>" % sarr
        df = pd.read_html(sarr)[0]
        df = df.drop([4, 5, 8], axis=1)
        df.columns = ct.FORECAST_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class="pages"]/a[last()]/@onclick')
        if len(nextPage) > 0:
            pageNo = re.findall(r"\d+", nextPage[0])[0]
            return _get_forecast_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #13
0
def _dist_cotent(year, pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if pageNo > 0:
                ct._write_console()
            html = lxml.html.parse(
                rv.DP_163_URL % (ct.P_TYPE["http"], ct.DOMAINS["163"], ct.PAGES["163dp"], year, pageNo)
            )
            res = html.xpath('//div[@class="fn_rp_list"]/table')
            if ct.PY3:
                sarr = [etree.tostring(node).decode("utf-8") for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = "".join(sarr)
            df = pd.read_html(sarr, skiprows=[0])[0]
            df = df.drop(df.columns[0], axis=1)
            df.columns = rv.DP_163_COLS
            df["divi"] = df["plan"].map(_fun_divi)
            df["shares"] = df["plan"].map(_fun_into)
            df = df.drop("plan", axis=1)
            df["code"] = df["code"].astype(object)
            df["code"] = df["code"].map(lambda x: str(x).zfill(6))
            if pageNo == 0:
                page = html.xpath('//div[@class="mod_pages"]/a')
                asr = page[len(page) - 2]
                pages = asr.xpath("text()")
        except _network_error_classes:
            pass
        else:
            if pageNo == 0:
                return df, pages[0]
            else:
                return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #14
0
def _get_forecast_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:
        gparser = etree.HTMLParser(encoding='GBK')
        html = lxml.html.parse(ct.FORECAST_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], 
                                                ct.PAGES['fd'], year, quarter, pageNo,
                                                ct.PAGE_NUM[1]),
                               parser=gparser)
        res = html.xpath("//table[@class=\"list_table\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('--', '0')
        sarr = '<table>%s</table>'%sarr
        df = pd.read_html(sarr)[0]
        df = df.drop([4, 5, 8], axis=1)
        df.columns = ct.FORECAST_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+',nextPage[0])[0]
            return _get_forecast_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
            print(e)
Пример #15
0
def _inst_detail(pageNo=1, retry_count=3, pause=0.001, dataArr=pd.DataFrame()):   
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.LHB_SINA_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], rv.LHB_KINDS[3],
                                               ct.PAGES['fd'], '', pageNo))
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//table[@id=\"dataTable\"]/tr")
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(sarr)[0]
            df.columns = rv.LHB_JGMX_COLS
            dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
            if len(nextPage)>0:
                pageNo = re.findall(r'\d+', nextPage[0])[0]
                return _inst_detail(pageNo, retry_count, pause, dataArr)
            else:
                return dataArr
        except Exception as e:
            print(e)
Пример #16
0
def _get_report_data(year, quarter, pageNo, dataArr, orderby):
    ct._write_console()
    try:
        request = Request(ct.REPORT_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'],
                                           year, quarter, pageNo, ct.PAGE_NUM[1], orderby))
        # 默认排序抓取的信息有重复和遗漏,增加排序功能参数orderby
        text = urlopen(request, timeout=10).read()
        text = text.decode('GBK')
        text = text.replace('--', '')
        html = lxml.html.parse(StringIO(text))
        res = html.xpath("//table[@class=\"list_table\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = '<table>%s</table>' % sarr
        df = pd.read_html(sarr)[0]
        df = df.drop(11, axis=1)
        df.columns = ct.REPORT_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
        if len(nextPage) > 0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_report_data(year, quarter, pageNo, dataArr,orderby)
        else:
            return dataArr
    except Exception as e:
        print(e)
Пример #17
0
def _get_debtpaying_data(year, quarter, pageNo, dataArr):
    url = ct.DEBTPAYING_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                             quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]/a/text()')[0]
            name = trs.xpath('td[2]/a/text()')[0]
            currentratio = trs.xpath('td[3]/text()')[0]
            currentratio = '0' if currentratio == '--' else currentratio
            quickratio = trs.xpath('td[4]/text()')[0] 
            quickratio = '0' if quickratio == '--' else quickratio
            cashratio = trs.xpath('td[5]/text()')[0] 
            cashratio = '0' if cashratio == '--' else cashratio
            icratio = trs.xpath('td[6]/text()')[0] 
            icratio = '0' if icratio == '--' else icratio
            sheqratio = trs.xpath('td[7]/text()')[0] 
            sheqratio = '0' if sheqratio == '--' else sheqratio
            adratio = trs.xpath('td[8]/text()')[0] 
            adratio = '0' if adratio == '--' else adratio
            dataArr.append([code, name, currentratio, quickratio, cashratio,
                            icratio, sheqratio, adratio])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_debtpaying_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #18
0
def _get_cashflow_data(year, quarter, pageNo, dataArr):
    url = ct.CASHFLOW_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                           quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]/a/text()')[0]
            name = trs.xpath('td[2]/a/text()')[0]
            cf_sales = trs.xpath('td[3]/text()')[0]
            cf_sales = '0' if cf_sales == '--' else cf_sales
            rateofreturn = trs.xpath('td[4]/text()')[0] 
            rateofreturn = '0' if rateofreturn == '--' else rateofreturn
            cf_nm = trs.xpath('td[5]/text()')[0] 
            cf_nm = '0' if cf_nm == '--' else cf_nm
            cf_liabilities = trs.xpath('td[6]/text()')[0] 
            cf_liabilities = '0' if cf_liabilities == '--' else cf_liabilities
            cashflowratio = trs.xpath('td[7]/text()')[0] 
            cashflowratio = '0' if cashflowratio == '--' else cashflowratio
            dataArr.append([code, name, cf_sales, rateofreturn, cf_nm,
                            cf_liabilities, cashflowratio])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_cashflow_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #19
0
def _get_operation_data(year, quarter, pageNo, dataArr):
    url = ct.OPERATION_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                            quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]/a/text()')[0]
            name = trs.xpath('td[2]/a/text()')[0]
            arturnover = trs.xpath('td[3]/text()')[0]
            arturnover = '0' if arturnover == '--' else arturnover
            arturndays = trs.xpath('td[4]/text()')[0] 
            arturndays = '0' if arturndays == '--' else arturndays
            inventory_turnover = trs.xpath('td[5]/text()')[0] 
            inventory_turnover = '0' if inventory_turnover == '--' else inventory_turnover
            inventory_days = trs.xpath('td[6]/text()')[0] 
            inventory_days = '0' if inventory_days == '--' else inventory_days
            currentasset_turnover = trs.xpath('td[7]/text()')[0] 
            currentasset_turnover = '0' if currentasset_turnover == '--' else currentasset_turnover
            currentasset_days = trs.xpath('td[8]/text()')[0] 
            currentasset_days = '0' if currentasset_days == '--' else currentasset_days
            dataArr.append([code, name, arturnover, arturndays, inventory_turnover,
                            inventory_days, currentasset_turnover, currentasset_days])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_growth_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #20
0
def _get_growth_data(year, quarter, pageNo, dataArr):
    url = ct.GROWTH_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                         quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]/a/text()')[0]
            name = trs.xpath('td[2]/a/text()')[0]
            mbrg = trs.xpath('td[3]/text()')[0]
            mbrg = '0' if mbrg == '--' else mbrg
            nprg = trs.xpath('td[4]/text()')[0] 
            nprg = '0' if nprg == '--' else nprg
            nav = trs.xpath('td[5]/text()')[0] 
            nav = '0' if nav == '--' else nav
            targ = trs.xpath('td[6]/text()')[0] 
            targ = '0' if targ == '--' else targ
            epsg = trs.xpath('td[7]/text()')[0] 
            epsg = '0' if epsg == '--' else epsg
            seg = trs.xpath('td[8]/text()')[0] 
            seg = '0' if seg == '--' else seg
            dataArr.append([code, name, mbrg, nprg, nav, targ, epsg, seg])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_growth_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #21
0
def _get_profit_data(year, quarter, pageNo, dataArr):
    url = ct.PROFIT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                         quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]/a/text()')[0]
            name = trs.xpath('td[2]/a/text()')[0]
            roe = trs.xpath('td[3]/text()')[0]
            roe = '0' if roe == '--' else roe
            net_profit_ratio = trs.xpath('td[4]/text()')[0] 
            net_profit_ratio = '0' if net_profit_ratio == '--' else net_profit_ratio
            gross_profit_rate = trs.xpath('td[5]/text()')[0] 
            gross_profit_rate = '0' if gross_profit_rate == '--' else gross_profit_rate
            net_profits = trs.xpath('td[6]/text()')[0] 
            net_profits = '0' if net_profits == '--' else net_profits
            eps = trs.xpath('td[7]/text()')[0] 
            eps = '0' if eps == '--' else eps
            business_income = trs.xpath('td[8]/text()')[0] 
            business_income = '0' if business_income == '--' else business_income
            bips = trs.xpath('td[9]/text()')[0] 
            bips = '0' if bips == '--' else bips
            dataArr.append([code, name, roe, net_profit_ratio, gross_profit_rate,
                            net_profits, eps, business_income, bips])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_profit_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #22
0
def _get_report_data(year, quarter, pageNo, dataArr):
    url = ct.REPORT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'],
                         year, quarter, pageNo, ct.PAGE_NUM[1])
    ct._write_console()
    try:
        html = lxml.html.parse(url)
        xtrs = html.xpath("//table[@class=\"list_table\"]/tr")
        for trs in xtrs:
            code = trs.xpath('td[1]//span/a/text()')[0]
            name = trs.xpath('td[2]/span/a/text()')[0]
            eps = trs.xpath('td[3]/text()')[0] #每股收益(元)
            eps_yoy = trs.xpath('td[4]/text()')[0] #每股收益同比(%)
            bvps = trs.xpath('td[5]/text()')[0] #每股净资产(元)
            bvps = '0' if bvps == '--' else bvps
            roe = trs.xpath('td[6]/text()')[0] #净资产收益率(%)
            roe = '0' if roe == '--' else roe
            epcf = trs.xpath('td[7]/text()')[0] #每股现金流量(元)
            epcf = '0' if epcf == '--' else epcf
            net_profits = trs.xpath('td[8]/text()')[0] #净利润(万元)
            profits_yoy = trs.xpath('td[9]/text()')[0] #净利润同比(%)
            distrib = trs.xpath('td[10]/text()')[0] #分配方案
            report_date = trs.xpath('td[11]/text()')[0] #发布日期
            dataArr.append([code, name, eps, eps_yoy, bvps, roe,
                            epcf, net_profits, profits_yoy, distrib,
                            report_date])
        nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') #获取下一页
        if len(nextPage)>0:
            pageNo = re.findall(r'\d+', nextPage[0])[0]
            return _get_report_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except:
        pass
Пример #23
0
def _get_report_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:
        request = Request(
            ct.REPORT_URL
            % (ct.P_TYPE["http"], ct.DOMAINS["vsf"], ct.PAGES["fd"], year, quarter, pageNo, ct.PAGE_NUM[1])
        )
        text = urlopen(request, timeout=10).read()
        text = text.decode("GBK")
        text = text.replace("--", "")
        html = lxml.html.parse(StringIO(text))
        res = html.xpath('//table[@class="list_table"]/tr')
        if ct.PY3:
            sarr = [etree.tostring(node).decode("utf-8") for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = "".join(sarr)
        sarr = "<table>%s</table>" % sarr
        df = pd.read_html(sarr)[0]
        df = df.drop(11, axis=1)
        df.columns = ct.REPORT_COLS
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@class="pages"]/a[last()]/@onclick')
        if len(nextPage) > 0:
            pageNo = re.findall(r"\d+", nextPage[0])[0]
            return _get_report_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Пример #24
0
def _sh_mx(data, date="", start="", end="", symbol="", pageNo="", beginPage="", endPage="", retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            tail = "&pageHelp.pageNo=%s&pageHelp.beginPage=%s&pageHelp.endPage=%s" % (pageNo, beginPage, endPage)
            if pageNo == "":
                pageNo = 6
                tail = ""
            else:
                pageNo += 5
            beginPage = pageNo
            endPage = pageNo + 4
            ref = rv.MAR_SH_HZ_REF_URL % (ct.P_TYPE["http"], ct.DOMAINS["sse"])
            clt = Client(
                rv.MAR_SH_MX_URL
                % (
                    ct.P_TYPE["http"],
                    ct.DOMAINS["sseq"],
                    ct.PAGES["qmd"],
                    _random(5),
                    date,
                    symbol,
                    start,
                    end,
                    tail,
                    _random(),
                ),
                ref=ref,
                cookie=rv.MAR_SH_COOKIESTR,
            )
            lines = clt.gvalue()
            lines = lines.decode("utf-8") if ct.PY3 else lines
            lines = lines[19:-1]
            lines = json.loads(lines)
            pagecount = int(lines["pageHelp"].get("pageCount"))
            datapage = int(pagecount / 5 + 1 if pagecount % 5 > 0 else pagecount / 5)
            if pagecount == 0:
                return data
            if pageNo == 6:
                ct._write_tips(lines["pageHelp"].get("total"))
            df = pd.DataFrame(lines["result"], columns=rv.MAR_SH_MX_COLS)
            df["opDate"] = df["opDate"].map(lambda x: "%s-%s-%s" % (x[0:4], x[4:6], x[6:8]))
            data = data.append(df, ignore_index=True)
            if beginPage < datapage * 5:
                data = _sh_mx(
                    data,
                    start=start,
                    end=end,
                    pageNo=pageNo,
                    beginPage=beginPage,
                    endPage=endPage,
                    retry_count=retry_count,
                    pause=pause,
                )
        except _network_error_classes:
            pass
        else:
            return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #25
0
def _holding_cotent(start, end, pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        if pageNo > 0:
            ct._write_console()
        try:
            request = Request(
                rv.FUND_HOLDS_URL
                % (
                    ct.P_TYPE["http"],
                    ct.DOMAINS["163"],
                    ct.PAGES["163fh"],
                    ct.PAGES["163fh"],
                    pageNo,
                    start,
                    end,
                    _random(5),
                )
            )
            lines = urlopen(request, timeout=10).read()
            lines = lines.decode("utf-8") if ct.PY3 else lines
            lines = lines.replace("--", "0")
            lines = json.loads(lines)
            data = lines["list"]
            df = pd.DataFrame(data)
            df = df.drop(
                ["CODE", "ESYMBOL", "EXCHANGE", "NAME", "RN", "SHANGQIGUSHU", "SHANGQISHIZHI", "SHANGQISHULIANG"],
                axis=1,
            )
            for col in ["GUSHU", "GUSHUBIJIAO", "SHIZHI", "SCSTC27"]:
                df[col] = df[col].astype(float)
            df["SCSTC27"] = df["SCSTC27"] * 100
            df["GUSHU"] = df["GUSHU"] / 10000
            df["GUSHUBIJIAO"] = df["GUSHUBIJIAO"] / 10000
            df["SHIZHI"] = df["SHIZHI"] / 10000
            df["GUSHU"] = df["GUSHU"].map(ct.FORMAT)
            df["GUSHUBIJIAO"] = df["GUSHUBIJIAO"].map(ct.FORMAT)
            df["SHIZHI"] = df["SHIZHI"].map(ct.FORMAT)
            df["SCSTC27"] = df["SCSTC27"].map(ct.FORMAT)
            df.columns = rv.FUND_HOLDS_COLS
            df = df[["code", "name", "date", "nums", "nlast", "count", "clast", "amount", "ratio"]]
        except _network_error_classes:
            pass
        else:
            if pageNo == 0:
                return df, int(lines["pagecount"])
            else:
                return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #26
0
def _sh_mx(data, date='', start='', end='', 
           symbol='',
           pageNo='', beginPage='',
           endPage='',
           retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            tail = '&pageHelp.pageNo=%s&pageHelp.beginPage=%s&pageHelp.endPage=%s'%(pageNo,
                                    beginPage, endPage)
            if pageNo == '':
                pageNo = 6
                tail = ''
            else:
                pageNo += 5
            beginPage = pageNo
            endPage = pageNo + 4
            url = rv.MAR_SH_MX_URL%(ct.P_TYPE['http'], ct.DOMAINS['sseq'],
                                    ct.PAGES['qmd'], _random(5), date, 
                                    symbol, start, end, tail,
                                    _random())
            ref = rv.MAR_SH_HZ_REF_URL%(ct.P_TYPE['http'], ct.DOMAINS['sse'])
            clt = Client(url, ref=ref, cookie=rv.MAR_SH_COOKIESTR)
            lines = clt.gvalue()
            lines = lines.decode('utf-8') if ct.PY3 else lines
            lines = lines[19:-1]
            lines = json.loads(lines)
            pagecount = int(lines['pageHelp'].get('pageCount'))
            datapage = int(pagecount/5+1 if pagecount%5>0 else pagecount/5)
            if pagecount == 0:
                return data
            if pageNo == 6:
                ct._write_tips(lines['pageHelp'].get('total'))
            df = pd.DataFrame(lines['result'], columns=rv.MAR_SH_MX_COLS)
            df['opDate'] = df['opDate'].map(lambda x: '%s-%s-%s'%(x[0:4], x[4:6], x[6:8]))
            df = df.set_index('opDate')
            data = data.append(df, ignore_index=True)
            if beginPage < datapage*5:
                data = _sh_mx(data, start=start, end=end, pageNo=pageNo, 
                       beginPage=beginPage, endPage=endPage, 
                       retry_count=retry_count, pause=pause)
        except _network_error_classes:
            pass
        else:
            return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #27
0
def _day_cinema(date=None, pNo=1, retry_count=3, pause=0.001):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.BOXOFFICE_CBD%(ct.P_TYPE['http'], ct.DOMAINS['mbox'],
                              ct.BOX, pNo, date))
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            df = pd.DataFrame(js['data1'])
            df = df.drop(['CinemaID'], axis=1)
            return df
Пример #28
0
def _sz_hz(date="", retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            request = Request(rv.MAR_SZ_HZ_URL % (ct.P_TYPE["http"], ct.DOMAINS["szse"], ct.PAGES["szsefc"], date))
            lines = urlopen(request, timeout=10).read()
            if len(lines) <= 200:
                return pd.DataFrame()
            df = pd.read_html(lines, skiprows=[0])[0]
            df.columns = rv.MAR_SZ_HZ_COLS
            df["opDate"] = date
        except:
            pass
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Пример #29
0
def _profit_divis(pageNo, dataArr, nextPage):
        ct._write_console()
        html = lxml.html.parse('%sdata.cfi.cn/%s'%(ct.P_TYPE['http'], nextPage))
        res = html.xpath("//table[@class=\"table_data\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('--', '0')
        sarr = '<table>%s</table>'%sarr
        df = pd.read_html(sarr, skiprows=[0])[0]
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@id=\"content\"]/div[2]/a[last()]/@href')[0]
        np = nextPage.split('&')[2].split('=')[1]
        if pageNo < int(np):
            return _profit_divis(int(np), dataArr, nextPage)
        else:
            return dataArr
Пример #30
0
def _broker_tops_detail(last=5, pageNo=1, retry_count=3, pause=0.001, dataArr=pd.DataFrame()):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.LHB_SINA_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], rv.LHB_KINDS[1],
                                               ct.PAGES['fd'], last, pageNo))
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//table[@id=\"dataTable\"]/tr/td/a")
            urls = [a.attrib['href'] for a in res]
            if len(urls) <= 0:
                return dataArr
            for url in urls:
                ct._write_console()
                time.sleep(pause)
                request2 = Request(url)
                text2 = urlopen(request2, timeout=10).read()
                html2 = lxml.html.parse(StringIO(text2.decode('GBK')))
                res2 = html2.xpath("//table[@id=\"dataTable\"]/tr")
                broker = html2.xpath("//div[@class=\"page_config\"]")[0].text_content().strip().split(':')[1].strip()
                ct._write_msg(broker + "\r\n")
                if ct.PY3:
                    sarr = [etree.tostring(node).decode('utf-8') for node in res2]
                else:
                    sarr = [etree.tostring(node) for node in res2]
                sarr = ''.join(sarr)
                sarr = '<table>%s</table>'%sarr
                df = pd.read_html(sarr)[0]
                df.columns = rv.LHB_YYMX_COLS
                df['broker'] = pd.Series([broker]*df.index.size, index=list(df.index))
                dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
            if len(nextPage)>0:
                pageNo = re.findall(r'\d+', nextPage[0])[0]
                return _broker_tops_detail(last, pageNo, retry_count, pause, dataArr)
            else:
                return dataArr
        except Exception as e:
            print(e)
Пример #31
0
def get_h_data(code,
               start=None,
               end=None,
               autype='qfq',
               retry_count=3,
               pause=0.001):
    '''
    获取历史复权数据
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取当前日期
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取去年今日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volumn 成交量
          amount 成交金额
    '''

    start = du.today_last_year() if start is None else start
    end = du.today() if end is None else end
    qs = du.get_quarts(start, end)
    qt = qs[0]
    ct._write_head()
    data = _parse_fq_data(
        ct.HIST_FQ_URL %
        (ct.P_TYPE['http'], ct.DOMAINS['vsf'], code, qt[0], qt[1]),
        retry_count, pause)
    if len(qs) > 1:
        for d in range(1, len(qs)):
            qt = qs[d]
            ct._write_console()
            url = ct.HIST_FQ_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], code,
                                    qt[0], qt[1])
            df = _parse_fq_data(url, retry_count, pause)
            data = data.append(df, ignore_index=True)
    data = data.drop_duplicates('date')
    if start is not None:
        data = data[data.date >= start]
    if end is not None:
        data = data[data.date <= end]
    if autype == 'hfq':
        data = data.drop('factor', axis=1)
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label].map(ct.FORMAT)
        data = data.set_index('date')
        data = data.sort_index(ascending=False)
        return data
    else:
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label] / data['factor']
        data = data.drop('factor', axis=1)
        if autype == 'qfq':
            df = _parase_fq_factor(code, start, end)
            df = df.drop_duplicates('date')
            df = df[df.date >= start]
            df = df[df.date <= end]
            df = pd.merge(data, df)
            df = df.sort('date', ascending=False)
            frow = df.head(1)
            rate = float(frow['close']) / float(frow['factor'])
            df['close_temp'] = df['close']
            df['close'] = rate * df['factor']
            for label in ['open', 'high', 'low']:
                df[label] = df[label] * (df['close'] / df['close_temp'])
                df[label] = df[label].map(ct.FORMAT)
            df = df.drop(['factor', 'close_temp'], axis=1)
            df['close'] = df['close'].map(ct.FORMAT)
            df = df.set_index('date')
            df = df.sort_index(ascending=False)
            df = df.astype(float)
            return df
        else:
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label].map(ct.FORMAT)
            data = data.set_index('date')
            data = data.sort_index(ascending=False)
            data = data.astype(float)
            return data
Пример #32
0
def get_h_data(code, start=None, end=None, autype='qfq',
               index=False, retry_count=3, pause=0.001, drop_factor=True):
    '''
    获取历史复权数据
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取当前日期
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取去年今日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
      drop_factor : bool, 默认 True
                是否移除复权因子,在分析过程中可能复权因子意义不大,但是如需要先储存到数据库之后再分析的话,有该项目会更加灵活
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交金额
    '''
    print("本接口即将停止更新,请尽快使用Pro版接口:https://waditu.com/document/2")
    start = du.today_last_year() if start is None else start
    end = du.today() if end is None else end
    qs = du.get_quarts(start, end)
    qt = qs[0]
    ct._write_head()
    data = _parse_fq_data(_get_index_url(index, code, qt), index,
                          retry_count, pause)
    if data is None:
        data = pd.DataFrame()
    if len(qs)>1:
        for d in range(1, len(qs)):
            qt = qs[d]
            ct._write_console()
            df = _parse_fq_data(_get_index_url(index, code, qt), index,
                                retry_count, pause)
            if df is None:  # 可能df为空,退出循环
                break
            else:
                data = data.append(df, ignore_index = True)
    if len(data) == 0 or len(data[(data.date >= start) & (data.date <= end)]) == 0:
        return pd.DataFrame()
    data = data.drop_duplicates('date')
    if index:
        data = data[(data.date >= start) & (data.date <= end)]
        data = data.set_index('date')
        data = data.sort_index(ascending = False)
        return data
    if autype == 'hfq':
        if drop_factor:
            data = data.drop('factor', axis=1)
        data = data[(data.date >= start) & (data.date <= end)]
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label].map(ct.FORMAT)
            data[label] = data[label].astype(float)
        data = data.set_index('date')
        data = data.sort_index(ascending = False)
        return data
    else:
        if autype == 'qfq':
            if drop_factor:
                data = data.drop('factor', axis = 1)
            df = _parase_fq_factor(code, start, end)
            df = df.drop_duplicates('date')
            df = df.sort_values('date', ascending = False)
            firstDate = data.head(1)['date']
            frow = df[df.date == firstDate[0]]
            rt = get_realtime_quotes(code)
            if rt is None:
                return pd.DataFrame()
            if ((float(rt['high']) == 0) & (float(rt['low']) == 0)):
                preClose = float(rt['pre_close'])
            else:
                if du.is_holiday(du.today()):
                    preClose = float(rt['price'])
                else:
                    if (du.get_hour() > 9) & (du.get_hour() < 18):
                        preClose = float(rt['pre_close'])
                    else:
                        preClose = float(rt['price'])
            
            rate = float(frow['factor']) / preClose
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'low', 'close']:
                data[label] = data[label] / rate
                data[label] = data[label].map(ct.FORMAT)
                data[label] = data[label].astype(float)
            data = data.set_index('date')
            data = data.sort_index(ascending = False)
            return data
        else:
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label] / data['factor']
            if drop_factor:
                data = data.drop('factor', axis=1)
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label].map(ct.FORMAT)
            data = data.set_index('date')
            data = data.sort_index(ascending = False)
            data = data.astype(float)
            return data
Пример #33
0
def get_h_data(code, start=None, end=None, autype='qfq',
               index=False, retry_count=3, pause=0.001):
    '''
    获取历史复权数据
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取当前日期
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取去年今日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交金额
    '''
    
    start = du.today_last_year() if start is None else start
    end = du.today() if end is None else end
    qs = du.get_quarts(start, end)
    qt = qs[0]
    ct._write_head()
    data = _parse_fq_data(_get_index_url(index, code, qt), index,
                          retry_count, pause)
    if len(qs)>1:
        for d in range(1, len(qs)):
            qt = qs[d]
            ct._write_console()
            df = _parse_fq_data(_get_index_url(index, code, qt), index,
                                retry_count, pause)
            data = data.append(df, ignore_index=True)
    if len(data) == 0 or len(data[(data.date>=start)&(data.date<=end)]) == 0:
        return None
    data = data.drop_duplicates('date')
    if index:
        data = data[(data.date>=start) & (data.date<=end)]
        data = data.set_index('date')
        data = data.sort_index(ascending=False)
        return data
    if autype == 'hfq':
        data = data.drop('factor', axis=1)
        data = data[(data.date>=start) & (data.date<=end)]
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label].map(ct.FORMAT)
            data[label] = data[label].astype(float)
        data = data.set_index('date')
        data = data.sort_index(ascending = False)
        return data
    else:
        if autype == 'qfq':
            data = data.drop('factor', axis=1)
            df = _parase_fq_factor(code, start, end)
            df = df.drop_duplicates('date')
            df = df.sort('date', ascending=False)
            frow = df.head(1)
            rt = get_realtime_quotes(code)
            if rt is None:
                return None
            if ((float(rt['high']) == 0) & (float(rt['low']) == 0)):
                preClose = float(rt['pre_close'])
            else:
                if du.is_holiday(du.today()):
                    preClose = float(rt['price'])
                else:
                    if (du.get_hour() > 9) & (du.get_hour() < 18):
                        preClose = float(rt['pre_close'])
                    else:
                        preClose = float(rt['price'])
            
            rate = float(frow['factor']) / preClose
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'low', 'close']:
                data[label] = data[label] / rate
                data[label] = data[label].map(ct.FORMAT)
                data[label] = data[label].astype(float)
            data = data.set_index('date')
            data = data.sort_index(ascending = False)
            return data
        else:
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label] / data['factor']
            data = data.drop('factor', axis=1)
            data = data[(data.date>=start) & (data.date<=end)]
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label].map(ct.FORMAT)
            data = data.set_index('date')
            data = data.sort_index(ascending=False)
            data = data.astype(float)
            return data