示例#1
0
def read_equity_by_date(date='2018-02-23', code='600420'):
    day = arrow.get(date, date_format).date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = '{}industry-price-earnings-ratio-detail?date={}&class=2&search=1&csrc_code={}'.format(csi_domain, date, code)
    print(url)
    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print(result)
    xpath = '//table[@class="table table-bg p_table table-border "]'
    if page is not None:
        r = page.xpath(xpath)
        # print(len(r))
        tree = etree.ElementTree(r[0])
        # print(etree.tostring(tree))
        html_table = etree.tostring(tree)
        dfs = pd.read_html(html_table, flavor='lxml')
        df = dfs[0]
        print(df)
        for index, row in df.iterrows():
            # 个股数据
            # code = str(row[1])
            name = row[2]
            code1 = str(row[3])
            code2 = str(row[5])
            code3 = str(row[7])
            code4 = str(row[9])
            row11 = row[11]
            row12 = row[12]
            row13 = row[13]
            row14 = row[14]
            try:
                pe = float(row11)
            except:
                pe = 0

            try:
                pe_ttm = float(row12)
            except:
                pe_ttm = 0

            try:
                pb = float(row13)
            except:
                pb = 0

            try:
                dyr = float(row14)
            except:
                dyr = 0
            print(Equity.objects(code=code, date=day))
            Equity.objects(code=code, date=day).update_one(code=code, date=day, name=name,
                                                           code1=code1, code2=code2, code3=code3,
                                                           code4=code4,
                                                           pe=pe, pe_ttm=pe_ttm, pb=pb,
                                                           dividend_yield_ratio=dyr, upsert=True)
    else:
        print("fail to download:{}".format(code))
示例#2
0
def finance_report(year=2018, quarter=2):
    latest_equity = Equity.objects().order_by('-date').first()
    # print(latest_equity)
    date = latest_equity.date

    df = ts.get_report_data(year, quarter)
    print(df)
    data = df.to_dict('index')
    print(data)
    print(len(data.items()))
    from mongoengine.queryset.visitor import Q
    for index, value in sorted(data.items()):
        code = value['code']
        name = value['name']
        roe = value['roe']
        eps = value['eps']
        report_date = value['report_date']
        # print('code:{} roe:{}'.format(code, roe))
        FinanceReport.objects(code=code, year=year, quarter=quarter).update_one(code=code, name=name,
                                                                                year=year, quarter=quarter,
                                                                                report_date=report_date,
                                                                                roe=roe, eps=eps, upsert=True)
示例#3
0
def read_history(code='600036', begin_date=None, end_date=None):
    if begin_date is None:
        begin = arrow.get('2014-01-01')
    else:
        begin = arrow.get(begin_date)
        # print begin_date
    if end_date is None:
        end = arrow.now()
    else:
        end = arrow.get(end_date)

    code2 = code
    if len(code) == 8:
        pass
    elif code.startswith('60') or code.startswith('51'):
        code2 = 'SH'+code
    elif len(code) == 5:
        code2 = 'HK'+code
    elif len(code) == 6:
        code2 = 'SZ'+code

    # url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741'
    url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=before&begin={}&end={}'
    url = url.format(api_home, code2, begin.timestamp*1000, end.timestamp*1000)
    # print(url)
    payload = {'access_token': xq_a_token}

    r = requests.get(url, params=payload, headers=headers)
    print(r.json())
    data_list = r.json().get('chartlist')
    # print data_list
    # print len(data_list)
    result = []
    for data in data_list:
        print(data)
        time = data.get('time')
        time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY')
        date = time.format('YYYY-MM-DD')
        # print('date:{}'.format(date))
        # timestamp = time.timestamp*1000
        # history = StockHistory(code=code, percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注:指数无法取得换手率
        #                        turn_rate=data.get('turnrate'))
        # print(Equity.objects(code=code, date=date))
        Equity.objects(code=code, date=date).update_one(percent=data.get('percent'),
                                                        open=data.get('open'), high=data.get('high'),
                                                        low=data.get('low'),
                                                        close=data.get('close'), volume=data.get('volume'), upsert=True)
        nh = False
        nl = False
        # if high == high52week:
        #     nh = True
        # if low == low52week:
        #     nl = True
        # Equity.objects(code=code, date=date).update_one(percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注:指数无法取得换手率
        #                        turn_rate=data.get('turnrate'), upsert=True)
        # print history
        # result.append(history)
    df = DataFrame(data_list)
    # print df
    max_turnover = df['turnrate'].max()
    min_turnover = df['turnrate'].min()
    # print df['turnrate'].mean()
    # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index
    # print max_turnover_index
    columns = ['time', 'turnrate', 'volume', 'close']
    # print df.loc[df['turnrate'] == max_turnover][columns]
    # print df.loc[df['turnrate'] == min_turnover][columns]
    max_volume = df['volume'].max()
    min_volume = df['volume'].min()
    mean_volume = df['volume'].mean()
    # print df.loc[df['volume'] == max_volume][columns]
    # print df.loc[df['volume'] == min_volume][columns]
    return result
示例#4
0
def read_history(code='600036', begin_date=None, end_date=None):
    if begin_date is None:
        begin = arrow.get('2014-01-01')
    else:
        begin = arrow.get(begin_date)
        # print begin_date
    if end_date is None:
        end = arrow.now()
    else:
        end = arrow.get(end_date)

    code2 = code
    if len(code) == 8:
        pass
    elif code.startswith('60') or code.startswith('51'):
        code2 = 'SH' + code
    elif len(code) == 5:
        code2 = 'HK' + code
    elif len(code) == 6:
        code2 = 'SZ' + code

    # url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741'
    url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=before&begin={}&end={}'
    url = url.format(api_home, code2, begin.timestamp * 1000,
                     end.timestamp * 1000)
    # print(url)
    payload = {'access_token': xq_a_token}

    r = requests.get(url, params=payload, headers=headers)
    print(r.json())
    data_list = r.json().get('chartlist')
    # print data_list
    # print len(data_list)
    result = []
    for data in data_list:
        print(data)
        time = data.get('time')
        time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY')
        date = time.format('YYYY-MM-DD')
        # print('date:{}'.format(date))
        # timestamp = time.timestamp*1000
        # history = StockHistory(code=code, percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注:指数无法取得换手率
        #                        turn_rate=data.get('turnrate'))
        # print(Equity.objects(code=code, date=date))
        Equity.objects(code=code,
                       date=date).update_one(percent=data.get('percent'),
                                             open=data.get('open'),
                                             high=data.get('high'),
                                             low=data.get('low'),
                                             close=data.get('close'),
                                             volume=data.get('volume'),
                                             upsert=True)
        nh = False
        nl = False
        # if high == high52week:
        #     nh = True
        # if low == low52week:
        #     nl = True
        # Equity.objects(code=code, date=date).update_one(percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注:指数无法取得换手率
        #                        turn_rate=data.get('turnrate'), upsert=True)
        # print history
        # result.append(history)
    df = DataFrame(data_list)
    # print df
    max_turnover = df['turnrate'].max()
    min_turnover = df['turnrate'].min()
    # print df['turnrate'].mean()
    # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index
    # print max_turnover_index
    columns = ['time', 'turnrate', 'volume', 'close']
    # print df.loc[df['turnrate'] == max_turnover][columns]
    # print df.loc[df['turnrate'] == min_turnover][columns]
    max_volume = df['volume'].max()
    min_volume = df['volume'].min()
    mean_volume = df['volume'].mean()
    # print df.loc[df['volume'] == max_volume][columns]
    # print df.loc[df['volume'] == min_volume][columns]
    return result
示例#5
0
def read_equity_by_date(date='2018-02-23', code='600420'):
    day = arrow.get(date, date_format).date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = '{}industry-price-earnings-ratio-detail?date={}&class=2&search=1&csrc_code={}'.format(
        csi_domain, date, code)
    print(url)
    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print(result)
    xpath = '//table[@class="table table-bg p_table table-border "]'
    if page is not None:
        r = page.xpath(xpath)
        # print(len(r))
        tree = etree.ElementTree(r[0])
        # print(etree.tostring(tree))
        html_table = etree.tostring(tree)
        dfs = pd.read_html(html_table, flavor='lxml')
        df = dfs[0]
        print(df)
        for index, row in df.iterrows():
            # 个股数据
            # code = str(row[1])
            name = row[2]
            code1 = str(row[3])
            code2 = str(row[5])
            code3 = str(row[7])
            code4 = str(row[9])
            row11 = row[11]
            row12 = row[12]
            row13 = row[13]
            row14 = row[14]
            try:
                pe = float(row11)
            except:
                pe = 0

            try:
                pe_ttm = float(row12)
            except:
                pe_ttm = 0

            try:
                pb = float(row13)
            except:
                pb = 0

            try:
                dyr = float(row14)
            except:
                dyr = 0
            print(Equity.objects(code=code, date=day))
            Equity.objects(code=code,
                           date=day).update_one(code=code,
                                                date=day,
                                                name=name,
                                                code1=code1,
                                                code2=code2,
                                                code3=code3,
                                                code4=code4,
                                                pe=pe,
                                                pe_ttm=pe_ttm,
                                                pb=pb,
                                                dividend_yield_ratio=dyr,
                                                upsert=True)
    else:
        print("fail to download:{}".format(code))
示例#6
0
def csi_industry(date='20180212'):
    # http://115.29.204.48/syl/csi20180212.zip
    day = arrow.get(date, 'YYYYMMDD').date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = 'http://115.29.204.48/syl/csi' + date + '.zip'
    r = requests.get(url)
    if r.status_code == 404:
        return
    # create memory file
    z = zipfile.ZipFile(io.BytesIO(r.content))
    # not extract to disk file here
    memory_unzip_files = extract_zip(z)
    for name in memory_unzip_files.keys():
        file_contents = memory_unzip_files.get(name)
        if len(file_contents) == 0:
            db.log.insert({'date': date})
            continue
        if file_contents:
            book = xlrd.open_workbook(
                file_contents=memory_unzip_files.get(name),
                encoding_override="gbk")
            print(
                "The number of worksheets is {0} for date {}".format(
                    book.nsheets), date)
            # print("Worksheet name(s): {0}".format(book.sheet_names()))
            for sheet in range(book.nsheets):
                sh = book.sheet_by_index(sheet)
                print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols))
                for rx in range(sh.nrows):
                    row = sh.row(rx)
                    # print(row)
                    code = row[0].value
                    name = row[1].value
                    value = row[2].value
                    if value.replace('.', '', 1).isdigit():
                        if sheet == 0:
                            # 行业静态市盈率
                            Industry.objects(code=code,
                                             date=day).update_one(code=code,
                                                                  date=day,
                                                                  name=name,
                                                                  pe=value,
                                                                  upsert=True)
                        elif sheet == 1:
                            # 行业滚动市盈率
                            Industry.objects(code=code,
                                             date=day).update_one(code=code,
                                                                  pe_ttm=value,
                                                                  upsert=True)
                        elif sheet == 2:
                            # 行业市净率
                            Industry.objects(code=code,
                                             date=day).update_one(code=code,
                                                                  pb=value,
                                                                  upsert=True)
                        elif sheet == 3:
                            # 行业股息率
                            Industry.objects(code=code, date=day).update_one(
                                code=code,
                                dividend_yield_ratio=value,
                                upsert=True)
                        elif sheet == 4:
                            # 个股数据
                            code1 = row[2].value
                            code2 = row[4].value
                            code3 = row[6].value
                            code4 = row[8].value
                            row10 = row[10].value
                            row11 = row[11].value
                            row12 = row[12].value
                            row13 = row[13].value
                            try:
                                pe = float(row10)
                            except:
                                pe = 0

                            try:
                                pe_ttm = float(row11)
                            except:
                                pe_ttm = 0

                            try:
                                pb = float(row12)
                            except:
                                pb = 0

                            try:
                                dyr = float(row13)
                            except:
                                dyr = 0
                            Equity.objects(name=name, date=day).update_one(
                                code=code,
                                date=day,
                                name=name,
                                code1=code1,
                                code2=code2,
                                code3=code3,
                                code4=code4,
                                pe=pe,
                                pe_ttm=pe_ttm,
                                pb=pb,
                                dividend_yield_ratio=dyr,
                                upsert=True)
示例#7
0
def csi_industry(date='20180212'):
    # http://115.29.204.48/syl/csi20180212.zip
    day = arrow.get(date, 'YYYYMMDD').date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = 'http://115.29.204.48/syl/csi'+date+'.zip'
    r = requests.get(url)
    if r.status_code == 404:
        return
    # create memory file
    z = zipfile.ZipFile(io.BytesIO(r.content))
    # not extract to disk file here
    memory_unzip_files = extract_zip(z)
    for name in memory_unzip_files.keys():
        file_contents = memory_unzip_files.get(name)
        if len(file_contents) == 0:
            db.log.insert({'date': date})
            continue
        if file_contents:
            book = xlrd.open_workbook(file_contents=memory_unzip_files.get(name), encoding_override="gbk")
            print("The number of worksheets is {0} for date {}".format(book.nsheets), date)
            # print("Worksheet name(s): {0}".format(book.sheet_names()))
            for sheet in range(book.nsheets):
                sh = book.sheet_by_index(sheet)
                print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols))
                for rx in range(sh.nrows):
                    row = sh.row(rx)
                    # print(row)
                    code = row[0].value
                    name = row[1].value
                    value = row[2].value
                    if value.replace('.', '', 1).isdigit():
                        if sheet == 0:
                            # 行业静态市盈率
                            Industry.objects(code=code, date=day).update_one(code=code, date=day, name=name, pe=value, upsert=True)
                        elif sheet == 1:
                            # 行业滚动市盈率
                            Industry.objects(code=code, date=day).update_one(code=code, pe_ttm=value, upsert=True)
                        elif sheet == 2:
                            # 行业市净率
                            Industry.objects(code=code, date=day).update_one(code=code, pb=value, upsert=True)
                        elif sheet == 3:
                            # 行业股息率
                            Industry.objects(code=code, date=day).update_one(code=code, dividend_yield_ratio=value, upsert=True)
                        elif sheet == 4:
                            # 个股数据
                            code1 = row[2].value
                            code2 = row[4].value
                            code3 = row[6].value
                            code4 = row[8].value
                            row10 = row[10].value
                            row11 = row[11].value
                            row12 = row[12].value
                            row13 = row[13].value
                            try:
                                pe = float(row10)
                            except:
                                pe = 0

                            try:
                                pe_ttm = float(row11)
                            except:
                                pe_ttm = 0

                            try:
                                pb = float(row12)
                            except:
                                pb = 0

                            try:
                                dyr = float(row13)
                            except:
                                dyr = 0
                            Equity.objects(name=name, date=day).update_one(code=code, date=day, name=name,
                                                                           code1=code1, code2=code2, code3=code3,
                                                                           code4=code4,
                                                                           pe=pe, pe_ttm=pe_ttm, pb=pb,
                                                                           dividend_yield_ratio=dyr, upsert=True)