def read_equity_by_date(date='2018-02-23', code='600420'): day = arrow.get(date, date_format).date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = '{}industry-price-earnings-ratio-detail?date={}&class=2&search=1&csrc_code={}'.format(csi_domain, date, code) print(url) page = parse(url).getroot() # result = etree.tostring(page) # print(result) xpath = '//table[@class="table table-bg p_table table-border "]' if page is not None: r = page.xpath(xpath) # print(len(r)) tree = etree.ElementTree(r[0]) # print(etree.tostring(tree)) html_table = etree.tostring(tree) dfs = pd.read_html(html_table, flavor='lxml') df = dfs[0] print(df) for index, row in df.iterrows(): # 个股数据 # code = str(row[1]) name = row[2] code1 = str(row[3]) code2 = str(row[5]) code3 = str(row[7]) code4 = str(row[9]) row11 = row[11] row12 = row[12] row13 = row[13] row14 = row[14] try: pe = float(row11) except: pe = 0 try: pe_ttm = float(row12) except: pe_ttm = 0 try: pb = float(row13) except: pb = 0 try: dyr = float(row14) except: dyr = 0 print(Equity.objects(code=code, date=day)) Equity.objects(code=code, date=day).update_one(code=code, date=day, name=name, code1=code1, code2=code2, code3=code3, code4=code4, pe=pe, pe_ttm=pe_ttm, pb=pb, dividend_yield_ratio=dyr, upsert=True) else: print("fail to download:{}".format(code))
def finance_report(year=2018, quarter=2): latest_equity = Equity.objects().order_by('-date').first() # print(latest_equity) date = latest_equity.date df = ts.get_report_data(year, quarter) print(df) data = df.to_dict('index') print(data) print(len(data.items())) from mongoengine.queryset.visitor import Q for index, value in sorted(data.items()): code = value['code'] name = value['name'] roe = value['roe'] eps = value['eps'] report_date = value['report_date'] # print('code:{} roe:{}'.format(code, roe)) FinanceReport.objects(code=code, year=year, quarter=quarter).update_one(code=code, name=name, year=year, quarter=quarter, report_date=report_date, roe=roe, eps=eps, upsert=True)
def read_history(code='600036', begin_date=None, end_date=None): if begin_date is None: begin = arrow.get('2014-01-01') else: begin = arrow.get(begin_date) # print begin_date if end_date is None: end = arrow.now() else: end = arrow.get(end_date) code2 = code if len(code) == 8: pass elif code.startswith('60') or code.startswith('51'): code2 = 'SH'+code elif len(code) == 5: code2 = 'HK'+code elif len(code) == 6: code2 = 'SZ'+code # url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741' url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=before&begin={}&end={}' url = url.format(api_home, code2, begin.timestamp*1000, end.timestamp*1000) # print(url) payload = {'access_token': xq_a_token} r = requests.get(url, params=payload, headers=headers) print(r.json()) data_list = r.json().get('chartlist') # print data_list # print len(data_list) result = [] for data in data_list: print(data) time = data.get('time') time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY') date = time.format('YYYY-MM-DD') # print('date:{}'.format(date)) # timestamp = time.timestamp*1000 # history = StockHistory(code=code, percent=data.get('percent'), # ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'), # open_price=data.get('open'), high=data.get('high'), low=data.get('low'), # close=data.get('close'), time=time.datetime, timestamp=timestamp, # volume=data.get('volume'), # # 注:指数无法取得换手率 # turn_rate=data.get('turnrate')) # print(Equity.objects(code=code, date=date)) Equity.objects(code=code, date=date).update_one(percent=data.get('percent'), open=data.get('open'), high=data.get('high'), low=data.get('low'), close=data.get('close'), volume=data.get('volume'), upsert=True) nh = False nl = False # if high == high52week: # nh = True # if low == low52week: # nl = True # Equity.objects(code=code, date=date).update_one(percent=data.get('percent'), # ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'), # open_price=data.get('open'), high=data.get('high'), low=data.get('low'), # close=data.get('close'), time=time.datetime, timestamp=timestamp, # volume=data.get('volume'), # # 注:指数无法取得换手率 # turn_rate=data.get('turnrate'), upsert=True) # print history # result.append(history) df = DataFrame(data_list) # print df max_turnover = df['turnrate'].max() min_turnover = df['turnrate'].min() # print df['turnrate'].mean() # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index # print max_turnover_index columns = ['time', 'turnrate', 'volume', 'close'] # print df.loc[df['turnrate'] == max_turnover][columns] # print df.loc[df['turnrate'] == min_turnover][columns] max_volume = df['volume'].max() min_volume = df['volume'].min() mean_volume = df['volume'].mean() # print df.loc[df['volume'] == max_volume][columns] # print df.loc[df['volume'] == min_volume][columns] return result
def read_history(code='600036', begin_date=None, end_date=None): if begin_date is None: begin = arrow.get('2014-01-01') else: begin = arrow.get(begin_date) # print begin_date if end_date is None: end = arrow.now() else: end = arrow.get(end_date) code2 = code if len(code) == 8: pass elif code.startswith('60') or code.startswith('51'): code2 = 'SH' + code elif len(code) == 5: code2 = 'HK' + code elif len(code) == 6: code2 = 'SZ' + code # url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741' url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=before&begin={}&end={}' url = url.format(api_home, code2, begin.timestamp * 1000, end.timestamp * 1000) # print(url) payload = {'access_token': xq_a_token} r = requests.get(url, params=payload, headers=headers) print(r.json()) data_list = r.json().get('chartlist') # print data_list # print len(data_list) result = [] for data in data_list: print(data) time = data.get('time') time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY') date = time.format('YYYY-MM-DD') # print('date:{}'.format(date)) # timestamp = time.timestamp*1000 # history = StockHistory(code=code, percent=data.get('percent'), # ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'), # open_price=data.get('open'), high=data.get('high'), low=data.get('low'), # close=data.get('close'), time=time.datetime, timestamp=timestamp, # volume=data.get('volume'), # # 注:指数无法取得换手率 # turn_rate=data.get('turnrate')) # print(Equity.objects(code=code, date=date)) Equity.objects(code=code, date=date).update_one(percent=data.get('percent'), open=data.get('open'), high=data.get('high'), low=data.get('low'), close=data.get('close'), volume=data.get('volume'), upsert=True) nh = False nl = False # if high == high52week: # nh = True # if low == low52week: # nl = True # Equity.objects(code=code, date=date).update_one(percent=data.get('percent'), # ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'), # open_price=data.get('open'), high=data.get('high'), low=data.get('low'), # close=data.get('close'), time=time.datetime, timestamp=timestamp, # volume=data.get('volume'), # # 注:指数无法取得换手率 # turn_rate=data.get('turnrate'), upsert=True) # print history # result.append(history) df = DataFrame(data_list) # print df max_turnover = df['turnrate'].max() min_turnover = df['turnrate'].min() # print df['turnrate'].mean() # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index # print max_turnover_index columns = ['time', 'turnrate', 'volume', 'close'] # print df.loc[df['turnrate'] == max_turnover][columns] # print df.loc[df['turnrate'] == min_turnover][columns] max_volume = df['volume'].max() min_volume = df['volume'].min() mean_volume = df['volume'].mean() # print df.loc[df['volume'] == max_volume][columns] # print df.loc[df['volume'] == min_volume][columns] return result
def read_equity_by_date(date='2018-02-23', code='600420'): day = arrow.get(date, date_format).date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = '{}industry-price-earnings-ratio-detail?date={}&class=2&search=1&csrc_code={}'.format( csi_domain, date, code) print(url) page = parse(url).getroot() # result = etree.tostring(page) # print(result) xpath = '//table[@class="table table-bg p_table table-border "]' if page is not None: r = page.xpath(xpath) # print(len(r)) tree = etree.ElementTree(r[0]) # print(etree.tostring(tree)) html_table = etree.tostring(tree) dfs = pd.read_html(html_table, flavor='lxml') df = dfs[0] print(df) for index, row in df.iterrows(): # 个股数据 # code = str(row[1]) name = row[2] code1 = str(row[3]) code2 = str(row[5]) code3 = str(row[7]) code4 = str(row[9]) row11 = row[11] row12 = row[12] row13 = row[13] row14 = row[14] try: pe = float(row11) except: pe = 0 try: pe_ttm = float(row12) except: pe_ttm = 0 try: pb = float(row13) except: pb = 0 try: dyr = float(row14) except: dyr = 0 print(Equity.objects(code=code, date=day)) Equity.objects(code=code, date=day).update_one(code=code, date=day, name=name, code1=code1, code2=code2, code3=code3, code4=code4, pe=pe, pe_ttm=pe_ttm, pb=pb, dividend_yield_ratio=dyr, upsert=True) else: print("fail to download:{}".format(code))
def csi_industry(date='20180212'): # http://115.29.204.48/syl/csi20180212.zip day = arrow.get(date, 'YYYYMMDD').date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = 'http://115.29.204.48/syl/csi' + date + '.zip' r = requests.get(url) if r.status_code == 404: return # create memory file z = zipfile.ZipFile(io.BytesIO(r.content)) # not extract to disk file here memory_unzip_files = extract_zip(z) for name in memory_unzip_files.keys(): file_contents = memory_unzip_files.get(name) if len(file_contents) == 0: db.log.insert({'date': date}) continue if file_contents: book = xlrd.open_workbook( file_contents=memory_unzip_files.get(name), encoding_override="gbk") print( "The number of worksheets is {0} for date {}".format( book.nsheets), date) # print("Worksheet name(s): {0}".format(book.sheet_names())) for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols)) for rx in range(sh.nrows): row = sh.row(rx) # print(row) code = row[0].value name = row[1].value value = row[2].value if value.replace('.', '', 1).isdigit(): if sheet == 0: # 行业静态市盈率 Industry.objects(code=code, date=day).update_one(code=code, date=day, name=name, pe=value, upsert=True) elif sheet == 1: # 行业滚动市盈率 Industry.objects(code=code, date=day).update_one(code=code, pe_ttm=value, upsert=True) elif sheet == 2: # 行业市净率 Industry.objects(code=code, date=day).update_one(code=code, pb=value, upsert=True) elif sheet == 3: # 行业股息率 Industry.objects(code=code, date=day).update_one( code=code, dividend_yield_ratio=value, upsert=True) elif sheet == 4: # 个股数据 code1 = row[2].value code2 = row[4].value code3 = row[6].value code4 = row[8].value row10 = row[10].value row11 = row[11].value row12 = row[12].value row13 = row[13].value try: pe = float(row10) except: pe = 0 try: pe_ttm = float(row11) except: pe_ttm = 0 try: pb = float(row12) except: pb = 0 try: dyr = float(row13) except: dyr = 0 Equity.objects(name=name, date=day).update_one( code=code, date=day, name=name, code1=code1, code2=code2, code3=code3, code4=code4, pe=pe, pe_ttm=pe_ttm, pb=pb, dividend_yield_ratio=dyr, upsert=True)
def csi_industry(date='20180212'): # http://115.29.204.48/syl/csi20180212.zip day = arrow.get(date, 'YYYYMMDD').date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = 'http://115.29.204.48/syl/csi'+date+'.zip' r = requests.get(url) if r.status_code == 404: return # create memory file z = zipfile.ZipFile(io.BytesIO(r.content)) # not extract to disk file here memory_unzip_files = extract_zip(z) for name in memory_unzip_files.keys(): file_contents = memory_unzip_files.get(name) if len(file_contents) == 0: db.log.insert({'date': date}) continue if file_contents: book = xlrd.open_workbook(file_contents=memory_unzip_files.get(name), encoding_override="gbk") print("The number of worksheets is {0} for date {}".format(book.nsheets), date) # print("Worksheet name(s): {0}".format(book.sheet_names())) for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols)) for rx in range(sh.nrows): row = sh.row(rx) # print(row) code = row[0].value name = row[1].value value = row[2].value if value.replace('.', '', 1).isdigit(): if sheet == 0: # 行业静态市盈率 Industry.objects(code=code, date=day).update_one(code=code, date=day, name=name, pe=value, upsert=True) elif sheet == 1: # 行业滚动市盈率 Industry.objects(code=code, date=day).update_one(code=code, pe_ttm=value, upsert=True) elif sheet == 2: # 行业市净率 Industry.objects(code=code, date=day).update_one(code=code, pb=value, upsert=True) elif sheet == 3: # 行业股息率 Industry.objects(code=code, date=day).update_one(code=code, dividend_yield_ratio=value, upsert=True) elif sheet == 4: # 个股数据 code1 = row[2].value code2 = row[4].value code3 = row[6].value code4 = row[8].value row10 = row[10].value row11 = row[11].value row12 = row[12].value row13 = row[13].value try: pe = float(row10) except: pe = 0 try: pe_ttm = float(row11) except: pe_ttm = 0 try: pb = float(row12) except: pb = 0 try: dyr = float(row13) except: dyr = 0 Equity.objects(name=name, date=day).update_one(code=code, date=day, name=name, code1=code1, code2=code2, code3=code3, code4=code4, pe=pe, pe_ttm=pe_ttm, pb=pb, dividend_yield_ratio=dyr, upsert=True)