def edit_index_and_fill_na(df: pd.DataFrame):
    # region Description:edit columns and index
    m = MongoDB_io()
    m.set_db('stock_daily_data')
    m.set_collection('stock_ipo_date')
    ipo_df = m.read_data_to_get_dataframe()
    ipo_df['stock_short_name'] = ipo_df.stock.apply(lambda x: x[:6])
    map_series = ipo_df.set_index('stock_short_name').stock
    df = df.reindex(map_series.index, axis=1)
    df.columns = df.columns.map(lambda x: map_series[x])
    df.loc['1990-01-01 00:00:00', :] = 1
    df.sort_index(inplace=True)
    df.index = pd.to_datetime(df.index)
    # endregion

    # region Description:
    df.fillna(method='ffill', inplace=True)
    m.set_collection('stock_trade_date')
    trade_date = m.read_data_to_get_dataframe()
    trade_date_list = trade_date.date.tolist()
    df = df.reindex(trade_date_list, axis=0)
    df.fillna(method='ffill', inplace=True)
    # endregion

    return df
    pass
Пример #2
0
def get_sw_industry():
    logging_joinquant()
    df=get_industries(name='sw_l1')
    df=df.append(get_industries(name='sw_l2'))
    df=df.append(get_industries(name='sw_l3'))
    df.index.name='industry_code'
    df.reset_index(inplace=True)

    # 插入数据库
    m=MongoDB_io()
    m.set_db('stock_daily_data')
    m.set_collection('sw_industry_code')
    m.remove_all_documents_from_mongodb()
    m.insert_dataframe_to_mongodb(df)
def insert_index_data():
    m = MongoDB_io()
    m.set_db('index_daily_data')
    m.set_collection('index_info')
    m.delete_document_include_condition()

    logging_joinquant()
    df = get_all_securities(types='index', date=None)
    df.index.name = 'index'
    df.reset_index(inplace=True)
    df.start_date = pd.to_datetime(df.start_date)
    df.end_date = pd.to_datetime(df.end_date)

    # 插入数据库
    m.insert_huge_dataframe_by_block_to_mongodb(df)
    pass
def insert_zz500_data():
    m = MongoDB_io()
    m.set_db('index_daily_data')
    m.set_collection('000905_XSHG')
    m.delete_document_include_condition()

    logging_joinquant()
    df = get_price('000905.XSHG',
                   start_date='2005-01-01',
                   end_date='2019-09-25',
                   fq=None,
                   frequency='daily')
    df.dropna(inplace=True)
    ## 指数没有复权一说
    # df2=get_price('000905.XSHG',fq='pre')
    df.index.name = 'date'
    df.reset_index(inplace=True)
    df.date = pd.to_datetime(df.date)

    # 插入数据库
    m.insert_huge_dataframe_by_block_to_mongodb(df)
    pass
Пример #5
0
from jqdatasdk import *
import pandas as pd
from data_base.mongodb import MongoDB_io


auth('15915765128','87662638qjf')
m=MongoDB_io()
m.set_db('stock_daily_data')
m.set_collection('stock_sw_industry_code')
sw_indus=m.read_data_to_get_dataframe()

start_date='2010-01-01'
trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None)
group_day_num=1000
group_num=1

trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None)
weight_df=pd.DataFrame()

m=MongoDB_io()
m.set_db('stock_daily_data')
m.set_collection('stock_ipo_date')
ipo_df=m.read_data_to_get_dataframe()
stock_list=ipo_df.stock.tolist()

for date in trade_date_list:
    print(date)
    panel = get_price(stock_list, start_date=date, end_date=date, frequency='daily', fields=None, skip_paused=False, fq='none', count=None)
    df=panel.iloc[:,0,:]
    df.reset_index(inplace=True)
    df.rename(columns={'index':'stock'},inplace=True)
Пример #6
0
from data_base.mongodb import MongoDB_io
import pandas as pd

m = MongoDB_io()
m.set_db('stock_min_data')
collection_list = m.list_collection_names()
m.close_MongoDB_connection()
collection_list.sort()
insert_date_df = pd.DataFrame()
for stock in collection_list:
    print(stock, collection_list.index(stock))
    m.set_db('stock_min_data')
    m.set_collection(stock)
    df = m.read_data_to_get_field(field={'DATETIME': 1})
    series_index = df.DATETIME.astype(str).apply(
        lambda x: x[:10]).drop_duplicates().tolist()
    insert_date_series = pd.Series(True, index=series_index)
    insert_date_series.name = stock
    insert_date_df = insert_date_df.append(insert_date_series)
    m.close_MongoDB_connection()
    pass

path = r'D:\code\factor_module\download_stock_min_data\stock_insert_situation.csv'
insert_date_df.to_csv(path)
class get_min_price_class(object):
    def __init__(self):
        self.m = MongoDB_io()
        self.m.set_db('stock_min_data')
        self.nothing = ''
        self.collection_list = self.m.list_collection_names()
        pass

    def download_and_insert(self, stock, start_date, end_date):
        m = self.m
        print(stock, start_date, end_date)
        min_data: pd.DataFrame = get_price(stock,
                                           start_date=start_date,
                                           end_date=end_date,
                                           frequency='minute',
                                           fields=None,
                                           skip_paused=False,
                                           fq=None,
                                           count=None)
        if min_data.shape[0] == 0:
            print(stock, ' is empty')
            return
        min_data.index.name = 'datetime'
        min_data.reset_index(inplace=True)
        min_data.columns = min_data.columns.map(lambda x: x.upper())
        min_data.rename({'MONEY': 'AMOUNT'}, axis=1, inplace=True)
        min_data.DATETIME = min_data.DATETIME.astype(str)
        m.set_collection(stock[:6])
        # m.insert_huge_dataframe_by_block_to_mongodb(min_data)
        m.insert_dataframe_to_mongodb(min_data)
        pass

    def check_stock_is_in_collection(self, stock):
        collection_list = self.collection_list
        if stock[:6] in collection_list:
            return True
        else:
            return False
        pass

    def get_collection_insert_date(self, collection):
        m = self.m
        m.set_collection(collection)
        df = m.read_data_to_get_field(field=['DATETIME'])
        date_list = df.DATETIME.astype(str).apply(
            lambda x: x[:10]).drop_duplicates().tolist()
        date_list.sort()
        return date_list
        pass

    # def multi_process_insert_min_data(stock_code_list,start_date,end_date,trade_date_list):
    #     process_list=[]
    #     sem=Semaphore(4)
    #     for stock in stock_code_list:
    #         print(stock)
    #         p=MyProcess(target=inserting_one_stock, args=(stock, start_date, end_date, trade_date_list), kwargs={'sem':sem})
    #         p.daemon=True
    #         p.start()
    #         process_list.append(p)
    #         pass
    #     for proc in process_list:
    #         proc.join()
    #         pass
    #     pass

    def single_process_insert_min_data(self, stock_code_list, trade_date_list):
        for stock in stock_code_list:
            print(stock, stock_code_list.index(stock))
            self.inserting_one_stock(stock, trade_date_list)
            pass
        pass

    def inserting_one_stock(self, stock, trade_date_list):
        start_date = trade_date_list[0]
        end_date = trade_date_list[-1]
        flag = self.check_stock_is_in_collection(stock)
        if not flag:
            self.download_and_insert(stock, start_date, end_date)
        else:
            date_list = self.get_collection_insert_date(stock[:6])
            last_insert_date = date_list[-1]
            if last_insert_date not in trade_date_list:
                return
            index = trade_date_list.index(last_insert_date)
            next_trade_date = trade_date_list[index + 1]
            self.download_and_insert(stock, next_trade_date,
                                     trade_date_list[-1])
        pass

    def insert_stock_min_data(self):
        logging_joinquant()
        stock_list = get_stock_code_list()
        stock_list.sort()
        stock_list = stock_list[200:500]
        dic = get_setting_start_end_date()
        start_date = dic['start_date']
        end_date = dic['end_date']
        trade_date_series: pd.Series = pd.Series(
            get_trade_date_list(start_date, end_date)).astype(str)
        trade_date_list = trade_date_series.tolist()
        self.single_process_insert_min_data(stock_list, trade_date_list)
        # multi_process_insert_min_data(stock_list,start_date,end_date,trade_date_list)
        pass