Пример #1
0
def get_recently_trade_date(dt=datetime.date.today()):
    date_path = ConfigUtils.get_stock("STOCKS_DATE")
    print(date_path)
    if date_path and os.path.exists(date_path):
        trade_dates = pd.read_csv(ConfigUtils.get_stock("STOCKS_DATE"),
                                  header=0)
        trade_date_dict = trade_dates.set_index(
            "calendar_date")['is_trading_day'].to_dict()
        tmp_dt = str(dt)
        if tmp_dt in trade_date_dict:
            if trade_date_dict[tmp_dt] == 1:
                return tmp_dt
            else:
                dt_num = 1
                dt_pass = str(dt - datetime.timedelta(days=dt_num))
                while dt_pass in trade_date_dict and trade_date_dict[
                        dt_pass] == 0:
                    dt_num += 1
                    dt_pass = str(dt - datetime.timedelta(days=dt_num))
                if dt_pass in trade_date_dict:
                    return dt_pass
            print("Date Is Not Exist !!!, Reload Trade Dates. ")
    else:
        print("Date Is Not Exist, Reloading Trade Dates. ")
        init_trade_date()
        print("Date Loading Finish. ")
    return None
Пример #2
0
    def __init__(self, trn, trn_label, val, val_label, name):
        self.trn = trn
        self.val = val
        self.trn_label = trn_label
        self.val_label = val_label
        self.path = os.path.join(ConfigUtils.get_model("MODELPATH"), name)

        if not os.path.exists(ConfigUtils.get_model("MODELPATH")):
            os.makedirs(ConfigUtils.get_model("MODELPATH"))

        self.param = {
            'num_leaves': 60,
            'n_estimatores': 3000,
            'min_data_in_leaf': 30,
            'objective': 'multiclass',
            'num_class': 21,
            'lambda_l1': 0.1,
            'lambda_l2': 0.2,
            'max_depth': 5,
            'learning_rate': 0.01,
            "min_child_samples": 20,
            "boosting": "gbdt",
            # "feature_fraction": 0.45,
            "bagging_freq": 1,
            "bagging_fraction": 0.8,
            "bagging_seed": 11,
            "nthread": 30,
            'metric': 'multi_logloss',
            "random_state": 1111,
            "verbosity": -1
        }
Пример #3
0
def period_trades():
    pth = os.path.join(ConfigUtils.get_stock("DATA_DIR"))
    m_pth = os.path.join(ConfigUtils.get_stock("DATA_M_DIR"))
    if not os.path.exists(m_pth):
        os.mkdir(m_pth)

    for fname in os.listdir(pth):
        try:
            df = pd.read_csv(os.path.join(pth, fname))
            df['date'] = pd.to_datetime(df['date'])
            df = df.set_index('date')
            df = df.sort_index(ascending=True)
            df_period = df.to_period('M')
            grouped = df_period.groupby('date')

            results = pd.DataFrame(columns=[
                'date', 'code', 'open', 'preclose', 'close', 'high', 'low',
                'volume', 'turn', 'amount', 'pctChg'
            ])
            for name, group in grouped:
                code = group.iloc[0]['code']
                open = group.iloc[0]['open']
                preclose = group.iloc[0]['preclose']
                close = group.iloc[-1]['close']
                high = group['high'].max()
                low = group['low'].min()
                volume = group['volume'].sum()
                turn = group['turn'].sum()
                amount = group['amount'].sum()
                if pd.isna(preclose):
                    pctChg = (close - open) / preclose
                else:
                    pctChg = (close - preclose) / preclose
                # print(name, code, preclose, open, close, high, low, volume, turn, amount, pctChg)
                series = pd.Series(
                    {
                        'date': name,
                        'code': code,
                        'open': open,
                        'preclose': preclose,
                        'close': close,
                        'high': high,
                        'low': low,
                        'volume': volume,
                        'turn': turn,
                        'amount': amount,
                        'pctChg': pctChg
                    },
                    name=name)
                results = results.append(series)
            results.reset_index(drop=True)
            results.to_csv(os.path.join(m_pth, fname), index=False)
            print("{} Month K done, {}".format(
                fname,
                datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        except pd.errors.EmptyDataError:
            print("Empty file:", fname)
        except KeyError:
            print("KeyError file:", fname)
Пример #4
0
def prepare():
    dirs = [ConfigUtils.get_stock("DATA_DIR"), ConfigUtils.get_stock("DB_DIR")]
    for dir in dirs:
        if os.path.exists(dir):
            clean_files()
            return
        else:
            os.makedirs(dir)
Пример #5
0
def clean_files():
    for the_file in os.listdir(ConfigUtils.get_stock("DATA_DIR")):
        file_path = os.path.join(ConfigUtils.get_stock("DATA_DIR"), the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(e)
Пример #6
0
def persist(strategy, results):
    if not os.path.exists(ConfigUtils.get_stock("OUTPUT")):
        os.mkdir(ConfigUtils.get_stock("OUTPUT"))

    with open(ConfigUtils.get_stock("OUTPUT") + "/" + strategy + ".txt",
              'w') as wf:
        for e in results:
            wf.write(e[0] + "-" + e[1] + "-" + str(e[2]) + '\n')
Пример #7
0
def get_trade_dates(start='2010-01-01', end='2020-12-30'):
    date_path = ConfigUtils.get_stock("STOCKS_DATE")
    print(date_path)
    trade_dates = pd.read_csv(ConfigUtils.get_stock("STOCKS_DATE"), header=0)
    trade_dates = trade_dates[(trade_dates['calendar_date'] >= start)
                              & (trade_dates['calendar_date'] <= end) &
                              (trade_dates['is_trading_day'] == 1)]
    # print(trade_dates['calendar_date'].values)
    return trade_dates['calendar_date'].values
Пример #8
0
def read_data(code_name, root=ConfigUtils.get_stock("DATA_DIR")):
    code = code_name[0]
    name = code_name[1]
    df = None
    file_name = str(code) + '_' + str(name) + '.csv'
    file_path = root + "/" + file_name
    if os.path.exists(file_path):
        try:
            df = pd.read_csv(file_path)
        except pd.errors.EmptyDataError as e:
            df = None
            pass
    if df is not None and not df.empty:
        # print(df.keys())
        df["open"] = df['open'].astype(float)
        df["high"] = df["high"].astype(float)
        df["low"] = df["low"].astype(float)
        df["close"] = df["close"].astype(float)
        df["preclose"] = df["preclose"].astype(float)
        df["volume"] = df["volume"].astype(float)
        df["amount"] = df["amount"].astype(float)
        df["turn"] = df["turn"].astype(float)
        df["pctChg"] = df["pctChg"].astype(float)
        return df
    return None
Пример #9
0
def reptile_dynamic_wz():
    wz_path = os.path.join(ConfigUtils.get_stock("DATA"), "wz_top_20.csv")
    url = 'http://www.waizichigu.com/Handler/Handler6.ashx'
    rs = post_wzcg(None, url=url)
    if rs:
        pprint.pprint(rs)
        df = pd.DataFrame(rs)
        df.to_csv(wz_path, index=None, mode='w+')
Пример #10
0
def reptile_by_date(
        trade_date=date.today() + timedelta(-1),
        size=50,
):
    if not os.path.exists(ConfigUtils.get_stock("WZ_DIR")):
        os.mkdir(ConfigUtils.get_stock("WZ_DIR"))

    wz_path = os.path.join(ConfigUtils.get_stock("WZ_DIR"),
                           "wz_{}.csv".format(str(trade_date)))
    total = get_foreign_num(trade_date)
    print("total:", total)
    if total > 0:
        df = pd.DataFrame()
        for i in range(0, int((total + size - 1) / size)):
            outJson = post_wzcg(trade_date, page=(i + 1), rows=size)
            print(outJson)
            if outJson and "rows" in outJson:
                row = pd.DataFrame(outJson['rows'])
                df = df.append(row, ignore_index=True)
                time.sleep(1)
        df.to_csv(wz_path, index=None, mode='w+')
Пример #11
0
def parse_position_change(code='600600'):
    if not os.path.exists(ConfigUtils.get_stock("POS_DIR")):
        os.mkdir(ConfigUtils.get_stock("POS_DIR"))

    pos_path = os.path.join(ConfigUtils.get_stock("POS_DIR"),
                            "position_{}.csv".format(code))

    html = reptile_position(code=code)
    html = (html.replace('<br>', '')).replace('<br/>', '')

    soup = BeautifulSoup(html, 'lxml')  # html.parser是解析器,也可是lxml
    # print(soup.prettify())
    inner = soup.select('table[class="tb0td1"]')
    if len(inner) > 0:
        tr_rows = inner[0].select('tr[height]')

        tb_head = tr_rows[0].select('td')
        heads = [ct.text for ct in tb_head]
        # print(heads)

        content = []
        for row in tr_rows[1:]:
            cols = row.select('td')
            tmp_rows = []
            for ct in cols:
                [s.extract() for s in ct(['a', 'br'])]
                tmp_rows.append(ct.text.replace('\n', '').replace('\r', ''))
            # print(tmp_rows)
            content.append(tmp_rows)

        with open(pos_path, mode="ab+") as csvfile:
            csvfile.write(codecs.BOM_UTF8)

        with open(pos_path, 'a+', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            # 先写入columns_name
            writer.writerow(heads)
            # 写入多行用writerows
            writer.writerows(content)
Пример #12
0
def scan_stocks():
	pd_names = pd.read_csv(ConfigUtils.get_stock("STOCK_NAME"))
	# 数据库获取沪、深、中小板
	zxb_stocks = [s.code for s in get_mark_stocks(mark='zxb')]
	hsb_stocks = [s.code for s in get_mark_stocks(mark='hsb')]
	ssb_stocks = [s.code for s in get_mark_stocks(mark='ssb')]
	candidate_stocks = zxb_stocks + hsb_stocks + ssb_stocks
	print("total stock size:", len(candidate_stocks))

	records = get_records(dt='2020-12-15', direction='limit', period='d1')
	print("涨停数量:", len(records))
	for index, row in tqdm(pd_names.iterrows()):
		code = row['code']
		name = row['code_name']
		if stock_utils.is_jiucaiban(code):
			continue
		if code[3:] not in candidate_stocks:
			# print(name, code)
			continue

		code_name = (code, name)
		df = stock_utils.read_data(code_name)
		df.reset_index(drop=True, inplace=True)
		if len(df) < 60:
			continue

		df = df.tail(n=5)
		df.reset_index(drop=True, inplace=True)
		# 涨停分析
		close = df.iloc[-1]['close']
		chg = df.iloc[-1]['pctChg']
		volume = df.iloc[-1]['volume']
		volume_rate = (volume - df.iloc[-2]['volume']) / df.iloc[-2]['volume']
		chgs = [c for c in df['pctChg']]  # 涨幅序列
		vols = [v / df.iloc[0]['volume'] for v in df['volume']]  # 量比序列

		feat = standardization(chgs[:-1]) + standardization(vols[:-1])
		feat = standardization(chgs[:-1])
		# print(feat)
		tmp_count = 0
		for record in records:
			tmp_chg = json.loads(record.extra)['chgs'][:-1]
			tmp_vol = json.loads(record.extra)['vols'][:-1]
			tmp_feat = standardization(tmp_chg) + standardization(tmp_vol)
			tmp_feat = standardization(tmp_chg)
			dist = cosine_dist(feat, tmp_feat)
			if dist > 0.9:
				# print(name, record.name, chg, "sim:", dist)
				tmp_count += 1
		if tmp_count > 5:
			print(code, name, chg)
Пример #13
0
def init_trade_date():
    # 登陆系统 ####
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:' + lg.error_code)
    print('login respond  error_msg:' + lg.error_msg)

    # 获取交易日信息 ####
    st = ConfigUtils.get_stock("START_DATE")
    et = ConfigUtils.get_stock("END_DATE")
    print(st, et)
    rs = bs.query_trade_dates(start_date=st, end_date=et)
    print('query_trade_dates respond error_code:' + rs.error_code)
    print('query_trade_dates respond  error_msg:' + rs.error_msg)

    # 打印结果集 ####
    data_list = []
    while (rs.error_code == '0') & rs.next():
        # 获取一条记录,将记录合并在一起
        data_list.append(rs.get_row_data())
    result = pd.DataFrame(data_list, columns=rs.fields)
    result.to_csv(ConfigUtils.get_stock("STOCKS_DATE"), index=False)
    # 结果集输出到csv文件 ####import ujson
    bs.logout()
Пример #14
0
def get_stocks(config=None):
    if config:
        data = xlrd.open_workbook(config)
        table = data.sheets()[0]
        rows_count = table.nrows
        codes = table.col_values(0)[1:rows_count - 1]
        names = table.col_values(1)[1:rows_count - 1]
        return list(zip(codes, names))
    else:
        data_files = os.listdir(ConfigUtils.get_stock('DATA_DIR'))
        stocks = []
        for file in data_files:
            code_name = file.split(".")[0]
            code = code_name.split("-")[0]
            name = code_name.split("-")[1]
            appender = (code, name)
            stocks.append(appender)
        return stocks
Пример #15
0
def get_all_stock_names():
    # 登陆系统 ####
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:' + lg.error_code + ', error_msg:' +
          lg.error_msg)
    dt = stock_utils.get_recently_trade_date()
    dt = '2020-08-03'
    k_rs = bs.query_all_stock(day=dt)
    print(k_rs)
    data_list = []
    while (k_rs.error_code == '0') & k_rs.next():
        # 获取一条记录,将记录合并在一起
        data_list.append(k_rs.get_row_data())
    result = pd.DataFrame(data_list, columns=k_rs.fields)
    print(result.tail())
    result.to_csv(ConfigUtils.get_stock("STOCK_NAME"), index=False)
    print("init all stock names")
    bs.logout()
Пример #16
0
def get_all_stock_industries():
    lg = bs.login()
    print('login respond error_code:' + lg.error_code)
    print('login respond  error_msg:' + lg.error_msg)

    # 获取行业分类数据
    rs = bs.query_stock_industry(date='2020-08-01')
    # rs = bs.query_stock_basic(code_name="浦发银行")
    print('query_stock_industry error_code:' + rs.error_code)
    print('query_stock_industry respond  error_msg:' + rs.error_msg)

    # 打印结果集
    industry_list = []
    while (rs.error_code == '0') & rs.next():
        # 获取一条记录,将记录合并在一起
        industry_list.append(rs.get_row_data())
    result = pd.DataFrame(industry_list, columns=rs.fields)
    # 结果集输出到csv文件
    result.to_csv(ConfigUtils.get_stock("STOCK_INDUSTRY"), index=False)
    print(result)
    # 登出系统
    bs.logout()
Пример #17
0
def hs300_stocks():
    # 登陆系统
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:' + lg.error_code)
    print('login respond  error_msg:' + lg.error_msg)

    # 获取证券基本资料
    rs = bs.query_hs300_stocks()
    print('query_hs300 error_code:' + rs.error_code)
    print('query_hs300  error_msg:' + rs.error_msg)

    # 打印结果集
    hs300_stocks = []
    while (rs.error_code == '0') & rs.next():
        # 获取一条记录,将记录合并在一起
        hs300_stocks.append(rs.get_row_data())
    result = pd.DataFrame(hs300_stocks, columns=rs.fields)
    # 结果集输出到csv文件
    # result.to_csv("D:/hs300_stocks.csv", encoding="gbk", index=False)
    result.to_csv(ConfigUtils.get_stock("HS_300_STOCK_NAME"), index=False)
    print(result.tail())
    print("init all hs300 stock names")
Пример #18
0
def update_all_trades():
    try:
        et = stock_utils.get_recently_trade_date()
        st = ConfigUtils.get_stock("START_DATE")
        print(st, et)
        # 登陆系统 ####
        lg = bs.login()
        # 显示登陆返回信息
        print('login respond error_code:' + lg.error_code + ', error_msg:' +
              lg.error_msg)
        print("stock—name path:", ConfigUtils.get_stock("STOCK_NAME"))
        pd_names = pd.read_csv(ConfigUtils.get_stock("STOCK_NAME"))

        for index, row in pd_names.iterrows():
            code = row['code']
            name = row['code_name']
            k_rs = bs.query_history_k_data_plus(
                code,
                ConfigUtils.get_stock("STOCK_FIELDS"),
                start_date=st,
                end_date=et)
            data_list = []
            while (k_rs.error_code == '0') & k_rs.next():
                # 获取一条记录,将记录合并在一起
                data_list.append(k_rs.get_row_data())
            result = pd.DataFrame(data_list, columns=k_rs.fields)
            print(result.tail())
            if not os.path.exists(ConfigUtils.get_stock("DATA_DIR")):
                os.makedirs(ConfigUtils.get_stock("DATA_DIR"))
            result.to_csv(os.path.join(ConfigUtils.get_stock("DATA_DIR"),
                                       str(code) + "_" + str(name) + ".csv"),
                          index=False)
            print("Downloading :" + code + " , name :" + name + ", " +
                  datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        bs.logout()
    except IOError as e:
        print("Update Data Error ", e)
Пример #19
0
def gen_records(day_ago=0):
    records = list()

    pd_names = pd.read_csv(ConfigUtils.get_stock("STOCK_NAME"))
    # 数据库获取沪、深、中小板
    zxb_stocks = [s.code for s in get_mark_stocks(mark='zxb')]
    hsb_stocks = [s.code for s in get_mark_stocks(mark='hsb')]
    ssb_stocks = [s.code for s in get_mark_stocks(mark='ssb')]
    candidate_stocks = zxb_stocks + hsb_stocks + ssb_stocks
    print("total stock size:", len(candidate_stocks))

    for index, row in tqdm(pd_names.iterrows()):
        code = row['code']
        name = row['code_name']
        if stock_utils.is_jiucaiban(code):
            continue
        if code[3:] not in candidate_stocks:
            continue

        code_name = (code, name)
        df = stock_utils.read_data(code_name)
        df = df.head(n=len(df) - day_ago)  # n天前 新高/低入口
        df.reset_index(drop=True, inplace=True)
        if len(df) < 60:
            continue

        for period in [500, 120, 60, 20, 10, 5]:
            df = df.tail(n=period)
            df.reset_index(drop=True, inplace=True)
            idxMax = df['close'].idxmax(axis=0)
            idxMin = df['close'].idxmin(axis=0)
            # print(idxMax, idxMin)
            dateMax = df.iloc[idxMax]['date']
            dateMin = df.iloc[idxMin]['date']
            date = df.iloc[-1]['date']
            volume = df.iloc[-1]['volume']
            record_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()
            d_period_inc = (df.iloc[-1]['close'] -
                            df.iloc[0]['close']) / df.iloc[0]['close']

            volume_avg = df['volume'].sum() / period
            amount_avg = df['amount'].sum() / period
            period_inc_avg = d_period_inc / period
            extraJson = {
                'volume_avg': volume_avg,
                'amount_avg': amount_avg,
                'inc_avg': period_inc_avg
            }

            flag, direction = False, None
            r = Record(name, code, record_date, df.iloc[-1]['close'],
                       "d{}".format(period), volume, d_period_inc)
            r.set_extra(json.dumps(extraJson))
            if df.iloc[-1]['date'] == dateMax:
                flag, direction = True, 'up'
            if df.iloc[-1]['date'] == dateMin and 'ST' not in name:
                flag, direction = True, 'down'

            if flag:
                r.set_direction(direction)
                records.append(r)

        # 涨停分析
        close = df.iloc[-1]['close']
        chg = df.iloc[-1]['pctChg']
        volume = df.iloc[-1]['volume']
        volume_rate = (volume - df.iloc[-2]['volume']) / df.iloc[-2]['volume']
        if float(chg) >= 9:
            chgs = [c for c in df['pctChg']]  # 涨幅序列
            vols = [v / df.iloc[0]['volume'] for v in df['volume']]  # 量比序列
            extraJson = {
                'volume_rate': volume_rate,
                'chgs': chgs,
                'vols': vols
            }
            r = Record(name, code, record_date, close, "d1", volume, chg)
            r.set_direction(direction='limit')
            r.set_extra(json.dumps(extraJson))
            records.append(r)
    return records
Пример #20
0
def generate_feature():
    start_dt = '2019-01-01'
    date_info = pd.read_csv(ConfigUtils.get_stock("STOCKS_DATE"),
                            encoding='utf-8')
    company_info = pd.read_csv(ConfigUtils.get_stock("HS_300_STOCK_NAME"),
                               encoding='utf-8')

    # 时序处理
    dt = datetime.date.today()
    tmp_list = sorted([
        row['calendar_date']
        for idx, row in date_info.iterrows() if row['calendar_date'] < str(dt)
        and row['calendar_date'] >= start_dt and row['is_trading_day'] == 1
    ],
                      reverse=True)
    date_map = dict(zip(tmp_list, range(len(tmp_list))))
    # 读取股票交易信息
    remove_stock = []
    tmp_list = []

    for i, row in tqdm.tqdm(company_info.iterrows()):
        code, name = row["code"], row["code_name"]
        path = os.path.join(ConfigUtils.get_stock("DATA_DIR"),
                            code + "_" + name + ".csv")
        if not os.path.exists(path):
            continue
        tmp_df = pd.read_csv(path)
        tmp_df = tmp_df[tmp_df.date >= start_dt]

        if filter_stock(tmp_df, code, name):
            remove_stock.append(code)
            continue
        tmp_df = tmp_df.sort_values('date', ascending=True).reset_index()
        tmp_list.append(tmp_df)

    stock_info = pd.concat(tmp_list)
    ts_code_map = dict(
        zip(stock_info['code'].unique(), range(stock_info['code'].nunique())))
    stock_info = stock_info.reset_index()
    stock_info['ts_code_id'] = stock_info['code'].map(ts_code_map)
    stock_info.drop('index', axis=1, inplace=True)
    stock_info['trade_date_id'] = stock_info['date'].map(date_map)
    stock_info['ts_date_id'] = (
        10000 + stock_info['ts_code_id']) * 10000 + stock_info['trade_date_id']

    # 特征工程
    col = ['close', 'open', 'high', 'low']
    feature_col = []
    for tmp_col in col:
        stock_info[tmp_col + '_' +
                   'transform'] = (stock_info[tmp_col] - stock_info['preclose']
                                   ) / stock_info['preclose']
        feature_col.append(tmp_col + '_' + 'transform')

    print('stock_info 占据内存约: {:.2f} GB'.format(
        sys.getsizeof(stock_info) / (1024**3)))
    # 提取前5天收盘价与今天收盘价的盈亏比, 增加10, 20, 30, 40, 50, 60, 120, 180
    for i in [
            0, 1, 2, 3, 4, 9, 19, 29, 39, 49, 59, 79, 99, 119, 149, 179, 199,
            249
    ]:
        tmp_df = pd.DataFrame(stock_info,
                              columns=['ts_date_id', 'close'],
                              dtype='float32')
        tmp_df = tmp_df.rename(
            columns={'close': 'close_shift_{}'.format(i + 1)})
        feature_col.append('close_shift_{}'.format(i + 1))
        tmp_df['ts_date_id'] = tmp_df['ts_date_id'] + i + 1

        stock_info = pd.merge(stock_info, tmp_df, how='left', on='ts_date_id')
        stock_info.drop_duplicates(subset=['ts_date_id'],
                                   keep='last',
                                   inplace=True)

    stock_info.drop('level_0', axis=1, inplace=True)
    for i in [
            0, 1, 2, 3, 4, 9, 19, 29, 39, 49, 59, 79, 99, 119, 149, 179, 199,
            249
    ]:
        stock_info['close_shift_{}'.format(i + 1)] = (
            stock_info['close'] - stock_info['close_shift_{}'.format(i + 1)]
        ) / stock_info['close_shift_{}'.format(i + 1)]

    # print(stock_info)
    # stock_info.dropna(inplace=True)
    # 标签制作

    # make_label  未来2天的涨幅
    use_col = []
    for i in range(3):
        tmp_df = stock_info[['ts_date_id', 'high', 'low']]
        tmp_df = tmp_df.rename(
            columns={
                'high': 'high_shift_{}'.format(i + 1),
                'low': 'low_shift_{}'.format(i + 1)
            })
        use_col.append('high_shift_{}'.format(i + 1))
        use_col.append('low_shift_{}'.format(i + 1))
        tmp_df['ts_date_id'] = tmp_df['ts_date_id'] - i - 1
        stock_info = stock_info.merge(tmp_df, how='left', on='ts_date_id')

    # stock_info.dropna(inplace=True)
    for i in range(3):
        stock_info['high_shift_{}'.format(i + 1)] = (stock_info['high_shift_{}'.format(i + 1)] - stock_info['close']) / \
                                                    stock_info['close']
        stock_info['low_shift_{}'.format(i + 1)] = (stock_info['low_shift_{}'.format(i + 1)] - stock_info['close']) / \
                                                   stock_info['close']

    tmp_array = stock_info[use_col].values
    max_increse = np.max(tmp_array, axis=1)
    min_increse = np.min(tmp_array, axis=1)
    stock_info['label_max'] = max_increse
    stock_info['label_min'] = min_increse
    stock_info['change'] = (stock_info['high'] -
                            stock_info['low']) / stock_info['preclose']
    # stock_info['label_final'] = (stock_info['label_max'] > 0.06) & (stock_info['label_min'] > -0.03)
    stock_info['label_final'] = (stock_info['label_max'] -
                                 stock_info['label_min']) * 100
    stock_info = stock_info.dropna(subset=['label_final'])

    def label_fun(x):
        if x >= 20:
            return 20
        else:
            return x

    stock_info['label_final'] = stock_info['label_final'].apply(
        lambda x: label_fun(x))

    stock_info['label_final'] = stock_info['label_final'].apply(
        lambda x: int(x))
    print("正负样本:", stock_info['label_final'].value_counts())
    # print(stock_info[stock_info.date == '2020-08-21'])
    # print(stock_info[stock_info.label_final == 1])
    # stock_info = stock_info.reset_index()
    stock_info = stock_info.reset_index()
    stock_info.drop('index', axis=1, inplace=True)
    stock_info.to_csv(ConfigUtils.get_stock("STOCKS_DATESET"), index=False)
Пример #21
0
def reptile_dc():
    pd_names = pd.read_csv(ConfigUtils.get_stock("STOCK_NAME"))
    codes = set([row['code'][3:] for index, row in pd_names.iterrows()])
    for code in codes:
        parse_position_change(code)
Пример #22
0
def get_m_candidates():
    pd_names = pd.read_csv(ConfigUtils.get_stock("STOCK_NAME"))
    # 数据库获取沪、深、中小板
    zxb_stocks = [s.code for s in get_mark_stocks(mark='zxb')]
    hsb_stocks = [s.code for s in get_mark_stocks(mark='hsb')]
    ssb_stocks = [s.code for s in get_mark_stocks(mark='ssb')]
    candidate_stocks = zxb_stocks + hsb_stocks + ssb_stocks
    print("total stock size:", len(candidate_stocks))

    t_date = datetime.date.today().strftime('%Y-%m')
    break_stocks = list()
    high_stocks = list()
    for index, row in tqdm(pd_names.iterrows()):
        code = row['code']
        name = row['code_name']
        if stock_utils.is_jiucaiban(code):
            continue
        if code[3:] not in candidate_stocks:
            continue

        code_name = (code, name)
        df = stock_utils.read_data(code_name,
                                   root=ConfigUtils.get_stock("DATA_M_DIR"))
        if df is None or len(df) < 2:
            continue
        df = df.tail(n=12)  # n天前 新高/低入口
        df.reset_index(drop=True, inplace=True)
        close_df = df.sort_values(by='close', ascending=False)
        volume_df = df.sort_values(by='volume', ascending=False)
        amount_df = df.sort_values(by='amount', ascending=False)
        pctChg_df = df.sort_values(by='pctChg', ascending=False)

        idxCloseMax0, idxCloseMax1 = close_df.index[0], close_df.index[1]
        idxVolMax0, idxVolMax1 = volume_df.index[0], volume_df.index[1]
        idxAmountMax0, idxAmountMax1 = amount_df.index[0], amount_df.index[1]
        idxChgMax0, idxChgMax1 = pctChg_df.index[0], pctChg_df.index[1]

        volMax0, volMax1 = df.iloc[idxVolMax0]['volume'], df.iloc[idxVolMax1][
            'volume']
        pctMax0, pctMax1 = df.iloc[idxChgMax0]['pctChg'], df.iloc[idxChgMax1][
            'pctChg']

        closeMax0, closeMax1 = df.iloc[idxCloseMax0]['close'], df.iloc[
            idxCloseMax1]['close']
        date0, date1 = df.iloc[idxCloseMax0]['date'], df.iloc[idxCloseMax1][
            'date']
        pch0, pch1 = df.iloc[idxCloseMax0]['pctChg'], df.iloc[idxCloseMax1][
            'pctChg']
        amount0, amount1 = df.iloc[idxCloseMax0]['amount'], df.iloc[
            idxCloseMax1]['amount']
        # 经过3个月洗盘, 最近股价将赶超新高
        if t_date == date1 and (idxCloseMax1 - idxCloseMax0) > 2:
            # print(code_name, volMax1, date0, date1, idxVolMax0, idxChgMax0, idxCloseMax0, closeMax0)
            r = Record(name, code, t_date, closeMax1, 'm', amount1, pch1)
            break_stocks.append(r)
        # 持续新高
        if t_date == date0:
            # print(code_name, dateMax0, dateMax1, idxVolMax0, idxChgMax0, idxCloseMax0, closeMax0)
            r = Record(name, code, t_date, closeMax0, 'm', amount0, pch0)
            high_stocks.append(r)

    break_stocks = sorted(break_stocks, key=lambda x: x.volume, reverse=True)
    high_stocks = sorted(high_stocks, key=lambda x: x.volume, reverse=True)

    # print(break_stocks[:30])
    # print(high_stocks[:30])

    break_stocks = [r for r in break_stocks if r.volume < 5e11]
    high_stocks = [r for r in high_stocks if r.volume < 90081936793]

    return break_stocks, high_stocks
Пример #23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020/11/29 5:09 下午
# @Author  : zhangzhen12
# @Site    :
# @File    : wzcc_dynamic.py
# @Software: PyCharm
import codecs
import csv
import os
import pandas as pd
import tqdm
from common.config import ConfigUtils

wz_path = ConfigUtils.get_stock("WZ_DIR")

names = list(os.listdir(wz_path))
dates = sorted([name[3:-4] for name in names])
dates = dates[-100:]

stock_percent_dict = {}
stock_amount_dict = {}
stocks = set()

for name in tqdm.tqdm(names):
    date = name[3:-4]
    df = pd.read_csv(os.path.join(wz_path, name))

    # print(list(df.iterrows())[0])
    for index, row in df.iterrows():
        stock_name = row['name']