model = load_model('dnn.model') else: T1 = time.time() if (T1 - T0 > 3600): # 3600sec = 1 hour T0 = T1 model = load_model('dnn.model') predict_test = model.predict(x_test, batch_size=200) # predict_test = model_predict(x_test) print("predict_result:", predict_test) rate_now = VEC[-1][-1] rate_next = predict_test[0][0] time_now = VEC[-1][0] if re.match('(.+) 23:(.+)', time_now): time_now = CommonUtil.get_datetime_from_string(time_now) time_next = time_now + datetime.timedelta(hours=10) else: time_now = CommonUtil.get_datetime_from_string(time_now) time_next = time_now + datetime.timedelta(hours=1) # Display_predict = [] # Display_predict.append(time_now) # Display_predict.append(rate_now) # Display_predict.append(time_next) # Display_predict.append(rate_next) # print("Display_predict:", Display_predict) # updata the data # 展示预测结果 判断是否重复 更新而不是单纯写入 res = data_os.sql_exe('select count(*) from RMB_rate_predict')
def generate_feature_vector(): logger.info("In Generate Feature Vector...") prepare_feature() # 设置标题 title_list = list(featureDict.keys()) title_list.append('TARGET') featureVectorList.append(title_list) feature_size = len(featureDict.keys()) global newsFeatureList newsFeatureList = CommonUtil.read_csv(NEWS_FEATURE_PATH) global processedPriceList file_path = PROCESSED_PRICE_PATH + '_' + str( PRICE_SAMPLE_MINUTE) + CSV_FILE_SUFFIX processedPriceList = CommonUtil.read_csv(file_path) # 新闻从20160630开始到20171229,价格从20160701开始到20171229 last_news_begin = 0 news_feature_begin_index = last_news_begin pre_price_item = list() pre_price_item.append(PRICE_START_TIME) pre_price_item.append(0) price_start_time = CommonUtil.get_datetime_from_string(PRICE_START_TIME) price_end_time = CommonUtil.get_datetime_from_string(PRICE_END_TIME) # 将闭市时间内的新闻统一设置为开市前NEWS_INFLUENCE_MOST分钟时发生的 for news_index in range(0, len(newsFeatureList)): news_feature = newsFeatureList[news_index] news_time = news_feature[0] # 重设新闻时间 news_feature[0] = CommonUtil.\ reset_news_time(news_time, NEWS_INFLUENCE_MOST, MARKET_OPEN_TIME, MARKET_CLOSE_TIME) newsFeatureList[news_index] = news_feature for current_price_item in processedPriceList: current_price_time = CommonUtil.get_datetime_from_string( current_price_item[0]) if price_start_time <= current_price_time < price_end_time: # 计算价格的变化 price_delta = round( (float(current_price_item[1]) - float(pre_price_item[1])) * FEATURE_VECTOR_SCALE, CURRENCY_PAIR_PRECISION) pre_price_time = CommonUtil.get_datetime_from_string( pre_price_item[0]) logger.debug(current_price_time) # 计算pre_price_time到current_price_time新闻的作用总和 # last_interval_minutes >= 1 last_interval_minutes = int( CommonUtil.get_interval_seconds(current_price_time, pre_price_time) / 60) influence_feature_vector = [0.0] * feature_size # 对两个价格之间的每个采样点计算新闻的影响 is_influenced_price = False for minute_i in range(0, last_interval_minutes): # 计算的时刻点,pre_price_time之后的时刻点,包括current_price_time time_i = CommonUtil.get_minute_changed(pre_price_time, minute_i + 1) # 该时刻点受到影响对应的新闻 for news_feature_begin_index in range(last_news_begin, len(newsFeatureList)): interval_seconds = CommonUtil.get_interval_seconds( time_i, CommonUtil.get_datetime_from_string( newsFeatureList[news_feature_begin_index][0])) # 如果有新闻在影响范围内 if 0 <= interval_seconds <= NEWS_INFLUENCE_DACAY_THRESHOLD * 60: for news_feature_end_index in range( news_feature_begin_index, len(newsFeatureList)): if CommonUtil.get_datetime_from_string(newsFeatureList[news_feature_end_index][0]) \ > time_i: break str_begin_end = str(minute_i + 1) + ': news->' + str( news_feature_begin_index) + ' : ' + str( news_feature_end_index - 1) logger.debug(str_begin_end) for news_feature_index in range( news_feature_begin_index, news_feature_end_index): current_news_feature = newsFeatureList[ news_feature_index] influence_score = decay_influence( CommonUtil.get_datetime_from_string( current_news_feature[0]), time_i) for value_i in range(0, feature_size): influence_feature_vector[value_i] += float(current_news_feature[value_i + 1]) \ * influence_score is_influenced_price = True break elif interval_seconds < 0: break last_news_begin = news_feature_begin_index if is_influenced_price: influence_feature_vector.append(price_delta) featureVectorList.append(influence_feature_vector) pre_price_item = current_price_item file_path = FEATURE_VECTOR_PATH + '_' + str( PRICE_SAMPLE_MINUTE) + CSV_FILE_SUFFIX CommonUtil.write_csv(file_path, featureVectorList) logger.info("Generate Feature Vector Done!")
from dataprocess import RawDataProcessor from util import CommonUtil # price_end_time), 2)) market_open_time = '09:30:00' market_close_time = '23:30:00' # print(CrawlerUtil.get_sample_time_list(market_open_time, market_close_time, 1)) time1 = '2016/06/06 23:30:00' time2 = '2016/06/05 09:30:00' # print(CrawlerUtil.get_interval_seconds(CrawlerUtil.get_datetime_from_string(time1), CrawlerUtil.get_datetime_from_string(time2))) # dt_time = CrawlerUtil.get_datetime_from_string(time) # dt_time_date = dt_time.date() # print(str(dt_time_date)) # cur_time = str(CrawlerUtil.get_sample_time_list(market_open_time, market_close_time, 1)[0]) print( CommonUtil.get_next_sample_time(CommonUtil.get_datetime_from_string(time1), 1, market_open_time, market_close_time))
def process_original_price(): logger.info("In Process Original Price...") global originalPriceList originalPriceList = CommonUtil.read_csv(ORIGINAL_PRICE_PATH) sample_datetime = None sample_price_list = list() # 对每一个原始价格 for original_price in originalPriceList: logger.debug('price time: ' + original_price[0]) price_datetime = CommonUtil.get_datetime_from_string(original_price[0]) price_value = float(original_price[1]) if sample_datetime is None: sample_datetime = CommonUtil.get_datetime_from_string( PRICE_START_TIME) time_interval = CommonUtil.get_interval_seconds( price_datetime, sample_datetime) # 价格时间在采集区间外(价格对应时间远早于采集时刻点),取下一个价格 if time_interval < -PRICE_SAMPLE_MINUTE * 60 / 2: continue # 如果当前时间超过采样区间(晚于),先计算上一个采样时间的平均价格,再寻找下一个采样点 while time_interval >= PRICE_SAMPLE_MINUTE * 60 / 2: # 如果当前采样点有价格 if len(sample_price_list) > 0: price_sum = 0 for price_item in sample_price_list: price_sum += price_item average_price = round(price_sum / len(sample_price_list), CURRENCY_PAIR_PRECISION + 2) sample_datetime_str = CommonUtil.get_string_from_datetime( sample_datetime) average_price_item = [sample_datetime_str, average_price] # 将采样时间及对应的计算后的价格加入列表 processedPriceList.append(average_price_item) # 重置采样点价格列表 sample_price_list = list() # 计算下一个采样点 sample_datetime = CommonUtil.get_next_sample_time( sample_datetime, PRICE_SAMPLE_MINUTE, MARKET_OPEN_TIME, MARKET_CLOSE_TIME) time_interval = CommonUtil.get_interval_seconds( price_datetime, sample_datetime) logger.debug('sample datetime:' + CommonUtil.get_string_from_datetime(sample_datetime)) # 价格时间在采集区间外 if sample_datetime > CommonUtil.get_datetime_from_string( PRICE_END_TIME): break # 属于当前采样点,加入当前采样点价格列表,前闭后开[,) sample_price_list.append(price_value) # 处理最后一个采集时刻的价格列表 # 如果当前采样点有价格 if len(sample_price_list) > 0: price_sum = 0 for price_item in sample_price_list: price_sum += price_item average_price = round(price_sum / len(sample_price_list), CURRENCY_PAIR_PRECISION + 2) sample_datetime_str = CommonUtil.get_string_from_datetime( sample_datetime) average_price_item = [sample_datetime_str, average_price] # 将采样时间及对应的计算后的价格加入列表 processedPriceList.append(average_price_item) file_path = PROCESSED_PRICE_PATH + '_' + str( PRICE_SAMPLE_MINUTE) + CSV_FILE_SUFFIX CommonUtil.write_csv(file_path, processedPriceList) logger.info("Process Original Price Done!")