Пример #1
0
def extract_selected_id_name():
    """

    :return: the id name of the selected pics in a file
    """
    root_dir = get_user_data_dir()
    original_path = join(
        root_dir,
        "706_dnm_tmp_3ZFwT#sum_iUserNum_300#20190620_16Days_valid6D_pipeline_test_input_node_train_data.csv"
    )
    df = pd.read_csv(original_path)
    # print(df.head(2))
    # print df.columns.tolist()
    df["timestamp"] = df["timestamp"].map(millisec_to_str)
    #
    pic_path = join("/Users/xumiaochun/jiawei", "tmp/pic_valid_select/")

    file_dir = pic_path
    i = 1
    a = os.walk(file_dir)
    b = None
    for root, dirs, files in os.walk(file_dir):
        print(i)
        i += 1
        print(root)  #当前目录路径
        print(dirs)  #当前路径下所有子目录
        print(files)  #当前路径下所有非目录子文件

    return b
Пример #2
0
def gen_pic():
    """

    :return: the plots of the multiple id dataset  based on the final date
    """
    root_dir = get_user_data_dir()

    original_path = join(
        root_dir,
        "706_dnm_tmp_3ZFwT#sum_iUserNum_300#20190620_16Days_valid6D_pipeline_test_input_node_train_data.csv"
    )
    df = pd.read_csv(original_path)

    df["timestamps"] = df["timestamps"].map(millisec_to_str)
    #
    pic_path = join("/Users/xumiaochun/jiawei", "tmp/pic_valid/result_train/")

    line_id_list = np.unique(df.line_id)

    for l_id in line_id_list:
        l_id_list = l_id.split("valid")
        VALID_DAY = l_id_list[-1].replace("D", "")
        # print (VALID_DAY)
        if int(VALID_DAY) < 10:
            continue
        df_slice = df[df.line_id == l_id].copy()
        print(df_slice.shape)
        plt = plot_hist(df_slice,
                        detect_days=2,
                        plot_day_index=[1, 7],
                        anom_col="label",
                        value_col="point",
                        freq=300)
        savePNG(plt, targetDir=join(pic_path, "%s.png" % l_id))
Пример #3
0
# -*- coding: utf-8 -*-
# Standard library
from os.path import join
from os import listdir
# Third Party Library
import pandas as pd
import numpy as np

# My Library
from common.path_helper import split_dir, saveDF
from settings import Config_json, get_user_data_dir
from step1_dataIO import load_level_df

config_json = Config_json()
root_dir = get_user_data_dir()
input_dir = join(root_dir, config_json.get_config("original_data"))
output_dir = join(root_dir, config_json.get_config("STEP1_DATA_SUBDIR"))
#global parameter for model
DELTA_T2_POINTS = config_json.get_config("DELTA_T2_POINTS")
DELTA_T1_POINTS = config_json.get_config("DELTA_T1_POINTS")
###make spread_crossing labels
delta_Events = config_json.get_config("PREDICT_LENGTH_POINTS")


def spread_crossing(a):
    """
    :param P1: price object contains bid/ask
    :param P2: price object contains bid/ask
    :return: spread_crossing of p1, p2,i.e,p2 - p1
    """
    P1_ask, P1_bid, P2_ask, P2_bid = a