def main():
    FEATURE_FILE_PATH = sys.argv[1]
    LABEL_FILE_PATH = sys.argv[2]
    INPUT_MODEL = sys.argv[3]
    OUTPUT_RESULTS = sys.argv[4]

    TIMESTAMP = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S").replace(
        "'", "")
    #    base = os.getcwd()
    base = INPUT_MODEL
    dir = os.path.join(base, "test.pkl")
    if not os.path.exists(dir):
        raise ValueError("The model file does not exist.")
    Mshift = joblib.load(INPUT_MODEL +
                         'test.pkl')  # call the pre-trained model
    print(Mshift)

    # get data
    print("Read file.........")
    print("Feature file:{}".format(FEATURE_FILE_PATH))

    if not os.path.exists(FEATURE_FILE_PATH):
        raise ValueError("The feature file does not exist.")

    X = pd.read_excel(FEATURE_FILE_PATH).fillna(0)

    for col in X.columns:
        X[col] = X[col].apply(lambda x: str(x).replace(" ", ""))
    if X.isnull().any().any():
        raise ValueError("There is missing data in the feature file.")
    # data pre-process
    X_ = data_process(X, CHOICE)

    # data prediction and results visualization
    y_pred = Mshift.predict(X_)
    y_pred = pd.DataFrame(y_pred, columns=['predict'])

    df = pd.concat([X, y_pred], axis=1)
    df.to_csv(OUTPUT_RESULTS + "{}_results.csv".format(TIMESTAMP), index=None)
Exemplo n.º 2
0
def main():
    FEATURE_FILE_PATH = sys.argv[1]
    LABEL_FILE_PATH = sys.argv[2]
    INPUT_MODEL = sys.argv[3]
    OUTPUT_RESULTS = sys.argv[4]

    TIMESTAMP = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S").replace("'","")
#    base = os.getcwd()
    base = INPUT_MODEL
    dir = os.path.join(base,"test.pkl")
    if not os.path.exists(dir):
        raise ValueError("模型文件不存在")
    kmeans = joblib.load(INPUT_MODEL+'test.pkl')             # 调用预训练好的模型
    print(kmeans)


    # 获取数据
    print("读取文件.........")
    print("特征文件:{}".format(FEATURE_FILE_PATH))

    if not os.path.exists(FEATURE_FILE_PATH):
        raise ValueError("特征文件不存在")

    X = pd.read_excel(FEATURE_FILE_PATH).fillna(0)


    for col in X.columns:
        X[col] = X[col].apply(lambda x:str(x).replace(" ",""))
    if X.isnull().any().any():
        raise ValueError("特征文件存在缺失数据")
    # 数据预处理
    X_ = data_process(X,CHOICE)

    # 模型预测并可视化结果
    y_pred = kmeans.predict(X_)
    y_pred = pd.DataFrame(y_pred,columns=['predict'])

    df = pd.concat([X,y_pred],axis=1)
    df.to_csv(OUTPUT_RESULTS+"{}_results.csv".format(TIMESTAMP),index=None)