Exemplo n.º 1
0
    for i in range(11, min_count):
        doc = list()
        doc.append(dic_list[0][i]['timestamp'])
        for dic in dic_list:
            for j in range(i - 10, i):
                doc.append(dic[j]['close'])
        if tsm_dic[i - 10]['close'] > tsm_dic[i - 11]['close']:
            doc.append(0)
        else:
            doc.append(1)

        process_dic.append(doc)

    final_schema = [StructField('timestamp', StringType(), True)]
    for i in range(0, 150):
        final_schema.append(StructField(str(i), DoubleType(), True))
    final_schema.append(StructField('result', IntegerType(), True))
    final_schema = StructType(final_schema)

    final_rdd = sc.parallelize(process_dic)
    final_df = sqlContext.createDataFrame(final_rdd, final_schema)

    # final_dict = map(lambda row: row.asDict(), final_df.collect())
    # final_dict = list(final_dict)

    model_dir = os.path.join(modeldir, sys.argv[1])

    p = PysparkPipelineWrapper.unwrap(PipelineModel.load(model_dir))
    predictions = p.transform(final_df)
    predictions_dict = list(
        map(lambda row: row.asDict(), predictions.collect()))