示例#1
0
class FeatureSelection:
    def __init__(self):
        self.stream = Consumer('bus', 'localhost').get_stream()
        self.kafka_stream = ConsumerKafka('bus', 'localhost')

    # kafka_stream and stream are both interchangable
    def select_feature(self):
        rdd = self.stream.filter(lambda message: float(message)) \
            .map(lambda message: round(float(message))) \
            .transform(lambda rdd: rdd.sortByKey())
        assembler = VectorAssembler(
            inputCols=['stop_id', 'delay', 'route_id', 'temperature'],
            outputCol='features')
        return assembler.transform(rdd)

    def random_forests(self):
        features = self.select_feature()
        rf = RandomForestClassifier(labelCol='temperature',
                                    featuresCol='features')
        final_df = features.select('features', 'temperature')
        rf_model = rf.fit(final_df)
        print(rf_model.featureImportances)
        return rf_model.featureImportances