def test_convert_predict_list_of_array(self): tf.reset_default_graph() sc = init_nncontext() sqlcontext = SQLContext(sc) rdd = sc.parallelize([(1, 2, 3), (4, 5, 6), (7, 8, 9)]) df = rdd.toDF(["feature", "label", "c"]) predict_rdd = df.rdd.map(lambda row: [np.array([1, 2]), np.array(0)]) resultDF = convert_predict_to_dataframe(df, predict_rdd) resultDF.printSchema() print(resultDF.collect()[0]) predict_rdd = df.rdd.map(lambda row: np.array(1)) resultDF = convert_predict_to_dataframe(df, predict_rdd) resultDF.printSchema() print(resultDF.collect()[0])
def predict(self, data, batch_size=4, feature_cols=None, hard_code_batch_size=False): if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" dataset = to_dataset(data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, labels_cols=None, hard_code_batch_size=hard_code_batch_size, sequential_order=True, shuffle=False) predicted_rdd = self.model.predict(dataset, batch_size) if isinstance(data, DataFrame): return convert_predict_to_dataframe(data, predicted_rdd) else: return predicted_rdd
def predict(self, data, batch_size=4, feature_cols=None, hard_code_batch_size=False): assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" dataset = to_dataset(data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, labels_cols=None, hard_code_batch_size=hard_code_batch_size, sequential_order=True, shuffle=False) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) predicted_rdd = tfnet.predict(dataset) if isinstance(data, DataFrame): return convert_predict_to_dataframe(data, predicted_rdd) else: return predicted_rdd