def predict(self, X, yst_dict): start = time() # change column order to lat, lng X = np.fliplr(X) nrows = X.shape[0] categories = get_all_categories() ncols = len(categories) pred_X = np.zeros( (nrows, ncols) ) for i, row in enumerate(X): if i % 100000 == 0: print i, (time() - start) / 60, "minutes" closest_cnn, distances = self._find_closest_intersection(row, 1) frequencies = np.zeros(39) for yst_col_vals in yst_dict.itervalues(): row_yst = yst_col_vals[i] frequencies += np.asarray( self.graph_dict[ closest_cnn[0] ].get_yst_prob_freq( row_yst ) ) pred_X[i] = frequencies / np.sum(frequencies) predict_df = pd.DataFrame(pred_X, columns=categories) predict_df.index.name = 'Id' predict_df = predict_df.reset_index() return predict_df
def predict(self, X): start = time() # change column order to lat, lng X = np.fliplr(X) nrows = X.shape[0] categories = get_all_categories() ncols = len(categories) pred_X = np.zeros( (nrows, ncols) ) for i, row in enumerate(X): if i % 200000 == 0: print i, (time() - start) / 60, "minutes" closest_cnn, distances = self._find_closest_intersection(row, 1) probabilities = self.graph_dict[ closest_cnn[0] ].get_probabilities() pred_X[i] = probabilities predict_df = pd.DataFrame(pred_X, columns=categories) predict_df.index.name = 'Id' predict_df = predict_df.reset_index() return predict_df