class VADEstimator(BaseEstimator): def fit( self, x , y , size=1 ): self.model = Sequential() self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh')) self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh')) self.model.add(Dense(size , init='uniform' ) ) self.model.compile(loss='mse', optimizer='rmsprop') self.model = KernelRidge( kernel='rbf' ) self.model.fit( x , y ) def predict( self, x ): if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ] return self.model.predict( x )
class VADEstimator(BaseEstimator): def fit(self, x, y, size=1): self.model = Sequential() self.model.add( Dense(int(embeddings_dim / 2.0), input_dim=embeddings_dim, init='uniform', activation='tanh')) self.model.add( Dense(int(embeddings_dim / 4.0), init='uniform', activation='tanh')) self.model.add(Dense(size, init='uniform')) self.model.compile(loss='mse', optimizer='rmsprop') self.model = KernelRidge(kernel='poly', degree=4) self.model.fit(x, y) def predict(self, x): if isinstance(self.model, Sequential): return self.model.predict(x, verbose=0)[0] return self.model.predict(x)
print ("Method = Linear ridge regression with bag-of-words features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix , train_labels ) results = model.predict( test_matrix ) if not(is_geocoding): print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) ) print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) ) else: print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") print ("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(reg_dimensions, activation='sigmoid')) if not(is_geocoding): model.compile(loss='mean_absolute_error', optimizer='adam') else: model.compile(loss=geoloss, optimizer='adam') model.fit( train_matrix , train_labels , nb_epoch=30, batch_size=16) results = model.predict( test_matrix ) if not(is_geocoding): print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) ) print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) ) else: print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )
model.fit( train_matrix2 , train_labels_aux ) results = encoder.inverse_transform( model.predict( test_matrix2 ) ) results = np.array( [ healpix2latlon( code , resolution ) for code in results ] ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") np.random.seed(0) print ("Method = MLP classification - Default features") encoder = preprocessing.LabelEncoder( ) train_labels_aux = np.array( [ latlon2healpix( lat , lon , resolution ) for (lat,lon) in train_labels1 ] ) train_labels_aux = np.array( encoder.fit_transform( train_labels_aux ) ) num_classes = len( set( train_labels_aux ) ) train_labels_aux = np_utils.to_categorical( train_labels_aux , num_classes ) model = Sequential( ) model.add(Dense(hidden_dim, input_dim=train_matrix1.shape[1], init='uniform', activation='sigmoid')) model.add(Dropout(0.25)) model.add(Dense(hidden_dim / 2, activation='sigmoid', init='uniform')) model.add(Dropout(0.25)) model.add(Dense( num_classes , activation='softmax' , init='uniform' )) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit( train_matrix1 , train_labels_aux , nb_epoch=1500, batch_size=16, verbose=1) results = encoder.inverse_transform( np_utils.categorical_probas_to_classes( model.predict( test_matrix1 ) ) ) results = np.array( [ healpix2latlon( code , resolution ) for code in results ] ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Method = MLP classification - Default features + chromatic features") np.random.seed(0) encoder = preprocessing.LabelEncoder( ) train_labels_aux = np.array( [ latlon2healpix( lat , lon , resolution ) for (lat,lon) in train_labels1 ] ) train_labels_aux = np.array( encoder.fit_transform( train_labels_aux ) )
geodistance(results[i], test_labels[i]) for i in range(results.shape[0]) ]))) print("Median error = " + repr( np.median([ geodistance(results[i], test_labels[i]) for i in range(results.shape[0]) ]))) print("") print("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add( Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(reg_dimensions, activation='sigmoid')) if not (is_geocoding): model.compile(loss='mean_absolute_error', optimizer='adam') else: model.compile(loss=geoloss, optimizer='adam') model.fit(train_matrix, train_labels, nb_epoch=30, batch_size=16) results = model.predict(test_matrix) if not (is_geocoding): print("RMSE = " + repr(np.sqrt(mean_squared_error(test_labels, results)))) print("MAE = " + repr(mean_absolute_error(test_labels, results)))