def test_home_rentals(self): lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA lightwood.config.config.CONFIG.PLINEAR = PLINEAR config = { 'input_features': [ {'name': 'sqft', 'type': 'numeric'}, {'name': 'days_on_market', 'type': 'numeric'}, {'name': 'neighborhood', 'type': 'categorical', 'dropout': 0.4} ], 'output_features': [ {'name': 'number_of_rooms', 'type': 'categorical', 'weights': {'0': 0.8, '1': 0.6, '2': 0.5, '3': 0.7, '4': 1}}, {'name': 'number_of_bathrooms', 'type': 'categorical', 'weights': {'0': 0.8, '1': 0.6, '2': 4}}, {'name': 'rental_price', 'type': 'numeric'}, {'name': 'location', 'type': 'categorical'} ], 'data_source': {'cache_transformed_data': CACHE_ENCODED_DATA}, 'mixer': { 'class': NnMixer, 'kwargs': { 'selfaware': SELFAWARE, 'eval_every_x_epochs': 4, 'stop_training_after_seconds': 80 } } } df = pd.read_csv('https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') predictor = Predictor(config) predictor.learn(from_data=df) df = df.drop([x['name'] for x in config['output_features']], axis=1) predictor.predict(when_data=df) predictor.save('test.pkl') predictor = Predictor(load_from_path='test.pkl') for j in range(100): pred = predictor.predict(when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0] assert isinstance(pred, (str, int))
def run_full_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR): ''' Run full test example with home_rentals dataset ''' lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA lightwood.config.config.CONFIG.PLINEAR = PLINEAR config = { 'input_features': [{ 'name': 'number_of_bathrooms', 'type': 'numeric' }, { 'name': 'sqft', 'type': 'numeric' }, { 'name': 'location', 'type': 'categorical' }, { 'name': 'days_on_market', 'type': 'numeric' }, { 'name': 'neighborhood', 'type': 'categorical', 'dropout': 0.4 }, { 'name': 'rental_price', 'type': 'numeric' }], 'output_features': [{ 'name': 'number_of_rooms', 'type': 'categorical', # 'weights':{ # '0': 0.8, # '1': 0.6, # '2': 0.5, # '3': 0.7, # '4': 1, # } }], 'data_source': { 'cache_transformed_data': CACHE_ENCODED_DATA }, 'mixer': { 'class': lightwood.BUILTIN_MIXERS.NnMixer, 'selfaware': SELFAWARE } } df = pd.read_csv( "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv" ) def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy): print( 'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}' .format(iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient, accuracy=predictor.train_accuracy, test_accuracy=test_accuracy)) predictor = Predictor(config) # stop_training_after_seconds given in order to not get timeouts in travis predictor.learn(from_data=df, callback_on_iter=iter_function, eval_every_x_epochs=4, stop_training_after_seconds=40) df = df.drop([x['name'] for x in config['output_features']], axis=1) predictor.predict(when_data=df) predictor.save('test.pkl') predictor = Predictor(load_from_path='test.pkl') preds = {} for j in range(100): pred = predictor.predict( when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0] if pred not in preds: preds[pred] = 0 preds[pred] += 1
def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy): print( 'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}'.format( iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient, accuracy=predictor.train_accuracy, test_accuracy=test_accuracy)) predictor.learn(from_data=data_frame, callback_on_iter=iter_function) print('accuracy') print(predictor.train_accuracy) print('accuracy over all dataset') print(predictor.calculate_accuracy(from_data=data_frame)) when = {'x': [1], 'y': [0]} print('- multiply when. {when}'.format(when=when)) print(predictor.predict(when=when)) # saving the predictor predictor.save('ok.pkl') # loading the predictor predictor2 = Predictor(load_from_path='ok.pkl') when = {'x': [0, 0, 1, -1, 1], 'y': [0, 1, -1, -1, 1]} print('- multiply when. {when}'.format(when=when)) print(predictor2.predict(when_data=pandas.DataFrame(when))) when = {'x': [0, 3, 1, -5, 1], 'y': [0, 1, -5, -4, 7]} print('- multiply when. {when}'.format(when=when)) print(predictor2.predict(when_data=pandas.DataFrame(when)))
def feedback(iter, error, test_error, test_error_gradient): # predictor.stop_training() print( 'iteration: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, accuracy: {accuracy}'.format( iter=iter, error=error, test_error=test_error, test_error_gradient=test_error_gradient, accuracy=predictor.train_accuracy)) predictor.learn(from_data=data_frame, callback_on_iter=feedback) print('accuracy') print(predictor.train_accuracy) print('accuracy over all dataset') print(predictor.calculate_accuracy(from_data=data_frame)) when = {'x': [1], 'y': [0]} print('- multiply when. {when}'.format(when=when)) print(predictor.predict(when=when)) # saving the predictor predictor.save('/tmp/ok.pkl') # loading the predictor predictor2 = Predictor(load_from_path='/tmp/ok.pkl') when = {'x': [0, 0, 1, -1, 1], 'y': [0, 1, -1, -1, 1]} print('- multiply when. {when}'.format(when=when)) print(predictor2.predict(when_data=pandas.DataFrame(when))) when = {'x': [0, 3, 1, -5, 1], 'y': [0, 1, -5, -4, 7]} print('- multiply when. {when}'.format(when=when)) print(predictor2.predict(when_data=pandas.DataFrame(when)))
def run_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR): lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA lightwood.config.config.CONFIG.CACHE_ENCODED_DATA = CACHE_ENCODED_DATA lightwood.config.config.CONFIG.SELFAWARE = SELFAWARE lightwood.config.config.CONFIG.PLINEAR = PLINEAR #################### config = { 'input_features': [{ 'name': 'number_of_bathrooms', 'type': 'numeric' }, { 'name': 'sqft', 'type': 'numeric' }, { 'name': 'location', 'type': 'categorical' }, { 'name': 'days_on_market', 'type': 'numeric' }, { 'name': 'neighborhood', 'type': 'categorical', 'dropout': 0.4 }, { 'name': 'rental_price', 'type': 'numeric' }], 'output_features': [{ 'name': 'number_of_rooms', 'type': 'categorical', # 'weights':{ # '0': 0.8, # '1': 0.6, # '2': 0.5, # '3': 0.7, # '4': 1, # } }], 'mixer': { 'class': lightwood.BUILTIN_MIXERS.NnMixer } } # AX doesn't seem to work on the travis version of windows, so don't test it there as of now if sys.platform not in ['win32', 'cygwin', 'windows']: pass #config['optimizer'] = lightwood.model_building.BasicAxOptimizer df = pd.read_csv( "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv" ) def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy): print( 'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}' .format(iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient, accuracy=predictor.train_accuracy, test_accuracy=test_accuracy)) predictor = Predictor(config) # stop_training_after_seconds given in order to not get timeouts in travis predictor.learn(from_data=df, callback_on_iter=iter_function, eval_every_x_epochs=1, stop_training_after_seconds=1) predictor.save('test.pkl') predictor = Predictor(load_from_path='test.pkl') df = df.drop([x['name'] for x in config['output_features']], axis=1) predictor.predict(when_data=df) predictor.save('test.pkl') predictor = Predictor(load_from_path='test.pkl') preds = {} for j in range(100): pred = predictor.predict( when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0] if pred not in preds: preds[pred] = 0 preds[pred] += 1