def run(sample=False): backend = 'lightwood' mdb = Predictor(name='german_data') mdb.learn(to_predict='class', from_data='processed_data/train.csv', backend=backend) predictions = mdb.predict(when_data='processed_data/test.csv') predicted_val = [ x.explanation['class']['predicted_value'] for x in predictions ] real_val = list(pd.read_csv('processed_data/test.csv')['class']) accuracy = balanced_accuracy_score(real_val, predicted_val) cm = confusion_matrix(real_val, predicted_val) print(cm) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend, 'single_row_predictions': additional_info }
def test_clickhouse_ds(): HOST = 'localhost' PORT = 8123 clickhouse_url = f'http://{HOST}:{PORT}' requests.post(clickhouse_url, data='CREATE DATABASE IF NOT EXISTS test') requests.post(clickhouse_url, data='DROP TABLE IF EXISTS test.mock') requests.post(clickhouse_url, data="""CREATE TABLE test.mock( col1 String ,col2 Int64 ,col3 Array(UInt8) ) ENGINE=Memory""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""") clickhouse_ds = ClickhouseDS( 'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2', host=HOST, port=PORT) assert (len(clickhouse_ds.df) == 2) assert (sum(map(int, clickhouse_ds.df['col2'])) == 5) assert (len(list(clickhouse_ds.df['col3'][1])) == 3) assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3'])) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=clickhouse_ds)
def test_mysql_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DATABASE = 'mysql' PORT = 3306 con = mysql.connector.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 BIGINT, col_3 BOOL)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES ("This is string number {i}", {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = MySqlDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
class Robotics: def __init__(self): self.mindsDb = Predictor(name='human_activity') def train(self): print("model training started") self.mindsDb.learn(from_data="train.csv", to_predict=['target'], order_by=['time'], window_size=128, group_by='id', disable_optional_analysis=True) print("model training completed") def predict_test(self): print("test prediction started") y_real = pd.read_csv("test.csv") y_real = list(y_real["target"]) results = self.mindsDb.predict(when_data="test.csv") y_pred = [] for row in results: y_pred.append(row['target']) predictions = pd.DataFrame(y_pred) predictions.to_csv(index=False, header=True, path_or_buf="test_pred.csv") acc_score = accuracy_score(y_real, y_pred, normalize=True) acc_pct = round(acc_score * 100) print(pd.crosstab(pd.Series(y_pred), pd.Series(y_real))) test_cm = pd.crosstab(pd.Series(y_pred), pd.Series(y_real)) test_cm.to_csv('test_final_cm.csv', header=True, index=True) print(f'Accuracy of : {acc_pct}%') print("test prediction completed")
class Insurance: def __init__(self): self.mindsDb = Predictor(name='insurance') def insurance_train(self): self.mindsDb.learn(to_predict='PolicyStatus', from_data="insu_replicate.csv", order_by=[ 'DateRequested', 'DateRqmtLastFollowed1', 'DateRqmtLastFollowed2', 'DateRqmtLastFollowedF', 'DateRqmtLastFollowed3', 'DateSignedOff' ], window_size_samples=4)
def basic_test(backend='ludwig',use_gpu=True,ignore_columns=[], run_extra=False): if run_extra: for py_file in [x for x in os.listdir('../functional_testing') if '.py' in x]: os.system(f'python3 ../functional_testing/{py_file}') # Create & Learn mdb = Predictor(name='home_rentals_price') mdb.learn(to_predict='rental_price',from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend, stop_training_in_x_seconds=20,use_gpu=use_gpu) # Reload & Predict model_name = 'home_rentals_price' if run_extra: mdb.rename_model('home_rentals_price', 'home_rentals_price_renamed') model_name = 'home_rentals_price_renamed' mdb = Predictor(name=model_name) prediction = mdb.predict(when={'sqft':300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not print(prediction) print(prediction[0]) for item in prediction: print(item) print(type(list(prediction.evaluations.values())[0][0])) assert('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0]))) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print('\n\n') # See if we can get the adapted metadata amd = mdb.get_model_data(model_name) # Make some simple assertions about it assert(5 < len(list(amd.keys())))
class Electricity: def __init__(self): self.mindsDb = Predictor(name='demand_30') def train(self): self.mindsDb.learn(to_predict='power_consumed', from_data='dataset/mdb_train.csv', window_size=84, order_by=['TimeStamp'], group_by=['customer'], disable_optional_analysis=True) def test_predict(self): y_real = pd.read_csv("mdb_test.csv") y_real = list(y_real["power_consumed"]) results = self.mindsDb.predict(when_data="dataset/mdb_test.csv") y_pred = [] for row in results: y_pred.append(row['power_consumed']) print(r2_score(y_real, y_pred))
class Temperature: def __init__(self): self.mindsDb = Predictor(name='temperature') def temp_train(self): self.mindsDb.learn(to_predict='temperature', from_data='train.csv', window_size=20, order_by='index') def temp_predict(self): y_real = pd.read_csv("test.csv") results = self.mindsDb.predict(when_data="test.csv") y_pred = [] for row in results: y_pred.append(row['temperature']) predictions = pd.DataFrame(y_pred) predictions.to_csv(index=False, header=True, path_or_buf="test_pred.csv") print(r2_score(y_real['temperature'].tolist(), pd.Series(y_pred).tolist()))
class Insurance: def __init__(self): self.mindsDb = Predictor(name='insurance1') def insurance_train(self): self.mindsDb.learn(to_predict='PolicyStatus', from_data='insu_train_indep_dep.csv') def insurance_predict(self): df = pd.read_csv('insu_test_indep_dep.csv') y_real = list(df['PolicyStatus']) results = self.mindsDb.predict(when_data="insu_test_indep_dep.csv") y_pred = [] for row in results: y_pred.append(row['PolicyStatus']) acc_score = accuracy_score(y_real, y_pred, normalize=True) acc_pct = round(acc_score * 100) print(f'Accuracy of : {acc_pct}%')
def test_postgres_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DBNAME = 'postgres' PORT = 5432 con = pg8000.connect(database=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean, col_4 Date, col_5 Int [])' ) for i in range(0, 200): dt = datetime.datetime.now() - datetime.timedelta(days=i) dt_str = dt.strftime('%Y-%m-%d') cur.execute( f'INSERT INTO test_mindsdb VALUES (\'String {i}\', {i}, {i % 2 == 0}, \'{dt_str}\', ARRAY [1, 2, {i}])' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
def basic_test(backend='ludwig', use_gpu=True, ignore_columns=[]): # Create & Learn mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=backend) # Reload & Predict mdb = Predictor(name='home_rentals_price') prediction = mdb.predict(when={'sqft': 300}) # Test all different forms of output # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not print(prediction) print(prediction[0]) for item in prediction: print(item) print(type(list(prediction.evaluations.values())[0][0])) assert ('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0]))) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print('\n\n') # See if we can get the adapted metadata amd = mdb.get_model_data('home_rentals_price') # Make some simple assertions about it assert (5 < len(list(amd.keys())))
from mindsdb import Predictor import sys import pandas as pd import json import time mdb = Predictor(name='test_predictor') #'rental_price', mdb.learn(to_predict=['neighborhood'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=False,stop_training_in_x_seconds=3000, backend='lightwood', unstable_parameters_dict={'use_selfaware_model':True}) p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) e = p[0].explanation print(e) p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv', use_gpu=True) for p in p_arr: e = p.explanation p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) for p in p_arr: exp_s = p.epitomize() exp = p.explanation print(exp_s) print(p.as_dict()) print(p.as_list()) print(p.raw_predictions())
PASSWORD = '' DBNAME = 'postgres' PORT = 5432 con = psycopg2.connect(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES (\'This is tring number {i}\', {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=mysql_ds)
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='player-stats').predict( when={ 'home_team': 'Scotland', 'away_team': 'England', 'tournament': 'Friendly', 'country': 'Scotland' }) print(result[0])
from mindsdb import Predictor print("learning...") # tell mindsDB what we want to learn and from what data Predictor(name='home_rentals_price').learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", # the path to the file where we can learn from, (note: can be url) use_gpu= False # 25 seconds using powershell: Measure-Command {python .\main.py} # 29 seconds on iMac # use_gpu=True # 25 seconds also (much less CPU used though) ) # use the model to make predictions result = Predictor(name='home_rentals_price').predict(when={ 'number_of_rooms': 2, 'number_of_bathrooms': 1, 'sqft': 1190 }) # you can now print the results print('The predicted price is ${price} with {conf} confidence'.format( price=result[0]['rental_price'], conf=result[0]['rental_price_confidence'])) print("done")
def __init__(self): self.mindsDb = Predictor(name='demand_30')
from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv" # the path to the file where we can learn from, (note: can be url) ) prediction = mdb.predict(when={'sqft': 300}) print(prediction[0]) amd = mdb.get_model_data('home_rentals_price') print(amd)
from mindsdb import Predictor import sys if len(sys.argv) > 1: backend = sys.argv[1] else: backend = 'ludwig' mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=backend) #mdb.learn(to_predict='rental_price',from_data="docs/examples/basic/home_rentals.csv",backend=backend) prediction = mdb.predict(when={'sqft': 300}) print(prediction[0]) print(list(map(lambda x: int(x['rental_price']), prediction))) amd = mdb.get_model_data('home_rentals_price') #print(amd)
from mindsdb import Predictor mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv")
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='btc-price').predict( when={ 'txVolume(USD)': 6739584540.73, 'adjustedTxVolume(USD)': 3868097401.91, 'txCount': 204913, 'exchangeVolume(USD)': 7394019840, 'generatedCoins': 1875, 'fees': 35.900, 'blockCount': 150 }) print(result[0])
data='CREATE DATABASE IF NOT EXISTS test') requests.post('http://localhost:8123', data='DROP TABLE IF EXISTS test.mock') requests.post('http://localhost:8123', data="""CREATE TABLE test.mock( col1 String ,col2 Int64 ,col3 Array(UInt8) ) ENGINE=Memory""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""") log.info('Querying data') clickhouse_ds = ClickhouseDS( 'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2') log.info('Validating data integrity') assert (len(clickhouse_ds.df) == 2) assert (sum(map(int, clickhouse_ds.df['col2'])) == 5) assert (len(list(clickhouse_ds.df['col3'][1])) == 3) assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3'])) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=clickhouse_ds) log.info('Finished running ClickhouseDS tests successfully !')
from mindsdb import Predictor, MySqlDS # Get data pg_ds = MySqlDS( query= "SELECT age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, target FROM sys.heartdisease", user="******", password="******", port=3306, host="localhost", table="heartdisease", database="sys") # Train model mdb = Predictor(name="heart-disease") mdb.learn(from_data=pg_ds, to_predict="target") # Get prediction prediction = mdb.predict(when={ "age": "40", "sex": 0, "chol": 180, "fbs": 0, "thal": 3, "exang": 0 }) print(prediction[0].explanation)
df = self.transaction.input_data.validation_df elif mode == 'test': df = self.transaction.input_data.test_df X = [] for col in self.input_columns: X.append(self.le_arr[col].transform(df[col])) X = np.swapaxes(X, 1, 0) predictions = self.clf.predict(X) formated_predictions = {self.output_columns[0]: predictions} return formated_predictions predictor = Predictor(name='custom_model_test_predictor') dt_model = CustomDTModel() predictor.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model) predictions = predictor.predict( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model)
from mindsdb import Predictor import sys import pandas as pd mdb = Predictor(name='sensor123') mdb.learn( to_predict='rental_price', from_data= "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv", use_gpu=True, stop_training_in_x_seconds=15) p_arr = mdb.predict( when_data= 'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') for p in p_arr: exp_s = p.epitomize() #exp = p.explain() #print(exp) print(exp_s) print(mdb.get_model_data('sensor123'))
from mindsdb import Predictor import pprint DEBUG_LOG_LEVEL = 10 INFO_LOG_LEVEL = 20 WARNING_LOG_LEVEL = 30 ERROR_LOG_LEVEL = 40 NO_LOGS_LOG_LEVEL = 50 ANDY_LOGLEVEL = INFO_LOG_LEVEL print("training") # tell mindsDB what we want to learn and from what data Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).learn( to_predict='answer', # the column we want to learn to predict given all the data in the file from_data="spam_small.csv", # the path to the file where we can learn from, (note: can be url) use_gpu=False, stop_training_in_x_seconds=10 ) # use the model to make predictions tests = [ {'text': 'how are you going, what have you been doing today', 'is_spam': None, 'confidence': 0}, {'text': 'ready to buy a new dvd today?', 'is_spam': None, 'confidence': 0}, {'text': 'WINNER!!', 'is_spam': None, 'confidence': 0}, ] for test in tests: print("predicting...") result = Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).predict(when={'conversation': test['text']}) test['is_spam'] = result[0]['answer'] test['confidence'] = result[0]['answer_confidence']
from mindsdb import Predictor import sys mdb = Predictor(name='sensor123') mdb.learn( to_predict='output', from_data= "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv", use_gpu=False, stop_training_in_x_seconds=40) p_arr = mdb.predict( when_data= 'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv') pdct = mdb.predict(when={ 'sensor 1': 0.5, 'sensor 2': 2, 'sensor 3': 0, 'sensor4': 5 }) print(pdct) for p in p_arr: exp_s = p.epitomize() exp = p.explain() if len(exp['output']) > 0: print(exp) print(exp_s)
def __init__(self): self.mindsDb = Predictor(name='human_activity')
def __init__(self): self.mindsDb = Predictor(name='temperature')
""" """ from mindsdb import Predictor # Here we use the model to make predictions (NOTE: You need to run train.py first) result = Predictor(name='fuel').predict(when_data='fuel_predict.csv') # you can now print the results print('The predicted main engine fuel consumption') for row in result: print(row)
from mindsdb import Predictor import sys import pandas as pd import json import time mdb = Predictor(name='test_predictor') mdb.learn(to_predict=['rental_price', 'location'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=True,stop_training_in_x_seconds=30, backend='lightwood') p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') for p in p_arr: exp_s = p.epitomize() #exp = p.explain() #print(exp) #print(exp_s) ''' print(mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True)[0].explain()) ''' #print(json.dumps(mdb.get_model_data('test_predictor')))