def page(): x = request.args['x'] y = request.args['y'] model = Model() model.train() n = model.predict([int(x),int(y)]) return jsonify({ 'data' : str(n)})
def __init__(self, user_id): print(f"Создан объект бота для пользователя {user_id}!") self._USER_ID = user_id self._USERNAME = self._get_user_name_from_vk_id(user_id) self._COMMANDS = [ "ПРИВЕТ", "ПОГОДА", "ВРЕМЯ", "ПОКА", "КОНСУЛЬТАЦИЯ", "НАЧАТЬ" ] self._REGISTED_ID = [] self.model = Model()
def test_model_loading(self): model_file = get_testfile("example_model.pkl") model_source = get_model_source("fs", model_file) mod = Model(model_source) assert hasattr(mod, '_model') assert hasattr(mod, 'target') assert mod._model is not None assert mod.target is not None
def test_s3_to_fs_copy(self): model_file = "s3://netlyt.com/example_model.pkl" model_file_dest = get_testfile("example_model_tmp.pkl") model_source = get_model_source("s3", model_file, True) model_dest = get_model_source("fs", model_file_dest) # Assert that we can copy it and open it correctly assert model_source.copy_to(model_dest) copied_model = Model(model_dest) assert model_dest.delete()
def test_simple_linear_dataset_scaled_cv(self): model = Model.create_model( model_type=Model.MODEL_TYPE_SVR, cross_validation=True, feature_scaling=True, C_range=[0.001, 0.01, 0.1, 1, 10, 100], kernel=Model.KERNEL_LINEAR ) train_dataset, test_dataset = test_datasets.get_simple_linear_datasets() self._test_dataset(model, train_dataset, test_dataset, 0, title="SVR scaled CV on linear dataset")
def test_simple_poly_dataset_scaled_cv(self): model = Model.create_model( model_type=Model.MODEL_TYPE_SVR, cross_validation=True, feature_scaling=True, C_range=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], kernel=Model.KERNEL_POLYNOMIAL ) train_dataset, test_dataset = test_datasets.get_simple_polynomial_datasets(n=1000) self._test_dataset(model, train_dataset, test_dataset, 0, title="SVR with polynomial kernel, scaled CV on poly dataset")
def test_simple_linear_dataset_scaled(self): model = Model.create_model( model_type=Model.MODEL_TYPE_SVR, cross_validation=False, feature_scaling=True, C=1, kernel=Model.KERNEL_LINEAR ) train_dataset, test_dataset = test_datasets.get_simple_linear_datasets() self._test_dataset(model, train_dataset, test_dataset, 0, title="SVR scaled on linear dataset")
def main(self): # Step 1 --> Get and sample data from the original dataset print("[AVAZU]\tCreating Data Handler for train and test files...") if self.validation_file == None: data = DataHandler(self.train_file,self.train_file) else: data = DataHandler(self.train_file,self.validation_file) # you can then get train and test files by doing data._train or data._test data.create_train_and_test(self.sampling) print("[AVAZU]\tData read and split successfully.") # Step 2 --> Feature Engineering feat_eng = FeatureEngineering() if need_feateng == True: print("[AVAZU]\tStarting feature engineering operations...") #data.transform_data(feat_eng.append_hours,feat_eng.append_counters_uniques) #data.transform_data(feat_eng.append_days,feat_eng.) print("[AVAZU]\tFinished feature engineering operations.") #print("[AVAZU]\tSaving intermediary dataframe...") #data.save("feat_eng_") #print("[AVAZU]\tFile saved...") data._train.drop(['click','id'], axis=1, inplace=True) data.drop(['C1','device_conn_type','device_type','banner_pos','C15','C16','C17','C18','C19','C20','C21','device_model','site_category','site_domain','site_id','app_category','app_domain','app_id','hours','device_id','device_ip']) print("[AVAZU]\tSelecting K-Best Features...") idxs = feat_eng.select_best_k_features(data._train,data._ytrain,5) print(idxs) print("[AVAZU]\tFeatures selected.") # Step 3 --> Train model and test it print("Training and testing the model...") m = Model(idxs) m.train(data) m.predict(data) if self.validation_file != None: print(type(data._test['id'].ravel())) df = pd.DataFrame({'id':data._test['id'].ravel(),'click':m._ypred}, columns=['id', 'click']) df.to_csv('output.csv', index=False) print("Output in the file: output.csv") else: print("Log Loss Result: " + str(m.log_loss(data))) def print_help(): print("python avazu.py <args>\n\n") print("------------------------------------------------------------------") print("List of arguments:") print("\t -t: Specifies a training file to read from a csv file\t[REQUIRED]") print("\t -v: Specifies a validation/test file to read from a csv file. If the file is not specified, 30\% of the training samples will be used to build the validation dataset (Default: Not specified)") print("\t -s: Specifies a sampling ratio to be performed over the training data. If none is specified, it will use the whole dataset. (Default: Entire dataset)") print("\t -f: (To be inproved) Specifies if one desires to apply feature engineering over the dataset or not. (Default: No transformation applied)") print("\t -h: Used to print this menu") print("------------------------------------------------------------------")
def test_fs_to_s3_copy_authed(self): import settings import boto3 import os model_file_dest = "s3://data.ml.netlyt.com/example_model.pkl" model_file = get_testfile("example_model.pkl") model_source = get_model_source("fs", model_file) model_dest = get_model_source("s3", model_file_dest) # Assert that we can copy it and open it correctly assert model_source.copy_to(model_dest) copied_model = Model(model_dest) assert model_dest.delete()
def test_simple_poly_dataset_scaled_cv(self): model = Model.create_model( model_type=Model.MODEL_TYPE_SVR, cross_validation=True, feature_scaling=True, C_range=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], kernel=Model.KERNEL_RBF ) train_dataset, test_dataset = test_datasets.get_simple_polynomial_datasets(n=1000) scaler = StandardScaler() scaler.fit(train_dataset.data) print("Train mean: " + str(scaler.transform(train_dataset.data).mean(axis=0))) print("Test mean: " + str( scaler.transform(test_dataset.data).mean(axis=0))) print("Train std: " + str(scaler.transform(train_dataset.data).std(axis=0))) print("Test str: " + str( scaler.transform(test_dataset.data).std(axis=0))) self._test_dataset(model, train_dataset, test_dataset, 0, title="SVR with RBF kernel, scaled CV on poly dataset")
def plot_validation_curve(model_type, train_dataset, feature_scaling, polynomial_degree, kernel, svr_degree, svr_epsilon, svr_gamma, svr_coef0, sparse, score_attr=None, cv=5, alpha=None, C=None, n_jobs=-1, save=False, display=True, filename="validation_curve"): if not save and not display: return estimator = Model.create_model(model_type, feature_scaling=feature_scaling, polynomial_degree=polynomial_degree, cross_validation=False, kernel=kernel, svr_degree=svr_degree, svr_epsilon=svr_epsilon, svr_gamma=svr_gamma, svr_coef0=svr_coef0, sparse=sparse) model_type = model_type.upper() if model_type == Model.MODEL_TYPE_RIDREG: if alpha is None or type(alpha) != list or len(alpha) == 0: logging.warning( "Validation curve cannot be drawn for %s when no alpha range is specified." % model_type) return param_name = "RIDGE_REGRESSION__alpha" param_range = sorted(alpha) elif model_type == Model.MODEL_TYPE_SVR: if C is None or type(C) != list or len(C) == 0: logging.warning( "Validation curve cannot be drawn for %s when no C range is specified." % model_type) return param_name = "SVR__C" param_range = sorted(C) else: logging.warning( "Validation curve is not applicable to Model type %s." % model_type) return logging.info("Calculating validation curve") train_scores, valid_scores = validation_curve(estimator=estimator, X=train_dataset.data, y=train_dataset.target, param_name=param_name, param_range=param_range, cv=cv, scoring=score_attr, n_jobs=n_jobs) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) valid_scores_mean = np.mean(valid_scores, axis=1) valid_scores_std = np.std(valid_scores, axis=1) logging.debug("Plotting validation curve") plt.title("Validation curve") plt.xlabel(param_name) plt.ylabel(score_attr.upper() if score_attr else "" + "Score") plt.semilogx(param_range, train_scores_mean, 'o-', label="Training score", color="r") plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2, color="r") plt.semilogx(param_range, valid_scores_mean, 'o-', label="Cross-Validation score", color="g") plt.fill_between(param_range, valid_scores_mean - valid_scores_std, valid_scores_mean + valid_scores_std, alpha=0.2, color="g") plt.legend(loc="best") if display: plt.show() if save: plt.savefig(filename + "_" + datetime.now().strftime("%Y_%m_%d_%H_%M") + ".png", dpi=400) plt.clf()
# -*- coding: utf-8 -*- import telebot import config as cfg import cherrypy import requests import shutil import os from ml import Model from threading import Timer bot = telebot.TeleBot(cfg.token) model = Model() users = {} rybkin_id = 122358697 env_var = { 'last_theme': None, 'get_response': None, 'expected': 'query', 'timer': None, 'timer_desc': '', 'try_count': 0, 'context': None } ''' class WebhookServer(object): @cherrypy.expose
class VkBot: def __init__(self, user_id): print(f"Создан объект бота для пользователя {user_id}!") self._USER_ID = user_id self._USERNAME = self._get_user_name_from_vk_id(user_id) self._COMMANDS = [ "ПРИВЕТ", "ПОГОДА", "ВРЕМЯ", "ПОКА", "КОНСУЛЬТАЦИЯ", "НАЧАТЬ" ] self._REGISTED_ID = [] self.model = Model() def _get_user_name_from_vk_id(self, user_id): request = requests.get("https://vk.com/id" + str(user_id)) bs = bs4.BeautifulSoup(request.text, "html.parser") user_name = self._clean_all_tag_from_str(bs.findAll("title")[0]) return user_name.split()[0] def new_message(self, message): # Привет if message.upper() == self._COMMANDS[0] or message.upper( ) == self._COMMANDS[5]: if self._USER_ID not in self._REGISTED_ID: self._REGISTED_ID.append(self._USER_ID) return f"Привет-привет, {self._USERNAME}!" # Погода elif message.upper() == self._COMMANDS[1]: return self._get_weather() # Время elif message.upper() == self._COMMANDS[2]: return self._get_time() # Пока elif message.upper() == self._COMMANDS[3]: return f"Пока-пока, {self._USERNAME}!" # Консультация elif message.upper() == self._COMMANDS[4]: return f"Готов к консультации, {self._USERNAME}!" else: try: res1 = "Похожий товар: " + \ str(self.model.most_similar_id(int(message))) + "\n" except Exception as e: res1 = "Похожий товар не найден " + str(e) + "\n" try: res2 = "Дополнительный товар: " + \ str(self.model.predict_output_id(int(message))) except Exception as e: res2 = "Дополнительный товар не найден " + str(e) + "\n" return res1 + res2 def _get_time(self): request = requests.get("https://my-calend.ru/date-and-time-today") b = bs4.BeautifulSoup(request.text, "html.parser") return self._clean_all_tag_from_str( str(b.select(".page")[0].findAll("h2")[1])).split()[1] @staticmethod def _clean_all_tag_from_str(string_line): """ Очистка строки stringLine от тэгов и их содержимых :param string_line: Очищаемая строка :return: очищенная строка """ result = "" not_skip = True for i in list(string_line): if not_skip: if i == "<": not_skip = False else: result += i else: if i == ">": not_skip = True return result @staticmethod def _get_weather(city: str = "москва") -> list: request = requests.get("https://sinoptik.com.ru/погода-" + city) b = bs4.BeautifulSoup(request.text, "html.parser") p3 = b.select('.temperature .p3') weather1 = p3[0].getText() p4 = b.select('.temperature .p4') weather2 = p4[0].getText() p5 = b.select('.temperature .p5') weather3 = p5[0].getText() p6 = b.select('.temperature .p6') weather4 = p6[0].getText() result = '' result = result + ('Утром :' + weather1 + ' ' + weather2) + '\n' result = result + ('Днём :' + weather3 + ' ' + weather4) + '\n' temp = b.select('.rSide .description') weather = temp[0].getText() result = result + weather.strip() return result @staticmethod def ml_func(): return
from flask import Flask, render_template, request app = Flask(__name__) from ml import Model model = Model('15_june_2020_v1') @app.route('/') def home(): query = request.args.get('query') if query: res = resML(query) return render_template("result.html", res=res, query=query) return render_template("home.html") def resML(query): """ This takes in the query from the user and return top 5 words and meaning INPUT: string query OUTPUT: list 5 tuples [('word', 'meaning')] """ words = model.get_words(query) result = model.get_meanings(words) return result @app.route('/chess') def chess():
root.setLevel(logging.INFO) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) root.addHandler(handler) # ML MODEL from ml import Model model = Model('/home/vbermudez/test/python_ai_test/csv/data.csv', '/home/vbermudez/test/python_ai_test/model/lstm_model.h5') if not model.exists: model.train() else: model.prepare_model() row_count, tip_avg = model.get_stats() logging.info(f'LINEAS={row_count}') logging.info(f'AVERAGE tip_amout={tip_avg}') values = [[1.7, 1.5], [6, 0.5], [1, 0]] logging.info(f'Predictions:\n{model.predict( values )}') # In memory MongoDB (MontyDB)
def main(): cli_args = parse_arguments() try: Config.read_config(cli_args.config_file) except ConfigError: die("Config File %s could not be read correctly! " % cli_args.config_file) init_logging() logging.info("Starting ML Pipeline!") logging.info("Initializing Database") try: DB.init_db() except DBError: die("DB Model could not be created!") logging.info("Reading training dataset") train_dataset = Dataset.get_dataset( Config.repository_name, Config.dataset_train_start, Config.dataset_train_end, Config.dataset_features, Config.dataset_target, ngram_sizes=Config.dataset_ngram_sizes, ngram_levels=Config.dataset_ngram_levels, label="Training", cache=Config.dataset_cache, eager_load=Config.database_eager_load, sparse=Config.dataset_sparse) if train_dataset is None: die("Training Dataset could not be created!") if Config.ml_log_transform_target: train_dataset.target = LogTransform.log_transform( train_dataset.target, base=Config.ml_log_transform_base) logging.info("Reading test dataset") test_dataset = Dataset.get_dataset( Config.repository_name, Config.dataset_test_start, Config.dataset_test_end, Config.dataset_features, Config.dataset_target, ngram_sizes=Config.dataset_ngram_sizes, ngram_levels=Config.dataset_ngram_levels, label="Test", cache=Config.dataset_cache, eager_load=Config.database_eager_load, sparse=Config.dataset_sparse) if test_dataset is None: die("Test Dataset could not be created!") if Config.ml_log_transform_target: test_dataset.target = LogTransform.log_transform( test_dataset.target, base=Config.ml_log_transform_base) logging.info("Creating and training model with training dataset") model = Model.create_model(Config.ml_model, feature_scaling=Config.ml_feature_scaling, polynomial_degree=Config.ml_polynomial_degree, cross_validation=Config.ml_cross_validation, alpha=Config.ml_alpha, C=Config.ml_C, kernel=Config.ml_kernel, svr_degree=Config.ml_svr_degree, svr_epsilon=Config.ml_svr_epsilon, svr_gamma=Config.ml_svr_gamma, svr_coef0=Config.ml_svr_coef0, sparse=Config.dataset_sparse) Model.train_model(model, train_dataset) logging.info("Model successfully trained.") logging.debug("Creating predictions...") baseline_mean_prediction = Predict.predict_mean( train_dataset, test_dataset.target.shape[0]) baseline_med_prediction = Predict.predict_median( train_dataset, test_dataset.target.shape[0]) baseline_wr_prediction = Predict.predict_weighted_random( train_dataset, test_dataset.target.shape[0]) training_prediction = Predict.predict_with_model(train_dataset, model) test_prediction = Predict.predict_with_model(test_dataset, model) logging.debug("Creating reports from predictions") train_target = train_dataset.target test_target = test_dataset.target if Config.ml_log_transform_target: train_target = LogTransform.exp_transform(train_target, Config.ml_log_transform_base) training_prediction = LogTransform.exp_transform( training_prediction, Config.ml_log_transform_base) test_target = LogTransform.exp_transform(test_target, Config.ml_log_transform_base) test_prediction = LogTransform.exp_transform( test_prediction, Config.ml_log_transform_base) baseline_mean_prediction = LogTransform.exp_transform( baseline_mean_prediction, Config.ml_log_transform_base) baseline_med_prediction = LogTransform.exp_transform( baseline_med_prediction, Config.ml_log_transform_base) baseline_wr_prediction = LogTransform.exp_transform( baseline_wr_prediction, Config.ml_log_transform_base) baseline_mean_report = Reporting.Report(test_target, baseline_mean_prediction, "Mean Baseline") baseline_med_report = Reporting.Report(test_target, baseline_med_prediction, "Median Baseline") baseline_wr_report = Reporting.Report(test_target, baseline_wr_prediction, "Weighted Random Baseline") training_report = Reporting.Report(train_target, training_prediction, "Training") test_report = Reporting.Report(test_target, test_prediction, "Test") base_entry = Scoreboard.create_entry_from_config(baseline_wr_report) test_entry = Scoreboard.create_entry_from_config(test_report) Scoreboard.add_entry(base_entry) Scoreboard.add_entry(test_entry) Scoreboard.write_entries() base_ranking = Scoreboard.get_ranking(base_entry, Scoreboard.RATING_ATTRIBUTE_R2S) test_ranking = Scoreboard.get_ranking(test_entry, Scoreboard.RATING_ATTRIBUTE_R2S) if Config.reporting_display or Config.reporting_save: config_table = Reporting.get_config_table() add_to_report(config_table.table) add_to_report(baseline_mean_report) add_to_report(baseline_med_report) add_to_report(baseline_wr_report) add_to_report(training_report) add_to_report(test_report) comparisation_table = Reporting.get_report_comparisation_table( [baseline_wr_report, training_report, test_report], [Reporting.SCORE_R2S, Reporting.SCORE_MAE, Reporting.SCORE_MDE]) add_to_report(comparisation_table.table) category_table = Reporting.get_category_table( train_target, training_prediction, label="Training prediction") add_to_report(category_table.table) category_table = Reporting.get_category_table(test_target, test_prediction, label="Test prediction") add_to_report(category_table.table) confusion_matrix_table, classification_report = Reporting.get_confusion_matrix( train_target, training_prediction, label="Training prediction") add_to_report(confusion_matrix_table.table) add_to_report(classification_report) confusion_matrix_table, classification_report = Reporting.get_confusion_matrix( test_target, test_prediction, label="Test prediction") add_to_report(confusion_matrix_table.table) add_to_report(classification_report) if Config.ml_polynomial_degree == 1: # Determining top features only makes sense without polynomial features. top_features_table = Reporting.get_top_features_table( model, train_dataset.feature_list, 10) if top_features_table is not None: add_to_report(top_features_table.table) add_to_report("Base ranking: %i" % base_ranking) add_to_report("Test ranking: %i" % test_ranking) if test_ranking == 0: add_to_report("Congratulations! Best one so far!") elif base_ranking > test_ranking: add_to_report("Hey, at least better than the baseline!") else: add_to_report("Do you even learn?") if Config.reporting_display: print(report_str) if Config.reporting_save: Reporting.save_report_file(report_str, filename=Config.reporting_file) if Config.reporting_target_histogram: Reporting.plot_target_histogram( train_dataset, display=Config.reporting_display_charts, save=Config.reporting_save_charts, ) if Config.reporting_validation_curve and Config.ml_cross_validation: Reporting.plot_validation_curve( model_type=Config.ml_model, train_dataset=train_dataset, alpha=Config.ml_alpha, C=Config.ml_C, feature_scaling=Config.ml_feature_scaling, polynomial_degree=Config.ml_polynomial_degree, kernel=Config.ml_kernel, svr_degree=Config.ml_svr_degree, svr_epsilon=Config.ml_svr_epsilon, svr_gamma=Config.ml_svr_gamma, svr_coef0=Config.ml_svr_coef0, sparse=Config.dataset_sparse, display=Config.reporting_display_charts, save=Config.reporting_save_charts) if Config.reporting_learning_curve: Reporting.plot_learning_curve( train_dataset=train_dataset, estimator=model, display=Config.reporting_display_charts, save=Config.reporting_save_charts) if Config.reporting_confusion_matrix_chart: Reporting.plot_confusion_matrix( ground_truth=train_target, predicted=training_prediction, label="Training", display=Config.reporting_display_charts, save=Config.reporting_save_charts) Reporting.plot_confusion_matrix( ground_truth=test_target, predicted=test_prediction, label="Test", display=Config.reporting_display_charts, save=Config.reporting_save_charts) logging.info("All done. Exiting ML Pipeline")
def main(self): # Step 1 --> Get and sample data from the original dataset print("[AVAZU]\tCreating Data Handler for train and test files...") if self.validation_file == None: data = DataHandler(self.train_file, self.train_file) else: data = DataHandler(self.train_file, self.validation_file) # you can then get train and test files by doing data._train or data._test data.create_train_and_test(self.sampling) print("[AVAZU]\tData read and split successfully.") # Step 2 --> Feature Engineering feat_eng = FeatureEngineering() if need_feateng == True: print("[AVAZU]\tStarting feature engineering operations...") #data.transform_data(feat_eng.append_hours,feat_eng.append_counters_uniques) #data.transform_data(feat_eng.append_days,feat_eng.) print("[AVAZU]\tFinished feature engineering operations.") #print("[AVAZU]\tSaving intermediary dataframe...") #data.save("feat_eng_") #print("[AVAZU]\tFile saved...") data._train.drop(['click', 'id'], axis=1, inplace=True) data.drop([ 'C1', 'device_conn_type', 'device_type', 'banner_pos', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'device_model', 'site_category', 'site_domain', 'site_id', 'app_category', 'app_domain', 'app_id', 'hours', 'device_id', 'device_ip' ]) print("[AVAZU]\tSelecting K-Best Features...") idxs = feat_eng.select_best_k_features(data._train, data._ytrain, 5) print(idxs) print("[AVAZU]\tFeatures selected.") # Step 3 --> Train model and test it print("Training and testing the model...") m = Model(idxs) m.train(data) m.predict(data) if self.validation_file != None: print(type(data._test['id'].ravel())) df = pd.DataFrame( { 'id': data._test['id'].ravel(), 'click': m._ypred }, columns=['id', 'click']) df.to_csv('output.csv', index=False) print("Output in the file: output.csv") else: print("Log Loss Result: " + str(m.log_loss(data))) def print_help(): print("python avazu.py <args>\n\n") print( "------------------------------------------------------------------" ) print("List of arguments:") print( "\t -t: Specifies a training file to read from a csv file\t[REQUIRED]" ) print( "\t -v: Specifies a validation/test file to read from a csv file. If the file is not specified, 30\% of the training samples will be used to build the validation dataset (Default: Not specified)" ) print( "\t -s: Specifies a sampling ratio to be performed over the training data. If none is specified, it will use the whole dataset. (Default: Entire dataset)" ) print( "\t -f: (To be inproved) Specifies if one desires to apply feature engineering over the dataset or not. (Default: No transformation applied)" ) print("\t -h: Used to print this menu") print( "------------------------------------------------------------------" )
def teste(): logicals = request.json['logicals'] model = Model() model.train() n = model.predict(logicals) return jsonify({ 'data' : str(n) })