def run_training() -> None: """Train the model.""" idv = load_dataset(file_name=config.IDV_MASTER) carwale = load_dataset(file_name=config.CARWALE) popularity = load_dataset(file_name=config.POPULARITY) valuation = load_dataset(file_name=config.IDV_VALUATION) use_final_grid = load_dataset(file_name=config.MARGIN_DIVISION) carwale = pipeline.carwale_pp.transform(carwale) idv = pipeline.idv_pp.transform(idv) carwale_idv_m = pipeline.carwale_idv_merger.transform(carwale, idv) data = pipeline.color_city_transform.transform(carwale_idv_m) final = pipeline.data_for_pop.transform(data) pop = pipeline.popularity_cleaner.transform(popularity) final_ = pipeline.data_pop_merger.transform(final, pop) final_ = pipeline.ex_showroom_price.transform(final_, valuation, idv) final_ = pipeline.outlier_dep.transform(final_, use_final_grid) data = pipeline.training_prep.transform(final_) save_path = config.TRAINED_MODEL_DIR / 'label_en_dic.pkl' joblib.dump(pipeline.training_prep.dic, save_path) X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES_CARWALE], data[config.TARGET], test_size=0.1, random_state=0) params = { "objective": "regression", "metric": "mae", #"num_leaves" : 800, "num_leaves": 500, "learning_rate": 0.005, "bagging_fraction": 0.6, "feature_fraction": 0.6, "bagging_frequency": 6, # "bagging_frequency" : 1, "bagging_seed": 42, "verbosity": -1, "seed": 42 } lgb_train_data = lgb.Dataset(X_train, label=y_train) model = lgb.train(params, lgb_train_data, num_boost_round=10000, verbose_eval=500) y_pred_lgbm = model.predict(X_test, num_iteration=model.best_iteration) score = mean_squared_error(y_test, y_pred_lgbm) print(score) save_file_name = 'car_dep_model.pkl' save_path = config.TRAINED_MODEL_DIR / save_file_name joblib.dump(model, save_path) print('saved pipeline')
def home(): if request.method == "POST": make = request.form['make'] model = request.form['model'] variant = request.form['variant'] color = request.form['color'] city = request.form['city'] age = request.form['age'] owners = request.form['owners'] fuel_type = request.form['fuel_type'] kms_run = request.form['kms_run'] transmission = request.form['transmission'] ex_showroom_price = request.form['ex_showroom_price'] data = load_dataset(file_name=config.CLEANED_POPULARITY) popularity = int(data.loc[(data['make']==str(make))&(data['model']==str(model))&(data['variant']==str(variant)),'Popularity Index'].iloc[0]) _price_pipe=load_pipeline(file_name=config.LABEL_ENCO_DIC) make1 = _price_pipe['make'][make] model1 = _price_pipe['model'][model] variant1 = _price_pipe['variant'][variant] fuel_type1 = _price_pipe['fuel_type'][fuel_type] color1 = _price_pipe['color'][color] city1 = _price_pipe['city'][city] fuel_type1 = _price_pipe['fuel_type'][fuel_type] transmission1 = _price_pipe['transmission'][transmission] df = pd.DataFrame({'make':make1, 'model':model1, 'city':city1, 'owners':int(owners), 'kms_run':int(kms_run), 'age':int(age), 'Popularity_Index':popularity, 'ex_showroom_price':int(ex_showroom_price), 'fuel_type':fuel_type1, 'transmission':int(transmission1), 'color':color1},index=[0]) _price_pipe=load_pipeline(file_name=config.TRAINED_MODEL) result = _price_pipe.predict(df)[0] return render_template('index.html',result=result) else: return render_template('index.html')
def run_training() -> None: """Train the model.""" # read the training dataset data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], random_state=0, test_size=0.1) # as outliers are present in target # transform the target y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) # save the model _logger.info("saving model version") save_pipeline(pipeline_to_persist=pipeline.price_pipe) _logger.info("------model successfully saved----")
# ------ general setup ---------- device = cfg_rob.device save_path = os.path.join(config.RESULTS_PATH, "attacks") save_results = os.path.join(save_path, "table_adv.pkl") do_plot = True save_plot = True save_table = True # ----- data prep ----- X_test, C_test, Y_test = [ tmp.unsqueeze(-2).to(device) for tmp in load_dataset(config.set_params["path"], subset="test") ] # ----- attack setup ----- # select samples samples = tuple(range(50)) it_init = 100 keep_init = 50 # select range relative noise noise_rel = torch.tensor([0.00, 0.005, 0.01, 0.03, 0.05, 0.075, 0.10]) # select measure for reconstruction error err_measure = err_measure_l2
torch.optim.lr_scheduler.StepLR, "scheduler_params": { "step_size": 1, "gamma": 1.0 }, "acc_steps": [1, 200], "train_transform": None, "val_transform": None, } # -----data prep ----- X_train, C_train, Y_train = [ tmp.unsqueeze(-2).to(device) for tmp in load_dataset(config.set_params["path"], subset="train") ] X_val, C_val, Y_val = [ tmp.unsqueeze(-2).to(device) for tmp in load_dataset(config.set_params["path"], subset="val") ] # ------ save hyperparameters ------- os.makedirs(train_params["save_path"][-1], exist_ok=True) with open(os.path.join(train_params["save_path"][-1], "hyperparameters.txt"), "w") as file: for key, value in subnet_params.items(): file.write(key + ": " + str(value) + "\n") for key, value in it_net_params.items(): file.write(key + ": " + str(value) + "\n")
model.download(target_dir=os.getcwd(), exist_ok=True) # verify the downloaded model file file_path = os.path.join(os.getcwd(), "trained_models\model_n_estimators_7.pkl") os.stat(file_path) # Testing score.py # Load test data: from data_management import load_dataset import configuracion import score data = load_dataset(file_name=configuracion.TRAINING_DATA_FILE) data = data.iloc[:8, :] data = data.to_json() score.init() pred = score.run(data) print(pred) # Create environment file ''' - Add package requeriments in this file -> myenv.yml ''' from azureml.core.conda_dependencies import CondaDependencies myenv = CondaDependencies()