Python load_dataset示例，data_management.load_dataset Python示例

示例#1

0

显示文件

def run_training() -> None:
    """Train the model."""
    idv = load_dataset(file_name=config.IDV_MASTER)
    carwale = load_dataset(file_name=config.CARWALE)
    popularity = load_dataset(file_name=config.POPULARITY)
    valuation = load_dataset(file_name=config.IDV_VALUATION)
    use_final_grid = load_dataset(file_name=config.MARGIN_DIVISION)

    carwale = pipeline.carwale_pp.transform(carwale)
    idv = pipeline.idv_pp.transform(idv)
    carwale_idv_m = pipeline.carwale_idv_merger.transform(carwale, idv)
    data = pipeline.color_city_transform.transform(carwale_idv_m)
    final = pipeline.data_for_pop.transform(data)
    pop = pipeline.popularity_cleaner.transform(popularity)
    final_ = pipeline.data_pop_merger.transform(final, pop)
    final_ = pipeline.ex_showroom_price.transform(final_, valuation, idv)
    final_ = pipeline.outlier_dep.transform(final_, use_final_grid)
    data = pipeline.training_prep.transform(final_)
    save_path = config.TRAINED_MODEL_DIR / 'label_en_dic.pkl'
    joblib.dump(pipeline.training_prep.dic, save_path)

    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES_CARWALE],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)

    params = {
        "objective": "regression",
        "metric": "mae",
        #"num_leaves" : 800,
        "num_leaves": 500,
        "learning_rate": 0.005,
        "bagging_fraction": 0.6,
        "feature_fraction": 0.6,
        "bagging_frequency": 6,
        # "bagging_frequency" : 1,
        "bagging_seed": 42,
        "verbosity": -1,
        "seed": 42
    }

    lgb_train_data = lgb.Dataset(X_train, label=y_train)

    model = lgb.train(params,
                      lgb_train_data,
                      num_boost_round=10000,
                      verbose_eval=500)

    y_pred_lgbm = model.predict(X_test, num_iteration=model.best_iteration)
    score = mean_squared_error(y_test, y_pred_lgbm)
    print(score)

    save_file_name = 'car_dep_model.pkl'
    save_path = config.TRAINED_MODEL_DIR / save_file_name

    joblib.dump(model, save_path)

    print('saved pipeline')

示例#2

0

显示文件

def home():
    if request.method == "POST":
        make = request.form['make']
        model = request.form['model']
        variant = request.form['variant']
        color = request.form['color']
        city = request.form['city']
        age = request.form['age']
        owners = request.form['owners']
        fuel_type = request.form['fuel_type']
        kms_run = request.form['kms_run']
        transmission = request.form['transmission']
        ex_showroom_price = request.form['ex_showroom_price']
        data = load_dataset(file_name=config.CLEANED_POPULARITY)
        popularity = int(data.loc[(data['make']==str(make))&(data['model']==str(model))&(data['variant']==str(variant)),'Popularity Index'].iloc[0])
        _price_pipe=load_pipeline(file_name=config.LABEL_ENCO_DIC)
        make1 = _price_pipe['make'][make]
        model1 = _price_pipe['model'][model]
        variant1 = _price_pipe['variant'][variant]
        fuel_type1 = _price_pipe['fuel_type'][fuel_type]
        color1 = _price_pipe['color'][color]
        city1 = _price_pipe['city'][city]
        fuel_type1 = _price_pipe['fuel_type'][fuel_type]
        transmission1 = _price_pipe['transmission'][transmission]

        df = pd.DataFrame({'make':make1,
             'model':model1,
             'city':city1,
             'owners':int(owners),
             'kms_run':int(kms_run),
             'age':int(age),
             'Popularity_Index':popularity,
             'ex_showroom_price':int(ex_showroom_price),
             'fuel_type':fuel_type1,
             'transmission':int(transmission1),
             'color':color1},index=[0])

        _price_pipe=load_pipeline(file_name=config.TRAINED_MODEL)
        result = _price_pipe.predict(df)[0]
        return render_template('index.html',result=result)
    else:
        return render_template('index.html')

示例#3

0

显示文件

文件： train_pipeline.py 项目： shila121/house_price_project

def run_training() -> None:
    """Train the model."""

    # read the training dataset
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        random_state=0,
                                                        test_size=0.1)

    # as outliers are present in target
    # transform the target
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    # save the model

    _logger.info("saving model version")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
    _logger.info("------model successfully saved----")

示例#4

0

显示文件

# ------ general setup ----------

device = cfg_rob.device

save_path = os.path.join(config.RESULTS_PATH, "attacks")
save_results = os.path.join(save_path, "table_adv.pkl")

do_plot = True
save_plot = True
save_table = True

# ----- data prep -----
X_test, C_test, Y_test = [
    tmp.unsqueeze(-2).to(device)
    for tmp in load_dataset(config.set_params["path"], subset="test")
]

# ----- attack setup -----

# select samples
samples = tuple(range(50))

it_init = 100
keep_init = 50

# select range relative noise
noise_rel = torch.tensor([0.00, 0.005, 0.01, 0.03, 0.05, 0.075, 0.10])

# select measure for reconstruction error
err_measure = err_measure_l2

示例#5

0

显示文件

    torch.optim.lr_scheduler.StepLR,
    "scheduler_params": {
        "step_size": 1,
        "gamma": 1.0
    },
    "acc_steps": [1, 200],
    "train_transform":
    None,
    "val_transform":
    None,
}

# -----data prep -----
X_train, C_train, Y_train = [
    tmp.unsqueeze(-2).to(device)
    for tmp in load_dataset(config.set_params["path"], subset="train")
]

X_val, C_val, Y_val = [
    tmp.unsqueeze(-2).to(device)
    for tmp in load_dataset(config.set_params["path"], subset="val")
]

# ------ save hyperparameters -------
os.makedirs(train_params["save_path"][-1], exist_ok=True)
with open(os.path.join(train_params["save_path"][-1], "hyperparameters.txt"),
          "w") as file:
    for key, value in subnet_params.items():
        file.write(key + ": " + str(value) + "\n")
    for key, value in it_net_params.items():
        file.write(key + ": " + str(value) + "\n")

示例#6

0

显示文件

model.download(target_dir=os.getcwd(), exist_ok=True)

# verify the downloaded model file
file_path = os.path.join(os.getcwd(),
                         "trained_models\model_n_estimators_7.pkl")

os.stat(file_path)

# Testing score.py

# Load test data:
from data_management import load_dataset
import configuracion
import score

data = load_dataset(file_name=configuracion.TRAINING_DATA_FILE)
data = data.iloc[:8, :]
data = data.to_json()

score.init()
pred = score.run(data)
print(pred)

# Create environment file
'''
- Add package requeriments in this file -> myenv.yml
'''

from azureml.core.conda_dependencies import CondaDependencies

myenv = CondaDependencies()