Exemplo n.º 1
0
****************************************

Script: Data Prune
Purpose: Splices original datasets into multiple subsets using settings & transformations found in config &
         common/dataselect

"""

import pandas as pd
import numpy as np

from config.config import export_dir, select_y
from common.dsm import Dataset_Manager

#Instantiate Dataset Manager & load dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.load_dataset("impute")
ds.load_df()

#create prune set
prune_ds = dsm.copy_dataset(ds, "prune")

#prune y subset
prune_ds.prune_y(select_y)

#prune away garbage features
garbage_ftrs = prune_ds.get_garbage_ftrs(select_y)
X = prune_ds.get_X()
include = list(set(X) - set(garbage_ftrs))
prune_ds.prune_features(include, True)
Exemplo n.º 2
0
****************************************

Script: Initial Analysis & Setup
Purpose: Runs setup for dataset; initializes objects from config settings

"""

from config.config import train_data, col_dtypes_dict, encoding, export_dir, custom_transforms, auto_transform_vars, num_tiles, y
from common.dsm import Dataset_Manager
import pandas as pd

#Read in the raw data
raw_df = pd.read_csv(train_data, dtype=col_dtypes_dict, encoding=encoding)

#Instantiate Dataset Manager & create the Dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.create_dataset("raw", raw_df)

#set the target variable
ds.set_target(y)

#Apply any transforms you specified in config
#The Dataset object understands when you apply transformations to the target variable; saving them as alt target variables for futher analysis
print("Applying custom transformations...")
ds.apply_transform_metadata(custom_transforms)

#Apply auto-transforms to any variables you specify
print("Applying auto transformations...")
ds.auto_transform(auto_transform_vars)

#Create a tiling for the target variable
Exemplo n.º 3
0
from common.utils import ensure_folder, save_obj, load_obj
from common.graphing import (bargraph_from_db, feature_importance_bargraphs,
                             figures_to_pdf, bargraph_fsa, grab_new_ax_array)
from common.featureanalysis import grab_all_features

import pandas as pd
from time import time
from numpy import array
import os
import matplotlib.gridspec as gridspec
import seaborn as sns
import matplotlib.pyplot as plt
from copy import deepcopy

#Instantiate Dataset Manager & load dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.load_dataset(dataset_chosen)
ds.load_df()

#Instantiate Model Manager
mm = Model_Manager(export_dir)

#start timing
t1_main = time()

#load test/train in dataset object
ds.load_test_train_dfs()

#all features in dataframe
X = ds.get_X()
y = ds.get_y()
Exemplo n.º 4
0
    Shivam Sharma
    sharma0611
****************************************

Script: Impute Data
Purpose: Splices original datasets into multiple subsets using settings & transformations found in config &
         common/dataselect

"""

from common.dsm import Dataset_Manager
import pandas as pd
from config.config import category_cols, impute_to_mean, impute_to_value, remove_na_rows, random_variables, export_dir, conditions, blacklist, carry_along

#Instantiate Dataset Manager & load dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.load_dataset("raw")
ds.load_df()

#apply imputations specified in config
impute_ds = dsm.copy_dataset(ds, "impute")
impute_ds.apply_imputations(category_cols, impute_to_mean, impute_to_value,
                            remove_na_rows, blacklist, random_variables,
                            conditions)

#provide analysis on dataframe
impute_ds.analyse_dataframe()

#handle carry along
impute_ds.carry_along_split(carry_along)
Exemplo n.º 5
0
"""
from common.dsm import Dataset_Manager
from common.mm import Model_Manager
from config.config import export_dir, random_variables, hyperparam_overwrite, hyperparam_step, final_features
from common.logger import start_printer, end_printer, join_pdfs, join_pdfs_list
from common.utils import ensure_folder, save_obj, load_obj
from common.graphing import bargraph_from_db, feature_importance_bargraphs, figures_to_pdf

import pandas as pd
from time import time
from numpy import array
import os

#Instantiate Dataset Manager & load dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.load_dataset("prune")
ds.load_df()

#Instantiate Model Manager
mm = Model_Manager(export_dir)

#start timing
t1_main = time()

#start logging
fname = "hyperparams"
hyperparam_dir = ds.make_custom_dir(hyperparam_step, hyperparam_overwrite)
start_printer(hyperparam_dir, fname)

#load test/train in dataset object
Exemplo n.º 6
0
"""

from common.dsm import Dataset_Manager
from common.mm import Model_Manager
from config.config import export_dir, random_variables, prelim_step, prelim_overwrite
from common.logger import start_printer, end_printer, join_pdfs
from common.utils import ensure_folder, save_obj, load_obj
from common.graphing import bargraph_from_db, feature_importance_bargraphs, figures_to_pdf

import pandas as pd
from time import time
from numpy import array
import os

#Instantiate Dataset Manager & load dataset object
dsm = Dataset_Manager(export_dir)
ds = dsm.load_dataset("impute")
ds.load_df()

#Instantiate Model Manager
mm = Model_Manager(export_dir)

#start timing
t1_main = time()

#start logging
fname = "prelim_log"
prelim_dir = ds.make_custom_dir(prelim_step, prelim_overwrite)
start_printer(prelim_dir, fname)

#load test/train in dataset object