def run(): # prepare data util_file.prepare_file(configuration.data_file) # read prepared data dataset = pd.read_csv(configuration.data_file_cleaned) # creating random forest create_random_forests(dataset)
def run(): # prepare data util_file.prepare_file(configuration.data_file) # read prepared data dataset = pd.read_csv(configuration.data_file_cleaned) # check correlation correlations = util.check_correlation(dataset, configuration.dimensions) print("correlations:", correlations)
def run(): # prepare data util_file.prepare_file(configuration.data_file) # read prepared data dataset = pd.read_csv(configuration.data_file_cleaned) # check correlation #correlations = util.check_correlation(dataset, configuration.dimensions_pol) #print("correlations:", correlations) # generate linear models and calculate their metrics generate_models(configuration.dimensions_logregpca, dataset)
def run(): print("running contentbased algorithm") # prepare data util_file.prepare_file(configuration.data_file) # read prepared data dataset = pd.read_csv(configuration.data_file_cleaned) for threshold in range(1,100): calculateAllProfiles(dataset, threshold) print(results) results.to_csv("content_based_results.csv")
def runParTuning(): # prepare data util_file.prepare_file(configuration.data_file) # read prepared data dataset = pd.read_csv(configuration.data_file_cleaned) for dep in configuration.dependent: print("running parameter tuning maxdepth for :", dep) partun_max_depth(dataset, dep) # parameter tuning for dep in configuration.dependent: print("running parameter tuning n_estimators for :", dep) partun_n_estimators(dataset, dep)