def experiment_for_submitting(): y_train, tX_train, ids = load_csv_data(DATA_TRAIN_PATH) _, tX_test, ids_test = load_csv_data(DATA_TEST_PATH) np.random.seed(2019) results = pd.DataFrame( columns=["Preprocessing", "Class -1 count", "Class +1 count"]) for preprocessing_param in preprocessing_options: tX_stacked = np.vstack((tX_train, tX_test)) prep_param = { "bias": True, "fill": True, "standardize": False, "degree": 11, "log": True, "root": True } tX_stacked_prep, _, desc_prep = preprocess_data( tX_stacked, None, prep_param) tX_train_prep, tX_test_prep = np.split(tX_stacked_prep, [len(tX_train)]) lambda_ = lambda_cv(tX_train_prep, y_train) print(f"Best lambda: {lambda_}") w, _ = ridge_regression(y_train, tX_train_prep, lambda_) y_pred = predict_labels(w, tX_test_prep) uniq, count = np.unique(y_pred, return_counts=True) print(preprocessing_param, f"Class -1: {count[0]}, Class +1: {count[1]}") results.loc[len(results)] = (desc_prep, count[0], count[1]) results.to_csv("Submitting experiment.csv", sep=";")
def main(): y_train, tX_train, ids = load_csv_data(DATA_TRAIN_PATH) _, tX_test, ids_test = load_csv_data(DATA_TEST_PATH) np.random.seed(2019) # Preprocess data together to have the same shifts while creating log or root features tX_stacked = np.vstack((tX_train, tX_test)) prep_param = { "bias": True, "fill": True, "standardize": False, "degree": 8, "log": True, "root": True } tX_stacked_prep, *_ = preprocess_data(tX_stacked, None, prep_param) tX_train_prep, tX_test_prep = np.split(tX_stacked_prep, [len(tX_train)]) # Split data according to PRI_jet_num value tX_tr_splitted, indices_tr = divide_data(tX_train_prep) tX_te_splitted, indices_te = divide_data(tX_test_prep) n_models = len(indices_tr) y_tr_splitted = [] for i in range(n_models): y_tr_splitted.append(y_train[indices_tr[i]]) # Train weights = [] for i in range(n_models): lambda_ = lambda_cv(tX_tr_splitted[i], y_tr_splitted[i]) print(f"Class {i}, lambda: {lambda_}") weights.append( ridge_regression(y_tr_splitted[i], tX_tr_splitted[i], lambda_)[0]) # Predict y_pr_tr = np.zeros(tX_train.shape[0]) y_pr_te = np.zeros(tX_test.shape[0]) for i in range(n_models): y_pr_tr[indices_tr[i]] = predict_labels(weights[i], tX_tr_splitted[i]) y_pr_te[indices_te[i]] = predict_labels(weights[i], tX_te_splitted[i]) acc_tr = compute_accuracy(y_train, y_pr_tr) print(f"Total accuracy train: {acc_tr}") _, counts = np.unique(y_pr_te, return_counts=True) print( f"Distribution on test data class -1: {counts[0]}, class +1: {counts[1]}" ) create_csv_submission(ids_test, y_pr_te, OUTPUT_PATH)
def recipe_drop_down(): if request.method == 'GET': info = load_csv_data() image_file = '' headline_py = "Pick a Recipe" return render_template("recipe_dropdown.html", headline=headline_py, recipe_image=image_file, image_dict=info) else: # POST info = load_csv_data() headline_py = "Pick a Recipe" image_file = request.form.get("recipe_image_list_drop_down") return render_template("recipe_dropdown.html", headline=headline_py, recipe_image=image_file, image_dict=info)
def load_user_neg_regex(self): if self.userfiles and FILEKEY_NEG_TITLE_REGEX in self.userfiles: for userfile in self.userfiles[FILEKEY_NEG_TITLE_REGEX].values(): negwords = load_csv_data(userfile, [FILEKEY_NEG_TITLE_REGEX_FIELD]) negdict = dict(zip(negwords, negwords)) self.negative_regex.update(negdict) self.log(f'Loaded {len(self.negative_regex.keys())} negative title regex values for user.')
def run_experiments(): y, tX, ids = load_csv_data(DATA_TRAIN_PATH) # run_preprocessing_experiment(tX, y) # run_balancing_experiment(tX, y) # run_filling_experiment(tX, y) # experiment_for_submitting() # feature_correlation_checking(tX, y) train_3models(tX, y)
Returns ------- Tuple (ndarray, ndarray) Sorted labels and predictions """ idx = ids.argsort() return ids[idx], y_pred[idx] # Locations of the train/test data and the submission files train_fname = "data/train.csv" test_fname = "data/test.csv" sumbission_fname = "data/submission.csv" # Load the train/test data y_train, X_train, ids_train = load_csv_data(train_fname) y_test, X_test, ids_test = load_csv_data(test_fname) # Print out the shapes for convinience print("Shapes") print(X_train.shape, y_train.shape, ids_train.shape) print(X_test.shape, y_test.shape, ids_test.shape) # Split the datasets into 8 subsets combine_vals = False train_subsets = PRI_jet_num_split(y_train, X_train, ids_train, combine_vals) test_subsets = PRI_jet_num_split(y_test, X_test, ids_test, combine_vals) # Print the number of subsets and assert that their sizes are the same # If not, there is something wrong with the split functionality print(f"Number of train subsets: { len(train_subsets) }")
def main(): print(f"RUNNING TEST PEN") info = helpers.get_nutridata() #pprint(info) for k,v in info.items(): print(f"{ k } = { v }") print(f"__name__ is: {__name__}") print(f"__file__ is: {__file__}") print(f"__loader__ is: {__loader__}") print(f"__package__ is: {__package__}") info['n_EnkJ'] = str( round( float( info['n_En'] ) * 4.184 ) ) info['serving_size'] = str( round( float( info['serving_size'] ) ) ) print(f"n_EnkJ: {info['n_EnkJ']}") print(f"serving_size: {info['serving_size']}") print(f"{1}") helpers.get_nutrients_per_serving() helpers.load_csv_data() #def get_ingredients_from_recipe(name): print( helpers.get_ingredients_from_recipe('') ) requested_recipe = 'mushroom rissotto' print( glob.glob('./static/recipe/*.txt') ) recipe_ref = {} for recipe_file in glob.glob('./static/recipe/*.txt'): print(recipe_file) #print(re.search(r'\d{8}_\d{6}_(.*).txt', './static/recipe/20190301_145910_mushroom rissotto.txt') ) print(re.search(r'\d{8}_\d{6}_(.*).txt', recipe_file).group(0) ) print(re.search(r'\d{8}_\d{6}_(.*).txt', recipe_file).group(1) ) recipe_name = re.search(r'\d{8}_\d{6}_(.*).txt', recipe_file).group(1) recipe_ref[recipe_name] = recipe_file print(f"Looking for: {requested_recipe} <") print(f"Found: {recipe_ref[requested_recipe]} <") recipe_file ="./static/recipe/20190301_145910_mushroom rissotto.txt" # with open(recipe_file) as f: # content = [line.rstrip() for line in f] # print( content.__class__.__name__ ) # list # # for l in content: # print( l ) # match # ^-+- for the (.*) \((\d+)\) # 1.name (2.portions) # (.*) # 3. ingredients # ^\s+Total \((.*?)\) # 4. yield # need DOTALL so multiline works # https://www.thegeekstuff.com/2014/07/advanced-python-regex/ # all together # ^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\) # with open(recipe_file) as f: content = "".join(f.readlines()) print( content.__class__.__name__ ) print( ' - - - recipe text' ) print( content ) #match = re.search( r'^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\)', content, re.DOTALL ) #match = re.search( r'^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\)', content, re.MULTILINE ) match = re.search( r'^-+- for the (.*) \((\d+)\)', content, re.DOTALL ) r_name = match.group(1) r_portions = match.group(2) match = re.search( r'\)(.*)^\s+T', content, re.MULTILINE ) #r_ingredients = match.group(1) r_ingredients = 'ingredients' #r_yield = match.group(4) r_yield = '1kg' print(f" - - - recipe: {r_name} <\n{r_ingredients}\nmakes {r_yield} which is {r_portions} portions" ) this_is_multiline = \ ''' ------------------ for the mushroom rissotto (3) 50g fennel 46g butter 70g leek 20g green pepper 16g garlic 88g white mushrooms 80g sauteed mushrooms 100g white wine 10g chicken stock cube 490g water 40g peas 80g cream cheese 100g arborio rice Total (900g) ''' print(f" - - - - - - this_is_multiline \n{this_is_multiline}\n - - - - - ") #match = re.search( r'^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\)', this_is_multiline, re.DOTALL ) #match = re.search( r'^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\)', this_is_multiline, re.MULTILINE ) r_name = 'No Match' r_portions = 'No Match' r_ingredients = [] r_yield = 'No Match' # name and portions match = re.search( r'^-+- for the (.*) \((\d+)\)', this_is_multiline, re.MULTILINE ) #match = re.search( r'^-+- for the (.*) \((\d+)\)', this_is_multiline, re.DOTALL ) if match: r_name = match.group(1) r_portions = match.group(2) else: print("name and portions NO MATCH") # ingredients #match = re.search( r'^(\d+)g\s+([a-zA-Z ]+)$', this_is_multiline, re.MULTILINE ) match = re.findall( r'^(\d+)g\s+([a-zA-Z ]+)$', this_is_multiline, re.MULTILINE ) #match = re.search( r'\)(.*)^\s+T', this_is_multiline, re.DOTALL ) if match: #pprint(match) print(f"ingredients: {match.__class__.__name__}") # - {match.size}") for i in match: r_ingredients.append( f"{i[0]}g\t{i[1]}" ) print( f"{i[0]}g\t{i[1]}" ) else: print("ingredients NO MATCH") # yield match = re.search( r'^\s+Total \((.*?)\)', this_is_multiline, re.MULTILINE ) #match = re.search( r'^\s+Total \((.*?)\)', this_is_multiline, re.DOTALL ) if match: r_yield = match.group(1) else: print("yield NO MATCH") print(f" - - - recipe ML: {r_name} <\n{r_ingredients}\nmakes {r_yield} which is {r_portions} portions" ) match = re.search( r'^-+- for the (.*) \((\d+)\)(.*)^\s+Total \((.*?)\)', this_is_multiline, re.MULTILINE | re.DOTALL ) #match = re.search( r'^-+- for the (.*) \((\d+)\)', this_is_multiline, re.MULTILINE ) #match = re.search( r'\).*?^(.*)^\s+Total \((.*?)\)', this_is_multiline, re.MULTILINE ) #match = re.search( r'^\s+Total \((.*?)\)', this_is_multiline, re.MULTILINE ) r_ingredients = match.group(3).strip() print(f"{r_ingredients.__class__.__name__}") print(f"{r_ingredients}") print(f"{match.group(3)}") get_ingredients_from_recipe('mushroom risotto')
parser.add_argument("--dyhigh", help="The max y limit on the d43 plots", type=float, required=True) parser.add_argument("--tlim", help="The max time on the m0 and d43 plots", type=float, required=True) parser.add_argument("--dlegendloc", help="The location of the legend on the d43 plot", type=str, required=True) args = parser.parse_args() x = np.linspace(1, 10, 300) case_num = args.case time_end = args.tlim node_num = args.nodes print("Building Comparison Plots for Case {}.".format(case_num)) # Read analytic data from file. analytic_data = helpers.load_csv_data("Vanni2000_Case{}_N_RIG.csv".format(case_num)) x_analytic = list(analytic_data.keys()) y_analytic = list(analytic_data.values()) # Produce Figures a-d plt.figure(case_num, figsize=(12,8), dpi=80) # Produce Figure a plt.subplot(221) plt.plot([], color="#007F00", linestyle="-.", label="LnEQMOM N={}, $N_\\alpha$ = 20".format(node_num)) plt.plot([], color="#956363", linestyle="--", label="EQMOM each nodes") plt.plot([], color="#0A246A", linestyle="-", label="Rigorous solution") plt.plot(x_analytic, y_analytic, color="#0A246A", linestyle="-")