def main(args): genereal_params = load_yaml(args.general) data_params = load_yaml(args.data) default_params = load_yaml(args.default) tune_params = load_yaml(args.tune) genereal_params['verbose'] = args.verbose genereal_params['cuda'] = torch.cuda.is_available() genereal_params['train_val'] = args.train_val if args.trick: default_trick[args.trick] = True genereal_params['trick'] = default_trick final_default_params = SimpleNamespace(**genereal_params, **data_params, **default_params) time_start = time.time() print(final_default_params) print() #reproduce np.random.seed(final_default_params.seed) random.seed(final_default_params.seed) torch.manual_seed(final_default_params.seed) if final_default_params.cuda: torch.cuda.manual_seed(final_default_params.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False #run multiple_run_tune_separate(final_default_params, tune_params, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] parameters = find_best_hyperparameters(table_path + args.dataset_name, 'NDCG') parameters_row = parameters.loc[parameters['model'] == args.model] if args.dataset_name == "yelp/": R_train_item_keyphrase = R_train_item_keyphrase.T start_time = time.time() results = critiquing( matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, keyphrase_selection_method=args.keyphrase_selection_method, topk=args.topk, lamb=args.lamb) print("Final Time Elapsed: {}".format(inhour(time.time() - start_time))) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG') # parameters_row = parameters.loc[parameters['model'] == args.model] parameters_row = { 'iter' : 10, 'lambda' : 200, 'rank' : 200 } keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist() results = critiquing(matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase.T, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, lamb = args.lambdas, keyphrases_names = keyphrases_names, keyphrase_selection_method = args.keyphrase_selection_method) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def run(options): ''' Defines and executes apache beam pipeline :param options: `apache_beam.options.pipeline_options import PipelineOptions` object containing arguments that should be used for running the Beam job. ''' with beam.Pipeline(options=options) as pipeline: # Load configs metadata = io.load_yaml(filename='configs/metadata.yml') # Specify query hist_trans_query = io.load_sql_query('sql/pull_hist_trans.sql').format( start_date=metadata['input_start_date'], end_date=metadata['input_end_date']) # Have dataflow pipeline pull the query, and write to CSV _ = (pipeline | 'Pull data for historical transactions' >> beam.io.Read( beam.io.BigQuerySource(query=hist_trans_query, use_standard_sql=True)) | 'Write data to CSV' >> io.DumpToCSV( header=metadata['output_headers'], file_path_prefix=metadata['output_file'], file_name_suffix='', compression_type=CompressionTypes.AUTO, shard_name_template=''))
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.tuning_result_path, 'NDCG') R_train = load_numpy(path=args.data_dir, name=args.train_set) R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_test = load_numpy(path=args.data_dir, name=args.test_set) R_train = R_train + R_valid topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = general(R_train, R_test, row, models[row['model']], measure=row['similarity'], gpu_on=args.gpu, model_folder=args.model_folder) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.save_path)
def show_uncertainty(df, x, y, hue='model', folder='unknown', name='uncertainty_analysis', save=True): fig, ax = plt.subplots(figsize=(6, 4)) models = df[hue].unique() for model in models: sns.regplot(x=x, y=y, data=df[df[hue] == model], lowess=True, scatter=False, ax=ax, label=model) plt.ylabel("Model Uncertainty \n (Standard Derivation)") plt.xlabel("Number of Ratings") plt.legend(loc='center right') plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] fig.savefig('{0}/analysis/{1}/{2}.pdf'.format(fig_path, folder, name), bbox_inches="tight", pad_inches=0, format='pdf') fig.savefig('{0}/analysis/{1}/{2}.png'.format(fig_path, folder, name), bbox_inches="tight", pad_inches=0, format='png') else: plt.show()
def load_config(cls, config_file_uri, environment): """ Loads config from file and returns. """ config_file_path = io_utils.fetch_file(config_file_uri) config_file = io_utils.load_yaml(config_file_path) return config_file[environment]
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = execute(R_train, R_test, row, models[row['model']], gpu_on=args.gpu) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): params = load_yaml(args.parameters) num_users = pd.read_csv(args.data_dir + args.user_col + '.csv')[args.user_col].nunique() num_items = pd.read_csv(args.data_dir + args.item_col + '.csv')[args.item_col].nunique() df_train = pd.read_csv(args.data_dir + args.train_set) df_train = df_train[df_train[args.rating_col] == 1] df_train[args.keyphrase_vector_col] = df_train[args.keyphrase_vector_col].apply(ast.literal_eval) df_test = pd.read_csv(args.data_dir + args.test_set) keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[args.keyphrase_col].values explain(num_users, num_items, args.user_col, args.item_col, args.rating_col, args.keyphrase_vector_col, df_train, df_test, keyphrase_names, params, load_path=args.load_path, save_path=args.save_path)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path+args.tuning_result_path, 'MAP@10') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid # R_train[(R_train <= 3).nonzero()] = 0 # R_test[(R_test <= 3).nonzero()] = 0 # R_train[(R_train > 3).nonzero()] = 1 # R_test[(R_test > 3).nonzero()] = 1 # import ipdb; ipdb.set_trace() topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = execute(R_train, R_test, row, models[row['model']]) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def pandas_ridge_plot(df, model, pop, k, folder='figures', name='personalization', save=True): sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}) num_models = len(df.model.unique()) # Initialize the FacetGrid object pal = sns.cubehelix_palette(num_models, rot=-.25, light=.7) g = sns.FacetGrid(df, row=model, hue=model, aspect=10, height=1, palette=pal) # Draw the densities in a few steps g.map(sns.kdeplot, pop, clip_on=False, shade=True, alpha=1, lw=1.5, bw=50) g.map(sns.kdeplot, pop, clip_on=False, color="w", lw=1.5, bw=50) g.map(plt.axhline, y=0, lw=2, clip_on=False) # Define and use a simple function to label the plot in axes coordinates def label(x, color, label): ax = plt.gca() ax.text(-0.1, .1, label, fontweight="bold", color=color, ha="left", va="center", transform=ax.transAxes) g.map(label, pop) # Set the subplots to overlap g.fig.subplots_adjust(hspace=-0.8) # Remove axes details that don't play well with overlap g.set_xlabels( "Popularity Distribution of The Top-{0} Recommended Items".format(k)) g.set_titles("") g.set(yticks=[]) g.despine(bottom=True, left=True) #plt.tight_layout() if save: pass fig_path = load_yaml('config/global.yml', key='path')['figs'] plt.savefig("{2}/{0}/{1}.pdf".format(folder, name, fig_path), format="pdf") plt.savefig("{2}/{0}/{1}.png".format(folder, name, fig_path), format="png") else: plt.show() plt.close()
def main(args): progress = WorkSplitter() progress.section("Tune Parameters") params = load_yaml(args.grid) params['models'] = {params['models']: models[params['models']]} train = load_numpy(path=args.path, name=args.dataset + args.train) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) valid = load_numpy(path=args.path, name=args.dataset + args.valid) hyper_parameter_tuning(train, valid, params, unif_train=unif_train, save_path=args.dataset + args.name, gpu_on=args.gpu, seed=args.seed, way=args.way, dataset=args.dataset)
def main(args): params = load_yaml(args.grid) params['models'] = {params['models']: models[params['models']]} R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) hyper_parameter_tuning(R_train, R_valid, params, save_path=args.name, measure=params['similarity'], gpu_on=args.gpu)
def read(conf, path, func, write=True, update=False): full_path = os.path.join(conf.data_folder, f"{path}.yaml") if not update and os.path.exists(full_path): logger.debug(f"Load value of '{path}'", prefix=conf.logger_prefix) return io.load_yaml(full_path) if not write: return func() return _write(conf, path, func)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path+args.param, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) results = converge(R_train, R_valid, df, table_path, args.name, epochs=500, gpu_on=args.gpu) show_training_progress(results, hue='model', metric='NDCG', name="epoch_vs_ndcg")
def pandas_bar_plot(df, x, y, hue, x_name, y_name, folder='figures', name='unknown', save=True): fig, ax = plt.subplots(figsize=(6, 3)) sns.barplot(ax=ax, x=x, y=y, hue=hue, data=df, errwidth=1, edgecolor='black', facecolor=(1, 1, 1, 0)) #, errwidth=0.5 num_category = len(df[x].unique()) hatch = None hatches = itertools.cycle([ '//', '**', '////', '----', 'xxxx', '\\\\\\\\', ' ', '\\', '...', 'OOO', "++++++++" ]) for i, bar in enumerate(ax.patches): if i % num_category == 0: hatch = next(hatches) bar.set_hatch(hatch) plt.xlabel(x_name) plt.ylabel(y_name) #plt.xticks(rotation=15) plt.legend(loc='upper left', ncol=5) # if 'Precision' not in y: ax.legend_.remove() plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] plt.savefig("{2}/{0}/{1}_bar.pdf".format(folder, name, fig_path), format="pdf") plt.savefig("{2}/{0}/{1}_bar.png".format(folder, name, fig_path), format="png") fig_leg = plt.figure(figsize=(12, 0.7)) ax_leg = fig_leg.add_subplot(111) ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=10) ax_leg.axis('off') fig_leg.savefig('figs/bar_legend.pdf', format='pdf') else: plt.show() plt.close()
def main(args): genereal_params = load_yaml(args.general) data_params = load_yaml(args.data) agent_params = load_yaml(args.agent) genereal_params['verbose'] = args.verbose genereal_params['cuda'] = torch.cuda.is_available() final_params = SimpleNamespace(**genereal_params, **data_params, **agent_params) time_start = time.time() print(final_params) #reproduce np.random.seed(final_params.seed) random.seed(final_params.seed) torch.manual_seed(final_params.seed) if final_params.cuda: torch.cuda.manual_seed(final_params.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False #run multiple_run(final_params)
def hyper_parameter_tuning(train, validation, params, save_path): progress = WorkSplitter() table_path = load_yaml('config/global.yml', key='path')['tables'] try: df = load_dataframe_csv(table_path, save_path) except: df = pd.DataFrame(columns=['model', 'k', 'topK']) num_user = train.shape[0] for algorithm in params['models']: for k in params['k']: if ((df['model'] == algorithm) & (df['k'] == k)).any(): continue format = "model: {}, k: {}" progress.section(format.format(algorithm, k)) progress.subsection("Training") model = params['models'][algorithm]() model.train(train) progress.subsection("Prediction") prediction_score = model.predict(train, k=k) prediction = predict(prediction_score=prediction_score, topK=params['topK'][-1], matrix_Train=train) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = {'model': algorithm, 'k': k} for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def show_training_progress(df, hue='model', metric='NDCG', name="epoch_vs_ndcg", save=True): fig, ax = plt.subplots(figsize=(8, 4)) #plt.axhline(y=0.165, color='r', linestyle='-') ax = sns.lineplot(x='epoch', y=metric, hue=hue, style=hue, data=df) plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] fig.savefig('{0}/train/progress/{1}.png'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='png') else: plt.show()
def main(args): params = load_yaml(args.parameters) params['models'] = {params['models']: models[params['models']]} num_users = pd.read_csv(args.data_dir + args.user_col + '.csv')[args.user_col].nunique() num_items = pd.read_csv(args.data_dir + args.item_col + '.csv')[args.item_col].nunique() df_train = pd.read_csv(args.data_dir + args.train_set) df_train = df_train[df_train[args.rating_col] == 1] df_train[args.keyphrase_vector_col] = df_train[ args.keyphrase_vector_col].apply(ast.literal_eval) df_valid = pd.read_csv(args.data_dir + args.valid_set) keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[ args.keyphrase_col].values if args.explanation: explanation_parameter_tuning(num_users, num_items, args.user_col, args.item_col, args.rating_col, args.keyphrase_vector_col, df_train, df_valid, keyphrase_names, params, save_path=args.save_path) else: hyper_parameter_tuning(num_users, num_items, args.user_col, args.item_col, args.rating_col, args.keyphrase_vector_col, df_train, df_valid, keyphrase_names, params, save_path=args.save_path)
def precision_recall_curve(df, k, folder='figures', name='precison_recall', save=True, reloaded=False): fig, ax = plt.subplots(figsize=(4, 3)) precisions = ["Precision@" + str(x) for x in k] recalls = ["Recall@" + str(x) for x in k] markers = ['o', '^', 's', '*', 'P', 'h', 'd', '3', 'X', 'v', '+'] for i in range(len(df)): if reloaded: precision = [ literal_eval(x)[0] for x in df[precisions].iloc[i].tolist() ] recall = [literal_eval(x)[0] for x in df[recalls].iloc[i].tolist()] else: precision = [x[0] for x in df[precisions].iloc[i].tolist()] recall = [x[0] for x in df[recalls].iloc[i].tolist()] ax.plot(recall, precision, label=df['model'].iloc[i], marker=markers[i]) plt.xlabel("Recall") plt.ylabel("Precision") plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] plt.savefig("{2}/{0}/{1}_curve.pdf".format(folder, name, fig_path), format="pdf") plt.savefig("{2}/{0}/{1}_curve.png".format(folder, name, fig_path), format="png") fig_leg = plt.figure(figsize=(12, 0.7)) ax_leg = fig_leg.add_subplot(111) ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=10) ax_leg.axis('off') fig_leg.savefig('figs/legend.pdf', format='pdf') else: plt.show() plt.close()
def test_inject_token_auth(self): auth_token = "abc123456" endpoint = "https://test.com/?token={0}".format(auth_token) target_env = "dev" # Sets the dictionary to be returned by a call to load_config config_file = fetch_file(resource_uri("test_config_with_auth_token.yaml")) config_yaml = load_yaml(config_file) dummy_utils = ConfigUtilsDummy({'load_config': config_yaml[target_env]}) config = ConfigFactory.create_from_file( environment=target_env, config_file_uri="some dummy uri", auth_token=auth_token, validate_contract_settings=False, config_utils=dummy_utils, ) self.assertEqual(config.auth_token, auth_token) self.assertEqual(config.eth_provider_args['endpoint_uri'], endpoint)
def show_samples(images, row, col, image_shape, name="Unknown", save=True, shift=False): num_images = row*col if shift: images = (images+1.)/2. fig = plt.figure(figsize=(col, row)) grid = ImageGrid(fig, 111, nrows_ncols=(row, col), axes_pad=0.) for i in xrange(num_images): im = images[i].reshape(image_shape) axis = grid[i] axis.axis('off') axis.imshow(im) plt.axis('off') plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] fig.savefig('{0}/train/grid/{1}.png'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='png') else: plt.show()
def multi_modes_count(df, y='modes', save=True): def udf(row): value = row['y'] if value == 1.0: return "One Mode" elif value == 2.0: return "Two Modes" elif value == 3.0: return "Three Modes" elif value == 4.0: return "Four Modes" else: return "Five Modes" df = df.sort_values(by=['y']) df['modes'] = df.apply(udf, axis=1) fig, ax = plt.subplots(figsize=(3, 2.5)) sns.countplot(x=y, data=df) hatches = itertools.cycle(['//', 'xx', '\\\\\\']) for i, bar in enumerate(ax.patches): hatch = next(hatches) bar.set_hatch(hatch) plt.xlabel("Number of User Preferences") plt.ylabel("Number of Users") plt.xticks(rotation=15) plt.legend() plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] plt.savefig("{0}/modes_count.pdf".format(fig_path), format="pdf") plt.savefig("{0}/modes_count.png".format(fig_path), format="png") else: plt.show() plt.close()
def main(args): params = load_yaml(args.parameters) params['models'] = {params['models']: models[params['models']]} R_train = load_numpy(path=args.data_dir, name=args.train_set) R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set) R_valid_keyphrase = load_numpy(path=args.data_dir, name=args.valid_keyphrase_set) R_train_keyphrase[R_train_keyphrase != 0] = 1 R_valid_keyphrase[R_valid_keyphrase != 0] = 1 hyper_parameter_tuning(R_train, R_valid, R_train_keyphrase.todense(), R_valid_keyphrase, params, save_path=args.save_path, tune_explanation=args.tune_explanation)
def show_training_progress(df, hue='model', metric='NDCG', name="epoch_vs_ndcg", save=True): fig, ax = plt.subplots(figsize=(6, 3)) df = df.sort_values(by=['model']) #plt.axhline(y=0.165, color='r', linestyle='-') ax = sns.lineplot(x='epoch', y=metric, hue=hue, style=hue, data=df, ci=68) ax.set_xlabel("Epoch") handles, labels = ax.get_legend_handles_labels() ax.legend(handles=handles[1:], labels=labels[1:]) plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] fig.savefig('{0}/{1}.pdf'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='pdf') else: plt.show()
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50] metric = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'] usercategory(R_train, R_test, df, topK, metric, args.problem, args.model_folder, gpu_on=args.gpu)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] personalization(R_train, R_test, df, topK, args.problem, args.model_folder, gpu_on=args.gpu)
def multi_modes_histogram(df, x='x', y='y', save=True): fig, ax = plt.subplots(figsize=(4, 3)) sns.distplot(df[df[y] == 1.0][x], kde=False, label="One Mode") sns.distplot(df[df[y] == 2.0][x], kde=False, label="Two Modes") sns.distplot(df[df[y] == 3.0][x], kde=False, label="Three Modes") plt.xlabel("Number of Observed Interactions") plt.ylabel("Frequency") plt.legend() plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] plt.savefig("{0}/multi_mode.pdf".format(fig_path), format="pdf") plt.savefig("{0}/multi_mode.png".format(fig_path), format="png") else: plt.show() plt.close()
def latent_distribution_ellipse(means, stds, keep_rate, lim=6, name="Unknown", save=True): fig, ax = plt.subplots(figsize=(4, 4)) patches = [] m, _ = means.shape colors = sns.color_palette("Blues", 10).as_hex() plt.axis('equal') handles = [] for i in range(m): ellipse = mpatches.Ellipse(means[i], stds[i][0], stds[i][1], edgecolor=colors[i], lw=3, facecolor='none', label="{:10.4f}".format(keep_rate**(i+1))) handles.append(ellipse) ax.add_artist(ellipse) ax.set(xlim=[-lim, lim], ylim=[-lim, lim]) #plt.axis('off') plt.legend(handles=handles) plt.tight_layout() if save: fig_path = load_yaml('config/global.yml', key='path')['figs'] fig.savefig('{0}/train/grid/{1}.png'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='png') else: plt.show()