def pseudo_cluster_tt(self, cluster, type, name): if type == 'bias': rating_cluster = pd.merge(self.rating_dense_bias, cluster, on = 'movieId') elif type == 'original': rating_cluster = pd.merge(self.rating_dense, cluster, on = 'movieId') else: raise ValueError('Unrecoginzed type') rating_cluster = rating_cluster.groupby(['userId', 'cluster'])['value'].agg(np.mean).reset_index() user_fav_cluster = rating_cluster.groupby('userId').apply(lambda x: x.loc[x['value'] .argmax()]).reset_index(drop=1) tmp = pd.merge(self.rating_dense, user_fav_cluster, on='userId') pseudo_rating = tmp.groupby(['cluster', 'movieId'])['rating'].agg(np.mean).reset_index() pseudo_rating['userId'] = utils.fake_uid(pseudo_rating.cluster) train_ratings = pd.concat([self.ratings_for_train[['userId', 'movieId', 'rating']], pseudo_rating[['userId', 'movieId', 'rating']]], ignore_index =True) train_ratings.to_csv(utils.get_output_name(self.train_file, type+'_'+name), header=False, index=False) test_ratings = pd.DataFrame({'userId': np.unique(pseudo_rating.userId), 'movieId': 1, 'rating': 2}) test_ratings[['userId', 'movieId', 'rating']]\ .to_csv(utils.get_output_name(self.test_file, type+'_'+name), header=False, index=False) labels = self.label_clusters(cluster) labels.to_csv(utils.get_output_name(self.train_file, type+'_'+name+'_label'), index=False) cluster.to_csv(utils.get_output_name(self.train_file, type+'_'+name+'_cluster'), index=False) pseudo_rating[['userId', 'movieId', 'rating']].to_csv( utils.get_output_name(self.train_file, type+'_'+name+'_user'), index=False)
def write_train_test_movielens(self, train_fn, test_fn, cost_fn, movie_fn, train_this_fold, train_other_folds, test, n): movies = self.rank_movie(train_other_folds) movie_list = pd.DataFrame({'item':movies, 'rank':range(1, len(movies)+1)}) train_this_fold_join = pd.merge(train_this_fold, movie_list, on='item') train_selected = train_this_fold_join.groupby('user').apply(self.top_n, n, 'rank').reset_index(drop=True) train_final = pd.concat([train_other_folds, train_selected[['user', 'item', 'rating', 'time']]]) # output to files pd.DataFrame({'movie': movies}).to_csv(utils.get_output_name(train_fn, str(n)+'_'+movie_fn)) train_selected.to_csv(utils.get_output_name(train_fn, str(n)+'_'+cost_fn), index=False) train_final.to_csv(utils.get_output_name(train_fn, str(n)), header=False, index=False) test.to_csv(utils.get_output_name(test_fn, str(n)), header=False, index=False)
def write_train_test(self, train_fn, test_fn, select_fn, movie_fn, train_this_fold, train_other_folds, test, n): # generate movie list and select only these to include in the training part # for this fold and combine with the other folds to make final training file movie_list = self.gen_movie_list(train_other_folds, n) train_this_fold['rated'] = train_this_fold.item.isin(movie_list) train_selected = train_this_fold[train_this_fold.rated].drop('rated', 1) train_final = pd.concat([train_other_folds, train_selected]) # output to files pd.DataFrame({'movie': movie_list}).to_csv(utils.get_output_name(train_fn, str(n)+'_'+movie_fn)) train_this_fold.to_csv(utils.get_output_name(train_fn, str(n)+'_'+select_fn), index=False) train_final.to_csv(utils.get_output_name(train_fn, str(n)), header=False, index=False) test.to_csv(utils.get_output_name(test_fn, str(n)), header=False, index=False)
def html_to_pdf(file_name, output_name=None): output_name = output_name if output_name else get_output_name(file_name, 'pdf') wk_path = Config.get('WKH2P_PATH', '') configuration = pdfkit.configuration(wkhtmltopdf=wk_path) if wk_path else None pdfkit.from_file(file_name, output_name, configuration=configuration, options=Config["WKH2P_OPTION"]) return output_name
def md_to_html(file_name, output_name=None): text = convert_md_html(file_name) html = fill_html(text) output_name = output_name if output_name else get_output_name( file_name, 'html') with open(output_name, 'w', encoding='utf-8') as f: f.write(html) return output_name
def main(): parser = argparse.ArgumentParser() parser.add_argument('train_file', type=str, help='Location of training files') parser.add_argument('test_file', type=str, help='Location of testing files') parser.add_argument('model_file', type=str, help='Location of the model recommendation file') parser.add_argument('cluster_name', type=str, help='Name of the clustering algorithm backing the recommendation') parser.add_argument('prediction', type=str, help='The list of prediction algorithm names') parser.add_argument('partition', type=int, help='The number of the file partition') parser.add_argument('score_type', type=str, help='Type of scoring process') args = parser.parse_args() recommender = ClusterRecommender() cluster_file = utils.get_output_name(args.train_file, args.cluster_name+'_cluster') recommender.train(args.cluster_name, args.prediction, args.model_file, cluster_file, args.partition) if args.score_type not in ['optimal', 'simulation']: raise ValueError('Unrecognized input for score_type') recommender.score_item(args.train_file, args.test_file, args.score_type)
type = lambda arg: is_valid_file(parser, arg), help = "The path to e2p2 program") parser.add_argument("-o", "--out", dest = "output", type = str, default = "[Input fasta filename without its extension].pf", help = "The output file name") args = parser.parse_args() if args.file is None or args.path is None: parser.print_help() exit(0) # # Retrieve fasta file directory path fasta_path = os.path.dirname(os.path.abspath(args.file)) test_fasta = os.path.join(fasta_path, ".e2p2_test_file.fasta") # Restructure the fasta file to be good for E2P2 program restructure_fasta(args.file, test_fasta) # Running the E2P2 program default_output = "[Input fasta filename without its extension].pf" if args.output == default_output: args.output = get_output_name(args.file) + ".pf" command = ["python", args.path, "-i", test_fasta, "-o", args.output] subprocess.call(command, stdout = open('/dev/null', 'w'), stderr = subprocess.STDOUT) # Deleting the temporary and intermediate files e2p2_path = os.path.dirname(os.path.abspath(args.path)) if os.path.exists(test_fasta): os.remove(test_fasta) if os.path.exists(os.path.join(e2p2_path, "run")): shutil.rmtree(os.path.join(e2p2_path, "run"))
help="The path to interproscan program") parser.add_argument( "-o", "--out", dest="output", type=str, default="[Input fasta filename without its extension].tsv", help="The output file name") args = parser.parse_args() if args.file is None or args.path is None: parser.print_help() exit(0) # # Running the interproscan program default_output = "[Input fasta filename without its extension].tsv" if args.output == default_output: args.output = get_output_name(args.file) + ".tsv" #if os.path.exists(os.path.abspath("runIprScan.sh")): os.remove(os.path.abspath("runIprScan.sh")) # Record time. time_stamp = str(time.time()) script = open("runIprScan." + time_stamp + ".sh", "w") command = args.path + " -i " + args.file + " -f TSV -pa -o " + args.output command += " -iprlookup -goterms -dp -crid UNIDAUPIF -cpu 30\n" script.write(command) script.close() print commands.getoutput("chmod +x runIprScan." + time_stamp + ".sh") print commands.getoutput( "qsub -q all.q@@bigmem -shell yes -S /bin/bash -cwd runIprScan." + time_stamp + ".sh")
"""Fast version of the main script that compute the R2 maps for a given subject and model, without nested cross-validation.""" ) parser.add_argument( "--yaml_file", type=str, default= "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/template.yml", help= "Path to the yaml containing the parameters of the script execution.") args = parser.parse_args() parameters = read_yaml(args.yaml_file) input_path = parameters['input'] output_path_ = parameters['output'] subject = get_subject_name(parameters['subject']) output_path = get_output_name(output_path_, parameters['language'], subject, parameters['model_name']) logs = Logger( get_output_name(output_path_, parameters['language'], subject, parameters['model_name'], 'logs.txt')) save_yaml(parameters, output_path + 'config.yml') logs.info("Fetching maskers...", end='\n') kwargs = { 'detrend': parameters['detrend'], 'standardize': parameters['standardize'], 'high_pass': parameters['high_pass'], 'low_pass': parameters['low_pass'], 'mask_strategy': parameters['mask_strategy'], #'dtype': parameters['dtype'], 'memory_level': parameters['memory_level'], 'smoothing_fwhm': parameters['smoothing_fwhm'],
description= """Main script that compute the R2 maps for a given component by SRM and model.""" ) parser.add_argument( "--yaml_file", type=str, default= "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/template.yml", help= "Path to the yaml containing the parameters of the script execution.") args = parser.parse_args() parameters = read_yaml(args.yaml_file) input_path = parameters['input'] output_path_ = parameters['output'] output_path = get_output_name(output_path_, parameters['language'], 'srm', parameters['model_name']) logs = Logger( get_output_name(output_path_, parameters['language'], 'srm', parameters['model_name'], 'logs.txt')) output_folder = os.path.join(output_path_, parameters['language'], 'srm', parameters['model_name']) check_folder(output_folder) save_yaml(parameters, output_path + 'config.yml') logs.info("Retrieve arguments for each model...") kwargs_splitter = get_splitter_information(parameters) kwargs_compression = get_compression_information(parameters) kwargs_transformation = get_data_transformation_information(parameters) kwargs_estimator_model = get_estimator_model_information(parameters) logs.validate()