def main(): usage = "usage: %prog inputfile1 inputfile2 ... inputfileN outputfile" parser = OptionParser(usage=usage) parser.add_option("-n", "--genename", dest="genename", help="Name of gene, prefix added to motifname.", default="") (options, args) = parser.parse_args() if len(args) < 3: print ("Two args must be specified in commandline: \n" "One or more input files\n" "Output file\n") sys.exit() input_file_list = args[0:-1] output_file = args[-1] genename = options.genename # Check output file does NOT exist if os.path.isfile(output_file): print "Warning: output file already exists.\n" "Press any key to overwrite file: %s" % output_file raw_input() print "Overwriting file: %s" % output_file # init write file fcount = 0 with open(output_file, "wb") as outfile: writing_utils.write_meme_headers(outfile) # Loop input files, reading their motifs and adding it to for f in input_file_list: motifname = os.path.basename(f) # remove .txt from motifname motifname = "".join(motifname.split(".")[:-1]) motifname = ",".join([genename, motifname, "D"]) motifs = reading_utils.read_motifs_from_file(f) writing_utils.write_motif_to_file(outfile, motifs, motifname, end_line="\n\n") fcount += 1 print "%s motif files crafted into: %s" % (fcount, output_file)
def main(): usage = 'usage: %prog rbp_list.txtfile rbp_db_directory '\ 'meme_db_output.outputfile\n'\ 'Three args must be specified in commandline: \n'\ '1) List of RBPs (ENSEMBL gene id).\n'\ '2) RBP directory containing RBP_Information_all_motifs.txt'\ ' and pwms_all_motifs directory.\n'\ '3) Output file.\n'\ '-h to display this help message.\n' parser = OptionParser(usage=usage) parser.add_option('-f', '--dbfile', dest='rbp_filename', help='name of RBP DB file, '\ 'default is RBP_Information_all_motifs.txt', default='RBP_Information_all_motifs.txt') parser.add_option('-d', '--pwmdir', dest='pwm_dir', help='Name of directory containing motif files.'\ ' Default is "pwms_all_motifs".', default='pwms_all_motifs') (options, args) = parser.parse_args() if len(args) < 3: print 'Incorrect number of parameters specified.' print usage sys.exit() input_rbps_path = args[0] rbp_db_dir = args[1] output_path = args[2] rbp_filename = options.rbp_filename pwm_dir = options.pwm_dir # Check that writefile does not already exist. # if it does, then do not overwrite and just exit if os.path.isfile(output_path): print '%s already exists. Aborting custom MEME db creation.' \ %output_path sys.exit() # Get rbp_db_path rbp_db_path = os.path.join(rbp_db_dir, rbp_filename) # Read list of RBPs, store as a list. rbps_list = reading_utils.extract_column_from_textfile(input_rbps_path, col_to_extract=0) # Index RBP DB: {ensemblID: {RBP_Name:[], Motif_ID:[], RBP_Status:[]}} rbp_db_dic = index_rbpdb_motifids(rbp_db_path) # init write file with open(output_path, 'wb') as writefile: # Write headers writing_utils.write_meme_headers(writefile) # Match rbp ensemblID to indexed dic key, write motifs to file. motifcount = 0 for rbp in rbps_list: motifid_list, motifname_list = get_motifids_from_rbp(rbp, rbp_db_dic) for motifid, motifname in zip(motifid_list, motifname_list): # Add .txt suffix to motifid motifid_fname = ''.join([motifid, '.txt']) motifid_path = os.path.join(rbp_db_dir, pwm_dir, motifid_fname) # Read motifid path, get the motif. extracted_motif = \ reading_utils.read_motifs_from_file(motifid_path, skipheader=True, rownames=True) # Only write something to file if extracted_motif is not empty. if len(extracted_motif) != 0: # Write extracted motif to file... writing_utils.write_motif_to_file(writefile, extracted_motif, motifname, alength=4, nsites=20) motifcount += 1 print '%s motifs written to file: %s' %(motifcount, output_path)