def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" fileListFile = "" fileList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-e", "--e"]: (g_params['evalue_threshold'], i) = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-overwrite", "--overwrite", "-force", "--force"]: g_params['isOverwrite'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0]) return 1 if outpath != "" and not os.path.exists(): cmd = ["mkdir", "-p", outpath] subprocess.check_output(cmd) for i in xrange(len(fileList)): Build_seqid2pfamid(fileList[i], outpath)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 datapath = "." outpath = './' idList = [] idListFile = '' i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False idList.append(sys.argv[i]) i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-datapath", "--datapath"]: datapath = sys.argv[i + 1] i += 2 elif argv[i] in ["-method", "--method"]: g_params['method'], i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-l", "--l"]: idListFile = sys.argv[i + 1] i = i + 2 elif sys.argv[i] in ["-outpath", "--outpath"]: outpath = sys.argv[i + 1] i = i + 2 else: print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: idList.append(sys.argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) if len(idList) > 0: os.system("mkdir -p %s" % outpath) cnt = 0 for pfamid in idList: print "================== ", cnt, pfamid, " ====================" if g_params['method'] == 0: Itol_Tree_m0(pfamid, datapath, outpath) elif g_params['method'] == 1: Itol_Tree_m1(pfamid, datapath, outpath) cnt += 1
def main(g_params): #{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileList = [] fileListFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(sys.argv[i]) isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg = True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-l", "--l"]: fileListFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "-outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-evalue", "--evalue"]: g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-seqidt", "--seqidt"]: g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-round", "--round"]: g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument: '%s'" % sys.argv[i]) return 1 else: fileList.append(sys.argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile, delim="\n") if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for infile in fileList: BlastM9toPairlist(infile, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" idList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) i += 1 isNonOptionArg = False elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l", "-list"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) numID = len(idList) if numID < 1: print >> sys.stderr, "No ID set. exit" return 1 params = {} params['from'] = 'P_GI' params['to'] = 'ID' # to uniprot id params['format'] = 'tab' params['query'] = " ".join(idList) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) GIID2UniprotID(params, fpout) myfunc.myclose(fpout)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" fileListFile = "" fileList = [] pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv"%(DATADIR3) i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"] : (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(fileList)): CountUniquePairInvertedInfo(fileList[i], pfamidDefDict, fpout) myfunc.myclose(fpout)
def main(g_params): seqinfofile = "/data3/wk/MPTopo/pfamAna/pfam2-giid-refseqid-pfamid-description.txt" seqidlistfile = "/data3/wk/MPTopo/pfamAna/pairwise/all/pfamfullseq.selTM_uniq.seqidlist" seqInfoDict = {} seqInfoDict = ReadSeqDefInfo(seqinfofile) idList = myfunc.ReadIDList(seqidlistfile) print "#gi_id | refseq_id | pfamid | sequence_description" for idd in idList: print "%s | %s | %s | %s" % ( idd, seqInfoDict[idd]['refseqid'],seqInfoDict[idd]['pfamid'], seqInfoDict[idd]['seqdef']) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" outfile = "" fileListFile = "" fileList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if outpath != "" and not os.path.exists(outpath): os.system("mkdir -p %s" % (outpath)) for i in xrange(len(fileList)): ExcludeConsensus(fileList[i], outpath)
def main(g_params): #{{{ argv = sys.argv datapath = '' ext = "" outpath = '' fileList = [] fileListFile = '' i = 1 numArgv = len(argv) if numArgv < 2: PrintHelp() return () isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False fileList.append(argv[i]) i = i + 1 elif argv[i] == "--": isNonOptionArg = True i = i + 1 elif argv[i][0] == "-": if argv[i] == "-h" or argv[i] == "--help": PrintHelp() return (0) elif argv[i] == "-outpath" or argv[i] == "--outpath": (outpath, i) = myfunc.my_getopt_str(argv, i) else: print(("Error! Wrong argument:%s" % argv[i]), file=sys.stderr) return (1) else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) <= 0: print("No input set. Exit", file=sys.stderr) return (1) else: cnt = 0 for treefile in fileList: print("================== ", cnt, treefile, " ====================") GetTreeListOrder(treefile, outpath) cnt += 1
def SplitIDList(infile, nsplit, method, datapath, ext, outpath):#{{{ idList = myfunc.ReadIDList(infile) rootname = os.path.basename(os.path.splitext(infile)[0]) numID = len(idList) if numID <= 0: print >> sys.stderr, "no ID in the idlist file %s"%(infile) return 1 if method == 0: nfile_per_split = int(ceil(numID / float(nsplit))) i = 0 cntfile = 0 while i < numID: outfile = outpath + os.sep + rootname + ".split_%d"%(cntfile) fpout = myfunc.myopen(outfile, None, "w", True) cntID_per_split = 0 for j in xrange(i, i + nfile_per_split): if j < numID: fpout.write("%s\n"%(idList[j])) cntID_per_split += 1 myfunc.myclose(fpout) print "split to %s \t %4d IDs"%(outfile, cntID_per_split) cntfile += 1 i += nfile_per_split elif method in [1,2]: sumFileSize = 0.0 fsizeList = [] for idd in idList: fname = datapath + os.sep + idd + ext if os.path.exists(fname): fsize = os.path.getsize(fname) if fsize > 0: if method == 1: sumFileSize += float(fsize) fsizeList.append((idd, float(fsize))) elif method == 2: sumFileSize += float(fsize)*float(fsize) fsizeList.append((idd, float(fsize)*fsize)) sumfilesize_per_split = ceil(sumFileSize / float(nsplit)) fsizeList = sorted(fsizeList, key = lambda x:x[1], reverse=True) print "sumfilesize_per_split = %g"% (sumfilesize_per_split) i = 0 cntfile = 0 numID = len(fsizeList) while i < numID: outfile = outpath + os.sep + rootname + ".split_%d"%(cntfile) fpout = myfunc.myopen(outfile, None, "w", True) j = 0 cntID_per_split = 0 sumFileSize = 0.0 while sumFileSize <= sumfilesize_per_split: idx = i+j if i+j > numID -1: break idd = fsizeList[idx][0] fsize = fsizeList[idx][1] sumFileSize += fsize fpout.write("%s\n"%(idd)) cntID_per_split += 1 j += 1 myfunc.myclose(fpout) print "split to %s \t %4d IDs sumsize = %g"%(outfile, cntID_per_split, sumFileSize) cntfile += 1 i += j return 1
def main(g_params): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" idList = [] idListFile = "" fastaFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(sys.argv[i]) isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if (sys.argv[i] in ["-h", "--help"]): PrintHelp() return 1 elif (sys.argv[i] in ["-l", "--l", "-list", "--list"]): idListFile = sys.argv[i + 1] i = i + 2 elif (sys.argv[i] in ["-f", "--f", "-fasta", "--fasta"]): fastaFile = sys.argv[i + 1] i = i + 2 elif (sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]): outFile = sys.argv[i + 1] i = i + 2 elif (sys.argv[i] in ["-mine", "--mine"]): g_params['min_evalue'] = float(sys.argv[i + 1]) g_params['isEvalueSet'] = True i = i + 2 elif (sys.argv[i] in ["-maxe", "--maxe"]): g_params['max_evalue'] = float(sys.argv[i + 1]) g_params['isEvalueSet'] = True i = i + 2 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: idList.append(sys.argv[i]) i += 1 if fastaFile == "": print("Fatal! fasta file not set. Exit.", file=sys.stderr) return 1 elif not os.path.exists(fastaFile): print("Fatal! fasta file %s does not exist. Exit." % (fastaFile), file=sys.stderr) return 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if len(idList) > 0: isIDSet = True else: isIDSet = False if not g_params['isEvalueSet'] and not isIDSet: print("Error! no ID nor evalue threshold is set. Eixt", file=sys.stderr) return 1 idListSet = set(idList) fpout = myfunc.myopen(filename=outFile, default_fp=sys.stdout, mode="w", isRaise=False) fpin = open(fastaFile, "r") if not fpin: print("Failed to open fastafile %s" % (fastaFile), file=sys.stderr) return -1 unprocessedBuffer = "" isEOFreached = False BLOCK_SIZE = g_params['BLOCK_SIZE'] isEvalueSet = g_params['isEvalueSet'] min_evalue = g_params['min_evalue'] max_evalue = g_params['max_evalue'] while 1: buff = fpin.read(BLOCK_SIZE) if len(buff) < BLOCK_SIZE: isEOFreached = True buff = unprocessedBuffer + buff recordList = [] unprocessedBuffer = myfunc.ReadFastaFromBuffer(buff, recordList, isEOFreached) if len(recordList) > 0: for r in recordList: if ((not isIDSet) or (r[0] in idListSet)): if (not isEvalueSet or r[1].lower().find('evalue') < 0): fpout.write(">%s\n" % r[1]) fpout.write("%s\n" % r[2]) else: evalue = myfunc.GetEvalueFromAnnotation(r[1]) if (evalue == None or (evalue >= min_evalue and evalue <= max_evalue)): fpout.write(">%s\n" % r[1]) fpout.write("%s\n" % r[2]) if isEOFreached == True: break fpin.close() myfunc.myclose(fpout)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-nmax", "--nmax"]: (g_params['nmax'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"] : (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "%s: no input file is set. exit"%(sys.argv[0]) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) pfamidset_all = set([]) pfamidset_output = set([]) nmax = g_params['nmax'] cnt_round = 0 while 1: cnt_round += 1 famid2seqidDict = {} for i in xrange(len(fileList)): hdl = myfunc.ReadLineByBlock(fileList[i]) if hdl.failure: continue lines = hdl.readlines() while lines != None: for line in lines: line = line.strip() if not line or line[0] == "#": continue strs = line.split() if len(strs) > 2: seqid = strs[0] pfamidlist = strs[2:] for pfamid in pfamidlist: if cnt_round == 1: pfamidset_all.add(pfamid) if pfamid in pfamidset_output: continue if not pfamid in famid2seqidDict: if len(famid2seqidDict) < nmax: famid2seqidDict[pfamid] = [] if pfamid in famid2seqidDict: famid2seqidDict[pfamid].append(seqid) else: msg="broken item in file %s: line \"%s\"" print >> sys.stderr, msg%(fileList[i], line) lines = hdl.readlines() hdl.close() for pfamid in famid2seqidDict: pfamidset_output.add(pfamid) seqidlist = famid2seqidDict[pfamid] seqidlist = myfunc.uniquelist(seqidlist) fpout.write("%s %d"%(pfamid, len(seqidlist))) for seqid in seqidlist: fpout.write(" %s"%(seqid)) fpout.write("\n") if len(pfamidset_output) == len(pfamidset_all): break else: print " %d / %d "%(len(pfamidset_output), len(pfamidset_all)) myfunc.myclose(fpout) if outfile != "": print "result output to %s"%(outfile) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 workdir = "" fileListFile = "" idListFile = "" extList = [] maxfile_per_folder = 2000 method = 0 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-idlist", "--idlist"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-filelist", "--filelist"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-workdir", "--workdir"]: (workdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-max", "--max"]: (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-method", "--method"]: (method, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-ext", "--ext"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) extList.append(tmpstr) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if myfunc.checkfile(workdir) != 0: return 1 if idListFile == "" and fileListFile == "": print >> sys.stderr, "At least one of idListFile and fileListFile need to be set" return 1 if idListFile != "": if os.path.exists(idListFile): idList = myfunc.ReadIDList(idListFile) if len(idList) <= 0: print >> sys.stderr, "No ID in idListFile %s" % (idListFile) elif len(extList) <= 0: print >> sys.stderr, "No extension set when idList is used." else: SplitToFolder_idlist(idList, workdir, extList, maxfile_per_folder) else: print >> sys.stderr, "idListFile %s does not exist" % (idListFile) if fileListFile != "": if os.path.exists(fileListFile): fileList = open(fileListFile, "r").read().split("\n") fileList = filter(None, fileList) if len(fileList) <= 0: print >> sys.stderr, "No file in fileListFile %s" % ( fileListFile) else: SplitToFolder_filelist(fileList, workdir, maxfile_per_folder) else: print >> sys.stderr, "fileListFile %s does not exist" % ( fileListFile)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" euk = "" gram_pos = "" gram_neg = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idListFile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-euk", "--euk"]: (euk, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram+", "--gram+"]: (gram_pos, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram-", "--gram-"]: (gram_neg, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idListFile = argv[i] i += 1 if myfunc.checkfile(idListFile, "idListFile") != 0: return 1 if myfunc.checkfile(euk, "euk") != 0: return 1 if myfunc.checkfile(gram_pos, "gram_pos") != 0: return 1 if myfunc.checkfile(gram_neg, "gram_neg") != 0: return 1 idList = myfunc.ReadIDList(idListFile) set_euk_idlist = set(myfunc.ReadIDList(euk)) set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos)) set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) NCBI_TaxID = "" for i in xrange(len(idList)): seqid = idList[i] cls = "" if seqid in set_euk_idlist: cls = "euk" elif seqid in set_gram_pos_idlist: cls = "gram+" elif seqid in set_gram_neg_idlist: cls = "gram-" else: cls = "NA" print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" classfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-class", "--class"]: (classfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(classfile, "Class File") != 0: return 1 if outpath == "": outpath = os.path.dirname(infile) if outpath == "": outpath = "." (id2ClassDict, classList) = ReadClassDict(classfile) idList = myfunc.ReadIDList(infile) rootname = os.path.basename(os.path.splitext(infile)[0]) ext = os.path.splitext(infile)[1] fpoutList = {} for i in range(len(classList)): outfile = outpath + os.sep + rootname + ".%s" % classList[i] + ext fpoutList[classList[i]] = open(outfile, "w") for idd in idList: try: cls = id2ClassDict[idd] except: print >> sys.stderr, "id %s not in classDict" % idd continue fpoutList[cls].write("%s\n" % idd) for i in range(len(classList)): fpoutList[classList[i]].close()
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 datapath = "." outpath = './' idList = [] idListFile = '' treefile = "" fastafile = "" i = 1; isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False; idList.append(sys.argv[i]) i = i + 1; elif sys.argv[i] == "--": isNonOptionArg=True; i = i + 1; elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h", "--help"]: PrintHelp(); return 1 elif sys.argv[i] in [ "-datapath", "--datapath"]: datapath = sys.argv[i+1] i += 2; elif argv[i] in [ "-m", "--m", "-method", "--method"]: g_params['method'], i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-treefile", "--treefile"]: treefile = sys.argv[i+1] i += 2; elif sys.argv[i] in [ "-fastafile", "--fastafile"]: fastafile = sys.argv[i+1] i += 2; elif sys.argv[i] in [ "-l", "--l"]: idListFile = sys.argv[i+1] i = i + 2; elif sys.argv[i] in ["-outpath", "--outpath"]: outpath = sys.argv[i+1]; i = i + 2; else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr); return 1 else: idList.append(sys.argv[i]); i+=1; if idListFile != "": idList += myfunc.ReadIDList(idListFile) if len(idList) > 0: os.system("mkdir -p %s"%outpath) cnt = 0 for pfamid in idList: print("================== ", cnt , pfamid, " ====================") if g_params['method'] == "0": Itol_Tree_m0(pfamid, datapath, outpath) elif g_params['method'] == "1": Itol_Tree_m1(pfamid, datapath, outpath) elif g_params['method'] == "sd1": Itol_Tree_m_sd1(pfamid, datapath, outpath) elif g_params['method'] == "sd2": Itol_Tree_m_sd2(pfamid, datapath, outpath) elif g_params['method'] == "sd3": Itol_Tree_m_sd3(pfamid, datapath, outpath) cnt += 1 if treefile != "": if g_params['method'] == "linear": Itol_Tree_linear(treefile, fastafile, outpath)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" idListFile = "" uniprotDBname = "" idList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-uniprotdb", "--uniprotdb"]: (uniprotDBname, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) if uniprotDBname == "": print >> sys.stderr, "uniprotdb not set" return 1 uniprotdbfile = "%s0.db" % uniprotDBname if myfunc.checkfile(uniprotdbfile, "uniprotdbfile") != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.MyDB(uniprotDBname) if hdl.failure: return 1 for seqid in idList: data = hdl.GetRecord(seqid) if data != None: goinfo = GetGOInfoFromUniprotData(data) WriteGOInfo(seqid, goinfo, fpout) hdl.close() myfunc.myclose(fpout)
def main():#{{{ argv = sys.argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() sys.exit() max_numpair = 10*1000*1000 isQuiet = False rand_seed = None idList = [] idListFile = "" outfile="" method = 0 i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg=False i += 1 elif sys.argv[i] == "--": isNonOptionArg=True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h" , "--help"]: PrintHelp() sys.exit() elif sys.argv[i] in [ "-o" , "--o", "-outfile" , "--outfile"]: outfile, i = myfunc.my_getopt_str(argv,i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] in [ "-l" , "--l", "-listfile" , "--listfile"]: idListFile, i = myfunc.my_getopt_str(argv,i) elif sys.argv[i] in [ "-maxpair" , "--maxpair"]: max_numpair, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] in [ "-seed" , "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] == "-q": isQuiet=True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", sys.argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) numseqid = len(idList) if numseqid <= 0: print >> sys.stderr, "List file is empty." return 1 elif numseqid < 2: print >> sys.stderr, "Too few items. At least 2 are required." return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) if method == 0: pairlist = myfunc.GenerateRandomPair(len(idList), max_numpair, rand_seed) elif method == 1: pairlist = myfunc.GenerateRandomPair_no_repeat_use(len(idList), max_numpair, rand_seed) for pair in pairlist: print >> fpout, "%s %s" %(idList[pair[0]], idList[pair[1]]) myfunc.myclose(fpout) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) threshold_Fraction_Group_2 = 0.05 threshold_NumSeq_Group_2 = 2 tableinfoFile = "" pdbtospFile = "" sprotACListFile = "" threshold_g12_seqidt = 20.0 topoalnFile = "" aapath = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqidttype", "--seqidttype"]: (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-tableinfo", "--tableinfo"]: (tableinfoFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topoaln", "--topoaln"]: (topoalnFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-aapath", "--aapath"]: (aapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-thncls2", "--thncls2"]: (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-thfrac2", "--thfrac2"]: (threshold_Fraction_Group_2, i) = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-sprot", "--sprot"]): sprotACListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 if myfunc.checkfile(topoalnFile, "topoalnFile") != 0: return 1 if myfunc.checkfile(aapath, "aapath") != 0: return 1 if outfile == "": print >> sys.stderr, "outfile not set. Exit" return 1 outpath = myfunc.my_dirname(outfile) if not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e return 1
cmd = ["mkdir", "-p", outpath] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e return 1 (pfamidDefDict, clanidDefDict) = lcmp.ReadPfamDefFile(pfamDefFile) # Read in pdbtosp map if pdbtospFile != "": (pdb2uniprotMap, uniprot2pdbMap) = myfunc.ReadPDBTOSP(pdbtospFile) else: (pdb2uniprotMap, uniprot2pdbMap) = ({}, {}) # Read in swissprot ac list if sprotACListFile != "": swissprotAcSet = set(myfunc.ReadIDList(sprotACListFile)) else: swissprotAcSet = set([]) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) if tableinfoFile != "": pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile) if pairalnStat != {}: outfile_pair_g12 = outfile + ".pair_group1_2.txt" fpout_pair = myfunc.myopen(outfile_pair_g12, sys.stdout, "w", False) else: fpout_pair = None selectedPairList = [] # select pairs for draw pairwise topology alignment # for each family, select the pair with highest sequence identity between
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-m", "--m"]: (g_params['method'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-md5", "--md5"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) if tmpstr.lower() == "yes": g_params['isUseMD5'] = True elif tmpstr.lower() == "no": g_params['isUseMD5'] = False else: print >> sys.stderr, "Bad syntax. option -md5 must be followed by yes or no" return 1 elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0]) if not g_params['method'] in ["id", "seq"]: print >> sys.stderr, "%s: bad method \"%s\"" % (sys.argv[0], g_params['method']) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(fileList)): status = IsUniqueSeq(fileList[i], g_params['method'], g_params['isUseMD5']) if status >= 0: if status == 1: yes_or_no = "yes" else: yes_or_no = "no" print >> fpout, "%s\t%s" % (fileList[i], yes_or_no) else: print >> sys.stderr, "%s: Failed to read file %s" % (sys.argv[0], fileList[i]) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 infile = "" outpath = "./" isQuiet = False tableinfoFile = "" cmpclassList = [] restrictIDListFile = "" signalpFile = "" dupFile = "" outfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = sys.argv[i] isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg = True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() sys.exit() elif argv[i] in ["-o", "--o"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-cmpclass", "--cmpclass"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) cmpclassList.append(tmpstr) elif argv[i] in ["-signalp", "--signalp"]: (signalpFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-restrictidlist", "--restrictidlist"]: (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i) g_params['isRestrictIDListSet'] = True elif argv[i] in ["-dup", "--dup", "-dupfile", "--dupfile"]: (dupFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-rmsp", "--rmsp"]: g_params['isRemoveSignalP'] = True i += 1 elif argv[i] in ["-rmdup", "--rmdup"]: g_params['isRemoveDup'] = True i += 1 elif argv[i] in ["-seq2fammap", "--seq2fammap"]: (seq2famMapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqidttype", "--seqidttype"]: g_params['seqidttype'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-tableinfo", "--tableinfo"]: tableinfoFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-min-seqidt", "--min-seqidt"]: g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-max-seqidt", "--max-seqidt"]: g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-evodist", "--evodist"]: g_params['isEvodist'] = True i += 1 elif argv[i] in ["-alignrange", "--alignrange"]: g_params['alignrange'], i = myfunc.my_getopt_str(argv, i) if not g_params['alignrange'] in ['all', 'full', 'part']: print >> sys.stderr, "alignrange must be one of [all, full, part]" return 1 else: if g_params['alignrange'] == 'full': g_params['alignrange'] = 'FULL_ALIGNED' elif g_params['alignrange'] == 'part': g_params['alignrange'] = 'PART_ALIGNED' elif argv[i] in ["-debug", "--debug"]: if argv[i + 1][0].lower() == 'y': g_params['isDEBUG'] = True else: g_params['isDEBUG'] = False i += 2 elif argv[i] in [ "-debug-unmapped-position", "--debug-unmapped-position" ]: DEBUG_UNMAPPED_TM_POSITION = 1 i += 2 elif sys.argv[i] == "-q": isQuiet = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", sys.argv[i] return -1 else: infile = sys.argv[i] i += 1 if infile == "": print >> sys.stderr, "infile not set. Exit." return -1 elif not os.path.exists(infile): print >> sys.stderr, "infile %s does not exists. Exit." % infile try: fpin = open(infile, "rb") except IOError: print >> sys.stderr, "Failed to open input file %s" % (infile) return -1 pairalnStat = {} if g_params['seqidttype'] != 0: if tableinfoFile == "" or not os.path.exists(tableinfoFile): print >> sys.stderr, "tableinfoFile must be set when seqidttype is set to 1 or 2" print >> sys.stderr, "but seqidttype = %d is set. Exit." % g_params[ 'seqidttype'] return -1 pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile) rootname = os.path.basename(os.path.splitext(infile)[0]) binpath = os.path.dirname(sys.argv[0]) signalpDict = {} if signalpFile != "": signalpDict = lcmp.ReadSignalPDict(signalpFile) if signalpDict != {}: g_params['isSignalPSet'] = True dupPairList = [] if dupFile != "": dupPairList = lcmp.ReadDupPairList(dupFile) if len(dupPairList) > 0: g_params['isDupSet'] = True dupPairSet = set(dupPairList) restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) rltyDict = {} fpout = myfunc.myopen(outfile, sys.stdout, "w", False) unprocessedBuffer = "" cntTotalReadInRecord = 0 cntTotalOutputRecord = 0 isEOFreached = False while 1: buff = fpin.read(BLOCK_SIZE) if buff == "": isEOFreached = True buff = unprocessedBuffer + buff pairCmpRecordList = [] unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer( buff, pairCmpRecordList) AddTableInfo(pairCmpRecordList, pairalnStat) AddSignalPInfo(pairCmpRecordList, signalpDict) AddDupInfo(pairCmpRecordList, dupPairSet) cntTotalReadInRecord += len(pairCmpRecordList) pairCmpRecordList = FilterPairCmpResult(pairCmpRecordList, cmpclassList, rltyDict, restrictIDSet) if len(pairCmpRecordList) > 0: lcmp.WritePairCmpRecord(pairCmpRecordList, cntTotalOutputRecord, fpout) cntTotalOutputRecord += len(pairCmpRecordList) if isEOFreached == True: break fpin.close() print "cntTotalReadInRecord =", cntTotalReadInRecord print "cntTotalOutputRecord =", cntTotalOutputRecord myfunc.myclose(fpout) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" datafile = "/data3/data/uniprot/uniprot_trembl.tableinfo" idList = [] idListFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-datafile", "--datafile"]: datafile = argv[i + 1] i += 2 elif argv[i] in ["-l", "--l", "-listfile", "--listfile"]: idListFile = argv[i + 1] i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if (len(idList)) < 1: print >> sys.stderr, "id not set. Exit" return 1 if not os.path.exists(datafile): print >> sys.stderr, "datafile %s not set or not exists. Exit" % ( datafile) return 1 uniprotInfoDict = ReadUniprotInfo(datafile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) UnirefSeqid2UniprotTableInfo(idList, uniprotInfoDict, fpout) myfunc.myclose(fpout) return 0
print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if outpath != "" and not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: subprocess.check_call(cmd) except subprocess.CalledProcessError, e: print e raise if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0]) if not g_params['method'] in ["id", "seq"]: print >> sys.stderr, "%s: bad method \"%s\"" % (sys.argv[0], g_params['method']) for i in xrange(len(fileList)): RemoveDupSeq(fileList[i], outpath, g_params['method'], g_params['isUseMD5']) #}}}
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6), (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10), (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13), (11, 13), (12, 14)] outfile = "" infile = "" pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % ( DATADIR3) #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3) #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3) seqid2clanidMapFile = "" seqid2pfamidMapFile = "" tm_pfamidListFile = "" tm_clanidListFile = "" pfamid2seqidMapFile = "" clanid2seqidMapFile = "" dbname_predTM = "" pairlistwithpfamidFile = "" pfamtype = "" pairListFile = "" #classList_TableNumTMHeatMap = ["ALL", "RMSP"] classList_TableNumTMHeatMap = ["ALL"] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-signalp", "--signalp"]: (signalpFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mp", "--mp"]: g_params[ 'pairwise_comparison_method'], i = myfunc.my_getopt_int( argv, i) elif argv[i] in ["-mindiffpair", "--mindiffpair"]: g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-pfamtype", "--pfamtype"]: pfamtype, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanidlist", "--clanidlist"]: (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamidlist", "--pfamidlist"]: (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]: (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]: (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]: (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]: (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]: (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-predTMdbname", "--predTMdbname"]: (dbname_predTM, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlist", "--pairlist"]: (pairListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-winsize", "--winsize"]: (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-outname", "--outname"]: (g_params['outname'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-prokar", "--prokar"]: g_params['isOnlyAnaProkar'] = True i += 1 elif argv[i] in ["-eukar", "--eukar"]: g_params['isOnlyAnaEukar'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile( infile, "%s (line %d): infile" % (__file__, inspect.currentframe().f_lineno)) != 0: return 1 dirpath = myfunc.my_dirname(infile) # try to obtain Pfam family tag tag = "" if pfamtype != "": if pfamtype.upper().find("FAM") != -1: tag = ".Family" elif pfamtype.upper().find("DOM") != -1: tag = ".Domain" elif pfamtype.upper().find("REP") != -1: tag = ".Repeat" elif pfamtype.upper().find("MOT") != -1: tag = ".Motif" else: tag = "" else: if infile.find(".Family.") != -1: tag = ".Family" elif infile.find(".Domain.") != -1: tag = ".Domain" elif infile.find(".Repeat.") != -1: tag = ".Repeat" elif infile.find(".Motif.") != -1: tag = ".Motif" else: tag = "" if seqid2clanidMapFile == "": seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % ( DATADIR3) if myfunc.checkfile( seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if seqid2pfamidMapFile == "": seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % ( DATADIR3, tag) if myfunc.checkfile( seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if pfamid2seqidMapFile == "": pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % ( DATADIR3) if myfunc.checkfile( pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if clanid2seqidMapFile == "": clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % ( DATADIR3, tag) if myfunc.checkfile( clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_pfamidListFile == "": tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % ( DATADIR3, tag) if myfunc.checkfile( tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_clanidListFile == "": tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % ( DATADIR3) if myfunc.checkfile( tm_clanidListFile, "%s (line %d): tm_clanidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if dbname_predTM == "": dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % ( DATADIR3) if myfunc.checkfile( "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" % (__file__, inspect.currentframe().f_lineno)): return 1 if g_params['isOnlyAnaProkar']: prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % ( DATADIR3) g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile)) if len(g_params['prokarSeqIDSet']) < 1: return 1 if g_params['isOnlyAnaEukar']: eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % ( DATADIR3) g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile)) if len(g_params['eukarSeqIDSet']) < 1: return 1 if pairlistwithpfamidFile == "": pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % ( dirpath) if myfunc.checkfile( pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" % (__file__, inspect.currentframe().f_lineno)): return 1 pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile) usedPfamIDSet = set( pfamid_2_seqidpair_Dict.keys()) # pfamids used in pair selection if pairListFile != "": li = myfunc.ReadPairList(pairListFile) SPE_PAIR_LIST = [] for tup in li: SPE_PAIR_LIST.append((int(tup[0]), int(tup[1]))) (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile) signalpDict = lcmp.ReadSignalPDict(signalpFile) seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile) seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile) clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile) pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile) tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile) tm_clanidList = myfunc.ReadIDList(tm_clanidListFile) tm_pfamidSet = set(tm_pfamidList) tm_clanidSet = set(tm_clanidList) hdl_predTM = myfunc.MyDB(dbname_predTM) if not hdl_predTM.failure: idSet_TMpro = set(hdl_predTM.indexedIDList) else: idSet_TMpro = set([]) #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"] #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED'] alignrangeList = ['FULL_ALIGNED'] if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']): cmd = ["mkdir", "-p", g_params['outpath']] try: subprocess.check_call(cmd) except subprocess.CalledProcessError, e: print e return 1
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" datapath = "" pairListFile = "" pairList = [] i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: pairList.append(argv[i].split()) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"] : (pairListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: pairList.append(argv[i].split()) i += 1 if pairListFile != "": pairList += [x.split() for x in myfunc.ReadIDList(pairListFile, delim="\n")] numpair = len(pairListFile) if numpair < 1: print >> sys.stderr, "no pair set. exit" return 1 if datapath == "": print >> sys.stderr, "datapath not set" return 1 elif not os.path.exists(datapath): print >> sys.stderr, "datapath %s does not exist"%(datapath) return 1 if outpath == "": print >> sys.stderr, "outpath not set" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] subprocess.check_call(cmd) WriteHTML(pairList, datapath, outpath)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" real_topofile = "" seqfile = "" restrictIDListFile = "" outfile_wrong_predtopo = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-owrong", "--owrong"]: (outfile_wrong_predtopo, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-realtopo", "--realtopo"]: (real_topofile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqfile", "--seqfile"]: (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mode", "--mode"]: (g_params['mode'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-path_predtopo", "--path_predtopo"]: (g_params['path_predtopo'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-basename", "--basename"]: (g_params['basename'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-restrictidlist", "--restrictidlist"]: (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-rmsp", "--rmsp"]: g_params['isRMSP'] = True i += 1 elif argv[i] in ["-debug", "--debug"]: g_params['isDEBUG'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 i += 1 if myfunc.checkfile(g_params['path_predtopo'], "path_predtopo") != 0: return 1 if g_params['basename'] == "": print >> sys.stderr, "%s: basename not set. exit" % (argv[0]) return 1 if myfunc.checkfile(real_topofile, "real_topofile") != 0: return 1 if restrictIDListFile != "": g_params['restrictIDset'] = set(myfunc.ReadIDList(restrictIDListFile)) g_params['isRestrictIDList'] = True if g_params['mode'] == "": if g_params['path_predtopo'].find("topcons_single") >= 0: g_params['mode'] = "tps" elif g_params['path_predtopo'].find("topcons") >= 0: g_params['mode'] = "tp" else: print >> sys.stderr, "mode not set, and can not be recognized from path_predtopo=%s" % ( path_predtopo) return 1 if not g_params['mode'] in ["tp", "tps"]: print >> sys.stderr, "Unrecognized mode = %s" % (g_params['mode']) return 1 (real_idlist, real_annolist, real_topolist) = myfunc.ReadFasta(real_topofile) seqDict = {} if seqfile != "" and os.path.exists(seqfile): (seq_idlist, seq_annolist, seqlist) = myfunc.ReadFasta(seqfile) for i in xrange(len(seq_idlist)): seqDict[seq_idlist[i]] = seqlist[i] if len(real_idlist) <= 0: print >> sys.stderr, "Failed to read real_topofile %s" % ( real_topofile) return 1 real_topodict = {} for i in xrange(len(real_idlist)): real_topodict[real_idlist[i]] = real_topolist[i] fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_wrong = myfunc.myopen(outfile_wrong_predtopo, None, "w", False) idSet_single = set([]) idSet_multi = set([]) for seqid in real_topodict: topo = real_topodict[seqid] numTM = myfunc.CountTM(topo) if numTM == 1: idSet_single.add(seqid) elif numTM > 1: idSet_multi.add(seqid) # print "len(real_topodict)", len(real_topodict) # print "len(idSet_single)", len(idSet_single) # print "len(idSet_multi)", len(idSet_multi) #for TM_type in ["All_Alpha", "Single", "Multi"]: for TM_type in ["All_Alpha"]: if TM_type == "All_Alpha": sub_real_topodict = real_topodict else: sub_real_topodict = {} for seqid in real_topodict: topo = real_topodict[seqid] numTM = myfunc.CountTM(topo) if TM_type == "Single" and numTM == 1: sub_real_topodict[seqid] = topo elif TM_type == "Multi" and numTM > 1: sub_real_topodict[seqid] = topo Benchmark(sub_real_topodict, idSet_single, idSet_multi, TM_type, fpout, fpout_wrong, seqDict) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" datapath = "" idListFile = "" idList = [] ext = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-ext", "--ext"]: (ext, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if ext == "": print >> sys.stderr, "file extension not set. exit" return 1 if datapath == "": print >> sys.stderr, "datapath not set. exit" return 1 elif not os.path.exists(datapath): print >> sys.stderr, "datapath %s does not exist. exit" % (datapath) return 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) if len(idList) < 1: print >> sys.stderr, "No input set. exit" return 1 fpout = sys.stdout if outfile != "": fpout = myfunc.myopen(outfile, sys.stdout, "w", False) id2pathmapfile = "%s%s%s" % (datapath, os.sep, "id2pathmap.txt") id2pathMapDict = myfunc.ReadIDPathMapDict(id2pathmapfile) for idd in idList: filename = ID2File(idd, datapath, id2pathMapDict, ext) if filename != "": print >> fpout, filename if outfile != "": myfunc.myclose(fpout)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 infile = "" outfile = "" keyIDListFile = "" contentIDListFile = "" isKeyIDSet = False isContentIDSet = False i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile = argv[i+1] i += 2 elif argv[i] in ["-idlist1", "--idlist1"] : keyIDListFile = argv[i+1] isKeyIDSet = True i += 2 elif argv[i] in ["-idlist2", "--idlist2"] : contentIDListFile = argv[i+1] isContentIDSet = True i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if infile == "": print >> sys.stderr, "infile not set. Exit" return 1 elif not os.path.exists(infile): print >> sys.stderr, "infile %s does not exist. Exit"%(infile) return 1 keyIDSet = {} contentIDSet = {} if keyIDListFile != "": keyIDSet = set(myfunc.ReadIDList(keyIDListFile)) if contentIDListFile != "": contentIDSet = set(myfunc.ReadIDList(contentIDListFile)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) # if idlist1 and idlist2 is empty, output nothing if isKeyIDSet or isContentIDSet: Filter_seqid2fam_map(infile, keyIDSet, contentIDSet, isKeyIDSet, isContentIDSet, fpout) else: os.system("cat %s"%(infile)) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" outfile_with_famid = "" outfile_with_pdb = "" outfile_fam2seqmap = "" idListFile = "" mapfile = "%s%s%s" % ( DATADIR3, os.sep, "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid" ) restrictIDListFile = "" idList = [] maxseq_for_fam = 200 maxpair_for_fam = 300 method = 0 rand_seed = None pdbtospFile = "" isOnlyPDB = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithfamid", "--outwithfamid"]: outfile_with_famid, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]: outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithpdb", "--outwithpdb"]: outfile_with_pdb, i = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-tmprolist", "--tmprolist", "-restrictlist", "--restrictlist" ]: restrictIDListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mapfile", "--mapfile"]: mapfile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"]: idListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxseq", "--maxseq"]: maxseq_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-maxpair", "--maxpair"]: maxpair_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-onlypdb", "--onlypdb"]: g_params['isOnlyPDB'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if len(idList) < 1: print >> sys.stderr, "no ID set. exit" return 1 if myfunc.checkfile(mapfile, "idMapFile") != 0: return 1 idMapDict = myfunc.ReadFam2SeqidMap(mapfile) # Read in pdbtosp map if pdbtospFile != "": (pdb2uniprotMap, uniprot2pdbMap) =\ myfunc.ReadPDBTOSP(pdbtospFile) g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys()) g_params['uniprot2pdbMap'] = uniprot2pdbMap if g_params['isOnlyPDB'] == True: if pdbtospFile == "": print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit." return 1 elif g_params['uniprotidlist_with_pdb'] == set([]): print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit." return 1 restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False) fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False) fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False) if method == 0: GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet, maxseq_for_fam, rand_seed, fpout, fpout_withfamid) elif method == 1: GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet, maxpair_for_fam, rand_seed, fpout, fpout_withfamid, fpout_fam2seqmap) elif method == 2: #all to all GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout, fpout_withfamid, fpout_withpdb) myfunc.myclose(fpout) myfunc.myclose(fpout_withfamid) myfunc.myclose(fpout_withpdb) myfunc.myclose(fpout_fam2seqmap) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 infile = "" progList = [] progListFile = "" outpath = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-prog", "--prog"]: tmpstr, i = myfunc.my_getopt_str(argv, i) progList.append(tmpstr) elif argv[i] in ["-gzip", "--gzip"]: tmpstr, i = myfunc.my_getopt_str(argv, i) if tmpstr.upper()[0] == "-": print >> sys.stderr, "Bad argument, -gzip should be"\ " followed by yes or no" return 1 elif tmpstr.upper()[0] == "Y": g_params['isGzip'] = True else: g_params['isGzip'] = False elif argv[i] in ["-num", "--num"]: g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-proglist", "--proglist"]: progListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: outpath, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True; i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile,"infile") != 0: return 1 inputList = ReadInputList(infile) # [(filename, numseq)] inputList = sorted(inputList, key=lambda x:x[1], reverse=False) rtname_infile = os.path.basename(os.path.splitext(infile)[0]) # get progList if len(progList) == 0 and progListFile == "": progList = default_progList else: if progListFile != "": tmp_list = myfunc.ReadIDList(progListFile) if len(tmp_list) == 0: print >> sys.stderr, "progListFile %s does not exist or empty"%( progListFile) return 1 else: progList += tmp_list if len(progList) == 0: print >> sys.stderr, "progList is empty. exit" return 1 if outpath != "" and not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print e return 1