def buildLabelFile(labelFile, pathDir): """Build label file and labelAtop file""" labelFile = relativeToAbsolutePath(labelFile) orderlist = [] with open(labelFile, "r") as labelFileOpen, open( pathDir + "labelsAtop", "w") as labelAtopOut, open(pathDir + "label", "w") as labelOut: i = 1 for line in labelFileOpen: souche, hote = line.rstrip().split("\t") hote = hote.replace(" ", "_") orderlist.append(souche) labelAtopOut.write("%i %s\n" % (i, hote)) labelOut.write("%i\t%s\n" % (i, souche)) i += 1 return pathDir + "label", pathDir + "labelsAtop", orderlist
required=False, dest='tableFileOut', help='Name of table file out (default tablein_extractedIDs.tab)') # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in extractColfromList (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') #get arguments tableFile = relativeToAbsolutePath(args.tableFile) tableFileOut = args.tableFileOut IDlist = args.IDlist if tableFileOut == "": tableFileOut = tableFile.split(".")[0] + "_extractedIDs.tab" #loading IDs to be kept in a list listNameKeep = loadInList(IDlist) if ".gz" in tableFile: fichier = gzip.open(tableFile, "rb") else: fichier = open(tableFile, "rb") #loading column IDs in a list
dest='paramfile', help='tab file with SNP') # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in getdistrgenotypeTAB (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument basename = args.paramfile.split(".")[0] tabFile = relativeToAbsolutePath(args.paramfile) workingDir = "/".join(tabFile.split("/")[:-1]) print("\t - Working directory is: %s" % workingDir) nblignetotal = float( os.popen("wc -l " + tabFile).read().rstrip().split(" ")[0]) print("Parse tab file with %i lines" % nblignetotal) # ajoute à la variable current_dir le chemin ou est executer le script current_dir = os.path.dirname(os.path.abspath(__file__)) # Utilisation du VCF # lecture et ecriture du header dans le fichier de output ctr = 0
#Welcome message print("#################################################################") print("# Welcome in make_structure_dir (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument nbRepiParam = int(args.nbRepiParam) nbRepmParam = int(args.nbRepmParam) nbpopiParam = int(args.nbpopiParam) nbpopmParam = int(args.nbpopmParam) nbIndivParam = args.nbIndivParam nbMarkerParam = args.nbMarkerParam inputFile = relativeToAbsolutePath(args.inputFile) outputFile = args.outputFile if outputFile == None: outputFile = inputFile.split("/")[-1].split(".")[0] workingDir = "/".join(inputFile.split("/")[:-1]) + "/" + outputFile + "/" outputSHDir = workingDir + "sh/" outputTrashDir = workingDir + "trash/" SGENameFile = outputSHDir + "submitQsubstructure.sge" print(" - Intput Info:") print("\t - Input matrice is: %s" % inputFile) print("\t - Output prefix name is: %s" % outputFile) print("\t - You want %s < K < %s and %s < Repetition < %s" %
required=True, dest='pathOut', help='Name of output file directory') # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in buildSNPtoFasta (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère les arguments gffFile = relativeToAbsolutePath(args.gffFile) listKeepFile = relativeToAbsolutePath(args.listKeepFile) tabFile = relativeToAbsolutePath(args.tabFile) fastaPath = args.fastaPath pathFileOut = args.pathOut #fastaFile = relativeToAbsolutePath(args.fastaFile) print("\t - Input GFF is: %s" % gffFile) print("\t - Input listKeppFile is: %s" % listKeepFile) print("\t - Input tabFile is: %s" % tabFile) print("\t - Input fasta files is: %s" % fastaPath.pathDirectory) print("\t - Output fasta files is: %s" % pathFileOut.pathDirectory) listKeepID = [ ID.replace("Mycfi_gene", "gene_") for ID in loadInList(listKeepFile)
default="L", choices=["L", "C"], dest='flag', help='L for CO (default), C pour gene conversion') # check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in make_ldhatfiles (Version " + version + ") #") print("#################################################################") # get arguments workingObjDir = args.workdir tabFile = relativeToAbsolutePath(args.tabFile) sizeTab = relativeToAbsolutePath(args.sizeTab) dataType = args.datatype intervalLDhatPATH = args.methode flag = args.flag print("\t - Workink Directory: %s" % workingObjDir.pathDirectory) print("\t - Input Path matrice is: %s" % tabFile) print("\t - Input Path size is: %s" % sizeTab) print("\t - dataType is : %s" % dataType) print("\t - Working with : %s" % intervalLDhatPATH) print("\t - flag is: %s\n\n" % flag) #exit() ## # code
parser.add_argument('-v', '--version', action='version', version='You are using %(prog)s version: ' + version, help='display make_hapmapfile.py version number and exit') filesreq = parser.add_argument_group('Input mandatory infos for running') filesreq.add_argument('-f', '--fileIn', metavar="<filename>", type = existant_file, required = True, dest = 'fileIn', help = 'Table of SNPs without FU') files = parser.add_argument_group('Input infos for running with default values') files.add_argument('-o', '--out', metavar="<filename>", default="outfile.hapmap", required = False, dest = 'fileOut', help = 'Name of hapmap file (default outfile.hapmap)') files.add_argument('-w', '--window', metavar="<int>", type = int, default=1, required = False, dest = 'window', help = 'Minimal window by which SNPs have to be separated, in bp (default = 1 - keep everything)') files.add_argument('-c', '--chrom', action='store_true', dest = 'chrom', help = 'If used, hapmap files will be produced by chromosomes') # Check parameters args = parser.parse_args() # get arguments fileIn = relativeToAbsolutePath(args.fileIn) workdir = "/".join(fileIn.split("/")[:-1])+"/" fileOut = workdir+args.fileOut window = args.window chrom = args.chrom if window > 1: fileOut = fileOut.split(".")[0]+"_window"+str(window)+".hapmap" if window == 0: window = 1 print("#### STARTING") print("File will be written in same directory as original file: %s" %workdir) print("\n#### CREATING FILES") headerHapmap = "rs\talleles\tchrom\tpos\tstrand\tassembly\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\t"
args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in run_multiblast (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère les arguments pathFastaFile = args.fastaFileDir pathFileOut = args.pathOut # defaults option typeBlast = args.typeBlast dbPath = relativeToAbsolutePath(args.dbPath) outfmtValue = args.outfmtValue blastOptionValue = " ".join(args.blastOptionValue) nbThreads = args.nbThreads outputBlastResDir = pathFileOut.pathDirectory + "blastRes/" outputSHDir = pathFileOut.pathDirectory + "sh/" outputTrashDir = pathFileOut.pathDirectory + "trash/" SGENameFile = outputSHDir + "submitQsubBLAST.sge" if len(outfmtValue) > 1: outfmtValue = "'" + outfmtValue + "'" # resume value to user print(" - Intput Info:") print("\t - Working in directory: %s" % pathFileOut.pathDirectory)
# Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in struc2runClumpak (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère les infos passer en argument # création de l'objet directory workingObjDir = args.dirPath clumpakObjDir = args.dirPathClumpak labelFileParam = relativeToAbsolutePath(args.labelFileParam) # build Drawparam if not add if args.drawparamsParam != None: drawparamsFile = relativeToAbsolutePath(args.drawparamsParam) else: drawparamsFile = relativeToAbsolutePath(workingObjDir.pathDirectory + "drawparams") with open(drawparamsFile, "w") as drawparamsFileWrite: drawparamsFileWrite.write(drawparams) # build color if not add if args.colorParam != None: colorParamFile = relativeToAbsolutePath(args.colorParam) else: colorParamFile = relativeToAbsolutePath(workingObjDir.pathDirectory + "colorsfile")
# Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in extractFromProteineOrtho (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument ref = args.refName workingDir = "/".join( relativeToAbsolutePath(args.proteineOrthoFile).split("/")[:-1]) + "/" correspondingCDSDir = workingDir + "correspondingCDS-contig" + args.suffixParam + "/" print("\t - Suffix is: %s" % args.suffixParam) print("\t - Ref strain is : %s" % ref) print("\t - Working directory is: %s" % workingDir) print("\t - Corresonding CDS ref/strain directory is: %s\n\n" % correspondingCDSDir) # liste de toute les souches de proteineOrtho listSouches = [] # dico de proteine orthologue dico_ortho = {} exist = 0 # creer le répertoire contenant les correspondance entre ref et souches
help= 'choice keep sequences size greater than -l (g/greater) or keep lower (l/lower)' ) # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in extractSeqFastaFromLen (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument fastaFile = relativeToAbsolutePath(args.fastaFile) outputfilename = relativeToAbsolutePath(args.paramoutfile) lenSize = args.lenSize keepValue = args.keepValue output_handle = open(relativeToAbsolutePath(outputfilename), "w") dicoSize = lenSeq2dict(fastaFile) dicoFasta = fasta2dict(fastaFile) nbKeep = 0 nbTotal = len(dicoFasta.keys()) for ID in sorted(dicoSize.keys(), key=sort_human): lenSeq = dicoSize[ID] if keepValue in ["g", "greater"]: if lenSeq >= lenSize:
metavar="<filename>", required=True, dest='outputFileParam', help='Name of output figure file') # Check parameters args = parser.parse_args() #Welcome message print "#################################################################" print "# Welcome in plot_barplot_DAPC (Version " + version + ") #" print "#################################################################" print 'Start time: ', start_time, '\n' # Récupère le fichier de conf passer en argument dirPath = relativeToAbsolutePath(args.dirPath) labelFileParam = relativeToAbsolutePath(args.labelFileParam) outputFileParam = args.outputFileParam fileListCSV = lsExtInDirToList(dirPath, "csv") #read membership data and store in dictionary with keys=line numbers, which corresponds to the 'count' key in the tags dictionary memberships = {} listK = [] for file in fileListCSV: K = int((file.replace(dirPath + "/", '').replace('K', '')).replace('.csv', '')) if K not in listK: listK.append(K) IN = open(file, 'r') IN.readline() # remove header
required=True, dest='pathOut', help='Name of output file directory') # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in splitMultiFasta (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument fastaFile = relativeToAbsolutePath(args.fastaFile) pathFileOut = args.pathOut print("\t - Input Path is: %s" % pathFileOut.pathDirectory) print("\t - fasta file is : %s" % fastaFile) dicoFasta = fasta2dict(fastaFile) for name, sequence in dicoFasta.items(): with open(pathFileOut.pathDirectory + name + ".fasta", "w") as output_handle: SeqIO.write(sequence, output_handle, "fasta") #print("\n\nExecution summary:") #print(" - Outputting \n\
choices=["yes", "y", "no", "n"], help='choise keep (y/yes) or not keep (n/no) sequences in list file') # Check parameters args = parser.parse_args() checkParameters(args) #Welcome message print("#################################################################") print("# Welcome in extractSeqFasta (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument fastaFile = relativeToAbsolutePath(args.fastaFile) outputfilename = relativeToAbsolutePath(args.paramoutfile) listFile = relativeToAbsolutePath(args.listFile) keepValue = args.keepValue output_handle = open(outputfilename, "w") if keepValue in ["no", "n"]: dico_keep, nbTotal = extractInverseListFromFasta(fastaFile, listFile) elif keepValue in ["yes", "y"]: dico_keep, nbTotal = extractListFromFasta(fastaFile, listFile) nbKeep = len(dico_keep.keys()) for geneId, sequence in dico_keep.items(): SeqIO.write(sequence, output_handle, "fasta")
# Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in extractSeqFastaCorresponding (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument fastaFile = args.fastaFile listFile = args.listFile outputfilePath = args.paramoutfile mggFileKeep = relativeToAbsolutePath(args.mggFileKeep) print("\t - Path with fasta is: %s" % fastaFile.pathDirectory) print("\t - Path with corresponding Orthologues is : %s" % listFile.pathDirectory) print("\t - MGG list keep are in file: %s\n" % mggFileKeep) print("\t - Output Orthologues fasta is: %s\n\n" % outputfilePath) #recupération de la liste des CDS complet listCDSfiles = fastaFile.lsExtInDirToList(["fasta", "fas", "fa"]) print("\n".join(listCDSfiles)) #ouverture de la liste des MGG à garder mggKeepall = loadInList(mggFileKeep)
print("# Welcome in %s (Version %s) #" % (__file__, version)) print("#################################################################") print('Start time: ', start_time, '\n') # Récupère les infos passer en argument tabFileParam = args.tabFileParam outFastaParam = args.outFastaParam IDParam = args.IDParam compress = args.compress basename = tabFileParam.split("/")[-1].split(".")[0] print(basename) if outFastaParam == None: outFastaParam = relativeToAbsolutePath(basename + ".fasta") # resume value to user print(" - Intput Info:") print("\t - TAB files is : %s" % tabFileParam) if IDParam != None: print("\t - Change Individual ID with custom ID provied table : %s" % IDParam) dicoCustomID = loadInDictCol(IDParam, 0, 1) print(" - Output Info:") if compress: print("\t - Output fasta will be gzip") print("\t - Output fasta is: %s\n\n" % outFastaParam) if ".gz" in tabFileParam:
help='File with Strain to keep (one per row), default keep all strains' ) # Check parameters args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in grepMotifFromAlignment (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time, '\n') # Récupère le fichier de conf passer en argument pathDirectory = args.pathDirectory outputfilename = relativeToAbsolutePath(args.paramoutfile) print("\t - Input pathDirectory is: %s" % pathDirectory) print("\t - Output file name is: %s" % outputfilename) if args.listKeepFile not in ["ALL"]: listKeepSouche = loadInList(existant_file(args.listKeepFile)) print("\t - You want to keep strains:\n%s" % "\n".join(listKeepSouche)) basename = paramlistKeep.split(".")[0] else: listKeepSouche = [] print("\t - You want to keep all strains \n") basename = "All" dicoOutputTxt = {} dicoSeqSNP = {}
metavar="<filename>", type=existant_file, required=False, dest='colorParam', help='File with colors (default 15 color max)') # Check parameters args = parser.parse_args() print(args) #Welcome message print("#################################################################") print("# Welcome in %s (Version %s) #" % (__file__, version)) print("#################################################################") print('Start time: ', start_time, '\n') de = relativeToAbsolutePath(args.deFile) rapmsu = relativeToAbsolutePath(args.rapmsuFile) irgsp = relativeToAbsolutePath(args.irgspFile) msu = relativeToAbsolutePath(args.msuFile) outputName = relativeToAbsolutePath(args.outputFile) #parcours fichier DE fileDE = open(de, "r") with open(outputName, 'w', newline='') as csvfile: outwriter = csv.writer(csvfile, delimiter='\t', quoting=csv.QUOTE_MINIMAL) for line in fileDE: #DBG::print ("---------------------------------------------------------------------------") cleanedline = cleaningLine(line, ",")