def main(): logging.basicConfig() logger.setLevel(logging.INFO) arguments = myTools.checkArgs( [("phylTree.conf",myTools.File), ("ensemblTree",myTools.File)], [("flatten",bool,False), ("rebuild",bool,False), ("fam",bool,False), ("cutoff",str,"-1"), ("defaultFamName",str,"FAM%08d"), ("scoreMethod",int,[1,2,3]), ("newNodeID",float,1e8), ("recurs",bool,False), ("indicator",bool,False), ("debug",bool,False)], __doc__) if arguments['debug']: logger.setLevel(logging.DEBUG) myProteinTree.nextNodeID = int(arguments["newNodeID"]) # For the rebuild step. phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) hasLowScore = setupScoring(phylTree, arguments["scoreMethod"], arguments["cutoff"]) prottrees = myProteinTree.loadTree(arguments["ensemblTree"]) prottrees = process(prottrees, phylTree, hasLowScore, arguments["defaultFamName"], arguments["flatten"], arguments["rebuild"], arguments["recurs"], arguments["indicator"]) if arguments["fam"]: # Will not work on previous versions of ToolsDyogen. from treeTools.ALL.extractGeneFamilies import extractGeneFamilies count, dupCount, geneFamilies = extractGeneFamilies(phylTree, prottrees) else: for tree in prottrees: tree.printTree(sys.stdout)
def main(): arguments = myTools.checkArgs([("genesFiles", str)], [("minChrSize", int, 1)], __doc__) genome = myGenomes.Genome(arguments["genesFiles"]) # print >> sys.stderr, genome # print >> sys.stdout, "Chr","Length" for (chrom, l) in genome.lstGenes.items(): if len(l) >= arguments["minChrSize"]: print(chrom, len(l), file=sys.stdout)
def main(): arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) for a in phylTree.listAncestr: for (f1, f2) in itertools.combinations([f for (f, _) in phylTree.items[a]], 2): l1 = [e for e in phylTree.species[f1]] l2 = [e for e in phylTree.species[f2]] for (e1, e2) in itertools.product(l1, l2): print("%s\t%s\t%s" % (e1, e2, a), file=sys.stdout)
def main(): arguments = myTools.checkArgs([("phylTree.conf", myTools.File), ("ensemblTree", myTools.File)], [("newNodeID", int, int(1e9)), ("reuseNames", bool, False)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) setrecursionlimit(20000) # !important myProteinTree.nextNodeID = arguments["newNodeID"] count, dupCount, geneFamilies = extractGeneFamilies( phylTree, processTrees(arguments["ensemblTree"], phylTree), arguments["reuseNames"])
def main(): arguments = myTools.checkArgs([("proteinTree", myTools.File), ("gene_name", str)], [], __doc__) # Information on ancestral node def printAncNode(node): txt = [node] d = tree.info[node].pop('Duplication', None) if tree.info[node].pop("dubious_duplication", None): txt.append("DUBIOUS_DUPLICATION") elif (d == 1) and ("duplication_confidence_score" in tree.info[node]): txt.append("ROOT_DUPLICATION") elif d == 2: txt.append("DUPLICATION") elif d == 3: txt.append("EDITED_DUPLICATION") else: txt.append("SPECIATION") txt.append(tree.info[node].pop("taxon_name", None)) txt.append(tree.info[node].pop("family_name", None)) txt.append(tree.info[node].pop("Bootstrap", None)) txt.append(tree.info[node].pop("duplication_confidence_score", None)) print(myFile.myTSV.printLine(txt)) # Information on Gene def printGeneNode(node): txt = [node] txt.append("GENE") txt.append(tree.info[node].pop("taxon_name", None)) txt.append(tree.info[node].pop("gene_name", None)) print(myFile.myTSV.printLine(txt)) # Recursive loop on the gene family def do(node): if node in tree.data: for (g, d) in tree.data[node]: if do(g): printAncNode(node) return True elif tree.info[node]["gene_name"] == arguments["gene_name"]: printGeneNode(node) return True return False # searching for the good gene tree for tree in myProteinTree.loadTree(arguments["proteinTree"]): if do(tree.root): break
def main(): arguments = myTools.checkArgs([("gffFile", file)], [], __doc__) gff = Gff3(arguments["gffFile"]) genes = [ line for line in gff.lines if line['line_type'] == 'feature' and line['type'] == 'mRNA' ] for gene in genes: #print >> sys.stdout, gene['seqid'], gene['start'], gene['end'], gene['strand'], gene['attributes']['ID'] if gene['strand'] == "+": gene['strand'] = '1' else: gene['strand'] = '-1' print(myFile.myTSV.printLine([ gene['seqid'], gene['start'], gene['end'], gene['strand'], gene['attributes']['ID'] ]), file=sys.stdout)
def main(): arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [("fromNewick", bool, True)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) if arguments["fromNewick"]: # Returns the phyltree format (with indentation) def do(node, indent): node = node.replace("*", "") names = myFile.myTSV.printLine([node] + [ x for x in phylTree.commonNames.get(node, "") if isinstance(x, str) and (x != node) ], delim="|") print(("\t" * indent) + "%s" % names) if node in phylTree.items: for (f, _) in phylTree.items[node]: do(f, indent + 1) do(phylTree.root, 0) else: # Returns the newick tree def convertToFlatFile(anc): a = phylTree.fileName[anc] # anc.replace(' ', '.') if anc in phylTree.listSpecies: return a else: return "(" + ",".join([ convertToFlatFile(e) + ":" + str(l) for (e, l) in phylTree.items[anc] ]) + ")%s|%d" % (a, phylTree.ages[anc]) print(convertToFlatFile(phylTree.root), ";")
def main(): # Arguments arguments = myTools.checkArgs([("phylTree.conf", myTools.File), ("proteinTree", myTools.File)], [("out:ancGenesFiles", str, ""), ("reuseNames", bool, False)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) proteinTrees = myProteinTree.loadTree(arguments["proteinTree"]) count, dupCount, geneFamilies = extractGeneFamilies(phylTree, proteinTrees, arguments["reuseNames"]) outTemplate = arguments["out:ancGenesFiles"] if outTemplate: for (anc, lst) in geneFamilies.items(): print("Ecriture des familles de %s ..." % anc, end=' ', file=sys.stderr) f = myFile.openFile(outTemplate % phylTree.fileName[anc], "w") for gg in lst: print(" ".join(gg), file=f) f.close() print(len(lst), "OK", file=sys.stderr)
def main(): arguments = myTools.checkArgs([("phylTree.conf", myTools.File), ("iniTree", myTools.File), ("rootSpecies", str)], [], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) # Returns a list of nodes under the new root species ######################################################### def search(node): if phylTree.isChildOf(tree.info[node]['taxon_name'], arguments["rootSpecies"]): return [node] elif node in tree.data: r = [] for (g, _) in tree.data[node]: r.extend(search(g)) return r else: return [] nb = 0 for tree in myProteinTree.loadTree(arguments["iniTree"]): l = search(tree.root) nb += len(l) if len(l) == 1: tree.info[l[0]]["tree_name"] = tree.info[tree.root]["tree_name"] myProteinTree.printTree(sys.stdout, tree.data, tree.info, l[0]) else: for (i, r) in enumerate(l): tree.info[r]["tree_name"] = tree.info[ tree.root]["tree_name"] + myProteinTree.getDupSuffix( i + 1, True) myProteinTree.printTree(sys.stdout, tree.data, tree.info, r) print(nb, "extracted trees", file=sys.stderr)
#!/usr/bin/env python3 """ Renvoie un arbre phylogenetique des especes avec les valeurs medianes issues des arbres de proteines """ import collections import sys from LibsDyogen import myTools, myPhylTree, myProteinTree arguments = myTools.checkArgs([("phylTree.conf", file), ("proteinTree", file)], [], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) lengths = collections.defaultdict(list) # Parcours recursif de la famille de genes def do(node): print("NEW TREE", file=sys.stderr) if node in tree.data: t1 = tree.info[node]['taxon_name'] for (g, d) in tree.data[node]: # Une distance ne peut etre prise qu'entre deux noeuds de speciation if (tree.info[node]['Duplication'] == 0) and (tree.info[g]['Duplication'] == 0): t2 = tree.info[g]['taxon_name'] # Les deux noeuds doivent etre strictement consecutifs if (phylTree.parent[t2].name == t1) and (d != 0): lengths[(t1, t2)].append(d)
#!/usr/bin/env python3 """ Renvoie les listes des devenirs de chaque gene le long des branches de l'arbre phylogenetique """ import sys from LibsDyogen import myFile, myMaths, myTools, myGenomes, myPhylTree # Arguments arguments = myTools.checkArgs([("phylTree.conf", file), ("genesFile", str), ("ancGenesFile", str)], [], __doc__) # Chargement des tous les fichiers phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) genes = {} for e in phylTree.listSpecies: genes[e] = myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e]) for a in phylTree.listAncestr: genes[a] = myGenomes.Genome(arguments["ancGenesFile"] % phylTree.fileName[a]) def transformName(esp, xxx_todo_changeme): (c, i) = xxx_todo_changeme if esp in phylTree.items: return i else: return str(c) + "|" + str(i)
def main(): arguments = myTools.checkArgs([("iniTree", myTools.File)], [], __doc__) for tree in myProteinTree.loadTree(arguments["iniTree"]): next
#!/usr/bin/env python3 """ Renvoie un tableau de statistiques de rearrangement le long des branches et un arbre de especes tenant compte de ces rearrangements. """ import sys from LibsDyogen import myFile, myMaths, myTools, myGenomes, myPhylTree # Argument: arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [("onlyOrthos", bool, False), ("in:genesFiles", str, ""), ("in:ancGenesFiles", str, ""), ("in:diagsFiles", str, ""), ("out:treeFile", str, "out.nwk"), ("out:statFile", str, "out.txt"), ("colNames", bool, True)], __doc__) # Chargement des tous les fichiers ################################### phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) genes = {} diags = {} dicDiags = {} for e in phylTree.listSpecies: # Les genes des especes modernes genes[e] = myGenomes.Genome(arguments["in:genesFiles"] % phylTree.fileName[e])
#!/usr/bin/env python3 """ Renvoie les listes des devenirs de chaque gene le long des branches de l'arbre phylogenetique """ import sys import collections from LibsDyogen import myDiags, myMaths, myTools, myGenomes, myPhylTree # Argument: arguments = myTools.checkArgs( \ [("phylTree.conf", file)], \ [("IN.genesFile", str, ""), ("IN.ancGenesFile", str, ""), ("IN.diagsFile", str, "")], \ __doc__ \ ) # Chargement des tous les fichiers ################################### phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) genes = {} diags = {} dicDiags = {} for e in phylTree.listSpecies: # Les genes des especes modernes genes[e] = myGenomes.Genome(arguments["IN.genesFile"] % phylTree.fileName[e]) diags[e] = [[g] for g in range(len(list(genes[e])))]
#! /usr/bin/env python """ From a species tree, print the number of extant species and ancetors. Optional: print the list of species Usage: getInfoOnSpeciesTree.py PhylTree.conf getInfoOnSpeciesTree.py PhylTree.conf +speciesList +ancList """ import sys from LibsDyogen import myFile, myTools, myPhylTree arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [("speciesList", bool, False), ("ancList", bool, False)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) if arguments["speciesList"]: print("Extant Species:", ",".join(x for x in phylTree.listSpecies), file=sys.stdout) if arguments["ancList"]: print(file=sys.stdout) print("Ancestral Species:", ",".join(x for x in phylTree.listAncestr), file=sys.stdout) print(file=sys.stdout) print("Extant Species:", len(phylTree.listSpecies), file=sys.stdout) print("Ancetral Species:", len(phylTree.listAncestr), file=sys.stdout)
#!/usr/bin/env python3 import sys import collections from LibsDyogen import myFile, myTools, myGenomes arguments = myTools.checkArgs([ ("genesFile", file), ("transcriptsCoords", file) ], [ ("useShortestTranscript", bool, True), ("sortOn5", bool, True), ("authorizedBiotypes", str, "protein_coding") ], "Cree une liste ordonnee des genes en tenant compte du plus petit transcrit" ) genome = myGenomes.Genome(arguments["genesFile"]) biotypes = set(arguments["authorizedBiotypes"].split(",")) # Chargement de la liste des transcrits lstTrans = collections.defaultdict(list) f = myFile.myTSV.reader(arguments["transcriptsCoords"]) for l in f.csvobject: if l[-1] in biotypes: lstTrans[l[0]].append((int(l[2]), int(l[3]), l[1])) f.file.close() for chrom in genome.lstGenes: # Creation de la liste a trier tmp = [] for gene in genome.lstGenes[chrom]:
Run the XMLfile BIOMART Query Usage: ./ENSEMBL.biomartQuery.py XMLfiles/BIOMART.HumanProteinCodingGene.xml -> will generate ouput.txt ./ENSEMBL.biomartQuery.py XMLfiles/BIOMART.HumanProteinCodingGene.xml -outputFileName=HumanProteinCodingGene.txt """ from __future__ import print_function import sys import urllib.request, urllib.parse, urllib.error from LibsDyogen import myFile, myTools # Arguments arguments = myTools.checkArgs( [("xmlRequest", myTools.File)], [("biomartServer", str, "http://www.ensembl.org/biomart/martservice"), ("outputFileName", str, "output.txt")], __doc__) # La requete with myFile.openFile(arguments["xmlRequest"], "r") as f: request = f.read() print("Downloading XML Query", end=' ', file=sys.stderr) urllib.request.urlretrieve(arguments["biomartServer"], filename=arguments["outputFileName"], data=urllib.parse.urlencode({ "query": request }).encode()) print("OK", file=sys.stderr)
#!/usr/bin/env python3 """ Read file of numbers and print statistics: Min [Q25/Q50/Q75] [N75/N50/N25] Max [Mean/Stddev-Length] Usage: ./printStats.py filename ./printStats.py filename +long +colNames """ from LibsDyogen import myFile, myMaths, myTools arguments = myTools.checkArgs([("file", file)], [("long", bool, False), ("colNames", bool, False)], __doc__) lst = [] f = myFile.openFile(arguments["file"], 'r') for l in f: c = l.split() for x in c: try: x = int(x) except ValueError: x = float(x) lst.append(x) f.close() # returns results
#! /usr/bin/env python3 """ Extract Newick or NHX trees from Phyltree protein trees usage: ./ALL.extractNewickTrees.py GeneTrees.bz2 +withDist +withNHXTags +withAncSpeciesNames +withAncGenesNames """ # Librairies import sys import LibsDyogen.myTools as myTools import LibsDyogen.myProteinTree as myProteinTree # Arguments arguments = myTools.checkArgs( [("proteinTree",myTools.File)], [("withDist",bool,False), ("withNHXTags",bool,False), ("withAncSpeciesNames",bool,False), ("withAncGenesNames",bool,False)], __doc__ ) print("Mise en forme des arbres ...", end=' ', file=sys.stderr) nb = 0 for tree in myProteinTree.loadTree(arguments["proteinTree"]): tree.printNewick(sys.stdout, withDist=arguments["withDist"], withTags=arguments["withNHXTags"], withAncSpeciesNames=arguments["withAncSpeciesNames"], withAncGenesNames=arguments["withAncGenesNames"]) nb += 1 print("%d arbres OK" % nb, file=sys.stderr)
Pere Fils Valeur(nbDup) """ # Librairies import sys import math from LibsDyogen import myFile, myMaths, myTools, myPhylTree, myPsOutput # Arguments arguments = myTools.checkArgs([("phylTree.conf", file)], [("landscape", bool, False), ("printSpecies", bool, True), ("printAncestors", bool, True), ("printAges", bool, False), ("lengthFile", str, ""), ("colorFile", str, ""), ("funcLength", str, ""), ("funcColor", str, ""), ("root", str, ""), ("min", float, None), ("max", float, None)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) (largeur, hauteur) = myPsOutput.printPsHeader(landscape=arguments["landscape"]) root = arguments["root"] if arguments[ "root"] in phylTree.items else phylTree.root funcLength = (lambda x, a: x) if arguments["funcLength"] == "" else eval( arguments["funcLength"]) #funcColor = (lambda x, a: x) if arguments["funcColor"] == "" else eval(arguments["funcColor"])
# This is free software, you may copy, modify and/or distribute this work under the terms of the GNU General Public License, version 3 (GPL v3) or later and the CeCiLL v2 license in France """ extract the ancGenes from the forest of gene trees """ import sys import collections from LibsDyogen import myFile from LibsDyogen import myTools from LibsDyogen import myPhylTree from LibsDyogen import myProteinTree # arguments arguments = myTools.checkArgs([("speciesTree", myTools.File), ("geneTreeForest", myTools.File)], [("out:ancGenes", str, ""), ("reuseNames", bool, False)], __doc__) speciesTree = myPhylTree.PhylogeneticTree(arguments["speciesTree"]) # duplication counter dupCount = collections.defaultdict(int) def futureName(name, dup): if dup >= 2: dupCount[name] += 1 # if there is a duplication we need to add a suffix return name + myProteinTree.getDupSuffix(dupCount[name], False) else: return name
#!/usr/bin/env python3 """ Blocs de syntenie entre deux especes """ import sys from LibsDyogen import myPhylTree, myGenomes, myFile, myTools, myMaths, myDiags # Arguments modesOrthos = list(myDiags.OrthosFilterType._keys) arguments = myTools.checkArgs( \ [("genome1",file), ("genome2",file), ("ancGenes",file)], \ [("fusionThreshold",int,-1), ("sameStrand",bool,True), ("orthosFilter",str,modesOrthos), ("minimalLength",int,2)], \ __doc__ \ ) genome1 = myGenomes.Genome(arguments["genome1"]) genome2 = myGenomes.Genome(arguments["genome2"]) ancGenes = myGenomes.Genome(arguments["ancGenes"]) orthosFilter = myDiags.OrthosFilterType[modesOrthos.index( arguments["orthosFilter"])] statsDiags = [] for ((c1,d1),(c2,d2),daa) in myDiags.calcDiags(genome1, genome2, ancGenes, \ fusionThreshold=arguments["fusionThreshold"], sameStrand=arguments["sameStrand"], orthosFilter=orthosFilter, minChromLength=arguments["minimalLength"]): l = len(daa) if l < arguments["minimalLength"]: continue statsDiags.append(l)
#!/usr/bin/env python3 """ Extrait (des genomes reels) la liste des evenements de duplications/pertes/gains sur chaque branche de l'arbre """ from LibsDyogen import myMaths, myTools, myGenomes, myPhylTree arguments = myTools.checkArgs([("phylTree.conf", file)], [("rootSpecies", str, ""), ("genesFile", str, ""), ("ancGenesFile", str, "")], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) @myTools.memoize def getGenome(e): if e in phylTree.listSpecies: return myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e]) else: return myGenomes.Genome(arguments["ancGenesFile"] % phylTree.fileName[e]) def transformName(esp, xxx_todo_changeme): (c, i) = xxx_todo_changeme return getGenome(esp).lstGenes[c][i].names[0] def do(node):
#!/usr/bin/env python3 """ Convertit un genome (suite de diagonales) en genome (suite de genes) """ import sys import itertools from LibsDyogen import myTools, myGenomes arguments = myTools.checkArgs([("contigsFile", file), ("ancGenesFile", file)], [], __doc__) ancGenes = myGenomes.Genome(arguments["ancGenesFile"]) genome = myGenomes.Genome(arguments["contigsFile"], ancGenes=ancGenes) genome.printEnsembl(sys.stdout)
#!/usr/bin/env python3 """ Renvoie pour chaque gene ancestral le decompte des evenements qu'il subit sur chaque branche """ import sys from LibsDyogen import myTools, myGenomes, myPhylTree arguments = myTools.checkArgs([("phylTree.conf", file), ("rootSpecies", str)], [("genesFiles", str, ""), ("ancGenesFiles", str, ""), ("countDup", bool, True), ("countLoss", bool, True)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) # Chargement des tous les fichiers genes = {} todo = {} for e in phylTree.listSpecies: genes[e] = myGenomes.Genome(arguments["genesFiles"] % phylTree.fileName[e]) for a in phylTree.listAncestr: genes[a] = myGenomes.Genome(arguments["ancGenesFiles"] % phylTree.fileName[a]) todo[a] = set(g.names[0] for g in genes[a]) allnames = set() for a in phylTree.listAncestr: allnames.update(todo[a]) print(len(allnames), file=sys.stderr)
#!/usr/bin/env python3 """ Convertit un genome (scaffolds = suite de contigs) en genome (uniquement des contigs) """ import sys from LibsDyogen import myDiags, myFile, myTools, myGenomes arguments = myTools.checkArgs([("scaffoldsFile", file), ("contigsFile", file)], [], __doc__) (diags, singletons) = myDiags.loadIntegr(arguments["scaffoldsFile"]) ref = {} f = myFile.openFile(arguments["contigsFile"], "r") for (i, l) in enumerate(f): ref[i + 1] = l f.close() for (chrom, weights) in diags: li = [] ls = [] lw = [] n = 0 for (i, (c, s)) in enumerate(chrom): t = ref.pop(c)[:-1].split("\t") if i >= 1: lw.append(weights[i - 1]) n += len(t[2].split()) if s > 0:
#!/usr/bin/env python3 """ Parcourt un fichier de genome et enleve les genes inclus dans un autre """ import sys import collections import itertools import operator from LibsDyogen import myFile, myTools, myGenomes # Arguments arguments = myTools.checkArgs([("genome", file)], [], __doc__) genome = myGenomes.Genome(arguments["genome"]) for c in genome.lstGenes: lref = list(genome.lstGenes[c]) lref.sort(key=operator.attrgetter("beginning")) lnew = list(genome.lstGenes[c]) lnew.sort(key=operator.attrgetter("end")) comb = myTools.myCombinator() for (g1, g2) in zip(lref, lnew): if g1 != g2: comb.addLink([g1, g2]) removed = set()
Transform an ancestral genome in tabular format with descendant species genes (one column by species), with modern position or not usage: ./formatTabularAncGenome.py PhylTree.conf genome.Boreoeutheria.list.bz2 Boreoeutheria -in:genesFiles=genes/genesST.%s.list.bz2 +withPos > genome.Boreoeutheria.WithDescendant.list """ import sys import collections from LibsDyogen import myFile, myPhylTree, myGenomes, myTools arguments = myTools.checkArgs( [("phylTree.conf", file), ("ancGenome", file), ("target", str)], [("in:genesFiles", str, ""), ("withPos", bool, False)], __doc__ ) # loading species tree phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) # Extant species list to load listSpecies = phylTree.getTargetsSpec(arguments["target"]) newlistSpecies = sorted(listSpecies) print(myFile.myTSV.printLine( ["Anc_chr", "Begin", "End", "Strand", "AncGene", '\t'.join(x for x in newlistSpecies)]), file=sys.stdout) ancGenome = myGenomes.Genome(arguments["ancGenome"]) genome = {}
elif unit == 'Mb': factor = 0.000001 else: assert unit is None factor = 1 res = '%1.0f%s' % (x * factor, unit if unit is not None else '') return res return f # arguments arguments = myTools.checkArgs( [ ("genome", myTools.File), ], [ ("removeUnofficialChrNames", bool, False), ("orderChromosomesBy", bool, 'names'), ('mode', bool, 'distribOnChr') ], __doc__) assert arguments['orderChromosomesBy'] in {'decreasingNbOfGenes', 'names'} assert arguments['mode'] in {'geneLengths', 'distribOnChr', 'distribOnChrs', 'overlap', 'correlationChromNbGenes', 'longestIntergene', 'minGeneLength'} # longestIntergene computes the longer intergene, i.e. space between two genes. # This gives the length of the longer rearrangement possibly unseen, except telomeres. def readerDependingOnFileWithDebAndEnd(fileName): flb = myFile.firstLineBuffer(myFile.openFile(fileName, 'r')) c = flb.firstLine.split("\t") if len(c) == 6: print("(c, beg, end, s, gName, transcriptName) -> (c, s, gName)", file=sys.stderr)
#!/usr/bin/python3 """ Find Strong ancGenes Families 1-1 (no duplication, no loss in descendants. """ import sys from LibsDyogen import myFile, myMaths, myTools, myPhylTree arguments = myTools.checkArgs([("phylTree.conf", file), ("target", str), ("IN.ancGenesFiles", str), ("OUT.ancGenesFiles", str)], [("except2XSpecies", bool, True)], __doc__) phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"]) target = phylTree.officialName[arguments["target"]] if arguments["except2XSpecies"] == "True": lstAncGenomes = [ x for x in phylTree.listAncestr if phylTree.dicParents[x][target] == target and x not in phylTree.lstEsp2X ] lstModernGenomes = [ x for x in phylTree.listSpecies if phylTree.dicParents[x][target] == target and x not in phylTree.lstEsp2X ] else: lstAncGenomes = [ x for x in phylTree.listAncestr