def generateRandomProblem(nbGenes, nbSpecies, orthologProb = 0.5, paralogProb = 0.4): """ Generates a random set of genes, orthologs, paralogs and species tree, ready to be input in ConstraintGraph class. Returned value has the form { "genes" : geneset, "orthologs" : orthologs, "paralogs" : paralogs, "speciesTree" : speciesTree } geneset items have the form [GENENAME]:[SPECIESNAME] Gene species are attributed randomly, though each species has at least one gene. If two genes have the same species, they end up in paralogs, always. :argument nbGenes: Number of genes to generate :argument orthologProb: chances for 2 genes to be a pair in orthologs :argument paralogProb: chances for 2 genes to be a pair in paralogs """ speciesnames = range(0, nbSpecies) speciesTree = TreeNode() speciesTree.populate(nbSpecies, speciesnames) for node in speciesTree: if not node.name is None: node.name = str(node.name) genes = [] #first add one gene per species for i in range(nbSpecies): genes.append("g" + str(len(genes)) + ":" + str(i)) #then fill in the rest with random species genes paralogs = set() orthologs = set() for i in range(nbGenes - nbSpecies): s = random.randint(0, nbSpecies - 1) genes.append("g" + str(len(genes)) + ":" + str(s)) #and here we decide of random relationships paralogProb += orthologProb for i in range(nbGenes): g1 = genes[i] pz = g1.split(":") g1name = pz[0] g1species = pz[1] for j in range(i + 1, nbGenes): g2 = genes[j] px = g2.split(":") g2name = px[0] g2species = px[1] if g1species == g2species: paralogs.add( (g1, g2) ) else: p = random.random() if p < orthologProb: orthologs.add( (g1, g2) ) elif p >= orthologProb and p < paralogProb: paralogs.add( (g1, g2) ) geneset = set() geneset.update(genes) return { "genes" : geneset, "orthologs" : orthologs, "paralogs" : paralogs, "speciesTree" : speciesTree }