def run(self): classifiers = classes_in_module(src.classifiers) attributes = classes_in_module(src.attributes) classifyDb = os.path.join(self.outDir, "classify.db") detailsDb = os.path.join(self.outDir, "details.db") attributesDb = os.path.join(self.outDir, "attributes.db") self.initializeDb(classifyDb, classifiers, dataType="INTEGER") self.initializeDb(detailsDb, classifiers, dataType="TEXT") for classifier, genome in product(classifiers, self.genomes): classifyDict = pickle.load( open( os.path.join(self.tmpDir, genome, "Classify" + classifier.__name__ + genome), "rb")) self.simpleUpdateWrapper(classifyDict, classifyDb, genome, classifier.__name__) detailsDict = pickle.load( open( os.path.join(self.tmpDir, genome, "Details" + classifier.__name__ + genome), "rb")) self.simpleBedUpdateWrapper(detailsDict, detailsDb, genome, classifier.__name__) self.initializeDb(attributesDb, attributes) for attribute, genome in product(attributes, self.genomes): attributeDict = pickle.load( open( os.path.join(self.tmpDir, genome, "Attribute" + attribute.__name__ + genome), "rb")) self.simpleUpdateWrapper(attributeDict, attributesDb, genome, attribute.__name__)
def run_aug_classifiers(args, target, tmp_dir): aug_classifiers = classes_in_module(src.augustus_classifiers) for classifier in aug_classifiers: target.addChildTarget( classifier(args.refFasta, args.annotationGp, args.refGenome, tmp_dir, args.genome, args.psl, args.refPsl, args.fasta, args.targetGp, args.augustusGp)) # in Augustus mode we run the alignment-free classifiers on augustus transcripts ref_classifiers = classes_in_module(src.classifiers) for classifier in ref_classifiers: target.addChildTarget( classifier(args.fasta, args.augustusGp, args.genome, tmp_dir))
def build_analyses(target, ref_genome, genome, annotation_gp, psl, gp, fasta, ref_fasta, sizes, gencode_attributes, out_dir): # find all user-defined classes in the categories of analyses out_file_tree = TempFileTree(target.getGlobalTempDir()) classifiers = classes_in_module(src.classifiers) attributes = classes_in_module(src.attributes) for classifier in classifiers: target.addChildTarget(classifier(genome, psl, fasta, ref_fasta, annotation_gp, gencode_attributes, gp, ref_genome, out_file_tree)) for attribute in attributes: target.addChildTarget(attribute(genome, psl, fasta, ref_fasta, annotation_gp, gencode_attributes, gp, ref_genome, out_file_tree)) # merge the resulting pickled files into sqlite databases and construct BED tracks target.setFollowOnTargetFn(database, memory=8 * (1024 ** 3), args=(out_dir, genome, psl, sizes, gp, annotation_gp, out_file_tree))
def run(self): augustusClassifiers = classes_in_module(src.augustusClassifiers) classifyDb = os.path.join(self.outDir, "augustusClassify.db") if os.path.exists(classifyDb): os.remove(classifyDb) detailsDb = os.path.join(self.outDir, "augustusDetails.db") if os.path.exists(detailsDb): os.remove(detailsDb) self.initializeDb(classifyDb, augustusClassifiers, dataType="INTEGER") self.initializeDb(detailsDb, augustusClassifiers, dataType="TEXT") for classifier, genome in product(augustusClassifiers, self.genomes): classifyDict = pickle.load( open( os.path.join(self.tmpDir, genome, "Classify" + classifier.__name__ + genome), "rb")) self.simpleUpdateWrapper(classifyDict, classifyDb, genome, classifier.__name__) detailsDict = pickle.load( open( os.path.join(self.tmpDir, genome, "Details" + classifier.__name__ + genome), "rb")) self.simpleBedUpdateWrapper(detailsDict, detailsDb, genome, classifier.__name__)
def augustusNotOk(): """ Defines augustus OK as not failing all Augustus classifiers. """ classifyFields = detailsFields = [x.__name__ for x in classes_in_module(src.augustusClassifiers)] classifyOperations = ["OR"] * (len(classifyFields) - 1) classifyValues = [1] * (len(classifyFields)) return detailsFields, classifyFields, classifyValues, classifyOperations
def run(self): classifiers = classes_in_module(src.classifiers) attributes = classes_in_module(src.attributes) classifyDb = os.path.join(self.outDir, "classify.db") detailsDb = os.path.join(self.outDir, "details.db") attributesDb = os.path.join(self.outDir, "attributes.db") self.initializeDb(classifyDb, classifiers, dataType="INTEGER") self.initializeDb(detailsDb, classifiers, dataType="TEXT") for classifier, genome in product(classifiers, self.genomes): classifyDict = pickle.load(open(os.path.join(self.tmpDir, genome, "Classify" + classifier.__name__ + genome), "rb")) self.simpleUpdateWrapper(classifyDict, classifyDb, genome, classifier.__name__) detailsDict = pickle.load(open(os.path.join(self.tmpDir, genome, "Details" + classifier.__name__ + genome), "rb")) self.simpleBedUpdateWrapper(detailsDict, detailsDb, genome, classifier.__name__) self.initializeDb(attributesDb, attributes) for attribute, genome in product(attributes, self.genomes): attributeDict = pickle.load(open(os.path.join(self.tmpDir, genome, "Attribute" + attribute.__name__ + genome), "rb")) self.simpleUpdateWrapper(attributeDict, attributesDb, genome, attribute.__name__)
def run_tm_classifiers(args, target, tmp_dir): tm_classifiers = classes_in_module(src.alignment_classifiers) for classifier in tm_classifiers: target.addChildTarget( classifier(args.refFasta, args.annotationGp, args.refGenome, tmp_dir, args.genome, args.psl, args.refPsl, args.fasta, args.targetGp)) attributes = classes_in_module(src.attributes) for attribute in attributes: target.addChildTarget( attribute(args.refFasta, args.annotationGp, args.refGenome, tmp_dir, args.genome, args.psl, args.refPsl, args.fasta, args.targetGp, args.gencodeAttributes)) # in transMap mode we run the alignment-free classifiers on the target genome ref_classifiers = classes_in_module(src.classifiers) for classifier in ref_classifiers: target.addChildTarget( classifier(args.fasta, args.targetGp, args.genome, tmp_dir))
def augustusNotOk(): """ Defines augustus OK as not failing all Augustus classifiers. """ classifyFields = detailsFields = [ x.__name__ for x in classes_in_module(src.augustusClassifiers) ] classifyOperations = ["OR"] * (len(classifyFields) - 1) classifyValues = [1] * (len(classifyFields)) return detailsFields, classifyFields, classifyValues, classifyOperations
def run(self): augustusClassifiers = classes_in_module(src.augustusClassifiers) classifyDb = os.path.join(self.outDir, "augustusClassify.db") if os.path.exists(classifyDb): os.remove(classifyDb) detailsDb = os.path.join(self.outDir, "augustusDetails.db") if os.path.exists(detailsDb): os.remove(detailsDb) self.initializeDb(classifyDb, augustusClassifiers, dataType="INTEGER") self.initializeDb(detailsDb, augustusClassifiers, dataType="TEXT") for classifier, genome in product(augustusClassifiers, self.genomes): classifyDict = pickle.load(open(os.path.join(self.tmpDir, genome, "Classify" + classifier.__name__ + genome), "rb")) self.simpleUpdateWrapper(classifyDict, classifyDb, genome, classifier.__name__) detailsDict = pickle.load(open(os.path.join(self.tmpDir, genome, "Details" + classifier.__name__ + genome), "rb")) self.simpleBedUpdateWrapper(detailsDict, detailsDb, genome, classifier.__name__)
def allProblems(): detailsFields = classifyFields = [x.__name__ for x in classes_in_module(src.classifiers)] classifyOperations = ["OR"] * (len(classifyFields) - 1) classifyValues = [1] * len(classifyFields) return detailsFields, classifyFields, classifyValues, classifyOperations
def run_ref_classifiers(args, target, tmp_dir): ref_classifiers = classes_in_module(src.classifiers) for classifier in ref_classifiers: target.addChildTarget( classifier(args.refFasta, args.annotationGp, args.refGenome, tmp_dir))