def functionsGoNOW(sampleNames, path, runTrimMetadata, commands): print "\nPerforming quality checks on assemblies." quastList = quastProcesses(sampleNames, path, runTrimMetadata, commands) quastMeta = metadataFiller.filler(runTrimMetadata, quastList) runTrimAssemblyMetadata = quastMetadata(sampleNames, path, quastMeta) jsonReportR.jsonR(sampleNames, path, runTrimAssemblyMetadata, "Collection") return runTrimAssemblyMetadata
def functionsGoNOW(correctedFiles, path, metadata, fLength, commands): """Run the helper function""" print("\nAssembling reads.") flagMetadataList = spadesPrepProcesses(correctedFiles, path, fLength, metadata, commands) flagMetadata = metadataFiller.filler(metadata, flagMetadataList) updatedMetadata = contigFileFormatter(correctedFiles, path, flagMetadata) assembledFiles = completionist(correctedFiles, path) jsonReportR.jsonR(correctedFiles, path, updatedMetadata, "Collection") return updatedMetadata, assembledFiles
def functionsGoNow(files, path, metadata, fLength, commands): # print path, fLength commandList = spadesPrepProcesses(files, path, fLength, metadata, commands) commandMetadata = metadataFiller.filler(metadata, commandList) updatedMetadata = contigFileFormatter(files, path, commandMetadata) assembledFiles = completionist(files, path) # moreMetadata = pipelineMetadata(path, updatedMetadata, assembledFiles) jsonReportR.jsonR(files, path, updatedMetadata, "Collection") return updatedMetadata, assembledFiles
def functionsGoNOW(assembledFiles, path, assemblyMetadata, refFilePath, commands): print "\nPerforming GeneSeekr analysis" # Clear out any summary reports from a previous iteration of the pipeline reportRemover(path) # Do everything - uniVec screening, geneSeeking, V-typing, and MLST analysis geneSeekrMetadataList = geneSeekrPrepProcesses(assembledFiles, path, assemblyMetadata, refFilePath, commands) # print json.dumps(geneSeekrMetadata, sort_keys=True, indent=4, separators=(',', ': ')) geneSeekrMetadata = metadataFiller.filler(assemblyMetadata, geneSeekrMetadataList) jsonReportR.jsonR(assembledFiles, path, geneSeekrMetadata, "Collection") return geneSeekrMetadata
def functionsGoNOW(sampleNames, path, date, metadata, refFilesPath): """Commenting is subpar in this script, as I am using code written by Mike, so I can't really comment on it very well""" '''Yeah, well if I didn't have to change it so much to work it would have been commented better Adam''' print "\nPerforming rMLST analyses." rMLSTgenes = refFilesPath + "/rMLST/alleles/" make_path("%s/tmp" % path) print "\nFinding rMLST alleles." dictionaryPreparer(sampleNames) plusdict = blaster(rMLSTgenes, sampleNames, path, path, date, refFilesPath) additionalMetadata = determineReferenceGenome(plusdict, path, metadata, refFilesPath) moreMetadata = determineSubtype(plusdict, path, additionalMetadata, refFilesPath) allMetadata = rMLSTsizer(moreMetadata, sampleNames) # print json.dumps(genomeCovered, sort_keys=True, indent=4, separators=(',', ': ')) # print json.dumps(testDict, sort_keys=True, indent=4, separators=(',', ': ')) jsonReportR.jsonR(sampleNames, path, allMetadata, "Collection") return allMetadata
def functionsGoNOW(sampleNames, path, runTrimMetadata, commands): """Calls all the functions in a way that they can be multi-processed""" inputData = referenceFiletoAssembly(path, sampleNames) print "\nSampling fastq files." sampleMeta = sampleFastq(path, sampleNames, runTrimMetadata, commands) indexList = indexTargetsProcesses(path, inputData, sampleMeta, commands) indexMeta = metadataFiller.filler(runTrimMetadata, indexList) #Start the mapping operations mappingList = mappingProcesses(path, inputData, indexMeta, commands) mappingMeta = metadataFiller.filler(runTrimMetadata, mappingList) extractingList = extractingProcesses(path, inputData, mappingMeta, commands) extractingMeta = metadataFiller.filler(runTrimMetadata, extractingList) graphingList = graphingProcesses(path, inputData, extractingMeta, commands) graphingMeta = metadataFiller.filler(runTrimMetadata, graphingList) os.chdir(path) runTrimInsertMetadata = formatOutput(path, sampleNames, graphingMeta) jsonReportR.jsonR(sampleNames, path, runTrimInsertMetadata, "Collection") return runTrimInsertMetadata
def functionsGoNOW(sampleNames, path, runMetadata, fLength, commands): """Run the functions""" print('\nPerforming error correction on fastq files.') # Removed the multiprocessing aspect of this function - it seemed to be unreliable. # Sometimes, fastq files with more data would not be corrected. os.chdir(path) print "Preparing fastq files for processing" prepList = quakePrepProcesses(sampleNames, path, fLength, runMetadata, commands) prepMetadata = metadataFiller.filler(runMetadata, prepList) print "Determining cut-off values for error correction" cutoffList = quakeCutOffProcesses(sampleNames, path, prepMetadata, commands) cutoffMetadata = metadataFiller.filler(prepMetadata, cutoffList) print "Correcting errors" correctList = quakeCorrectProcesses(sampleNames, path, fLength, cutoffMetadata, commands) correctMetadata = metadataFiller.filler(runMetadata, correctList) # runQuake(sampleNames, path) os.chdir(path) # Run completionist to determine unprocessable files, and acquire metadata runTrimMetadata, correctedList = completionist(sampleNames, path, correctMetadata, fLength) # Clean up tmp files tmpFileRemover(path, correctedList) # Return important variables jsonReportR.jsonR(correctedList, path, runTrimMetadata, "Collection") return correctedList, runTrimMetadata
def reportWriter(sampleNames, metadata, path): """As outputting a JSON file is so straightforward, helper functions were unnecessary""" print "\nCreating reports." reportPath = "%s/reports" % path # Grab the name of the analysis from the path variable folderName = path.split("/")[-1] # Get the path of the parental folder. This is where subfolders containing copies of # all the assemblies and reports will be stored repositoryPath = os.path.dirname(path) # Create the appropriate folders (if necessary) for storing the appropriate files make_path("%s/AssemblyData/Assemblies" % repositoryPath) make_path("%s/AssemblyData/JsonReports" % repositoryPath) make_path("%s/AssemblyData/SummaryReports" % repositoryPath) make_path(reportPath) combinedReport = open("%s/%s_CombinedMetadataReport.tsv" % (reportPath, folderName), "wb") # The headings will be used to search the json file and return only these particular data for # inclusion in the metadata spreadsheet headings = ["SampleName", "fileName", "N50", "NumContigs", "TotalLength", "MeanInsertSize", "averageDepthofCov", "referenceGenome", "NumIdenticalAlleles", "rMLSTSequenceType", "rMLSTIdenticalAlleles", "SequencingDate", "NumPredictedGenes", "NumPredictedGenes>500bp", "NumPredictedGenes>1000bp", "NumPredictedGenes>3000bp", "geneSeekrProfile", "verotoxinProfile", "MLST_sequenceType", "percentGC", "Investigator", "NumberOfClustersPF", "PercentOfClusters", "TotalClustersinRun", "LengthofFirstRead", "LengthofSecondRead", "Project", "PipelineVersion"] # General headings within the metadata json file reportHeadings = ["1.General", "2.Assembly", "3.Run", "4.Correction", "5.rMLST", "6.rMLSTmatchestoRef", "7.PipelineCommands", "8.PipelineVersions"] combinedReport.write("\t".join(headings)) combinedReport.write("\n") # Use jsonReportR to fill out the final json metadata report jsonReportR.jsonR(sampleNames, path, metadata, "Report") # Fill the combined metadata report spreadsheet with the appropriate data for name in sampleNames: newPath = path + "/" + name for heading in headings: value = "NA" for rHeading in reportHeadings: # Had some issues with the values being the wrong "type". # This if statement only accepts strings, floats, and integers if type(metadata[name][rHeading][heading]) is StringType or type(metadata[name][rHeading][heading]) \ is FloatType or type(metadata[name][rHeading][heading]) is IntType: value = str(metadata[name][rHeading][heading]) # Write the value to the spreadsheet combinedReport.write(value) combinedReport.write("\t") combinedReport.write("\n") # file clean-up dbm = "%s/%s_fastqFiles.dbm" % (newPath, name) if os.path.isfile(dbm): os.remove(dbm) fastq = glob.glob("%s/*_001.fastq" % newPath) for files in fastq: if os.path.isfile(files): os.remove(files) counts = "%s/%s_counts.txt" % (newPath, name) if os.path.isfile(counts): os.remove(counts) qcts = "%s/%s_fastqFiles.txt.qcts" % (newPath, name) if os.path.isfile(qcts): os.remove(qcts) jsonCollection = glob.glob("%s/reports/*Collection.json" % path) for jsonFile in jsonCollection: if os.path.isfile(jsonFile): os.remove(jsonFile) # Move assemblies and reports to appropriate Master repositories shutil.copy("%s/%s/%s_filteredAssembled.fasta" % (path, name, name), "%s/AssemblyData/Assemblies/%s_filteredAssembled.fasta" % (repositoryPath, name)) shutil.copy("%s/%s/%s_metadataReport.json" % (path, name, name), "%s/AssemblyData/JsonReports/%s_metadataReport.json" % (repositoryPath, name)) # Move the metadata spreadsheet combinedReport.close() shutil.copy("%s/%s_CombinedMetadataReport.tsv" % (reportPath, folderName), "%s/AssemblyData/SummaryReports/%s_CombinedMetadataReport.tsv" % (repositoryPath, folderName))