def RunGlobalAlphaTest(beginSeqNum, endSeqNum, firstTime, significantRepeatList, organism, prmPrefix, prmDirectory = 'prm_files/'): """ Runs all Global Alpha test calculations. beginSeqNum: Beginning chromosome range. endSeqNum: Ending chromosome range. firstTime: Boolean if first time for all calculations. significantRepeatList: List of significant transposable element family instances. organism: Organism's name. prmDirectory: Directory where all of the .prm files are located. """ print ("GLOBAL ALPHA") # to find global alpha values for C and P if firstTime == True: AllRepObjs = list(itertools.chain.from_iterable(([unpickleRepeats(prmPrefix + "%d" % chrNum, prmDirectory) for chrNum in range(beginSeqNum, endSeqNum+1)]))) print (len(AllRepObjs)) sigReps = printAndParseFiles.SigTEs(AllRepObjs, significantRepeatList) print len(sigReps) CfamilyMats = FindCandP.combineMatrices(sigReps) print ("done with C") printAndParseFiles.printListToFile(CfamilyMats, "Calculations_" + organism + "\CFamilyMats.txt") PfamilyMats = FindCandP.getPSymm(CfamilyMats) printAndParseFiles.printListToFile(PfamilyMats, "Calculations_" + organism + "\PFamilyMats.txt") print ("done with P") else: PfamilyMats = printAndParseFiles.parsePfamilyFile("Calculations_" + organism + "\PFamilyMats.txt") # to find global alpha values for R*t print ("R") RmatDict = FindR.calculateR(PfamilyMats) printAndParseFiles.printMatrixToFile(RmatDict, "Calculations_" + organism + "\RFamilyMats.txt" ) # to find global alpha values for m*t matSize = 4 print ("mt") mtDict = FindMT.calculateMt(RmatDict, matSize) printAndParseFiles.printValuesToFile(mtDict, "Calculations_" + organism + "\MTFamilyVals.txt") # to find global alpha values for Q print ("Q") QmatDict = FindQ.calculateQvalues(mtDict, RmatDict, matSize) printAndParseFiles.printMatrixToFile(QmatDict, "Calculations_" + organism + "\QFamilyMats.txt" )