Пример #1
0


#  Instantiate the fasta rec lists with BioPython Seq using geneID field of discriptor as key to seq objects
genomeOneFastasDict = SeqIO.to_dict(SeqIO.parse(open(genomeFileOne, "rU"), 'fasta'),
                                    key_function = lambda rec : rec.description.split()[0])

genomeTwoFastasDict = SeqIO.to_dict(SeqIO.parse(open(genomeFileTwo, "rU"), 'fasta'),
                                    key_function = lambda rec : rec.description.split()[0])


#  Initiate resultList
resultList = []

#  Explode orthologList into list of lists
JamesDefs.explodeDelimitedList(orthologList, '\t')

#  Populate a list of GeneIDs in each genome's dict of boundary seqs
genomeOneGeneIDs = genomeOneFastasDict.keys()
genomeTwoGeneIDs = genomeTwoFastasDict.keys()

#  Loop through orthologList and call each fasta in orthoPair, format
#  the new comboFasta and append it to resultList
for orthoPair in orthologList:
    
    #  Test for orthoPair[0] in genomeOneFastasDict and same for orthoPair[1] in genomeTwoFastasDict
    orthoPair_0_warn = None
    orthoPair_1_warn = None
    if orthoPair[0] not in genomeOneGeneIDs:
        orthoPair_0_warn = 'Yes'
    if orthoPair[1] not in genomeTwoGeneIDs:
Пример #2
0
        else:
            print "WARNING: boundaryRegion variable should only be 'up' or 'down'.\nScript exiting."
            sys.exit()


#--------------------------------------------------       


# Strip trailing newlines
codingBoundsList = map(string.strip, codingBoundsList)
resolvedConflictsList = map(string.strip, resolvedConflictsList)
                            

# Convert these into lists of lists so that field vals can be interrogated and copied 
# Explode tab delimited strings of each record into list of values
JamesDefs.explodeDelimitedList(codingBoundsList, '\t')
JamesDefs.explodeDelimitedList(resolvedConflictsList, '\t')


len_codingBoundsList = len(codingBoundsList)
len_resolvedConflictsList = len(resolvedConflictsList)



# Populate unUsableList
unUsableGeneNames = []
i = 0
while i < len_resolvedConflictsList:
    
    if int(resolvedConflictsList[i][5]) < shortestUsableBdryReg:
        unUseableGene = resolvedConflictsList.pop(i)