示例#1
0
    def __init__(self, badStrs=None, trueStrs=None, falseStrs=None):
        if badStrs is None:
            self.badStrs = set()
        else:
            self.badStrs = set(
                [s.lower() for s in SeqUtil.asCollection(badStrs)])

        if trueStrs is None:
            self.trueStrs = set(
                [v.lower() for v in _TrueValues if hasattr(v, "lower")])
            self.trueStrs -= self.badStrs
        else:
            self.trueStrs = set(
                [s.lower() for s in SeqUtil.asCollection(trueStrs)])
            if self.trueStrs & self.badStrs:
                raise ValueError(
                    "One or more bad values and true values overlap")

        if falseStrs is None:
            self.falseStrs = set(
                [v.lower() for v in _FalseValues if hasattr(v, "lower")])
            self.falseStrs -= self.badStrs
        else:
            self.falseStrs = set(
                [s.lower() for s in SeqUtil.asCollection(falseStrs)])
            if self.falseStrs & self.badStrs:
                raise ValueError(
                    "One or more bad values and true values overlap")

        if self.trueStrs & self.falseStrs:
            raise ValueError("One or more true and false values overlap")
示例#2
0
 def test_findlonglen_returns_longest_length(self):
     test_dict = {
         'length1': 15,
         'length10': 150,
         'length100': 1,
     }
     self.assertEqual(len('length100'), SeqUtil.findlonglen(test_dict))
示例#3
0
文件: CnvUtil.py 项目: r-owen/RO
    def __init__(self, badStrs=None, trueStrs=None, falseStrs=None):
        if badStrs is None:
            self.badStrs = set()
        else:
            self.badStrs = set([s.lower() for s in SeqUtil.asCollection(badStrs)])

        if trueStrs is None:
            self.trueStrs = set([v.lower() for v in _TrueValues if hasattr(v, "lower")])
            self.trueStrs -= self.badStrs
        else:
            self.trueStrs = set([s.lower() for s in SeqUtil.asCollection(trueStrs)])
            if self.trueStrs & self.badStrs:
                raise ValueError("One or more bad values and true values overlap")

        if falseStrs is None:
            self.falseStrs = set([v.lower() for v in _FalseValues if hasattr(v, "lower")])
            self.falseStrs -= self.badStrs
        else:
            self.falseStrs = set([s.lower() for s in SeqUtil.asCollection(falseStrs)])
            if self.falseStrs & self.badStrs:
                raise ValueError("One or more bad values and true values overlap")

        if self.trueStrs & self.falseStrs:
            raise ValueError("One or more true and false values overlap")
inputFile = './data/5.leaderboard_data-1.txt'
#inputFile = 'C:/Users/Ashis/Downloads/dataset_102_4 (1).txt'
outputFile = './results/5.leaderboard.txt'
aminoAcidMassMapFile = './data/integer_mass_table.txt'

# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    N = int(f.readline().strip())
    spectrumLine = f.readline().strip()
    spectrum = [int(n) for n in spectrumLine.split(" ")]

# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate sequence from spectrum
seq = SeqUtil.leaderboardCyclopeptideSequencing(spectrum, N,
                                                AAMassMap.values())

# output
with open(outputFile, "w") as f:
    f.writelines("-".join([str(s) for s in seq]))
inputFile = './data/3.theoretical-spectrum-data-1.txt'
#inputFile = 'C:/Users/Ashis/Downloads/dataset_98_3.txt'
outputFile = './results/3.theoretical-spectrum.txt'
aminoAcidMassMapFile = './data/integer_mass_table.txt'


# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    peptide = f.readline().strip()

# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate spectrum
spectrum = SeqUtil.cyclospectrum(peptide, AAMassMap)

# output
with open(outputFile, "w") as f:
    f.writelines(" ".join([str(s) for s in spectrum]))


示例#6
0
文件: CnvUtil.py 项目: r-owen/RO
 def __init__(self, badStrs=("NaN", "?")):
     if not SeqUtil.isCollection(badStrs):
         self.badStrs = set([badStrs.lower()])
     else:
         self.badStrs = set([bs.lower() for bs in badStrs])
示例#7
0
import os, sys
import SeqUtil, Report

if not os.path.exists('aligns'):
  os.mkdir('aligns')
if not os.path.exists('Bayes'):
  os.mkdir('Bayes')
#if not os.path.exists('ML'):
#  os.mkdir('ML')
  
out= sys.argv[1]
query = sys.argv[2]
SeqUtil.rename('Data/bac-'+out+'.fas')
if not os.path.exists('aligns/bac-'+out+'.best.nex'):
  os.system('prank -d=Data/bac-'+out+' -o=aligns/bac-'+out+' -f=nexus -quiet')
  SeqUtil.bayesinNex('aligns/bac-'+out+'.best.nex')
#SeqUtil.splicealign('aligns/bac-'+out+'.best.nex','Bayes/bac-'+out+'-mod.nxs')
#models=SeqUtil.bestmod('Bayes/bac-'+out+'-mod.nxs')
models_ori=SeqUtil.bestmod('aligns/bac-'+out+'.best.nex')
if not os.path.exists('Bayes/bac-'+out+'-bayes.nxs'):
  SeqUtil.bayesfile('aligns/bac-'+out+'.best.nex',models_ori,'Bayes/bac-'+out+'-bayes.nxs')
#SeqUtil.bayesfile('Bayes/bac-'+out+'-mod.nxs',models,'Bayes/bac-'+out+'-bayes.nxs')
os.system('mb Bayes/bac-'+out+'-bayes.nxs')
#SeqUtil.pamlseqnex('Bayes/bac-'+out+'-mod.nxs','ML/bac-'+out)
#for mod in models.keys():
#    SeqUtil.pamlinput('ML/bac-'+out,'ML/bac-'+out+'.out','ML/bac-'+out+'.ctl',{models.keys()[mod].split('+')[0]:models[models.keys()[mod]][1]})
#    os.system('codeml ML/bac-'+out+'.ctl')
#    SeqUtil.extractMLtree('ML/bac-'+out+'.out')
Report.generateReport(out,query,models_ori,'bac')
示例#8
0
 def __init__(self, badStrs=("NaN", "?")):
     if not SeqUtil.isCollection(badStrs):
         self.badStrs = set([badStrs.lower()])
     else:
         self.badStrs = set([bs.lower() for bs in badStrs])
# settings
curDir = 'E:/Copy/Coursera/Bioinformatics Algorithms (part-I)/MyPrograms/week2'
#curDir = 'D:/Copy/Coursera/Bioinformatics Algorithms (part-I)/MyPrograms/week2'
inputFile = './data/3.theoretical-spectrum-data-1.txt'
#inputFile = 'C:/Users/Ashis/Downloads/dataset_98_3.txt'
outputFile = './results/3.theoretical-spectrum.txt'
aminoAcidMassMapFile = './data/integer_mass_table.txt'

# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    peptide = f.readline().strip()

# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate spectrum
spectrum = SeqUtil.cyclospectrum(peptide, AAMassMap)

# output
with open(outputFile, "w") as f:
    f.writelines(" ".join([str(s) for s in spectrum]))
示例#10
0
aminoAcidMassMapFile = './data/integer_mass_table.txt'


# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    spectrumLine = f.readline().strip()
    spectrum = [int(n) for n in spectrumLine.split(" ")]


# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate sequence from spectrum
conv = SeqUtil.spectralConvolution(spectrum)
conv = sorted(conv)

# output
with open(outputFile, "w") as f:
    f.writelines(" ".join([str(m) for m in conv]))


aminoAcidMassMapFile = './data/integer_mass_table.txt'


# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    N = int(f.readline().strip())
    spectrumLine = f.readline().strip()
    spectrum = [int(n) for n in spectrumLine.split(" ")]


# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate sequence from spectrum
seq = SeqUtil.leaderboardCyclopeptideSequencing(spectrum, N, AAMassMap.values())

# output
with open(outputFile, "w") as f:
    f.writelines("-".join([str(s) for s in seq]))


# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    spectrumLine = f.readline().strip()

spectrum = [int(n) for n in spectrumLine.split(" ")]

# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# load amion acid mass
AAMassMap = SeqUtil.loadAminoAcidMass(aminoAcidMassMapFile)

# generate sequence from spectrum
sequences = SeqUtil.cyclopeptideSequencing(spectrum, AAMassMap.values())

# output
with open(outputFile, "w") as f:
    seqStrings = []
    for seq in sequences:
        seqStrings.append("-".join(str(mass) for mass in seq))
    f.writelines(" ".join([s for s in seqStrings]))


示例#13
0
inputFile = './data/7.convolutional_seq_data-1.txt'
#inputFile = 'C:/Users/Ashis/Downloads/dataset_104_7.txt'
outputFile = './results/7.convolutional_seq.txt'


# set current directory
os.chdir(curDir)

# read input
with open(inputFile) as f:
    M = int(f.readline().strip())
    N = int(f.readline().strip())
    spectrumLine = f.readline().strip()
    spectrum = [int(n) for n in spectrumLine.split(" ")]


# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)

### original work ####
# generate sequence from spectrum
seq = SeqUtil.convolutionCyclopeptideSequencing(spectrum, M, N)

# output
with open(outputFile, "w") as f:
    f.writelines("-".join([str(s) for s in seq]))


# load SeqUtil methods
sys.path.append('code')
import SeqUtil
importlib.reload(SeqUtil)


lenTargetPeptide = len(target)
lenTargetDna = lenTargetPeptide*3

foundDnas = []  # storage of target dan sequences

## search amino acids in forward direction
for start in range(0,3):                # loop for reading frame start
    curDna = dna[start:]                
    curRna = SeqUtil.dna2rna(curDna)
    peptides = SeqUtil.rna2peptide(curRna, mapFile='./data/RNA_codon_table_1.txt')

    pepStartIndex = 0
    for pep in peptides:
        # find positions of target in peptide
        targetPositions = [pos for pos in range(0, len(pep)) if pep[pos:(pos+lenTargetPeptide)]==target]

        # get corresponding dna sequence and save
        targetDnas = [curDna[(pepStartIndex+pos*3):(pepStartIndex+pos*3+lenTargetDna)] for pos in targetPositions]
        foundDnas.extend(targetDnas)

        # update pepStartIndex for the next peptide
        pepStartIndex = pepStartIndex + (len(pep)+1)*3

示例#15
0
if not os.path.exists('aligns'):
  os.mkdir('aligns')
if not os.path.exists('Bayes'):
  os.mkdir('Bayes')
#if not os.path.exists('ML'):
#  os.mkdir('ML')

out=sys.argv[1]
query=sys.argv[2]
try:
  paml=sys.argv[3]
  paml= paml=='-y'
except IndexError:
  paml=False
print "Beginning alignment"
SeqUtil.rename('Data/all-'+out+'.fas')
os.system('prank -d=Data/all-'+out+' -o=aligns/all-'+out+' -f=nexus -quiet')
SeqUtil.bayesinNex('aligns/all-'+out+'.best.nex')
#SeqUtil.splicealign('aligns/all-'+out+'.best.nex','Bayes/all-'+out+'-mod.nxs')
print "Alignment complete.\nCalculating best model for tree finding"
models_ori=SeqUtil.bestmod('aligns/all-'+out+'.best.nex')
#models=SeqUtil.bestmod('Bayes/all-'+out+'-mod.nxs')
#print models_ori, models
if paml:
  for mod in models.keys():
     SeqUtil.pamlseqnex('Bayes/all-'+out+'-mod.nxs','ML/all-'+out+mod.split('+')[0])
     if models[mod][0]=='0' and models[mod][1]=='0':
         os.system('phyml -i ML/all-'+out+mod.split('+')[0]+' -d aa -b 100 -m '+mod.split('+')[0]+
                   ' -f e -s BEST -u aligns/all-'+out+'.ed.2.dnd -o tl')
     elif models[mod][0]=='0':
         os.system('phyml -i '+'ML/all-'+out+mod.split('+')[0]+' -d aa -b 100 -m '+mod.split('+')[0]+