def Linnaeus2YASDBF(params): db_name = params['DB Name'] linnaeus = '/home/d3y034/chembio_svn/Proteomics/Linnaeus/Linnaeus.py' if os.path.exists(linnaeus): yasdbf_main.runCmd( "%s %s %s" % (linnaeus, 'compile2YASDBF', db_name) ) else: sys.exit('Linnaeus is not in the specifed location.')
def insertSpectraSTData(con, pepXML): # NOTE: consider avoiding hardcoding this spectraST = "/usr/bin/spectrast" if os.path.exists(spectraST): # keep relative path in base path, base = os.path.split(pepXML) base = base.split('.')[0] yasdbf_main.runCmd( "%s -c -cP0 %s" % (spectraST, pepXML) ) sptxt = os.path.join(path, base + '.sptxt') specST_dict = parseSPTXT(sptxt) # NOTE: try to alter table # TODO: remove! try: alter_table(con) except sqlite3.OperationalError, ex: if "duplicate column name" in str(ex): pass else: raise data = [] for k, v in specST_dict.iteritems(): data.append( (';'.join(v[0]), ';'.join(v[1]) , ';'.join(v[2]), v[3], v[4], v[5], k) ) con.executemany("UPDATE spectra SET x=?, y=?, labels=?, precur_int=?, total_int=?, matched_int=? WHERE name=?", data) con.commit()
def runMSGF(msgfPre, dirname, enzyme): if os.path.exists(msgfPre): msgfPath = "/home/yasdbf/apps/msgf.jar" msgf = os.path.join(dirname, msgfPre[:-3]) #make this a path if len(dirname) is 0: dirname = "." params = (msgfPath, msgfPre, dirname, msgf, enzyme) cmdStr = "java -Xmx2000M -jar %s -i %s -d %s -o %s -fixMod 0 -e %s" % params # "-fixMod 0" sets default mods to none # execute command yasdbf_main.runCmd(cmdStr) return msgf #else return None
def Tandem2XML(tandemXML): Tandem2XML = "/home/d3y034/chembio_svn/Proteomics/Kraftwerk/apps/Tandem2XML" if os.path.exists(Tandem2XML): d = os.path.dirname(tandemXML) base = os.path.basename(tandemXML).split('.')[0] # NOTE: need to copy tandem file to new name for it to relate to mzXML newTandemXML = os.path.join(d, base + '.xml') yasdbf_main.runCmd( "cp %s %s" % (tandemXML, newTandemXML) ) # NOTE: this means pepXMLs follow naming of pepXMLs pepXML = os.path.join(d, base + '.pep.xml') yasdbf_main.runCmd( "%s %s %s" % (Tandem2XML, newTandemXML, pepXML) ) # NOTE: remove copy yasdbf_main.runCmd( "rm %s" % newTandemXML ) return pepXML else: sys.exit("Tandem2XML does not appear to be installed")
if "duplicate column name" in str(ex): #NOTE: maybe log some kind of error here pass else: raise enzyme = params['Enzyme'] filename = filename.split('.')[0] + '.mzXML' filename = os.path.basename(filename) # make sure you have basename (important for structured mode) file_id, dirname = con.execute("SELECT id, dirname FROM files WHERE basename=?", (filename,) ).fetchone() con.row_factory = sqlite3.Row msgfPre = writePrefile(con, file_id, filename) msgf = runMSGF(msgfPre, dirname, enzyme) yasdbf_main.runCmd("rm %s" % msgfPre) # clean-up importMSGF(con, msgf) def writePrefile(con, file_id, basename): # use basename to find name for msgfPre file basename = basename.split('.')[0] + '.mzXML' filename = basename.split('.')[0] + '.msgfPre' # write msgfPre file with open(filename, 'w') as f: # write headers f.write("#SpectrumFile\tScanName\tScan#\tAnnotation\tCharge\n") select = """SELECT name, n_terminal, peptide, peptide_modded, c_terminal, charge FROM spectra WHERE file_id = ? ORDER BY name""" for record in con.execute(select, (file_id,)):