def run_waters_pipeline(raw_folder, fasta_file, out_folder="C:/Symphony/Temp/test", parameters_file="C:/Symphony/Search/251.xml", apex_kwds={}, pep3d_kwds={}, iadbs_kwds={}): """Run Waters pipeline. A convenience wrapper around apex3d, peptide3d, and iaDBs. Args: raw_folder (str): a path to the input folder with raw Waters data. fasta_file (str): Path to the fasta file used in iaDBs peptide search. out_folder (str): Path to where to place the output. parameters_file (str): Path to the search parameters used in iaDBs peptide search. apex_kwds (dict): Other named arguments to apex3d. pep3d_kwds (dict): Other named arguments to peptide3d. iadbs_kwds (dict): Other named arguments to iaDBs. Returns: Paths to Apex3D, Peptide3D and iaDBs output files. """ raw_folder = Path(raw_folder) out_folder = Path(out_folder) apexOutPath, apex_proc = apex3d(raw_folder, out_folder, write_binary=True, capture_output=True, **apex_kwds) apexOutBIN = apexOutPath.with_suffix('.bin') pep3dOutPath, pep_proc = peptide3d(apexOutBIN, out_folder, write_binary=True, min_LEMHPlus=350.0, capture_output=True, **pep3d_kwds) pep3dOutXML = pep3dOutPath.with_suffix('.xml') iadbsOutXML, iadbs_proc = iadbs(pep3dOutXML, out_folder, fasta_file=Path(fasta_file), parameters_file=Path(parameters_file), capture_output=True, **iadbs_kwds) return apexOutBIN, pep3dOutXML, iadbsOutXML
if debug: print(pep3dOut, pep_proc) except Exception as e: print(e) OK = False exceptions[rawdatapath].append(e) except subprocess.TimeoutExpired: print("pep3d reached a timeout of {} hour(s).".format(timeout / 3600)) OK = False if OK: try: t_iadbs = time() iadbsOut, iadbs_proc = iadbs(pep3dOut.with_suffix('.xml'), out_folder, fasta_file=fastapath, parameters_file=parameters_file, capture_output=capture_output, debug=debug) timings[raw_folder]['iadbs'] = t_iadbs = time() - t_iadbs if debug: print(iadbsOut, iadbs_proc) except Exception as e: print(e) exceptions[rawdatapath].append(e) OK = False except subprocess.TimeoutExpired: print("iadbs reached a timeout of {} hour(s).".format(timeout / 3600)) OK = False # iadbsOut = Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03/T170722_03_IA_workflow") # out_folder= Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03")
from vodkas import peptide3d, iadbs from vodkas.fastas import get_fastas from vodkas import from pathlib import Path from vodkas.xml_parser import create_params_file p = Path(r'C:\SYMPHONY_VODKAS\temp\2019-095\O190920_21') a = p/'O190920_21_Apex3D.bin' o, _ = peptide3d(a, p) fastas = get_fastas('human') i, _ = iadbs(o.with_suffix('.xml'), p, fastas) create_params_file(a, o, i) U:\Matteo\20191211_2019-015_reprocessing_with_Matteos_pipeline
# write_binary=True, # min_LEMHPlus=350.0, # capture_output=True, # debug=True) # pep3dOutXML = pep3dOut.with_suffix('.xml') pep3dOutXML = temp_folder / (raw_folder + "_Pep3D_Spectrum.xml") fasta_file = Path(settings[raw_folder]) if debug: print(pep3dOutXML, fasta_file, str(fasta_file)) for parameters_file in (proj_folder / "params").iterdir(): if debug: print(parameters_file) print(temp_folder / parameters_file.stem) iadbsOut, iadbs_proc = iadbs(pep3dOutXML, temp_folder / parameters_file.stem, fasta_file=fasta_file, parameters_file=parameters_file, capture_output=True, debug=True) if debug: print(iadbsOut, iadbs_proc) report, wx2csv_proc = wx2csv(iadbsOut.with_suffix('.xml'), temp_folder / parameters_file.stem / "report.csv", debug=debug) # cp(apexOutBIN, final_folder) # cp(pep3dOutXML, final_folder) # cp(iadbsOutXML, final_folder) # cp(temp_folder/'apex3d.log', final_folder) # cp(temp_folder/'peptide3d.log', final_folder) # cp(temp_folder/'iadbs.log', final_folder) #TODO: add file removal!!!!
log.error(f"missing: {raw_folder}") continue log.info(f"analyzing: {raw_folder}") sender.update_group(raw_folder) # wtf??? change name .... acquired_name = raw_folder.stem header_txt = parse_header_txt(raw_folder / '_HEADER.TXT') sample_set = header_txt['Sample Description'][:8] # C:/SYMPHONY_PIPELINE/2019-008/O191017-04 local_folder = local_output_folder / sample_set / acquired_name a = apex3d(raw_folder, local_folder, **apex3d_kwds) if peptide3d_kwds['timeout'] >= 0: p = peptide3d(a.with_suffix('.bin'), local_folder, **peptide3d_kwds) if iadbs_kwds['timeout'] >= 0: i = iadbs(p, local_folder, fasta_file, parameters_file, **iadbs_kwds) if i is not None: params = create_params_file(a, p, i) # for projectizer2.0 with open(a.parent / "params.json", 'w') as f: json.dump(params, f) search_stats = get_search_stats(i) rows2csv(i.parent / 'stats.csv', [list(search_stats), list(search_stats.values())]) if net_folder: # Y:/RES/2019-008 net_set_folder = Path(net_folder) / sample_set net_set_folder.mkdir(parents=True, exist_ok=True) # if reanalysing, the old folder is preserved, # and a version number appended to the new one # e.g. Y:/RES/2019-008/O191017-04
from pathlib import Path from vodkas import apex3d, peptide3d, iadbs from vodkas.fs import cp # if __name__ == "__main__": # raw = Path("//MSSERVER/restoredData/proteome_tools/net/idefix/WIRD_GESICHERT/T1707/T170722_03.raw")#big raw = Path("C:/ms_soft/MasterOfPipelines/RAW/O1903/O190302_01.raw") #small temp = Path("C:/Symphony/Temp/test") #TODO: WTF if this file already existed? apexOutPath, apex_proc = apex3d(raw, temp, write_binary=True, capture_output=True) # apexOutPath = temp/(raw.stem + "_Apex3D") apexOutBIN = apexOutPath.with_suffix('.bin') pep3dOutPath, pep_proc = peptide3d(apexOutBIN, temp, write_binary=True, min_LEMHPlus=350.0, capture_output=True) # pep3dOutPath = temp/(raw.stem + "_Pep3D_Spectrum") pep3dOutXML = pep3dOutPath.with_suffix('.xml') iadbsOutPath, iadbs_proc = iadbs(pep3dOutXML, temp, fasta_file="C:/Symphony/Search/wheat.fasta", parameters_file="C:/Symphony/Search/251.xml", capture_output=True)
# raw, fasta = pool1fix[0] for raw, fasta in pool1fix: # copy raw = Path(raw) fasta = Path(fasta) res_folder = res_path / raw.parent.stem / raw.stem shutil.copy(str(fasta), str(res_folder / fasta.name)) # reversal FF = Fastas(reformulate_fasta(f) for f in fastas(fasta)) FF.reverse() rev_fasta_path = res_folder / f"{fasta.stem}_reversed.fasta" FF.write(rev_fasta_path) # rerun iadbs try: outfile, _ = iadbs(res_folder / f"{raw.stem}_Pep3D_Spectrum.xml", res_folder / 'reversed_search', rev_fasta_path) except Exception as e: problems.append((str(f), repr(e))) logger.warning(repr(e)) # troubles = [] # # raw,_ = pool1fix[0] # for raw,_ in pool1fix: # raw = Path(raw) # res_folder = res_path/raw.parent.stem/raw.stem # try: # df,_ = wx2csv(res_folder/'reversed_search'/f"{raw.stem}_IA_workflow.xml", # res_folder/'reversed_search'/f"{raw.stem}_report.csv") # except Exception as e: # print(e) # troubles.append(e)
fold = Path(r"Y:\RES\2019-095") def zeropad(x, k=2): if x < 10: x = f"0{x}" return str(x) folders = [ fold / f"O190920_{zeropad(i)}" for i in range(2, 9) if i not in (8, 15, 22, 29) ] # folders = [fold/f"O190920_{zeropad(i)}" # for i in range(2,29) if i not in (8,15,22,29)] fastas = get_fastas('human') problems = [] # f = folders[0] for f in folders[1:]: try: input = f / f"{f.stem}_Pep3D_Spectrum.xml" outfile, _ = iadbs(input, f, fastas) except Exception as e: problems.append((str(f), repr(e))) if problems: with open(Path('C:/SYMPHONY_VODKAS/problems')) as h: json.dump(problems, h, indent=4)