P3D = Pep3Dparser(pep3d) P3D.get_all_tag_counts() del P3D le = P3D.LE he = P3D.HE le['ADCResponse'] = 10000 P3D.LE = le he['ADCResponse'] = 10000 P3D.HE = he P3D.write(pep3d.parent/(pep3d.stem + "_ADCResponse10000.xml")) # compare outputs: with check sums: from syncFiles.syncFiles import check_sum from waters.parsers import iaDBsXMLparser ia_workflows = list(data_f.glob('*_IA_Workflow*.xml')) for iw in ia_workflows: print(check_sum(iw)) # check sums do differ: what about the data? orig = ia_workflows[0] mod = ia_workflows[1] parsed = [iaDBsXMLparser(i) for i in ia_workflows] prots = [i.proteins() for i in parsed] prods = [i.products() for i in parsed]
import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.style.use('dark_background') data_f = Path('~/Projects/WatersData').expanduser() apexPaths = list(data_f.glob('*/*_Apex3D.xml')) apex3d = next(data_f.glob('*/*_Apex3D.xml')) pep3d = next(data_f.glob('*/*_Pep3D_Spectrum.xml')) iaDBs = next(data_f.glob('*/*_IA_workflow.xml')) A = Apex3Dparser(apex3d) A.LE A.HE A.to_hdf() A.data_path A.LE.to_hdf(apex3d.with_suffix('.hd5'), 'LE', complevel=9) P = Pep3Dparser(pep3d) P.LE P.HE IA = iaDBsXMLparser(iaDBs) IA.parameters() IA.count_proteins_per_hit() IA.info()
) args = p.parse_args() try: print('Parsing iaDBs outputs to csvs.') from fs_ops.paths import find_suffixed_files from waters.parsers import iaDBsXMLparser xmls = list( find_suffixed_files(args.paths, ['**/*_IA_workflow.xml'], ['.xml'])) print('Supplied paths:') pprint(xmls) for xml in xmls: XML = iaDBsXMLparser(xml) info = XML.info() pprint(info) print('dumping to csv') XML.query_masses().to_csv(xml.parent / 'query_masses.csv') XML.proteins().to_csv(xml.parent / 'proteins.csv') XML.products().to_csv(xml.parent / 'products.csv') except Exception as e: print(e) print() print('Avoid tricks of Loki.') print('And have a nice day..') input('press ENTER')
%load_ext autoreload %autoreload 2 from collections import Counter from pathlib import Path import pandas as pd pd.set_option('display.max_columns', 100) pd.set_option('display.max_rows', 5) import numpy as np from pprint import pprint from waters.parsers import iaDBsXMLparser data_f = Path(r"/home/matteo/Projects/WatersData/ADCResppnse") files = list(data_f.glob('*IA*.xml')) I0 = iaDBsXMLparser(files[0]) I1 = iaDBsXMLparser(files[1]) I2 = iaDBsXMLparser(files[2]) diff = lambda d0, d1: (d1 != d0).any(1) I0.get_all_tag_counts() I0.parameters() PR0 = I0.products() PR1 = I1.products() sum(diff(PR0, PR1)) PR0[diff(PR0, PR1)] sum(diff(PR0.iloc[:,:14], PR1.iloc[:,:14])) Counter(PR0.LOSS_TYPE) Counter(PR1.LOSS_TYPE) # dass siehst gut aus.
%load_ext autoreload %autoreload 2 from pathlib import Path from pprint import pprint from collections import Counter import pandas as pd import csv import json import numpy as np import xml.etree.cElementTree as ET from plotnine import * import sklearn from sklearn.linear_model import LinearRegression from statsmodels.api import OLS import matplotlib.pyplot as plt import statsmodels.api as sm import statsmodels.formula.api as smf from fs_ops.csv import rows2csv from waters.parsers import XMLparser, iaDBsXMLparser, Pep3Dparser, Apex3Dparser data_f = Path('~/Projects/WatersData/O190303_78').expanduser() apex3d = next(data_f.glob('*_Apex3D.xml')) pep3d = next(data_f.glob('*_Pep3D_Spectrum.xml')) iadbs = next(data_f.glob('*_IA_workflow.xml')) I = iaDBsXMLparser(iadbs) A = Apex3Dparser(apex3d) A.LE() A.HE()
from pprint import pprint from collections import Counter import pandas as pd import csv import json import numpy as np import xml.etree.cElementTree as ET from fs_ops.csv import rows2csv from waters.parsers import XMLparser, iaDBsXMLparser # data_path = Path("~/Projects/waters/data/T181207_07/T181207_07_IA_workflow.xml").expanduser() data_path = Path("~/Projects/WatersData/O190303_78/O190303_78_IA_workflow.xml").expanduser() assert data_path.exists() iaDBsXML = iaDBsXMLparser(data_path) prots = iaDBsXML.prot_ids() iaDBsXML.get_tag_counts() iaDBsXML.proteins() iaDBsXML.products() iaDBsXML.get_tag_counts() iaDBsXML.parameters() iaDBsXML.query_masses() iaDBsXML.count_proteins_per_hit() info = iaDBsXML.info() rows2csv(Path("~/Projects/waters/data/info.csv").expanduser(), [list(info), list(info.values())])