def match_decon2ls_isos(spectrum_db, hypothesis, adducts=None, ms1_match_tolerance=1e-5, output_path=None): if adducts is None: adducts = [NoShift] results = RecordDatabase(output_path, record_type=hypothesis.record_type) results.apply_schema() for composition in hypothesis: matches = [] scans_searched = set() charge_states = set() for adduct in adducts: for spectrum in spectrum_db.ppm_match_tolerance_search( composition.intact_mass + adduct.mass, ms1_match_tolerance): match_ppm = ppm_error(composition.intact_mass + adduct.mass, spectrum.neutral_mass) match = PrecursorMatch( str(composition.id) + ":" + adduct.name, spectrum.neutral_mass, match_ppm, spectrum.get("abundance"), spectrum.charge, spectrum.other_data, spectrum.scan_ids[0]) matches.append(match) scans_searched.update(spectrum.scan_ids) charge_states.add(spectrum.charge) groups = groupby(matches, match_key_getter) composition.precursor_matches = {key: combine_results(group) for key, group in groups.items()} composition.precursor_scans_searched = scans_searched composition.precursor_scan_density = density(scans_searched) composition.precursor_charge_states = charge_states results.load_data([composition], set_id=False, cast=True, commit=False) results.commit() results.apply_indices() return results
def download_all_structures(db_path, record_type=GlycanRecordWithTaxon): # pragma: no cover response = requests.get(u'http://www.glycome-db.org/http-services/getStructureDump.action?user=eurocarbdb') response.raise_for_status() handle = gzip.GzipFile(fileobj=StringIO(response.content)) xml = etree.parse(handle) db = RecordDatabase(db_path, record_type=record_type) misses = [] i = 0 for structure in xml.iterfind(".//structure"): try: glycomedb_id = int(structure.attrib['id']) i += 1 glycoct_str = structure.find("sequence").text taxa = [Taxon(t.attrib['ncbi'], None, None) for t in structure.iterfind(".//taxon")] glycan = glycoct.loads(glycoct_str) if (glycoct.loads(str(glycan)).mass() - glycan.mass()) > 0.00001: raise Exception("Mass did not match on reparse") record = record_type(glycan, taxa=taxa, id=glycomedb_id) db.load_data(record, commit=False, set_id=False) if i % 1000 == 0: print(i, "Records parsed.") except Exception as e: misses.append((glycomedb_id, e)) print(glycomedb_id, e) db.set_metadata("misses", misses) db.commit() return db