def test_new_ds_saves_to_db(test_db, metadata, ds_config): db = DB() moldb = create_test_molecular_db() ds_config['database_ids'] = [moldb.id] ds = create_test_ds(config={**ds_config, 'database_ids': [moldb.id]}) ion_metrics_df = pd.DataFrame({ 'formula': ['H2O', 'H2O', 'CO2', 'CO2', 'H2SO4', 'H2SO4'], 'adduct': ['+H', '[M]+', '+H', '[M]+', '+H', '[M]+'], 'fdr': [0.05, 0.1, 0.05, 0.1, 0.05, 0.1], 'image_id': list(map(str, range(6))), }) (job_id, ) = db.insert_return( "INSERT INTO job (moldb_id, ds_id, status) VALUES (%s, %s, 'FINISHED') RETURNING id", rows=[(moldb.id, ds.id)], ) db.insert( 'INSERT INTO annotation(' ' job_id, formula, chem_mod, neutral_loss, adduct, msm, fdr, stats, iso_image_ids' ') ' "VALUES (%s, %s, '', '', %s, 1, %s, '{}', %s)", [(job_id, r.formula, r.adduct, r.fdr, [r.image_id]) for i, r in ion_metrics_df.iterrows()], ) with patch( 'sm.engine.postprocessing.colocalization.ImageStorage.get_ion_images_for_analysis' ) as get_ion_images_for_analysis_mock: get_ion_images_for_analysis_mock.side_effect = mock_get_ion_images_for_analysis Colocalization(db).run_coloc_job(ds) jobs = db.select('SELECT id, error, sample_ion_ids FROM graphql.coloc_job') annotations = db.select( 'SELECT coloc_ion_ids, coloc_coeffs FROM graphql.coloc_annotation') ions = db.select('SELECT id FROM graphql.ion') assert len(jobs) > 0 assert not any(job[1] for job in jobs) assert jobs[0][2] assert len(annotations) > 10 assert all(len(ann[0]) == len(ann[1]) for ann in annotations) assert len(ions) == len(ion_metrics_df)
class SearchJob(object): """ Main class responsible for molecule search. Uses other modules of the engine. Args ---- no_clean : bool Don't delete interim data files """ def __init__(self, img_store=None, no_clean=False): self.no_clean = no_clean self._img_store = img_store self._job_id = None self._sc = None self._db = None self._ds = None self._ds_reader = None self._status_queue = None self._fdr = None self._wd_manager = None self._es = None self._sm_config = SMConfig.get_conf() logger.debug('Using SM config:\n%s', pformat(self._sm_config)) def _configure_spark(self): logger.info('Configuring Spark') sconf = SparkConf() for prop, value in self._sm_config['spark'].items(): if prop.startswith('spark.'): sconf.set(prop, value) if 'aws' in self._sm_config: sconf.set("spark.hadoop.fs.s3a.access.key", self._sm_config['aws']['aws_access_key_id']) sconf.set("spark.hadoop.fs.s3a.secret.key", self._sm_config['aws']['aws_secret_access_key']) sconf.set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") sconf.set( "spark.hadoop.fs.s3a.endpoint", "s3.{}.amazonaws.com".format( self._sm_config['aws']['aws_region'])) self._sc = SparkContext(master=self._sm_config['spark']['master'], conf=sconf, appName='SM engine') def _init_db(self): logger.info('Connecting to the DB') self._db = DB(self._sm_config['db']) def store_job_meta(self, mol_db_id): """ Store search job metadata in the database """ logger.info('Storing job metadata') rows = [(mol_db_id, self._ds.id, JobStatus.RUNNING, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))] self._job_id = self._db.insert_return(JOB_INS, rows=rows)[0] def _run_annotation_job(self, mol_db): try: self.store_job_meta(mol_db.id) mol_db.set_job_id(self._job_id) logger.info( "Running new job ds_id: %s, ds_name: %s, db_name: %s, db_version: %s", self._ds.id, self._ds.name, mol_db.name, mol_db.version) target_adducts = self._ds.config['isotope_generation']['adducts'] self._fdr = FDR(job_id=self._job_id, decoy_sample_size=20, target_adducts=target_adducts, db=self._db) isocalc = IsocalcWrapper(self._ds.config['isotope_generation']) centroids_gen = IonCentroidsGenerator(sc=self._sc, moldb_name=mol_db.name, isocalc=isocalc) polarity = self._ds.config['isotope_generation']['charge'][ 'polarity'] all_adducts = list( set(self._sm_config['defaults']['adducts'][polarity]) | set(DECOY_ADDUCTS)) centroids_gen.generate_if_not_exist(isocalc=isocalc, sfs=mol_db.sfs, adducts=all_adducts) target_ions = centroids_gen.ions(target_adducts) self._fdr.decoy_adducts_selection(target_ions) search_alg = MSMBasicSearch(sc=self._sc, ds=self._ds, ds_reader=self._ds_reader, mol_db=mol_db, centr_gen=centroids_gen, fdr=self._fdr, ds_config=self._ds.config) ion_metrics_df, ion_iso_images = search_alg.search() search_results = SearchResults(mol_db.id, self._job_id, search_alg.metrics.keys()) mask = self._ds_reader.get_2d_sample_area_mask() img_store_type = self._ds.get_ion_img_storage_type(self._db) search_results.store(ion_metrics_df, ion_iso_images, mask, self._db, self._img_store, img_store_type) except Exception as e: self._db.alter( JOB_UPD_STATUS_FINISH, params=(JobStatus.FAILED, datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self._job_id)) msg = 'Job failed(ds_id={}, mol_db={}): {}'.format( self._ds.id, mol_db, str(e)) raise JobFailedError(msg) from e else: self._db.alter( JOB_UPD_STATUS_FINISH, params=(JobStatus.FINISHED, datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self._job_id)) def _remove_annotation_job(self, mol_db): logger.info( "Removing job results ds_id: %s, ds_name: %s, db_name: %s, db_version: %s", self._ds.id, self._ds.name, mol_db.name, mol_db.version) self._db.alter('DELETE FROM job WHERE ds_id = %s and db_id = %s', params=(self._ds.id, mol_db.id)) self._es.delete_ds(self._ds.id, mol_db) def _moldb_ids(self): completed_moldb_ids = { db_id for (_, db_id) in self._db.select(JOB_ID_MOLDB_ID_SEL, params=(self._ds.id, )) } new_moldb_ids = { MolecularDB(name=moldb_name).id for moldb_name in self._ds.config['databases'] } return completed_moldb_ids, new_moldb_ids def _save_data_from_raw_ms_file(self): ms_file_type_config = SMConfig.get_ms_file_handler( self._wd_manager.local_dir.ms_file_path) acq_geometry_factory_module = ms_file_type_config[ 'acq_geometry_factory'] acq_geometry_factory = getattr( import_module(acq_geometry_factory_module['path']), acq_geometry_factory_module['name']) acq_geometry = acq_geometry_factory( self._wd_manager.local_dir.ms_file_path).create() self._ds.save_acq_geometry(self._db, acq_geometry) self._ds.save_ion_img_storage_type( self._db, ms_file_type_config['img_storage_type']) def run(self, ds): """ Entry point of the engine. Molecule search is completed in several steps: * Copying input data to the engine work dir * Conversion input mass spec files to plain text format. One line - one spectrum data * Generation and saving to the database theoretical peaks for all formulas from the molecule database * Molecules search. The most compute intensive part. Spark is used to run it in distributed manner. * Saving results (isotope images and their metrics of quality for each putative molecule) to the database Args ---- ds : sm.engine.dataset_manager.Dataset """ try: logger.info('*' * 150) start = time.time() self._init_db() self._es = ESExporter(self._db) self._ds = ds if self._sm_config['rabbitmq']: self._status_queue = QueuePublisher( config=self._sm_config['rabbitmq'], qdesc=SM_DS_STATUS, logger=logger) else: self._status_queue = None self._wd_manager = WorkDirManager(ds.id) self._configure_spark() if not self.no_clean: self._wd_manager.clean() self._ds_reader = DatasetReader(self._ds.input_path, self._sc, self._wd_manager) self._ds_reader.copy_convert_input_data() self._save_data_from_raw_ms_file() self._img_store.storage_type = self._ds.get_ion_img_storage_type( self._db) logger.info('Dataset config:\n%s', pformat(self._ds.config)) completed_moldb_ids, new_moldb_ids = self._moldb_ids() for moldb_id in completed_moldb_ids.symmetric_difference( new_moldb_ids): # ignore ids present in both sets mol_db = MolecularDB( id=moldb_id, db=self._db, iso_gen_config=self._ds.config['isotope_generation']) if moldb_id not in new_moldb_ids: self._remove_annotation_job(mol_db) elif moldb_id not in completed_moldb_ids: self._run_annotation_job(mol_db) logger.info("All done!") time_spent = time.time() - start logger.info('Time spent: %d mins %d secs', *divmod(int(round(time_spent)), 60)) finally: if self._sc: self._sc.stop() if self._db: self._db.close() if self._wd_manager and not self.no_clean: self._wd_manager.clean() logger.info('*' * 150)
class SearchJob(object): """ Main class responsible for molecule search. Uses other modules of the engine. Args ---- no_clean : bool Don't delete interim data files """ def __init__(self, img_store=None, no_clean=False): self.no_clean = no_clean self._img_store = img_store self._job_id = None self._sc = None self._db = None self._ds = None self._ds_reader = None self._status_queue = None self._fdr = None self._wd_manager = None self._es = None self._sm_config = SMConfig.get_conf() logger.debug('Using SM config:\n%s', pformat(self._sm_config)) def _configure_spark(self): logger.info('Configuring Spark') sconf = SparkConf() for prop, value in self._sm_config['spark'].items(): if prop.startswith('spark.'): sconf.set(prop, value) if 'aws' in self._sm_config: sconf.set("spark.hadoop.fs.s3a.access.key", self._sm_config['aws']['aws_access_key_id']) sconf.set("spark.hadoop.fs.s3a.secret.key", self._sm_config['aws']['aws_secret_access_key']) sconf.set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") sconf.set("spark.hadoop.fs.s3a.endpoint", "s3.{}.amazonaws.com".format(self._sm_config['aws']['aws_region'])) self._sc = SparkContext(master=self._sm_config['spark']['master'], conf=sconf, appName='SM engine') def _init_db(self): logger.info('Connecting to the DB') self._db = DB(self._sm_config['db']) def store_job_meta(self, mol_db_id): """ Store search job metadata in the database """ logger.info('Storing job metadata') rows = [(mol_db_id, self._ds.id, 'STARTED', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))] self._job_id = self._db.insert_return(JOB_INS, rows=rows)[0] def _run_annotation_job(self, mol_db): try: self.store_job_meta(mol_db.id) mol_db.set_job_id(self._job_id) logger.info("Running new job ds_id: %s, ds_name: %s, db_name: %s, db_version: %s", self._ds.id, self._ds.name, mol_db.name, mol_db.version) target_adducts = self._ds.config['isotope_generation']['adducts'] self._fdr = FDR(job_id=self._job_id, decoy_sample_size=20, target_adducts=target_adducts, db=self._db) isocalc = IsocalcWrapper(self._ds.config['isotope_generation']) centroids_gen = IonCentroidsGenerator(sc=self._sc, moldb_name=mol_db.name, isocalc=isocalc) polarity = self._ds.config['isotope_generation']['charge']['polarity'] all_adducts = list(set(self._sm_config['defaults']['adducts'][polarity]) | set(DECOY_ADDUCTS)) centroids_gen.generate_if_not_exist(isocalc=isocalc, sfs=mol_db.sfs, adducts=all_adducts) target_ions = centroids_gen.ions(target_adducts) self._fdr.decoy_adducts_selection(target_ions) search_alg = MSMBasicSearch(sc=self._sc, ds=self._ds, ds_reader=self._ds_reader, mol_db=mol_db, centr_gen=centroids_gen, fdr=self._fdr, ds_config=self._ds.config) ion_metrics_df, ion_iso_images = search_alg.search() search_results = SearchResults(mol_db.id, self._job_id, search_alg.metrics.keys()) mask = self._ds_reader.get_2d_sample_area_mask() img_store_type = self._ds.get_ion_img_storage_type(self._db) search_results.store(ion_metrics_df, ion_iso_images, mask, self._db, self._img_store, img_store_type) except Exception as e: self._db.alter(JOB_UPD, params=('FAILED', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self._job_id)) msg = 'Job failed(ds_id={}, mol_db={}): {}'.format(self._ds.id, mol_db, str(e)) raise JobFailedError(msg) from e else: self._export_search_results_to_es(mol_db, isocalc) def _export_search_results_to_es(self, mol_db, isocalc): try: self._es.index_ds(self._ds.id, mol_db, isocalc) except Exception as e: self._db.alter(JOB_UPD, params=('FAILED', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self._job_id)) msg = 'Export to ES failed(ds_id={}, mol_db={}): {}'.format(self._ds.id, mol_db, str(e)) raise ESExportFailedError(msg) from e else: self._db.alter(JOB_UPD, params=('FINISHED', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self._job_id)) def _remove_annotation_job(self, mol_db): logger.info("Removing job results ds_id: %s, ds_name: %s, db_name: %s, db_version: %s", self._ds.id, self._ds.name, mol_db.name, mol_db.version) self._db.alter('DELETE FROM job WHERE ds_id = %s and db_id = %s', params=(self._ds.id, mol_db.id)) self._es.delete_ds(self._ds.id, mol_db) def _moldb_ids(self): moldb_service = MolDBServiceWrapper(self._sm_config['services']['mol_db']) completed_moldb_ids = {moldb_service.find_db_by_id(db_id)['id'] for (_, db_id) in self._db.select(JOB_ID_MOLDB_ID_SEL, params=(self._ds.id,))} new_moldb_ids = {moldb_service.find_db_by_name_version(moldb_name)[0]['id'] for moldb_name in self._ds.config['databases']} return completed_moldb_ids, new_moldb_ids def _save_data_from_raw_ms_file(self): ms_file_type_config = SMConfig.get_ms_file_handler(self._wd_manager.local_dir.ms_file_path) acq_geometry_factory_module = ms_file_type_config['acq_geometry_factory'] acq_geometry_factory = getattr(import_module(acq_geometry_factory_module['path']), acq_geometry_factory_module['name']) acq_geometry = acq_geometry_factory(self._wd_manager.local_dir.ms_file_path).create() self._ds.save_acq_geometry(self._db, acq_geometry) self._ds.save_ion_img_storage_type(self._db, ms_file_type_config['img_storage_type']) def run(self, ds): """ Entry point of the engine. Molecule search is completed in several steps: * Copying input data to the engine work dir * Conversion input mass spec files to plain text format. One line - one spectrum data * Generation and saving to the database theoretical peaks for all formulas from the molecule database * Molecules search. The most compute intensive part. Spark is used to run it in distributed manner. * Saving results (isotope images and their metrics of quality for each putative molecule) to the database Args ---- ds : sm.engine.dataset_manager.Dataset """ try: start = time.time() self._init_db() self._es = ESExporter(self._db) self._ds = ds if self._sm_config['rabbitmq']: self._status_queue = QueuePublisher(config=self._sm_config['rabbitmq'], qdesc=SM_DS_STATUS, logger=logger) else: self._status_queue = None ds.set_status(self._db, self._es, self._status_queue, DatasetStatus.STARTED) self._wd_manager = WorkDirManager(ds.id) self._configure_spark() if not self.no_clean: self._wd_manager.clean() self._ds_reader = DatasetReader(self._ds.input_path, self._sc, self._wd_manager) self._ds_reader.copy_convert_input_data() self._save_data_from_raw_ms_file() self._img_store.storage_type = self._ds.get_ion_img_storage_type(self._db) ds.set_status(self._db, self._es, self._status_queue, DatasetStatus.STARTED) logger.info('Dataset config:\n%s', pformat(self._ds.config)) completed_moldb_ids, new_moldb_ids = self._moldb_ids() for moldb_id in completed_moldb_ids.symmetric_difference(new_moldb_ids): # ignore ids present in both sets mol_db = MolecularDB(id=moldb_id, db=self._db, iso_gen_config=self._ds.config['isotope_generation']) if moldb_id not in new_moldb_ids: self._remove_annotation_job(mol_db) elif moldb_id not in completed_moldb_ids: self._run_annotation_job(mol_db) ds.set_status(self._db, self._es, self._status_queue, DatasetStatus.FINISHED) logger.info("All done!") time_spent = time.time() - start logger.info('Time spent: %d mins %d secs', *divmod(int(round(time_spent)), 60)) except Exception as e: if self._ds: ds.set_status(self._db, self._es, self._status_queue, DatasetStatus.FAILED) logger.error(e, exc_info=True) raise finally: if self._sc: self._sc.stop() if self._db: self._db.close() if self._wd_manager and not self.no_clean: self._wd_manager.clean() logger.info('*' * 150)
def test_index_ds_works(sm_config, test_db, es, es_dsl_search, sm_index, ds_config, metadata, annotation_stats): ds_id = '2000-01-01_00h00m' upload_dt = datetime.now().isoformat() last_finished = '2017-01-01 00:00:00' iso_image_ids = ['iso_img_id_1', 'iso_img_id_2'] stats = json.dumps(annotation_stats) db = DB() db.insert( "INSERT INTO dataset(id, name, input_path, config, metadata, upload_dt, status, " "status_update_dt, is_public, acq_geometry, ion_thumbnail) " "VALUES (%s, 'ds_name', 'ds_input_path', %s, %s, %s, 'ds_status', %s, true, '{}', %s)", [[ ds_id, json.dumps(ds_config), json.dumps(metadata), upload_dt, upload_dt, 'thumb-id' ]], ) moldb = create_test_molecular_db() (job_id, ) = db.insert_return( "INSERT INTO job(ds_id, moldb_id, status, start, finish) " "VALUES (%s, %s, 'job_status', %s, %s) RETURNING id", rows=[(ds_id, moldb.id, last_finished, last_finished)], ) (user_id, ) = db.insert_return( "INSERT INTO graphql.user (email, name, role) " "VALUES ('email', 'user_name', 'user') RETURNING id", [[]], ) (group_id, ) = db.insert_return( "INSERT INTO graphql.group (name, short_name) VALUES ('group name', 'grp') RETURNING id", [[]], ) db.insert( "INSERT INTO graphql.dataset(id, user_id, group_id) VALUES (%s, %s, %s)", [[ds_id, user_id, group_id]], ) ion_id1, ion_id2 = db.insert_return( "INSERT INTO graphql.ion(ion, formula, chem_mod, neutral_loss, adduct, charge, ion_formula) " "VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id", [ ['H2O-H+O-H+H', 'H2O', '-H+O', '-H', '+H', 1, 'HO2'], ['Au+H', 'Au', '', '', '+H', 1, 'HAu'], ], ) db.insert( "INSERT INTO annotation(job_id, formula, chem_mod, neutral_loss, adduct, " "msm, fdr, stats, iso_image_ids, ion_id) " "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [ [ job_id, 'H2O', '-H+O', '-H', '+H', 1, 0.1, stats, iso_image_ids, ion_id1 ], [ job_id, 'Au', '', '', '+H', 1, 0.05, stats, iso_image_ids, ion_id2 ], ], ) isocalc_mock = MagicMock(IsocalcWrapper) isocalc_mock.centroids = lambda formula: { 'H2O+H': ([100.0, 200.0], None), 'H2O-H+O-H+H': ([100.0, 200.0, 300.0], None), 'Au+H': ([10.0, 20.0], None), }[formula] isocalc_mock.mass_accuracy_bounds = lambda mzs: (mzs, mzs) with patch( 'sm.engine.es_export.molecular_db.fetch_molecules', return_value=pd.DataFrame( [('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')], columns=['formula', 'mol_id', 'mol_name'], ), ): es_exp = ESExporter(db, sm_config) es_exp.delete_ds(ds_id) es_exp.index_ds( ds_id=ds_id, moldb=moldb, isocalc=isocalc_mock, ) wait_for_es(es, sm_config['elasticsearch']['index']) ds_d = (es_dsl_search.filter( 'term', _type='dataset').execute().to_dict()['hits']['hits'][0]['_source']) expected_ds_fields = { 'ds_last_finished': last_finished, 'ds_config': ds_config, 'ds_adducts': ds_config['isotope_generation']['adducts'], 'ds_moldb_ids': ds_config['database_ids'], 'ds_chem_mods': [], 'ds_neutral_losses': [], 'ds_project_ids': [], 'ds_project_names': [], 'ds_meta': metadata, 'ds_status': 'ds_status', 'ds_status_update_dt': upload_dt, 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id, 'ds_upload_dt': upload_dt, 'ds_is_public': True, 'ds_submitter_email': 'email', 'ds_submitter_id': user_id, 'ds_submitter_name': 'user_name', 'ds_group_approved': False, 'ds_group_id': group_id, 'ds_group_name': 'group name', 'ds_group_short_name': 'grp', } assert ds_d == { **expected_ds_fields, 'ds_acq_geometry': {}, 'annotation_counts': [{ 'db': { 'id': moldb.id, 'name': moldb.name }, 'counts': [ { 'level': 5, 'n': 1 }, { 'level': 10, 'n': 2 }, { 'level': 20, 'n': 2 }, { 'level': 50, 'n': 2 }, ], }], } ann_1_d = (es_dsl_search.filter( 'term', formula='H2O').execute().to_dict()['hits']['hits'][0]['_source']) top_level_stats = { 'pattern_match': annotation_stats['spectral'], 'image_corr': annotation_stats['spatial'], 'chaos': annotation_stats['chaos'], **{ key: value for key, value in annotation_stats.items() if key in NON_METRIC_STATS }, } metrics = { key: value for key, value in annotation_stats.items() if key not in NON_METRIC_STATS } assert ann_1_d == { **expected_ds_fields, **top_level_stats, 'metrics': metrics, 'fdr': 0.1, 'formula': 'H2O', 'msm': 1.0, 'ion': 'H2O-H+O-H+H+', 'ion_formula': 'HO2', 'centroid_mzs': [100.0, 200.0, 300.0], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'iso_image_urls': [ f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1', f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2', ], 'isobars': [], 'isomer_ions': [], 'polarity': '+', 'job_id': 1, 'adduct': '+H', 'neutral_loss': '-H', 'chem_mod': '-H+O', 'annotation_counts': [], 'comp_names': ['mol_name'], 'comps_count_with_isomers': 1, 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, 'mz': 100.0, 'comp_ids': ['mol_id'], 'annotation_id': 1, 'off_sample_label': None, 'off_sample_prob': None, } ann_2_d = (es_dsl_search.filter( 'term', formula='Au').execute().to_dict()['hits']['hits'][0]['_source']) assert ann_2_d == { **expected_ds_fields, **top_level_stats, 'metrics': metrics, 'fdr': 0.05, 'formula': 'Au', 'msm': 1.0, 'ion': 'Au+H+', 'ion_formula': 'HAu', 'centroid_mzs': [10.0, 20.0], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'iso_image_urls': [ f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1', f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2', ], 'isobars': [], 'isomer_ions': [], 'polarity': '+', 'job_id': 1, 'adduct': '+H', 'neutral_loss': '', 'chem_mod': '', 'annotation_counts': [], 'comp_names': ['mol_name'], 'comps_count_with_isomers': 1, 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, 'mz': 10.0, 'comp_ids': ['mol_id'], 'annotation_id': 2, 'off_sample_label': None, 'off_sample_prob': None, }