def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db, drop_test_db, sm_config, ds_config): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize([ '0,1,1\n', '1,100,200\n']) dataset = Dataset(spark_context, 'ds_name', '', ds_config, work_dir_man_mock, DB(sm_config['db'])) dataset.save_ds_meta() db = DB(sm_config['db']) ds_row = db.select_one('SELECT name, file_path, img_bounds, config from dataset') assert ds_row == ('ds_name', '/txt_path', {u'x': {u'min': 1, u'max': 100}, u'y': {u'min': 1, u'max': 200}}, ds_config) coord_row = db.select_one('SELECT xs, ys from coordinates') assert coord_row == ([1, 100], [1, 200]) db.close()
def reprocess_dataset_local(sm_src, src_ds_id, dst_ds_id, update_metadata_func, skip_existing=True, use_cache=False): existing = get_dataset_diagnostics(dst_ds_id) if skip_existing and existing: print(f'Skipping {dst_ds_id}\n', end=None) return dst_ds_id, None smds = sm_src.dataset(id=src_ds_id) db = DB() ds_metadata, ds_config = update_metadata_func(smds.metadata, smds.config) ds = Dataset( id=dst_ds_id, name=smds.name, input_path=smds.s3dir, upload_dt=datetime.now(), metadata=ds_metadata, config=ds_config, status=DatasetStatus.QUEUED, status_update_dt=None, is_public=False, ) ds.save(db, None, True) with perf_profile(db, 'annotate_lithops', dst_ds_id) as perf: executor = Executor(SMConfig.get_conf()['lithops'], perf=perf) job = ServerAnnotationJob(executor, ds, perf, use_cache=use_cache) job.pipe.use_db_cache = False job.run() return dst_ds_id
def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db, drop_test_db, sm_config, ds_config): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize(['0,1,1\n', '1,100,200\n']) dataset = Dataset(spark_context, 'ds_name', '', 'input_path', ds_config, work_dir_man_mock, DB(sm_config['db'])) dataset.save_ds_meta() db = DB(sm_config['db']) ds_row = db.select_one( 'SELECT name, file_path, img_bounds, config from dataset') assert ds_row == ('ds_name', 'input_path', { u'x': { u'min': 1, u'max': 100 }, u'y': { u'min': 1, u'max': 200 } }, ds_config) coord_row = db.select_one('SELECT xs, ys from coordinates') assert coord_row == ([1, 100], [1, 200]) db.close()
def test_dataset_update_status_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config, DatasetStatus.INDEXING, mol_dbs=['HMDB'], adducts=['+H']) ds.set_status(db, es_mock, status_queue_mock, DatasetStatus.FINISHED) assert DatasetStatus.FINISHED == Dataset.load(db, ds_id).status status_queue_mock.publish.assert_called_once_with({ 'ds_id': ds_id, 'status': DatasetStatus.FINISHED })
def test_search_job_imzml_example_es_export_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, MolDBServiceWrapperMock2, sm_config, create_fill_sm_database, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2) get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.]) filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) def throw_exception_function(*args): raise Exception('Test') try: ds_id = '2000-01-01_00h00m' upload_dt = datetime.now() ds_config_str = open(ds_config_path).read() db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) with patch('sm.engine.search_job.ESExporter.index_ds') as index_ds_mock: index_ds_mock.side_effect = throw_exception_function img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) job = SearchJob(img_store=img_store) ds = Dataset.load(db, ds_id) job.run(ds) except ESExportFailedError as e: assert e # dataset table asserts row = db.select_one('SELECT status from dataset') assert row[0] == 'FAILED' else: raise AssertionError('ESExportFailedError should be raised') finally: db.close() with warn_only(): local('rm -rf {}'.format(data_dir_path))
def create_ds_from_files(ds_id, ds_name, ds_input_path, config_path=None, meta_path=None): config_path = config_path or Path(ds_input_path) / 'config.json' ds_config = json.load(open(config_path)) if 'database_ids' not in ds_config: ds_config['database_ids'] = [ molecular_db.find_by_name(db, True).id for db in ds_config['databases'] ] meta_path = meta_path or Path(ds_input_path) / 'meta.json' if not Path(meta_path).exists(): raise Exception('meta.json not found') metadata = json.load(open(str(meta_path))) return Dataset( id=ds_id, name=ds_name, input_path=str(ds_input_path), upload_dt=datetime.now(), metadata=metadata, is_public=True, config=ds_config, )
def add_ds(): ds_id = None try: params = _json_params(req) logger.info('Received ADD request: %s', params) now = datetime.now() ds_id = params.get('id', None) or now.strftime("%Y-%m-%d_%Hh%Mm%Ss") ds = Dataset(ds_id, params.get('name', None), params.get('input_path'), params.get('upload_dt', now.isoformat()), params.get('metadata', None), params.get('config'), is_public=params.get('is_public'), mol_dbs=params.get('mol_dbs'), adducts=params.get('adducts')) priority = params.get('priority', DatasetActionPriority.DEFAULT) db = _create_db_conn() ds_man = _create_dataset_manager(db) ds_man.add(ds, del_first=params.get('del_first', False), force=params.get('force', False), email=params.get('email', None)) db.close() return {'status': OK['status'], 'ds_id': ds_id} except DSIsBusy as e: logger.warning(e.message) resp.status = ERR_DS_BUSY['status_code'] return {'status': ERR_DS_BUSY['status'], 'ds_id': e.ds_id} except Exception as e: logger.error(e, exc_info=True) resp.status = ERROR['status_code'] return {'status': ERROR['status'], 'ds_id': ds_id}
def _on_success(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FINISHED) self.logger.info(f" SM update daemon: success") self._post_to_slack(msg)
def test_get_sample_area_mask_correctness(sm_config, ds_config, spark_context): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize(['0,0,0\n', '2,1,1\n']) ds = Dataset(spark_context, 'ds_name', '', 'input_path', ds_config, work_dir_man_mock, None) #ds.norm_img_pixel_inds = np.array([0, 3]) assert tuple(ds.get_sample_area_mask()) == (True, False, False, True)
def run(sm_config, ds_id_str, sql_where, algorithm, use_lithops): db = DB() if sql_where: ds_ids = [ id for (id, ) in db.select( f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] else: ds_ids = ds_id_str.split(',') if not ds_ids: logger.warning('No datasets match filter') return if use_lithops: executor = Executor(sm_config['lithops']) for i, ds_id in enumerate(ds_ids): try: logger.info( f'[{i+1} / {len(ds_ids)}] Generating ion thumbnail for {ds_id}' ) ds = Dataset.load(db, ds_id) if use_lithops: # noinspection PyUnboundLocalVariable generate_ion_thumbnail_lithops(executor, db, ds, algorithm=algorithm) else: generate_ion_thumbnail(db, ds, algorithm=algorithm) except Exception: logger.error(f'Failed on {ds_id}', exc_info=True)
def test_classify_ion_images_preds_saved(call_api_mock, image_storage_mock, fill_db): call_api_mock.return_value = { 'predictions': [{'prob': 0.1, 'label': 'on'}, {'prob': 0.9, 'label': 'off'}] } fp = io.BytesIO() Image.new('RGBA', (10, 10)).save(fp, format='PNG') fp.seek(0) img_bytes = fp.read() image_storage_mock.get_image.return_value = img_bytes db = DB() ds_id = '2000-01-01' ds = Dataset.load(db, ds_id) services_config = defaultdict(str) classify_dataset_ion_images(db, ds, services_config) annotations = db.select_with_fields( ( 'select off_sample ' 'from dataset d ' 'join job j on j.ds_id = d.id ' 'join annotation m on m.job_id = j.id ' 'where d.id = %s ' 'order by m.id ' ), params=(ds_id,), ) exp_annotations = [ {'off_sample': {'prob': 0.1, 'label': 'on'}}, {'off_sample': {'prob': 0.9, 'label': 'off'}}, ] assert annotations == exp_annotations
def _on_failure(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FAILED) self.logger.error(f" SM update daemon: failure", exc_info=True) self._post_to_slack(msg)
def run_off_sample(sm_config, ds_ids_str, sql_where, fix_missing, overwrite_existing): db = DB() ds_ids = None if ds_ids_str: ds_ids = ds_ids_str.split(',') elif sql_where: ds_ids = [ id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] elif fix_missing: logger.info('Checking for missing off-sample jobs...') results = db.select(MISSING_OFF_SAMPLE_SEL) ds_ids = [ds_id for ds_id, in results] logger.info(f'Found {len(ds_ids)} missing off-sample sets') if not ds_ids: logger.warning('No datasets match filter') return es_exp = ESExporter(db, sm_config) for i, ds_id in enumerate(ds_ids): try: logger.info(f'Running off-sample on {i+1} out of {len(ds_ids)}') ds = Dataset.load(db, ds_id) classify_dataset_ion_images(db, ds, sm_config['services'], overwrite_existing) es_exp.reindex_ds(ds_id) except Exception: logger.error(f'Failed to run off-sample on {ds_id}', exc_info=True)
def create_ds( ds_id='2000-01-01', ds_name='ds_name', input_path='input_path', upload_dt=None, metadata=None, status=DatasetStatus.QUEUED, moldbs_ids=None, adducts=None, ): upload_dt = upload_dt or datetime.now() moldbs_ids = moldbs_ids or [0] if not adducts: adducts = ['+H', '+Na', '+K', '[M]+'] if not metadata: metadata = { 'MS_Analysis': { 'Polarity': 'Positive', 'Analyzer': 'FTICR', 'Detector_Resolving_Power': {'mz': 200, 'Resolving_Power': 140000}, } } config = generate_ds_config(metadata, moldb_ids=moldbs_ids, adducts=adducts) return Dataset( id=ds_id, name=ds_name, input_path=input_path, upload_dt=upload_dt, metadata=metadata or {}, config=config, status=status, )
def delete(self, ds_id, **kwargs): """ Send delete message to the queue """ ds = Dataset.load(self._db, ds_id) self._set_ds_busy(ds, kwargs.get('force', False)) self._post_sm_msg(ds=ds, queue=self._update_queue, action=DaemonAction.DELETE, **kwargs)
def test_sm_daemons_annot_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, sm_config, test_db, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) def throw_exception_function(*args): raise Exception('Test') get_compute_img_metrics_mock.return_value = throw_exception_function filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) es = ESExporter(db) annotate_daemon = None try: ds_id = '2000-01-01_00h00m' upload_dt = datetime.now() ds_config_str = open(ds_config_path).read() db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) ds = Dataset.load(db, ds_id) queue_pub.publish({'ds_id': ds.id, 'ds_name': ds.name, 'action': 'annotate'}) run_daemons(db, es) # dataset and job tables asserts row = db.select_one('SELECT status from dataset') assert row[0] == 'FAILED' row = db.select_one('SELECT status from job') assert row[0] == 'FAILED' finally: db.close() if annotate_daemon: annotate_daemon.stop() with warn_only(): local('rm -rf {}'.format(data_dir_path))
def create_ds(ds_id='2000-01-01', ds_name='ds_name', input_path='input_path', upload_dt=None, metadata=None, ds_config=None, status=DatasetStatus.NEW, mol_dbs=None, adducts=None): upload_dt = upload_dt or datetime.now() if not mol_dbs: mol_dbs = ['HMDB-v4'] if not adducts: adducts = ['+H', '+Na', '+K'] return Dataset(ds_id, ds_name, input_path, upload_dt, metadata or {}, ds_config or {}, status=status, mol_dbs=mol_dbs, adducts=adducts, img_storage_type='fs')
def test_dataset_update_status_works(fill_db, metadata, ds_config): db = DB() es_mock = MagicMock(spec=ESExporter) ds = create_test_ds(status=DatasetStatus.ANNOTATING) ds.set_status(db, es_mock, DatasetStatus.FINISHED) assert DatasetStatus.FINISHED == Dataset.load(db, ds.id).status
def test_dataset_save_overwrite_ds_works(fill_db, metadata, ds_config): db = DB() es_mock = MagicMock(spec=ESExporter) ds = create_test_ds() ds.save(db, es_mock) assert ds == Dataset.load(db, ds.id) es_mock.sync_dataset.assert_called_once_with(ds.id)
def test_dataset_to_queue_message_works(metadata, ds_config): upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset( id=ds_id, name='ds_name', input_path='input_path', upload_dt=upload_dt, metadata=metadata, config=ds_config, status=DatasetStatus.QUEUED, ) msg = ds.to_queue_message() assert { 'ds_id': ds_id, 'ds_name': 'ds_name', 'input_path': 'input_path' } == msg
def annotate_lithops(self, ds: Dataset, del_first=False): if del_first: self.logger.warning(f'Deleting all results for dataset: {ds.id}') del_jobs(ds) ds.save(self._db, self._es) with perf_profile(self._db, 'annotate_lithops', ds.id) as perf: executor = Executor(self._sm_config['lithops'], perf=perf) ServerAnnotationJob(executor, ds, perf).run() if self._sm_config['services'].get('colocalization', True): Colocalization(self._db).run_coloc_job_lithops(executor, ds, reprocess=del_first) if self._sm_config['services'].get('ion_thumbnail', True): generate_ion_thumbnail_lithops( executor=executor, db=self._db, ds=ds, only_if_needed=not del_first, )
def add(self, doc, use_lithops, **kwargs): """Save dataset and send ANNOTATE message to the queue.""" now = datetime.now() if 'id' not in doc: doc['id'] = now.strftime('%Y-%m-%d_%Hh%Mm%Ss') ds_config_kwargs = dict( (k, v) for k, v in doc.items() if k in FLAT_DS_CONFIG_KEYS) try: ds = Dataset.load(self._db, doc['id']) self._set_ds_busy(ds, kwargs.get('force', False)) config = update_ds_config(ds.config, doc['metadata'], **ds_config_kwargs) except UnknownDSID: config = generate_ds_config(doc.get('metadata'), **ds_config_kwargs) ds = Dataset( id=doc['id'], name=doc.get('name'), input_path=doc.get('input_path'), upload_dt=doc.get('upload_dt', now.isoformat()), metadata=doc.get('metadata'), config=config, is_public=doc.get('is_public'), status=DatasetStatus.QUEUED, ) ds.save(self._db, self._es, allow_insert=True) self._status_queue.publish({ 'ds_id': ds.id, 'action': DaemonAction.ANNOTATE, 'stage': DaemonActionStage.QUEUED }) queue = self._lit_queue if use_lithops else self._annot_queue self._post_sm_msg(ds=ds, queue=queue, action=DaemonAction.ANNOTATE, **kwargs) return doc['id']
def index(self, ds: Dataset): """Re-index all search results for the dataset. Args: ds: dataset to index """ self._es.delete_ds(ds.id, delete_dataset=False) job_docs = self._db.select_with_fields( 'SELECT id, moldb_id FROM job WHERE ds_id = %s', params=(ds.id,) ) moldb_ids = ds.config['database_ids'] for job_doc in job_docs: moldb = molecular_db.find_by_id(job_doc['moldb_id']) if job_doc['moldb_id'] not in moldb_ids: self._db.alter('DELETE FROM job WHERE id = %s', params=(job_doc['id'],)) else: isocalc = IsocalcWrapper(ds.config) self._es.index_ds(ds_id=ds.id, moldb=moldb, isocalc=isocalc) ds.set_status(self._db, self._es, DatasetStatus.FINISHED)
def test_dataset_to_queue_message_works(): upload_dt = datetime.now() ds_id = '2000-01-01' meta = {'Submitted_By': {'Submitter': {'Email': '*****@*****.**'}}} ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, meta, ds_config, mol_dbs=['HDMB'], adducts=['+H']) msg = ds.to_queue_message() assert { 'ds_id': ds_id, 'ds_name': 'ds_name', 'input_path': 'input_path', 'user_email': '*****@*****.**' } == msg
def test_dataset_save_overwrite_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config, mol_dbs=['HMDB'], adducts=['+H']) ds.save(db, es_mock, status_queue_mock) assert ds == Dataset.load(db, ds_id) es_mock.sync_dataset.assert_called_once_with(ds_id) status_queue_mock.publish.assert_called_with({ 'ds_id': ds_id, 'status': DatasetStatus.NEW })
def create_test_ds( id='2000-01-01', name='ds_name', input_path='input_path', upload_dt=None, metadata=None, config=None, status=DatasetStatus.FINISHED, es=None, ): upload_dt = upload_dt or datetime.now() ds = Dataset( id=id, name=name, input_path=input_path, upload_dt=upload_dt or datetime.now(), metadata=metadata or deepcopy(TEST_METADATA), config=config or deepcopy(TEST_DS_CONFIG), status=status or DatasetStatus.QUEUED, ) ds.save(DB(), es=es, allow_insert=True) return ds
def _callback(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.INDEXING) self.logger.info(f' SM update daemon received a message: {msg}') self._manager.post_to_slack( 'new', f" [v] New {msg['action']} message: {json.dumps(msg)}") if msg['action'] == 'update': self._manager.index(ds=ds) elif msg['action'] == 'delete': self._manager.delete(ds=ds) else: raise Exception(f"Wrong action: {msg['action']}")
def add_optical_image(db, ds_id, url, transform, zoom_levels=(1, 2, 4, 8)): """Add optical image to dataset. Generates scaled and transformed versions of the provided optical image + creates the thumbnail """ ds = Dataset.load(db, ds_id) logger.info(f'Adding optical image {url} to "{ds.id}" dataset') dims = _annotation_image_shape(db, ds) resp = requests.get(url) optical_img = Image.open(io.BytesIO(resp.content)) raw_optical_img_id = url.split('/')[-1] _add_raw_optical_image(db, ds, raw_optical_img_id, transform) _add_zoom_optical_images(db, ds, dims, optical_img, transform, zoom_levels) _add_thumbnail_optical_image(db, ds, dims, optical_img, transform)
def update(self, ds_id, doc, async_es_update, **kwargs): """ Save dataset and send update message to the queue """ ds = Dataset.load(self._db, ds_id) ds.name = doc.get('name', ds.name) ds.input_path = doc.get('input_path', ds.input_path) if 'metadata' in doc: ds.metadata = doc['metadata'] ds.upload_dt = doc.get('upload_dt', ds.upload_dt) ds.is_public = doc.get('is_public', ds.is_public) ds.save(self._db, None if async_es_update else self._es) self._post_sm_msg( ds=ds, queue=self._update_queue, action=DaemonAction.UPDATE, fields=list(doc.keys()), **kwargs, )
def test_dataset_load_existing_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) upload_dt = datetime.strptime('2000-01-01 00:00:00', "%Y-%m-%d %H:%M:%S") ds_id = '2000-01-01' metadata = {"meta": "data"} ds = Dataset.load(db, ds_id) assert ds.__dict__ == dict(id=ds_id, name='ds_name', input_path='input_path', upload_dt=upload_dt, metadata=metadata, config=ds_config, status=DatasetStatus.FINISHED, is_public=True, mol_dbs=['HMDB-v4'], adducts=['+H', '+Na', '+K'], ion_img_storage_type='fs')
def _callback(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.ANNOTATING) self.logger.info(f" SM annotate daemon received a message: {msg}") self._manager.post_to_slack( 'new', " [v] New annotation message: {}".format(json.dumps(msg))) self._manager.annotate(ds=ds, search_job_factory=SearchJob, del_first=msg.get('del_first', False)) upd_msg = { 'ds_id': msg['ds_id'], 'ds_name': msg['ds_name'], 'action': 'update' } self._upd_queue_pub.publish(msg=upd_msg, priority=2)
def del_optical_image(db, ds_id): """Delete raw and zoomed optical images from DB and FS.""" ds = Dataset.load(db, ds_id) logger.info(f'Deleting optical image of "{ds.id}" dataset') (raw_img_id, ) = db.select_one(SEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id, )) if raw_img_id: image_storage.delete_image(image_storage.OPTICAL, ds_id, raw_img_id) for img_id in db.select_onecol(SEL_OPTICAL_IMAGE, params=(ds.id, )): image_storage.delete_image(image_storage.OPTICAL, ds_id, img_id) (thumbnail_img_id, ) = db.select_one(SEL_OPTICAL_IMAGE_THUMBNAIL, params=(ds.id, )) if thumbnail_img_id: image_storage.delete_image(image_storage.OPTICAL, ds_id, thumbnail_img_id) db.alter(DEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id, )) db.alter(DEL_OPTICAL_IMAGE, params=(ds.id, )) db.alter(UPD_DATASET_THUMB_OPTICAL_IMAGE, params=(None, None, ds.id))
def test_search_job_imzml_example(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, MolDBServiceWrapperMock2, sm_config, create_fill_sm_database, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2) get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.]) filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) try: ds_config_str = open(ds_config_path).read() upload_dt = datetime.now() ds_id = '2000-01-01_00h00m' db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) job = SearchJob(img_store=img_store) job._sm_config['rabbitmq'] = {} # avoid talking to RabbitMQ during the test ds = Dataset.load(db, ds_id) job.run(ds) # dataset table asserts rows = db.select('SELECT id, name, input_path, upload_dt, status from dataset') input_path = join(dirname(__file__), 'data', test_ds_name) assert len(rows) == 1 assert rows[0] == (ds_id, test_ds_name, input_path, upload_dt, DatasetStatus.FINISHED) # ms acquisition geometry asserts rows = db.select('SELECT acq_geometry from dataset') assert len(rows) == 1 assert rows[0][0] == ds.get_acq_geometry(db) assert rows[0][0] == { ACQ_GEOMETRY_KEYS.LENGTH_UNIT: 'nm', ACQ_GEOMETRY_KEYS.AcqGridSection.section_name: { ACQ_GEOMETRY_KEYS.AcqGridSection.REGULAR_GRID: True, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_X : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_Y : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_X : 100, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_Y : 100 }, ACQ_GEOMETRY_KEYS.PixelSizeSection.section_name: { ACQ_GEOMETRY_KEYS.PixelSizeSection.REGULAR_SIZE: True, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_X : 100, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_Y : 100 } } # job table asserts rows = db.select('SELECT db_id, ds_id, status, start, finish from job') assert len(rows) == 1 db_id, ds_id, status, start, finish = rows[0] assert (db_id, ds_id, status) == (0, '2000-01-01_00h00m', 'FINISHED') assert start < finish # image metrics asserts rows = db.select(('SELECT db_id, sf, adduct, stats, iso_image_ids ' 'FROM iso_image_metrics ' 'ORDER BY sf, adduct')) assert rows[0] == (0, 'C12H24O', '+K', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) assert rows[1] == (0, 'C12H24O', '+Na', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) time.sleep(1) # Waiting for ES # ES asserts ds_docs = es_dsl_search.query('term', _type='dataset').execute().to_dict()['hits']['hits'] assert 1 == len(ds_docs) ann_docs = es_dsl_search.query('term', _type='annotation').execute().to_dict()['hits']['hits'] assert len(ann_docs) == len(rows) for doc in ann_docs: assert doc['_id'].startswith(ds_id) finally: db.close() with warn_only(): local('rm -rf {}'.format(data_dir_path))