def test_search_job_imzml_example_es_export_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock,
                                                  post_images_to_annot_service_mock,
                                                  MolDBServiceWrapperMock, MolDBServiceWrapperMock2,
                                                  sm_config, create_fill_sm_database, es_dsl_search,
                                                  clean_isotope_storage):
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock)
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2)

    get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.])
    filter_sf_metrics_mock.side_effect = lambda x: x

    url_dict = {
        'iso_image_ids': ['iso_image_1', None, None, None]
    }
    post_images_to_annot_service_mock.return_value = {
        35: url_dict,
        44: url_dict
    }

    db = DB(sm_config['db'])

    def throw_exception_function(*args):
        raise Exception('Test')

    try:
        ds_id = '2000-01-01_00h00m'
        upload_dt = datetime.now()
        ds_config_str = open(ds_config_path).read()
        db.insert(Dataset.DS_INSERT, [{
            'id': ds_id,
            'name': test_ds_name,
            'input_path': input_dir_path,
            'upload_dt': upload_dt,
            'metadata': '{}',
            'config': ds_config_str,
            'status': DatasetStatus.QUEUED,
            'is_public': True,
            'mol_dbs': ['HMDB-v4'],
            'adducts': ['+H', '+Na', '+K'],
            'ion_img_storage': 'fs'
        }])

        with patch('sm.engine.search_job.ESExporter.index_ds') as index_ds_mock:
            index_ds_mock.side_effect = throw_exception_function

            img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url'])
            job = SearchJob(img_store=img_store)
            ds = Dataset.load(db, ds_id)
            job.run(ds)
    except ESExportFailedError as e:
        assert e
        # dataset table asserts
        row = db.select_one('SELECT status from dataset')
        assert row[0] == 'FAILED'
    else:
        raise AssertionError('ESExportFailedError should be raised')
    finally:
        db.close()
        with warn_only():
            local('rm -rf {}'.format(data_dir_path))
Exemplo n.º 2
0
def test_classify_ion_images_preds_saved(call_api_mock, image_storage_mock, fill_db):
    call_api_mock.return_value = {
        'predictions': [{'prob': 0.1, 'label': 'on'}, {'prob': 0.9, 'label': 'off'}]
    }

    fp = io.BytesIO()
    Image.new('RGBA', (10, 10)).save(fp, format='PNG')
    fp.seek(0)
    img_bytes = fp.read()
    image_storage_mock.get_image.return_value = img_bytes

    db = DB()
    ds_id = '2000-01-01'
    ds = Dataset.load(db, ds_id)

    services_config = defaultdict(str)
    classify_dataset_ion_images(db, ds, services_config)

    annotations = db.select_with_fields(
        (
            'select off_sample '
            'from dataset d '
            'join job j on j.ds_id = d.id '
            'join annotation m on m.job_id = j.id '
            'where d.id = %s '
            'order by m.id '
        ),
        params=(ds_id,),
    )
    exp_annotations = [
        {'off_sample': {'prob': 0.1, 'label': 'on'}},
        {'off_sample': {'prob': 0.9, 'label': 'off'}},
    ]
    assert annotations == exp_annotations
Exemplo n.º 3
0
def run_off_sample(sm_config, ds_ids_str, sql_where, fix_missing, overwrite_existing):
    db = DB()

    ds_ids = None
    if ds_ids_str:
        ds_ids = ds_ids_str.split(',')
    elif sql_where:
        ds_ids = [
            id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}')
        ]
    elif fix_missing:
        logger.info('Checking for missing off-sample jobs...')
        results = db.select(MISSING_OFF_SAMPLE_SEL)
        ds_ids = [ds_id for ds_id, in results]
        logger.info(f'Found {len(ds_ids)} missing off-sample sets')

    if not ds_ids:
        logger.warning('No datasets match filter')
        return

    es_exp = ESExporter(db, sm_config)
    for i, ds_id in enumerate(ds_ids):
        try:
            logger.info(f'Running off-sample on {i+1} out of {len(ds_ids)}')
            ds = Dataset.load(db, ds_id)
            classify_dataset_ion_images(db, ds, sm_config['services'], overwrite_existing)
            es_exp.reindex_ds(ds_id)
        except Exception:
            logger.error(f'Failed to run off-sample on {ds_id}', exc_info=True)
Exemplo n.º 4
0
def run(sm_config, ds_id_str, sql_where, algorithm, use_lithops):
    db = DB()

    if sql_where:
        ds_ids = [
            id for (id, ) in db.select(
                f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}')
        ]
    else:
        ds_ids = ds_id_str.split(',')

    if not ds_ids:
        logger.warning('No datasets match filter')
        return

    if use_lithops:
        executor = Executor(sm_config['lithops'])

    for i, ds_id in enumerate(ds_ids):
        try:
            logger.info(
                f'[{i+1} / {len(ds_ids)}] Generating ion thumbnail for {ds_id}'
            )
            ds = Dataset.load(db, ds_id)
            if use_lithops:
                # noinspection PyUnboundLocalVariable
                generate_ion_thumbnail_lithops(executor,
                                               db,
                                               ds,
                                               algorithm=algorithm)
            else:
                generate_ion_thumbnail(db, ds, algorithm=algorithm)
        except Exception:
            logger.error(f'Failed on {ds_id}', exc_info=True)
Exemplo n.º 5
0
    def _on_success(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FINISHED)

        self.logger.info(f" SM update daemon: success")
        self._post_to_slack(msg)
Exemplo n.º 6
0
    def _on_failure(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FAILED)

        self.logger.error(f" SM update daemon: failure", exc_info=True)
        self._post_to_slack(msg)
Exemplo n.º 7
0
def test_dataset_update_status_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    es_mock = MagicMock(spec=ESExporter)
    status_queue_mock = MagicMock(spec=QueuePublisher)

    upload_dt = datetime.now()
    ds_id = '2000-01-01'
    ds = Dataset(ds_id,
                 'ds_name',
                 'input_path',
                 upload_dt, {},
                 ds_config,
                 DatasetStatus.INDEXING,
                 mol_dbs=['HMDB'],
                 adducts=['+H'])

    ds.set_status(db, es_mock, status_queue_mock, DatasetStatus.FINISHED)

    assert DatasetStatus.FINISHED == Dataset.load(db, ds_id).status
    status_queue_mock.publish.assert_called_once_with({
        'ds_id':
        ds_id,
        'status':
        DatasetStatus.FINISHED
    })
Exemplo n.º 8
0
 def delete(self, ds_id, **kwargs):
     """ Send delete message to the queue """
     ds = Dataset.load(self._db, ds_id)
     self._set_ds_busy(ds, kwargs.get('force', False))
     self._post_sm_msg(ds=ds,
                       queue=self._update_queue,
                       action=DaemonAction.DELETE,
                       **kwargs)
Exemplo n.º 9
0
def test_sm_daemons_annot_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock,
                                post_images_to_annot_service_mock,
                                MolDBServiceWrapperMock,
                                sm_config, test_db, es_dsl_search,
                                clean_isotope_storage):
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock)

    def throw_exception_function(*args):
        raise Exception('Test')
    get_compute_img_metrics_mock.return_value = throw_exception_function
    filter_sf_metrics_mock.side_effect = lambda x: x

    url_dict = {
        'iso_image_ids': ['iso_image_1', None, None, None]
    }
    post_images_to_annot_service_mock.return_value = {
        35: url_dict,
        44: url_dict
    }

    db = DB(sm_config['db'])
    es = ESExporter(db)
    annotate_daemon = None

    try:
        ds_id = '2000-01-01_00h00m'
        upload_dt = datetime.now()
        ds_config_str = open(ds_config_path).read()
        db.insert(Dataset.DS_INSERT, [{
            'id': ds_id,
            'name': test_ds_name,
            'input_path': input_dir_path,
            'upload_dt': upload_dt,
            'metadata': '{}',
            'config': ds_config_str,
            'status': DatasetStatus.QUEUED,
            'is_public': True,
            'mol_dbs': ['HMDB-v4'],
            'adducts': ['+H', '+Na', '+K'],
            'ion_img_storage': 'fs'
        }])

        ds = Dataset.load(db, ds_id)
        queue_pub.publish({'ds_id': ds.id, 'ds_name': ds.name, 'action': 'annotate'})

        run_daemons(db, es)

        # dataset and job tables asserts
        row = db.select_one('SELECT status from dataset')
        assert row[0] == 'FAILED'
        row = db.select_one('SELECT status from job')
        assert row[0] == 'FAILED'
    finally:
        db.close()
        if annotate_daemon:
            annotate_daemon.stop()
        with warn_only():
            local('rm -rf {}'.format(data_dir_path))
Exemplo n.º 10
0
def test_dataset_save_overwrite_ds_works(fill_db, metadata, ds_config):
    db = DB()
    es_mock = MagicMock(spec=ESExporter)
    ds = create_test_ds()

    ds.save(db, es_mock)

    assert ds == Dataset.load(db, ds.id)
    es_mock.sync_dataset.assert_called_once_with(ds.id)
Exemplo n.º 11
0
def test_dataset_update_status_works(fill_db, metadata, ds_config):
    db = DB()
    es_mock = MagicMock(spec=ESExporter)

    ds = create_test_ds(status=DatasetStatus.ANNOTATING)

    ds.set_status(db, es_mock, DatasetStatus.FINISHED)

    assert DatasetStatus.FINISHED == Dataset.load(db, ds.id).status
Exemplo n.º 12
0
    def _callback(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.INDEXING)

        self.logger.info(f' SM update daemon received a message: {msg}')
        self._manager.post_to_slack(
            'new', f" [v] New {msg['action']} message: {json.dumps(msg)}")

        if msg['action'] == 'update':
            self._manager.index(ds=ds)
        elif msg['action'] == 'delete':
            self._manager.delete(ds=ds)
        else:
            raise Exception(f"Wrong action: {msg['action']}")
Exemplo n.º 13
0
def add_optical_image(db, ds_id, url, transform, zoom_levels=(1, 2, 4, 8)):
    """Add optical image to dataset.

    Generates scaled and transformed versions of the provided optical image + creates the thumbnail
    """
    ds = Dataset.load(db, ds_id)
    logger.info(f'Adding optical image {url} to "{ds.id}" dataset')

    dims = _annotation_image_shape(db, ds)
    resp = requests.get(url)
    optical_img = Image.open(io.BytesIO(resp.content))

    raw_optical_img_id = url.split('/')[-1]
    _add_raw_optical_image(db, ds, raw_optical_img_id, transform)
    _add_zoom_optical_images(db, ds, dims, optical_img, transform, zoom_levels)
    _add_thumbnail_optical_image(db, ds, dims, optical_img, transform)
Exemplo n.º 14
0
    def update(self, ds_id, doc, async_es_update, **kwargs):
        """ Save dataset and send update message to the queue """
        ds = Dataset.load(self._db, ds_id)
        ds.name = doc.get('name', ds.name)
        ds.input_path = doc.get('input_path', ds.input_path)
        if 'metadata' in doc:
            ds.metadata = doc['metadata']
        ds.upload_dt = doc.get('upload_dt', ds.upload_dt)
        ds.is_public = doc.get('is_public', ds.is_public)
        ds.save(self._db, None if async_es_update else self._es)

        self._post_sm_msg(
            ds=ds,
            queue=self._update_queue,
            action=DaemonAction.UPDATE,
            fields=list(doc.keys()),
            **kwargs,
        )
Exemplo n.º 15
0
def test_dataset_load_existing_ds_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    upload_dt = datetime.strptime('2000-01-01 00:00:00', "%Y-%m-%d %H:%M:%S")
    ds_id = '2000-01-01'
    metadata = {"meta": "data"}

    ds = Dataset.load(db, ds_id)

    assert ds.__dict__ == dict(id=ds_id,
                               name='ds_name',
                               input_path='input_path',
                               upload_dt=upload_dt,
                               metadata=metadata,
                               config=ds_config,
                               status=DatasetStatus.FINISHED,
                               is_public=True,
                               mol_dbs=['HMDB-v4'],
                               adducts=['+H', '+Na', '+K'],
                               ion_img_storage_type='fs')
Exemplo n.º 16
0
    def _callback(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.ANNOTATING)

        self.logger.info(f" SM annotate daemon received a message: {msg}")
        self._manager.post_to_slack(
            'new', " [v] New annotation message: {}".format(json.dumps(msg)))

        self._manager.annotate(ds=ds,
                               search_job_factory=SearchJob,
                               del_first=msg.get('del_first', False))

        upd_msg = {
            'ds_id': msg['ds_id'],
            'ds_name': msg['ds_name'],
            'action': 'update'
        }
        self._upd_queue_pub.publish(msg=upd_msg, priority=2)
Exemplo n.º 17
0
def del_optical_image(db, ds_id):
    """Delete raw and zoomed optical images from DB and FS."""

    ds = Dataset.load(db, ds_id)
    logger.info(f'Deleting optical image of "{ds.id}" dataset')
    (raw_img_id, ) = db.select_one(SEL_DATASET_RAW_OPTICAL_IMAGE,
                                   params=(ds.id, ))
    if raw_img_id:
        image_storage.delete_image(image_storage.OPTICAL, ds_id, raw_img_id)
    for img_id in db.select_onecol(SEL_OPTICAL_IMAGE, params=(ds.id, )):
        image_storage.delete_image(image_storage.OPTICAL, ds_id, img_id)
    (thumbnail_img_id, ) = db.select_one(SEL_OPTICAL_IMAGE_THUMBNAIL,
                                         params=(ds.id, ))
    if thumbnail_img_id:
        image_storage.delete_image(image_storage.OPTICAL, ds_id,
                                   thumbnail_img_id)
    db.alter(DEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id, ))
    db.alter(DEL_OPTICAL_IMAGE, params=(ds.id, ))
    db.alter(UPD_DATASET_THUMB_OPTICAL_IMAGE, params=(None, None, ds.id))
Exemplo n.º 18
0
def test_dataset_load_existing_ds_works(fill_db, metadata, ds_config):
    db = DB()
    upload_dt = datetime.strptime('2000-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')
    ds_id = '2000-01-01'

    ds = Dataset.load(db, ds_id)

    assert ds.metadata == metadata
    ds_fields = {k: v for k, v in ds.__dict__.items() if not k.startswith('_')}
    assert ds_fields == dict(
        id=ds_id,
        name='ds_name',
        input_path='input_path',
        upload_dt=upload_dt,
        metadata=metadata,
        config=ds_config,
        status=DatasetStatus.FINISHED,
        status_update_dt=upload_dt,
        is_public=True,
    )
def run(ds_id, sql_where):

    conf = SMConfig.get_conf()

    db = DB(conf['db'])
    img_store = ImageStoreServiceWrapper(conf['services']['img_service_url'])

    if sql_where:
        ds_ids = [
            id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}')
        ]
    else:
        ds_ids = ds_id.split(',')

    if not ds_ids:
        logger.warning('No datasets match filter')
        return

    for i, ds_id in enumerate(ds_ids):
        try:
            logger.info(f'[{i+1} / {len(ds_ids)}] Updating acq geometry for {ds_id}')
            ds = Dataset.load(db, ds_id)
            (sample_img_id,) = db.select_one(
                "SELECT iim.iso_image_ids[1] from job j "
                "JOIN iso_image_metrics iim on j.id = iim.job_id "
                "WHERE j.ds_id = %s LIMIT 1",
                [ds_id],
            )
            print(sample_img_id)
            if sample_img_id:
                w, h = img_store.get_image_by_id('fs', 'iso_image', sample_img_id).size
                dims = (h, w)  # n_cols, n_rows
            else:
                dims = (None, None)

            acq_geometry = make_acq_geometry('ims', None, ds.metadata, dims)

            ds.save_acq_geometry(db, acq_geometry)

        except Exception:
            logger.error(f'Failed on {ds_id}', exc_info=True)
Exemplo n.º 20
0
    def add(self, doc, use_lithops, **kwargs):
        """Save dataset and send ANNOTATE message to the queue."""
        now = datetime.now()
        if 'id' not in doc:
            doc['id'] = now.strftime('%Y-%m-%d_%Hh%Mm%Ss')

        ds_config_kwargs = dict(
            (k, v) for k, v in doc.items() if k in FLAT_DS_CONFIG_KEYS)

        try:
            ds = Dataset.load(self._db, doc['id'])
            self._set_ds_busy(ds, kwargs.get('force', False))
            config = update_ds_config(ds.config, doc['metadata'],
                                      **ds_config_kwargs)
        except UnknownDSID:
            config = generate_ds_config(doc.get('metadata'),
                                        **ds_config_kwargs)

        ds = Dataset(
            id=doc['id'],
            name=doc.get('name'),
            input_path=doc.get('input_path'),
            upload_dt=doc.get('upload_dt', now.isoformat()),
            metadata=doc.get('metadata'),
            config=config,
            is_public=doc.get('is_public'),
            status=DatasetStatus.QUEUED,
        )
        ds.save(self._db, self._es, allow_insert=True)
        self._status_queue.publish({
            'ds_id': ds.id,
            'action': DaemonAction.ANNOTATE,
            'stage': DaemonActionStage.QUEUED
        })

        queue = self._lit_queue if use_lithops else self._annot_queue
        self._post_sm_msg(ds=ds,
                          queue=queue,
                          action=DaemonAction.ANNOTATE,
                          **kwargs)
        return doc['id']
Exemplo n.º 21
0
    def _on_success(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FINISHED)

        self.logger.info(f" SM annotate daemon: success")

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'Thank you for uploading the "{}" dataset to the METASPACE annotation service. '
                'We are pleased to inform you that the dataset has been processed and is available at {}.\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name, msg['web_app_link'])
            self._send_email(msg['email'],
                             'METASPACE service notification (SUCCESS)',
                             email_body)
Exemplo n.º 22
0
    def _on_failure(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FAILED)

        self.logger.error(f" SM annotate daemon: failure", exc_info=True)

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'hankey', ' [x] Annotation failed: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'We are sorry to inform you that there was a problem during processing of the "{}" dataset '
                'and it could not be annotated. '
                'If this is unexpected, please do not hesitate to contact us for support at [email protected]\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name)
            self._send_email(msg['email'],
                             'METASPACE service notification (FAILED)',
                             email_body)
Exemplo n.º 23
0
def test_dataset_save_overwrite_ds_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    es_mock = MagicMock(spec=ESExporter)
    status_queue_mock = MagicMock(spec=QueuePublisher)

    upload_dt = datetime.now()
    ds_id = '2000-01-01'
    ds = Dataset(ds_id,
                 'ds_name',
                 'input_path',
                 upload_dt, {},
                 ds_config,
                 mol_dbs=['HMDB'],
                 adducts=['+H'])

    ds.save(db, es_mock, status_queue_mock)

    assert ds == Dataset.load(db, ds_id)
    es_mock.sync_dataset.assert_called_once_with(ds_id)
    status_queue_mock.publish.assert_called_with({
        'ds_id': ds_id,
        'status': DatasetStatus.NEW
    })
Exemplo n.º 24
0
        def _func(ds_id):
            try:
                params = _json_params(req)
                logger.info('Received %s request: %s', request_name, params)
                db = _create_db_conn()
                ds = Dataset.load(db=db, ds_id=ds_id)
                ds_man = _create_dataset_manager(db)
                handler(ds_man, ds, params)

                db.close()
                return {'status': OK['status'], 'ds_id': ds_id}
            except UnknownDSID as e:
                logger.warning(e.message)
                resp.status = ERR_DS_NOT_EXIST['status_code']
                return {'status': ERR_DS_NOT_EXIST['status'], 'ds_id': e.ds_id}
            except DSIsBusy as e:
                logger.warning(e.message)
                resp.status = ERR_DS_BUSY['status_code']
                return {'status': ERR_DS_BUSY['status'], 'ds_id': e.ds_id}

            except Exception as e:
                logger.error(e, exc_info=True)
                resp.status = ERROR['status_code']
                return {'status': ERROR['status'], 'ds_id': ds_id}
def test_search_job_imzml_example(get_compute_img_metrics_mock, filter_sf_metrics_mock,
                                  post_images_to_annot_service_mock, MolDBServiceWrapperMock, MolDBServiceWrapperMock2,
                                  sm_config, create_fill_sm_database, es_dsl_search, clean_isotope_storage):
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock)
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2)

    get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.])
    filter_sf_metrics_mock.side_effect = lambda x: x

    url_dict = {
        'iso_image_ids': ['iso_image_1', None, None, None]
    }
    post_images_to_annot_service_mock.return_value = {
        35: url_dict,
        44: url_dict
    }

    db = DB(sm_config['db'])

    try:
        ds_config_str = open(ds_config_path).read()
        upload_dt = datetime.now()
        ds_id = '2000-01-01_00h00m'
        db.insert(Dataset.DS_INSERT, [{
            'id': ds_id,
            'name': test_ds_name,
            'input_path': input_dir_path,
            'upload_dt': upload_dt,
            'metadata': '{}',
            'config': ds_config_str,
            'status': DatasetStatus.QUEUED,
            'is_public': True,
            'mol_dbs': ['HMDB-v4'],
            'adducts': ['+H', '+Na', '+K'],
            'ion_img_storage': 'fs'
        }])

        img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url'])
        job = SearchJob(img_store=img_store)
        job._sm_config['rabbitmq'] = {}  # avoid talking to RabbitMQ during the test
        ds = Dataset.load(db, ds_id)
        job.run(ds)

        # dataset table asserts
        rows = db.select('SELECT id, name, input_path, upload_dt, status from dataset')
        input_path = join(dirname(__file__), 'data', test_ds_name)
        assert len(rows) == 1
        assert rows[0] == (ds_id, test_ds_name, input_path, upload_dt, DatasetStatus.FINISHED)

        # ms acquisition geometry asserts
        rows = db.select('SELECT acq_geometry from dataset')
        assert len(rows) == 1
        assert rows[0][0] == ds.get_acq_geometry(db)
        assert rows[0][0] == {
            ACQ_GEOMETRY_KEYS.LENGTH_UNIT: 'nm',
            ACQ_GEOMETRY_KEYS.AcqGridSection.section_name: {
                ACQ_GEOMETRY_KEYS.AcqGridSection.REGULAR_GRID: True,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_X : 3,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_Y : 3,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_X : 100,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_Y : 100
            },
            ACQ_GEOMETRY_KEYS.PixelSizeSection.section_name: {
                ACQ_GEOMETRY_KEYS.PixelSizeSection.REGULAR_SIZE: True,
                ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_X : 100,
                ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_Y : 100
            }
        }

        # job table asserts
        rows = db.select('SELECT db_id, ds_id, status, start, finish from job')
        assert len(rows) == 1
        db_id, ds_id, status, start, finish = rows[0]
        assert (db_id, ds_id, status) == (0, '2000-01-01_00h00m', 'FINISHED')
        assert start < finish

        # image metrics asserts
        rows = db.select(('SELECT db_id, sf, adduct, stats, iso_image_ids '
                          'FROM iso_image_metrics '
                          'ORDER BY sf, adduct'))

        assert rows[0] == (0, 'C12H24O', '+K', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9,
                                                'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]},
                           ['iso_image_1', None, None, None])
        assert rows[1] == (0, 'C12H24O', '+Na', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9,
                                                 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]},
                           ['iso_image_1', None, None, None])

        time.sleep(1)  # Waiting for ES
        # ES asserts
        ds_docs = es_dsl_search.query('term', _type='dataset').execute().to_dict()['hits']['hits']
        assert 1 == len(ds_docs)
        ann_docs = es_dsl_search.query('term', _type='annotation').execute().to_dict()['hits']['hits']
        assert len(ann_docs) == len(rows)
        for doc in ann_docs:
            assert doc['_id'].startswith(ds_id)

    finally:
        db.close()
        with warn_only():
            local('rm -rf {}'.format(data_dir_path))
Exemplo n.º 26
0
def run_coloc_jobs(
    sm_config, ds_id_str, sql_where, fix_missing, fix_corrupt, skip_existing, use_lithops
):
    assert (
        len(
            [
                data_source
                for data_source in [ds_id_str, sql_where, fix_missing, fix_corrupt]
                if data_source
            ]
        )
        == 1
    ), "Exactly one data source (ds_id, sql_where, fix_missing, fix_corrupt) must be specified"
    assert not (ds_id_str and sql_where)

    db = DB()

    if ds_id_str:
        ds_ids = ds_id_str.split(',')
    elif sql_where:
        ds_ids = [
            id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}')
        ]
    else:
        mol_dbs = [
            (doc['id'], doc['name'])
            for doc in db.select_with_fields('SELECT id, name FROM molecular_db m')
        ]
        mol_db_ids, mol_db_names = map(list, zip(*mol_dbs))
        fdrs = [0.05, 0.1, 0.2, 0.5]
        algorithms = ['median_thresholded_cosine', 'cosine']

        if fix_missing:
            logger.info('Checking for missing colocalization jobs...')
            results = db.select(
                MISSING_COLOC_JOBS_SEL, [list(mol_db_ids), list(mol_db_names), fdrs, algorithms]
            )
            ds_ids = [ds_id for ds_id, in results]
            logger.info(f'Found {len(ds_ids)} missing colocalization sets')
        else:
            logger.info(
                'Checking all colocalization jobs. '
                'This is super slow: ~5 minutes per 1000 datasets...'
            )
            results = db.select(
                CORRUPT_COLOC_JOBS_SEL, [list(mol_db_ids), list(mol_db_names), fdrs, algorithms]
            )
            ds_ids = [ds_id for ds_id, in results]
            logger.info(f'Found {len(ds_ids)} corrupt colocalization sets')

    if not ds_ids:
        logger.warning('No datasets match filter')
        return

    if use_lithops:
        executor = Executor(sm_config['lithops'])

    for i, ds_id in enumerate(ds_ids):
        try:
            logger.info(f'Running colocalization on {i+1} out of {len(ds_ids)}')
            ds = Dataset.load(db, ds_id)
            coloc = Colocalization(db)
            if use_lithops:
                # noinspection PyUnboundLocalVariable
                coloc.run_coloc_job_lithops(executor, ds, reprocess=not skip_existing)
            else:
                coloc.run_coloc_job(ds, reprocess=not skip_existing)
        except Exception:
            logger.error(f'Failed to run colocalization on {ds_id}', exc_info=True)
Exemplo n.º 27
0
 def load_ds(self, ds_id):
     return Dataset.load(self._db, ds_id)
Exemplo n.º 28
0
def test_sm_daemons(get_compute_img_metrics_mock, filter_sf_metrics_mock,
                    post_images_to_annot_service_mock,
                    MolDBServiceWrapperMock,
                    sm_config, test_db, es_dsl_search, clean_isotope_storage):
    init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock)

    get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.])
    filter_sf_metrics_mock.side_effect = lambda x: x

    url_dict = {
        'iso_image_ids': ['iso_image_1', None, None, None]
    }
    post_images_to_annot_service_mock.return_value = {
        35: url_dict,
        44: url_dict
    }

    db = DB(sm_config['db'])
    es = ESExporter(db)
    annotate_daemon = None
    update_daemon = None

    try:
        ds_config_str = open(ds_config_path).read()
        upload_dt = datetime.now()
        ds_id = '2000-01-01_00h00m'
        db.insert(Dataset.DS_INSERT, [{
            'id': ds_id,
            'name': test_ds_name,
            'input_path': input_dir_path,
            'upload_dt': upload_dt,
            'metadata': '{"Data_Type": "Imaging MS"}',
            'config': ds_config_str,
            'status': DatasetStatus.QUEUED,
            'is_public': True,
            'mol_dbs': ['HMDB-v4'],
            'adducts': ['+H', '+Na', '+K'],
            'ion_img_storage': 'fs'
        }])

        ds = Dataset.load(db, ds_id)
        queue_pub.publish({'ds_id': ds.id, 'ds_name': ds.name, 'action': 'annotate'})

        run_daemons(db, es)

        # dataset table asserts
        rows = db.select('SELECT id, name, input_path, upload_dt, status from dataset')
        input_path = join(dirname(__file__), 'data', test_ds_name)
        assert len(rows) == 1
        assert rows[0] == (ds_id, test_ds_name, input_path, upload_dt, DatasetStatus.FINISHED)

        # ms acquisition geometry asserts
        rows = db.select('SELECT acq_geometry from dataset')
        assert len(rows) == 1
        assert rows[0][0] == ds.get_acq_geometry(db)
        assert rows[0][0] == {
            ACQ_GEOMETRY_KEYS.LENGTH_UNIT: 'nm',
            ACQ_GEOMETRY_KEYS.AcqGridSection.section_name: {
                ACQ_GEOMETRY_KEYS.AcqGridSection.REGULAR_GRID: True,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_X : 3,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_Y : 3,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_X : 100,
                ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_Y : 100
            },
            ACQ_GEOMETRY_KEYS.PixelSizeSection.section_name: {
                ACQ_GEOMETRY_KEYS.PixelSizeSection.REGULAR_SIZE: True,
                ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_X : 100,
                ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_Y : 100
            }
        }

        # job table asserts
        rows = db.select('SELECT db_id, ds_id, status, start, finish from job')
        assert len(rows) == 1
        db_id, ds_id, status, start, finish = rows[0]
        assert (db_id, ds_id, status) == (0, '2000-01-01_00h00m', JobStatus.FINISHED)
        assert start < finish

        # image metrics asserts
        rows = db.select(('SELECT db_id, sf, adduct, stats, iso_image_ids '
                          'FROM iso_image_metrics '
                          'ORDER BY sf, adduct'))

        assert rows[0] == (0, 'C12H24O', '+K', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9,
                                                'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]},
                           ['iso_image_1', None, None, None])
        assert rows[1] == (0, 'C12H24O', '+Na', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9,
                                                 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]},
                           ['iso_image_1', None, None, None])

        time.sleep(1)  # Waiting for ES
        # ES asserts
        ds_docs = es_dsl_search.query('term', _type='dataset').execute().to_dict()['hits']['hits']
        assert 1 == len(ds_docs)
        ann_docs = es_dsl_search.query('term', _type='annotation').execute().to_dict()['hits']['hits']
        assert len(ann_docs) == len(rows)
        for doc in ann_docs:
            assert doc['_id'].startswith(ds_id)

    finally:
        db.close()
        if annotate_daemon:
            annotate_daemon.stop()
        if update_daemon:
            update_daemon.stop()
        with warn_only():
            local('rm -rf {}'.format(data_dir_path))