Пример #1
0
class ped_writer(unittest.TestCase):
    def setUp(self):
        self.kb = KB(driver='omero')('localhost', 'root', 'romeo')

    def tearDown(self):
        pass

    def test_base(self):
        def extract_data_sample(group, mset, dsample_name):
            by_individual = {}
            for i in self.kb.get_individuals(group):
                gds = filter(lambda x: x.snpMarkersSet == mset,
                             self.kb.get_data_samples(i, dsample_name))
                assert (len(gds) == 1)
                by_individual[i.id] = gds[0]
            return by_individual

        study = self.kb.get_study('TEST01')
        family = self.kb.get_individuals(study)
        mset = self.kb.get_snp_markers_set(label='FakeTaqSet01')
        gds_by_individual = extract_data_sample(study, mset,
                                                'GenotypeDataSample')

        pw = PedWriter(mset, base_path="./foo")
        pw.write_map()
        pw.write_family(study.id, family, gds_by_individual)
        pw.close()
Пример #2
0
class ped_writer(unittest.TestCase):
    def setUp(self):
        self.kb = KB(driver="omero")("localhost", "root", "romeo")

    def tearDown(self):
        pass

    def test_base(self):
        def extract_data_sample(group, mset, dsample_name):
            by_individual = {}
            for i in self.kb.get_individuals(group):
                gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name))
                assert len(gds) == 1
                by_individual[i.id] = gds[0]
            return by_individual

        study = self.kb.get_study("TEST01")
        family = self.kb.get_individuals(study)
        mset = self.kb.get_snp_markers_set(label="FakeTaqSet01")
        gds_by_individual = extract_data_sample(study, mset, "GenotypeDataSample")

        pw = PedWriter(mset, base_path="./foo")
        pw.write_map()
        pw.write_family(study.id, family, gds_by_individual)
        pw.close()
Пример #3
0
class VidMapper(object):
    def __init__(self, host, user, passwd, logger, study):
        self.logger = logger
        self.study = study
        self.kb = KB(driver="omero")(host, user, passwd)
        plates = self.kb.get_objects(self.kb.TiterPlate)
        self.logger.info("fetched %d plates" % len(plates))
        self.plate_map = {}
        self.enroll_map = {}
        for p in plates:
            self.plate_map[p.omero_id] = p.barcode
        s = self.kb.get_study(self.study)
        enrolls = self.kb.get_enrolled(s)
        self.logger.info("fetched %d enrollments" % len(enrolls))
        for e in enrolls:
            self.logger.debug('Retrieving wells for %s' % e.studyCode)
            wells = [
                w for w in self.kb.get_vessels_by_individual(
                    e.individual, "PlateWell")
            ]
            self.enroll_map[e.studyCode] = wells

    def map_vid(self, r):
        en_code = r["source"]
        pl_barcode = en_code.split("|")[1]
        try:
            wells = self.enroll_map[en_code]
        except KeyError:
            msg = "%s is not enrolled in %s" % (en_code, self.study)
            self.logger.error(msg)
            raise ValueError(msg)
        self.logger.info("found %d wells for %s" % (len(wells), en_code))
        imm_wells = [
            w for w in wells
            if self.plate_map[w.container.omero_id] == pl_barcode
        ]
        if len(imm_wells) > 1:
            msg = ("more than 1 (%d) immuno wells for plate %s" %
                   (len(imm_wells), pl_barcode))
            self.logger.error(msg)
            raise ValueError(msg)
        elif len(imm_wells) == 0:
            msg = "no immuno well for plate %s" % pl_barcode
            self.logger.warn(msg)
            raise ValueError(msg)
        else:
            r["source"] = imm_wells[0].id
Пример #4
0
class VidMapper(object):
    def __init__(self, host, user, passwd, logger, study):
        self.logger = logger
        self.study = study
        self.kb = KB(driver="omero")(host, user, passwd)
        plates = self.kb.get_objects(self.kb.TiterPlate)
        self.logger.info("fetched %d plates" % len(plates))
        self.plate_map = {}
        self.enroll_map = {}
        for p in plates:
            self.plate_map[p.omero_id] = p.barcode
        s = self.kb.get_study(self.study)
        enrolls = self.kb.get_enrolled(s)
        self.logger.info("fetched %d enrollments" % len(enrolls))
        for e in enrolls:
            self.logger.debug("Retrieving wells for %s" % e.studyCode)
            wells = [w for w in self.kb.get_vessels_by_individual(e.individual, "PlateWell")]
            self.enroll_map[e.studyCode] = wells

    def map_vid(self, r):
        en_code = r["source"]
        pl_barcode = en_code.split("|")[1]
        try:
            wells = self.enroll_map[en_code]
        except KeyError:
            msg = "%s is not enrolled in %s" % (en_code, self.study)
            self.logger.error(msg)
            raise ValueError(msg)
        self.logger.info("found %d wells for %s" % (len(wells), en_code))
        imm_wells = [w for w in wells if self.plate_map[w.container.omero_id] == pl_barcode]
        if len(imm_wells) > 1:
            msg = "more than 1 (%d) immuno wells for plate %s" % (len(imm_wells), pl_barcode)
            self.logger.error(msg)
            raise ValueError(msg)
        elif len(imm_wells) == 0:
            msg = "no immuno well for plate %s" % pl_barcode
            self.logger.warn(msg)
            raise ValueError(msg)
        else:
            r["source"] = imm_wells[0].id
The first element of a marker defining tuple is its label, the second
is the dbSNP db label, if available, while the third is the marker mask.

.. todo::

  put a reference to reference documentation

Now we will load the markers set definition into Omero.biobank.

**Note:** We are considering an ideal case where none of the markers
  is already in the db.

"""

study = kb.get_study('TEST01')

action = kb.create_an_action(study, doc='importing markers')
action.reload()

source, context, release = 'foobar', 'fooctx', 'foorel'
ref_rs_genome, dbsnp_build = 'foo-rs-genome', 13200

lvs = kb.create_markers(source, context, release, ref_rs_genome, dbsnp_build,
                        taq_man_markers, action)

""" ..

where lvs is a list of (label, vid) tuples.

We can assume that the markers above have been aligned against a
Пример #6
0
        host = args.host or vlu.ome_host()
        user = args.user or vlu.ome_user()
        passwd = args.passwd or vlu.ome_passwd()
    except ValueError, ve:
        logger.critical(ve)
        sys.exit(ve)

    kb = KB(driver='omero')(host, user, passwd)

    logger.debug('Reading codes from source list')
    with open(args.source_list) as f:
        codes = [row.strip() for row in f.readlines()]
    logger.debug('Found %d codes to discard' % len(codes))

    logger.debug('Retrieving enrollments for study %s' % args.source_study)
    source_enrolls = kb.get_enrolled(kb.get_study(args.source_study))
    logger.debug('Retrieved %d enrollments' % len(source_enrolls))
    src_st_lookup = {}
    for sen in source_enrolls:
        src_st_lookup[sen.studyCode] = sen

    to_be_discarded = []
    discard_st = kb.get_study(args.discard_study)
    if discard_st is None:
        logger.critical('Study with label %s not found!' % args.discard_study)
        sys.exit(2)

    for c in codes:
        try:
            src_st_lookup[c].study = discard_st
            to_be_discarded.append(src_st_lookup[c])
Пример #7
0
class TestKB(KBObjectCreator):
    def __init__(self, name):
        super(TestKB, self).__init__(name)
        self.kill_list = []

    def setUp(self):
        self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS)

    def tearDown(self):
        self.kill_list.reverse()
        for x in self.kill_list:
            self.kb.delete(x)
        self.kill_list = []

    def check_object(self, o, conf, otype):
        try:
            self.assertTrue(isinstance(o, otype))
            for k in conf.keys():
                v = conf[k]
                # FIXME this is omero specific...
                if hasattr(v, 'ome_obj'):
                    self.assertEqual(getattr(o, k).id, v.id)
                    self.assertEqual(type(getattr(o, k)), type(v))
                elif hasattr(v, '_id'):
                    self.assertEqual(getattr(o, k)._id, v._id)
                else:
                    self.assertEqual(getattr(o, k), v)
        except:
            pass

    def test_study(self):
        conf, s = self.create_study()
        self.kill_list.append(s.save())
        self.check_object(s, conf, self.kb.Study)

    def test_study_ops(self):
        conf, s = self.create_study()
        s.save()
        xs = self.kb.get_study(conf['label'])
        self.assertTrue(not xs is None)
        self.assertEqual(xs.id, s.id)
        self.assertEqual(xs.label, s.label)
        self.kb.delete(s)
        self.assertEqual(self.kb.get_study(conf['label']), None)

    def test_device(self):
        conf, d = self.create_device()
        self.kill_list.append(d.save())
        self.check_object(d, conf, self.kb.Device)

    def test_hardware_device(self):
        conf, d = self.create_hardware_device()
        self.kill_list.append(d.save())
        self.check_object(d, conf, self.kb.HardwareDevice)

    def test_device_ops(self):
        conf, d = self.create_device()
        d.save()
        xs = self.kb.get_device(conf['label'])
        self.assertTrue(not xs is None)
        self.check_object(xs, conf, self.kb.Device)
        self.kb.delete(d)
        self.assertEqual(self.kb.get_device(conf['label']), None)

    def test_action_setup(self):
        conf, a = self.create_action_setup()
        self.kill_list.append(a.save())
        self.check_object(a, conf, self.kb.ActionSetup)

    def test_action(self):
        conf, action = self.create_action()
        self.kill_list.append(action.save())
        self.check_object(action, conf, self.kb.Action)

    def test_action_on_vessel(self):
        conf, action = self.create_action_on_vessel()
        self.kill_list.append(action.save())
        self.check_object(action, conf, self.kb.ActionOnVessel)

    def test_action_on_data_sample(self):
        conf, action = self.create_action_on_data_sample()
        self.kill_list.append(action.save())
        self.check_object(action, conf, self.kb.ActionOnDataSample)

    def test_action_on_data_collection_item(self):
        conf, action = self.create_action_on_data_collection_item()
        self.kill_list.append(action.save())
        self.check_object(action, conf, self.kb.ActionOnDataSample)
Пример #8
0
supported on mset0, linked to an individual contained in a given
group. Just to keep things simple, we will select, for each
individual, the first of the list of known GenotypeDataSample for that
mset, if there is at least one, otherwise we will skip the individual.

"""
def extract_data_sample(group, mset, dsample_name):
  by_individual = {}
  for i in kb.get_individuals(group):
    gds = filter(lambda x: x.snpMarkersSet == mset,
                 kb.get_data_samples(i, dsample_name))
    assert(len(gds) == 1)
    by_individual[i.id] = gds[0]
  return by_individual

group = kb.get_study(label='TEST01')
gds0_by_individual = extract_data_sample(group, mset0, 'GenotypeDataSample')

""" ..

Note that what we have now is a dictionary that maps individual ids to
GenotypeDataSample objects  and the latter are only handlers to get to
the actual genotyping data, not the data itself.

We can, now, do a global check on data quality.

"""
def do_check(s):
  counts = algo.count_homozygotes(s)
  mafs = algo.maf(None, counts)
  hwe  = algo.hwe(None, counts)
Пример #9
0
class App(object):
    def __init__(self, host, user, passwd, study_label, maker, model, release):
        self.kb = KB(driver='omero')(host, user, passwd)
        self.mset = self.kb.get_snp_markers_set(maker, model, release)
        self.logger = logging.getLogger()
        if not self.mset:
            raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' %
                             (maker, model, release))
        #--
        alabel = 'load_genotypes-setup-%s' % time.time()
        self.asetup = self.kb.factory.create(self.kb.ActionSetup, {
            'label': alabel,
            'conf': ''
        }).save()
        #--
        dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1'
        dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease)
        device = self.kb.get_device(dlabel)
        if not device:
            device = self.kb.factory.create(
                self.kb.Device, {
                    'label': dlabel,
                    'maker': dmaker,
                    'model': dmodel,
                    'release': drelease
                }).save()
        self.device = device
        #-- FIXME this will break if study is not defined.
        self.study = self.kb.get_study(study_label)

    def check_snp_markers_set(self, marker_types, marker_names):
        self.logger.info('start checking snp_markers_set')
        mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset)
        rs_labels = mdefs['rs_label']
        for t, n in it.izip(marker_types, marker_names):
            if t == 'M':
                if not n in rs_labels:
                    msg = 'marker %s is not in the specified SNPMarkersSet' % n
                    self.logger.critical(msg)
                    raise ValueError(msg)
        self.logger.info('done checking snp_markers_set')

    def create_action(self, target):
        conf = {
            'setup': self.asetup,
            'device': self.device,
            'actionCategory': self.kb.ActionCategory.MEASUREMENT,
            'operator': 'Alfred E. Neumann',
            'context': self.study,
            'target': target,
        }
        action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save()
        return action

    def create_data_sample(self, action, label):
        conf = {
            'snpMarkersSet': self.mset,
            'label': label,
            'status': self.kb.DataSampleStatus.USABLE,
            'action': action
        }
        return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save()

    def load(self, pedfile, datfile, conf_value=1.0):
        pr = PedReader(pedfile, datfile, conf_value)
        self.check_snp_markers_set(pr.marker_types, pr.marker_names)
        #--
        self.logger.info('start loading from pedfile %s' % pedfile.name)
        for x in pr:
            sample = self.kb.get_vessel(x['sample_label'])
            if not sample:
                self.logger.error('No sample with label %s in VL' %
                                  x['sample_label'])
                continue
            action = self.create_action(sample)
            avid = action.id
            action.unload()
            data_sample = self.create_data_sample(action, x['label'])
            data_object = self.kb.add_gdo_data_object(avid, data_sample,
                                                      x['probs'], x['confs'])
            self.logger.info('-- loaded %s' % x['label'])
        self.logger.info('done loading from pedfile %s' % pedfile.name)
Пример #10
0
class App(object):

  def __init__(self, host, user, passwd,
               study_label,
               maker, model, release):
    self.kb = KB(driver='omero')(host, user, passwd)
    self.mset = self.kb.get_snp_markers_set(maker, model, release)
    self.logger = logging.getLogger()
    if not self.mset:
      raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.'
                       % (maker, model, release))
    #--
    alabel = 'load_genotypes-setup-%s' % time.time()
    self.asetup = self.kb.factory.create(self.kb.ActionSetup,
                                         {'label' : alabel,
                                          'conf'  : ''}).save()
    #--
    dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1'
    dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease)
    device = self.kb.get_device(dlabel)
    if not device:
      device = self.kb.factory.create(self.kb.Device,
                                      {'label' : dlabel,
                                       'maker' : dmaker,
                                       'model' : dmodel,
                                       'release' : drelease}).save()
    self.device = device
    #-- FIXME this will break if study is not defined.
    self.study = self.kb.get_study(study_label)

  def check_snp_markers_set(self, marker_types, marker_names):
    self.logger.info('start checking snp_markers_set')
    mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset)
    rs_labels = mdefs['rs_label']
    for t, n in it.izip(marker_types, marker_names):
      if t == 'M':
        if not n in rs_labels:
          msg = 'marker %s is not in the specified SNPMarkersSet' % n
          self.logger.critical(msg)
          raise ValueError(msg)
    self.logger.info('done checking snp_markers_set')

  def create_action(self, target):
    conf = {'setup' : self.asetup,
            'device' : self.device,
            'actionCategory' : self.kb.ActionCategory.MEASUREMENT,
            'operator' : 'Alfred E. Neumann',
            'context'  : self.study,
            'target'   : target,
            }
    action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save()
    return action

  def create_data_sample(self, action, label):
    conf = {'snpMarkersSet' : self.mset,
            'label' : label,
            'status' : self.kb.DataSampleStatus.USABLE,
            'action' : action}
    return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save()

  def load(self, pedfile, datfile, conf_value=1.0):
    pr = PedReader(pedfile, datfile, conf_value)
    self.check_snp_markers_set(pr.marker_types, pr.marker_names)
    #--
    self.logger.info('start loading from pedfile %s' % pedfile.name)
    for x in pr:
      sample = self.kb.get_vessel(x['sample_label'])
      if not sample:
        self.logger.error('No sample with label %s in VL' % x['sample_label'])
        continue
      action = self.create_action(sample)
      avid = action.id
      action.unload()
      data_sample = self.create_data_sample(action, x['label'])
      data_object = self.kb.add_gdo_data_object(avid, data_sample,
                                                x['probs'], x['confs'])
      self.logger.info('-- loaded %s' % x['label'])
    self.logger.info('done loading from pedfile %s' % pedfile.name)
        host = args.host or vlu.ome_host()
        user = args.user or vlu.ome_user()
        passwd = args.passwd or vlu.ome_passwd()
    except ValueError, ve:
        logger.critical(ve)
        sys.exit(ve)

    kb = KB(driver='omero')(host, user, passwd)

    logger.debug('Reading codes from source list')
    with open(args.source_list) as f:
        codes = [row.strip() for row in f.readlines()]
    logger.debug('Found %d codes to discard' % len(codes))

    logger.debug('Retrieving enrollments for study %s' % args.source_study)
    source_enrolls = kb.get_enrolled(kb.get_study(args.source_study))
    logger.debug('Retrieved %d enrollments' % len(source_enrolls))
    src_st_lookup = {}
    for sen in source_enrolls:
        src_st_lookup[sen.studyCode] = sen

    to_be_discarded = []
    discard_st = kb.get_study(args.discard_study)
    if discard_st is None:
        logger.critical('Study with label %s not found!' % args.discard_study)
        sys.exit(2)

    for c in codes:
        try:
            src_st_lookup[c].study = discard_st
            to_be_discarded.append(src_st_lookup[c])
Пример #12
0
class Core(object):

  def __init__(self, host=None, user=None, passwd=None, group=None,
               keep_tokens=1, study_label=None, logger=None):
    self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens)
    self.logger = logger if logger else logging.getLogger()
    self.record_counter = 0
    self.default_study = None
    if study_label:
      s = self.kb.get_study(study_label)
      if not s:
        raise ValueError('No known study with label %s' % study_label)
      self.logger.info('Selecting %s[%d,%s] as default study' %
                       (s.label, s.omero_id, s.id))
      self.default_study = s

  @classmethod
  def find_action_setup_conf(klass, args):
    action_setup_conf = {}
    for x in dir(args):
      if not (x.startswith('_') or x.startswith('func')):
        action_setup_conf[x] = getattr(args, x)
    # HACKS
    action_setup_conf['ifile'] = action_setup_conf['ifile'].name
    action_setup_conf['ofile'] = action_setup_conf['ofile'].name
    action_setup_conf['report_file'] = action_setup_conf['report_file'].name
    return action_setup_conf

  @classmethod
  def get_action_setup_options(klass, record, action_setup_conf = None,
                               object_history = None):
    options = {}
    if 'options' in record and record['options']:
      kvs = record['options'].split(',')
      for kv in kvs:
        k, v = kv.split('=')
        options[k] = v
    if action_setup_conf:
      options['importer_setup'] = action_setup_conf
    if object_history:
      options['object_history'] = object_history
    return json.dumps(options)

  def get_device(self, label, maker, model, release):
    device = self.kb.get_device(label)
    if not device:
      self.logger.debug('creating a device')
      device = self.kb.create_device(label, maker, model, release)
    return device

  def get_action_setup(self, label, conf):
    """
    Return the ActionSetup corresponding to label if there is one,
    else create a new one using conf.
    """
    asetup = self.kb.get_action_setup(label)
    if not asetup:
      kb_conf = {
        'label': label,
        'conf': json.dumps(conf),
        }
      asetup = self.kb.factory.create(self.kb.ActionSetup, kb_conf).save()
    return asetup

  def get_study(self, label):
    if self.default_study:
      return self.default_study
    study = self.kb.get_study(label)
    if not study:
      study = self.kb.factory.create(self.kb.Study, {'label': label}).save()
    return study

  def find_study(self, records):
    study_label = records[0]['study']
    for r in records:
      if r['study'] != study_label:
        m = 'all records should have the same study label'
        self.logger.critical(m)
        raise ValueError(m)
    return self.get_study(study_label)

  def find_klass(self, col_name, records):
    o_type = records[0][col_name]
    for r in records:
      if r[col_name] != o_type:
        m = 'all records should have the same %s' % col_name
        self.logger.critical(m)
        raise ValueError(m)
    return getattr(self.kb, o_type)

  def __preload_items__(self, key_field, klass, preloaded):
    objs = self.kb.get_objects(klass)
    for o in objs:
      assert not getattr(o, key_field) in preloaded
      preloaded[getattr(o, key_field)] = o

  def preload_by_type(self, name, klass, preloaded):
    self.logger.info('start preloading %s' % name)
    self.__preload_items__('id', klass, preloaded)
    self.logger.info('done preloading %s' % name)

  def preload_studies(self, preloaded):
    self.logger.info('start preloading studies')
    self.__preload_items__('label', self.kb.Study, preloaded)
    self.logger.info('done preloading studies')

  def missing_fields(self, fields, r):
    for f in fields:
      if f not in r:
        return f
    return False
Пример #13
0
def main(argv):
    parser = make_parser()
    args = parser.parse_args(argv)

    log_level = getattr(logging, args.loglevel)
    kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level}
    if args.logfile:
        kwargs['filename'] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger()

    kb = KB(driver='omero')(args.host, args.user, args.passwd)

    # Load enrollments and individual (needed to build sample label and for gender field)
    enrolls = []
    for sl in STUDY_LABELS:
        logger.debug('Loading enrollments for study %s' % sl)
        enrolls.extend(kb.get_enrolled(kb.get_study(sl)))
        logger.debug('Fetched %d individuals' % len(enrolls))

    wells_lookup = get_wells_enrolls_lookup(enrolls, kb)

    logger.debug('Loading EHR records')
    ehr_records = kb.get_ehr_records('(valid == True)')
    ehr_records_map = {}
    for r in ehr_records:
        ehr_records_map.setdefault(r['i_id'], []).append(r)

    # Read plate barcodes
    with open(args.plates_list) as pl_list:
        barcodes = [row.strip() for row in pl_list]

    # Load plate
    for plate_barcode in barcodes:
        logger.info('Creating datasheet for plate %s' % plate_barcode)
        pl = load_plate(plate_barcode, kb)
        if not pl:
            logger.error('No plate with barcode %s exists, skipping it.' %
                         (plate_barcode))
            continue

        # Load wells for selected plate
        pl_wells = get_wells_by_plate(plate_barcode, kb)

        with open(
                os.path.join(args.out_dir, '%s_datasheet.csv' % plate_barcode),
                'w') as of:
            writer = csv.DictWriter(of, CSV_FIELDS, delimiter='\t')
            writer.writeheader()
            last_slot = 0
            for slot, well in sorted(pl_wells.iteritems()):
                cl_record = ehr_records_map[wells_lookup[well]
                                            [0].individual.id]
                t1d, ms = get_affections(cl_record)
                # Fill empty slots
                while (last_slot != slot - 1):
                    last_slot += 1
                    writer.writerow({
                        'Sample_ID': 'X',
                        'PLATE_barcode': pl.barcode,
                        'PLATE_name': pl.label,
                        'WELL_label': get_well_label(last_slot),
                        'INDIVIDUAL_gender': 'X',
                        'INDIVIDUAL_vid': 'X',
                        'T1D_affected': 'X',
                        'MS_affected': 'X'
                    })

                writer.writerow({
                    'Sample_ID':
                    get_ichip_sample_code(wells_lookup[well], pl.barcode),
                    'PLATE_barcode':
                    pl.barcode,
                    'PLATE_name':
                    pl.label,
                    'WELL_label':
                    well.label,
                    'INDIVIDUAL_gender':
                    map_gender(wells_lookup[well][0].individual),
                    'INDIVIDUAL_vid':
                    wells_lookup[well][0].individual.id,
                    'T1D_affected':
                    t1d,
                    'MS_affected':
                    ms
                })
                last_slot = slot
Пример #14
0
        passwd = args.passwd or vlu.ome_passwd()
    except ValueError, ve:
        logger.critical(ve)
        sys.exit(ve)

    kb = KB(driver='omero')(host, user, passwd)

    with open(args.couples_list) as f:
        reader = csv.reader(f, delimiter='\t')
        couples = []
        for row in reader:
            couples.append((row[0], row[1]))
    logger.info('%d couples are going to be swapped' % len(couples))

    logger.debug('Retrieving enrollments for study %s' % args.study)
    enrolls = kb.get_enrolled(kb.get_study(args.study))
    logger.debug('Retrieved %d enrollments' % len(enrolls))

    en_lookup = {}
    for en in enrolls:
        en_lookup[en.studyCode] = en

    for en_code1, en_code2 in couples:
        logger.info('Swapping couple %s - %s' % (en_code1, en_code2))
        try:
            en1 = en_lookup[en_code1]
            en2 = en_lookup[en_code2]
        except KeyError, ke:
            logger.error('Code %s not found in study %s' % (ke, args.study))
            sys.exit(2)
            
Пример #15
0
class Core(object):
    def __init__(self,
                 host=None,
                 user=None,
                 passwd=None,
                 group=None,
                 keep_tokens=1,
                 study_label=None,
                 logger=None):
        self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens)
        self.logger = logger if logger else logging.getLogger()
        self.record_counter = 0
        self.default_study = None
        if study_label:
            s = self.kb.get_study(study_label)
            if not s:
                raise ValueError('No known study with label %s' % study_label)
            self.logger.info('Selecting %s[%d,%s] as default study' %
                             (s.label, s.omero_id, s.id))
            self.default_study = s

    @classmethod
    def find_action_setup_conf(klass, args):
        action_setup_conf = {}
        for x in dir(args):
            if not (x.startswith('_') or x.startswith('func')):
                action_setup_conf[x] = getattr(args, x)
        # HACKS
        action_setup_conf['ifile'] = action_setup_conf['ifile'].name
        action_setup_conf['ofile'] = action_setup_conf['ofile'].name
        action_setup_conf['report_file'] = action_setup_conf[
            'report_file'].name
        return action_setup_conf

    @classmethod
    def get_action_setup_options(klass,
                                 record,
                                 action_setup_conf=None,
                                 object_history=None):
        options = {}
        if 'options' in record and record['options']:
            kvs = record['options'].split(',')
            for kv in kvs:
                k, v = kv.split('=')
                options[k] = v
        if action_setup_conf:
            options['importer_setup'] = action_setup_conf
        if object_history:
            options['object_history'] = object_history
        return json.dumps(options)

    def get_device(self, label, maker, model, release):
        device = self.kb.get_device(label)
        if not device:
            self.logger.debug('creating a device')
            device = self.kb.create_device(label, maker, model, release)
        return device

    def get_action_setup(self, label, conf):
        """
    Return the ActionSetup corresponding to label if there is one,
    else create a new one using conf.
    """
        asetup = self.kb.get_action_setup(label)
        if not asetup:
            kb_conf = {
                'label': label,
                'conf': json.dumps(conf),
            }
            asetup = self.kb.factory.create(self.kb.ActionSetup,
                                            kb_conf).save()
        return asetup

    def get_study(self, label):
        if self.default_study:
            return self.default_study
        study = self.kb.get_study(label)
        if not study:
            study = self.kb.factory.create(self.kb.Study, {
                'label': label
            }).save()
        return study

    def find_study(self, records):
        study_label = records[0]['study']
        for r in records:
            if r['study'] != study_label:
                m = 'all records should have the same study label'
                self.logger.critical(m)
                raise ValueError(m)
        return self.get_study(study_label)

    def find_klass(self, col_name, records):
        o_type = records[0][col_name]
        for r in records:
            if r[col_name] != o_type:
                m = 'all records should have the same %s' % col_name
                self.logger.critical(m)
                raise ValueError(m)
        return getattr(self.kb, o_type)

    def __preload_items__(self, key_field, klass, preloaded):
        objs = self.kb.get_objects(klass)
        for o in objs:
            assert not getattr(o, key_field) in preloaded
            preloaded[getattr(o, key_field)] = o

    def preload_by_type(self, name, klass, preloaded):
        self.logger.info('start preloading %s' % name)
        self.__preload_items__('id', klass, preloaded)
        self.logger.info('done preloading %s' % name)

    def preload_studies(self, preloaded):
        self.logger.info('start preloading studies')
        self.__preload_items__('label', self.kb.Study, preloaded)
        self.logger.info('done preloading studies')

    def missing_fields(self, fields, r):
        for f in fields:
            if f not in r:
                return f
        return False
Пример #16
0
class TestKB(KBObjectCreator):

  def __init__(self, name):
    super(TestKB, self).__init__(name)
    self.kill_list = []

  def setUp(self):
    self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS)

  def tearDown(self):
    self.kill_list.reverse()
    for x in self.kill_list:
      self.kb.delete(x)
    self.kill_list = []

  def check_object(self, o, conf, otype):
    try:
      self.assertTrue(isinstance(o, otype))
      for k in conf.keys():
        v = conf[k]
        # FIXME this is omero specific...
        if hasattr(v, 'ome_obj'):
          self.assertEqual(getattr(o, k).id, v.id)
          self.assertEqual(type(getattr(o, k)), type(v))
        elif hasattr(v, '_id'):
          self.assertEqual(getattr(o, k)._id, v._id)
        else:
          self.assertEqual(getattr(o, k), v)
    except:
      pass

  def test_study(self):
    conf, s = self.create_study()
    self.kill_list.append(s.save())
    self.check_object(s, conf, self.kb.Study)

  def test_study_ops(self):
    conf, s = self.create_study()
    s.save()
    xs = self.kb.get_study(conf['label'])
    self.assertTrue(not xs is None)
    self.assertEqual(xs.id, s.id)
    self.assertEqual(xs.label, s.label)
    self.kb.delete(s)
    self.assertEqual(self.kb.get_study(conf['label']), None)

  def test_device(self):
    conf, d = self.create_device()
    self.kill_list.append(d.save())
    self.check_object(d, conf, self.kb.Device)

  def test_hardware_device(self):
    conf, d = self.create_hardware_device()
    self.kill_list.append(d.save())
    self.check_object(d, conf, self.kb.HardwareDevice)

  def test_device_ops(self):
    conf, d = self.create_device()
    d.save()
    xs = self.kb.get_device(conf['label'])
    self.assertTrue(not xs is None)
    self.check_object(xs, conf, self.kb.Device)
    self.kb.delete(d)
    self.assertEqual(self.kb.get_device(conf['label']), None)

  def test_action_setup(self):
    conf, a = self.create_action_setup()
    self.kill_list.append(a.save())
    self.check_object(a, conf, self.kb.ActionSetup)

  def test_action(self):
    conf, action = self.create_action()
    self.kill_list.append(action.save())
    self.check_object(action, conf, self.kb.Action)

  def test_action_on_vessel(self):
    conf, action = self.create_action_on_vessel()
    self.kill_list.append(action.save())
    self.check_object(action, conf, self.kb.ActionOnVessel)

  def test_action_on_data_sample(self):
    conf, action = self.create_action_on_data_sample()
    self.kill_list.append(action.save())
    self.check_object(action, conf, self.kb.ActionOnDataSample)

  def test_action_on_data_collection_item(self):
    conf, action = self.create_action_on_data_collection_item()
    self.kill_list.append(action.save())
    self.check_object(action, conf, self.kb.ActionOnDataSample)
def main(argv):
    parser = make_parser()
    args = parser.parse_args(argv)

    log_level = getattr(logging, args.loglevel)
    kwargs = {"format": LOG_FORMAT, "datefmt": LOG_DATEFMT, "level": log_level}
    if args.logfile:
        kwargs["filename"] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger()

    kb = KB(driver="omero")(args.host, args.user, args.passwd)

    # Load enrollments and individual (needed to build sample label and for gender field)
    enrolls = []
    for sl in STUDY_LABELS:
        logger.debug("Loading enrollments for study %s" % sl)
        enrolls.extend(kb.get_enrolled(kb.get_study(sl)))
        logger.debug("Fetched %d individuals" % len(enrolls))

    wells_lookup = get_wells_enrolls_lookup(enrolls, kb)

    logger.debug("Loading EHR records")
    ehr_records = kb.get_ehr_records("(valid == True)")
    ehr_records_map = {}
    for r in ehr_records:
        ehr_records_map.setdefault(r["i_id"], []).append(r)

    # Read plate barcodes
    with open(args.plates_list) as pl_list:
        barcodes = [row.strip() for row in pl_list]

    # Load plate
    for plate_barcode in barcodes:
        logger.info("Creating datasheet for plate %s" % plate_barcode)
        pl = load_plate(plate_barcode, kb)
        if not pl:
            logger.error("No plate with barcode %s exists, skipping it." % (plate_barcode))
            continue

        # Load wells for selected plate
        pl_wells = get_wells_by_plate(plate_barcode, kb)

        with open(os.path.join(args.out_dir, "%s_datasheet.csv" % plate_barcode), "w") as of:
            writer = csv.DictWriter(of, CSV_FIELDS, delimiter="\t")
            writer.writeheader()
            last_slot = 0
            for slot, well in sorted(pl_wells.iteritems()):
                cl_record = ehr_records_map[wells_lookup[well][0].individual.id]
                t1d, ms = get_affections(cl_record)
                # Fill empty slots
                while last_slot != slot - 1:
                    last_slot += 1
                    writer.writerow(
                        {
                            "Sample_ID": "X",
                            "PLATE_barcode": pl.barcode,
                            "PLATE_name": pl.label,
                            "WELL_label": get_well_label(last_slot),
                            "INDIVIDUAL_gender": "X",
                            "INDIVIDUAL_vid": "X",
                            "T1D_affected": "X",
                            "MS_affected": "X",
                        }
                    )

                writer.writerow(
                    {
                        "Sample_ID": get_ichip_sample_code(wells_lookup[well], pl.barcode),
                        "PLATE_barcode": pl.barcode,
                        "PLATE_name": pl.label,
                        "WELL_label": well.label,
                        "INDIVIDUAL_gender": map_gender(wells_lookup[well][0].individual),
                        "INDIVIDUAL_vid": wells_lookup[well][0].individual.id,
                        "T1D_affected": t1d,
                        "MS_affected": ms,
                    }
                )
                last_slot = slot
Пример #18
0
        passwd = args.passwd or vlu.ome_passwd()
    except ValueError, ve:
        logger.critical(ve)
        sys.exit(ve)

    kb = KB(driver='omero')(host, user, passwd)

    with open(args.couples_list) as f:
        reader = csv.reader(f, delimiter='\t')
        couples = []
        for row in reader:
            couples.append((row[0], row[1]))
    logger.info('%d couples are going to be swapped' % len(couples))

    logger.debug('Retrieving enrollments for study %s' % args.study)
    enrolls = kb.get_enrolled(kb.get_study(args.study))
    logger.debug('Retrieved %d enrollments' % len(enrolls))

    en_lookup = {}
    for en in enrolls:
        en_lookup[en.studyCode] = en

    for en_code1, en_code2 in couples:
        logger.info('Swapping couple %s - %s' % (en_code1, en_code2))
        try:
            en1 = en_lookup[en_code1]
            en2 = en_lookup[en_code2]
        except KeyError, ke:
            logger.error('Code %s not found in study %s' % (ke, args.study))
            sys.exit(2)
Пример #19
0
mset, if there is at least one, otherwise we will skip the individual.

"""


def extract_data_sample(group, mset, dsample_name):
    by_individual = {}
    for i in kb.get_individuals(group):
        gds = filter(lambda x: x.snpMarkersSet == mset,
                     kb.get_data_samples(i, dsample_name))
        assert (len(gds) == 1)
        by_individual[i.id] = gds[0]
    return by_individual


group = kb.get_study(label='TEST01')
gds0_by_individual = extract_data_sample(group, mset0, 'GenotypeDataSample')
""" ..

Note that what we have now is a dictionary that maps individual ids to
GenotypeDataSample objects  and the latter are only handlers to get to
the actual genotyping data, not the data itself.

We can, now, do a global check on data quality.

"""


def do_check(s):
    counts = algo.count_homozygotes(s)
    mafs = algo.maf(None, counts)
Пример #20
0
The first element of a marker defining tuple is its label, the second
is the dbSNP db label, if available, while the third is the marker mask.

.. todo::

  put a reference to reference documentation

Now we will load the markers set definition into Omero.biobank.

**Note:** We are considering an ideal case where none of the markers
  is already in the db.

"""

study = kb.get_study('TEST01')

action = kb.create_an_action(study, doc='importing markers')
action.reload()

source, context, release = 'foobar', 'fooctx', 'foorel'
ref_rs_genome, dbsnp_build = 'foo-rs-genome', 13200

lvs = kb.create_markers(source, context, release, ref_rs_genome, dbsnp_build,
                        taq_man_markers, action)
""" ..

where lvs is a list of (label, vid) tuples.

We can assume that the markers above have been aligned against a
reference genome, say fake19, and save in omero.biobank the alignment