Exemplo n.º 1
0
 def record(self, records, otsv, rtsv, blocking_validation):
   def records_by_chunk(batch_size, records):
     offset = 0
     while len(records[offset:]) > 0:
       yield records[offset:offset+batch_size]
       offset += batch_size
   if len(records) == 0:
     msg = 'No records are going to be imported'
     self.logger.critical(msg)
     raise core.ImporterValidationError(msg)
   study = self.find_study(records)
   self.source_klass = self.find_source_klass(records)
   self.device_klass = self.find_device_klass(records)
   self.preload_scanners()
   self.preload_devices()
   self.preload_sources()
   self.preload_markers_sets()
   self.preload_data_samples()
   records, bad_records = self.do_consistency_checks(records)
   for br in bad_records:
     rtsv.writerow(br)
   if blocking_validation and len(bad_records) >= 1:
     raise core.ImporterValidationError('%d invalid records' % len(bad_records))
   for i, c in enumerate(records_by_chunk(self.batch_size, records)):
     self.logger.info('start processing chunk %d' % i)
     self.process_chunk(otsv, c, study)
     self.logger.info('done processing chunk %d' % i)
Exemplo n.º 2
0
    def record(self, records, otsv, rtsv, blocking_validation):
        def records_by_chunk(batch_size, records):
            offset = 0
            while len(records[offset:]) > 0:
                yield records[offset:offset + batch_size]
                offset += batch_size

        if not records:
            msg = 'No records are going to be imported'
            self.logger.critical(msg)
            raise core.ImporterValidationError(msg)
        self.container_klass = self.find_container_klass(records)
        self.preload_containers()
        if self.container_klass == self.kb.Lane:
            self.preload_flowcells()
        records, bad_records = self.do_consistency_checks(records)
        for br in bad_records:
            rtsv.writerow(br)
        if blocking_validation and len(bad_records) >= 1:
            raise core.ImporterValidationError('%d invalid records' %
                                               len(bad_records))
        study = self.find_study(records)
        device = self.get_device(label='importer-%s.titer_plate' % version,
                                 maker='CRS4',
                                 model='importer',
                                 release=version)
        act_setups = set(
            Recorder.get_action_setup_options(r, self.action_setup_conf)
            for r in records)
        self.logger.debug('Action setups:\n%r' % act_setups)
        actions = {}
        for acts in act_setups:
            acts_label = 'import-prog-%f' % time.time()
            act_setup_conf = {'label': acts_label, 'conf': acts}
            act_setup = self.kb.save(
                self.kb.factory.create(self.kb.ActionSetup, act_setup_conf))
            acat = self.kb.ActionCategory.IMPORT
            act_conf = {
                'setup': act_setup,
                'device': device,
                'actionCategory': acat,
                'operator': self.operator,
                'context': study,
            }
            act = self.kb.save(self.kb.factory.create(self.kb.Action,
                                                      act_conf))
            act.unload()
            actions[acts] = act
        for i, c in enumerate(records_by_chunk(self.batch_size, records)):
            self.logger.info('start processing chunk %d' % i)
            self.process_chunk(otsv, c, study, actions)
            self.logger.info('done processing chunk %d' % i)
Exemplo n.º 3
0
def implementation(logger, host, user, passwd, args):
    action_setup_conf = Recorder.find_action_setup_conf(args)
    f = csv.DictReader(args.ifile, delimiter='\t')
    records = [r for r in f]
    if len(records) == 0:
        msg = 'No records are going to be imported'
        logger.critical(msg)
        raise core.ImporterValidationError(msg)
    canonizer = core.RecordCanonizer(['study'], args)
    canonizer.canonize_list(records)
    study_label = records[0]['study']
    o = csv.DictWriter(args.ofile,
                       fieldnames=['study', 'label', 'type', 'vid'],
                       delimiter='\t',
                       lineterminator=os.linesep)
    recorder = Recorder(o,
                        study_label,
                        host,
                        user,
                        passwd,
                        args.keep_tokens,
                        args.batch_size,
                        operator=args.operator,
                        action_setup_conf=action_setup_conf,
                        logger=logger)
    report_fnames = copy.deepcopy(f.fieldnames)
    report_fnames.append('error')
    report = csv.DictWriter(args.report_file,
                            report_fnames,
                            delimiter='\t',
                            lineterminator=os.linesep,
                            extrasaction='ignore')
    report.writeheader()
    records, bad_records = recorder.do_consistency_checks(records)
    for br in bad_records:
        report.writerow(br)
    if args.blocking_validator and len(bad_records) >= 1:
        args.ofile.close()
        args.ifile.close()
        args.report_file.close()
        msg = '%d invalid records' % len(bad_records)
        recorder.logger.critical(msg)
        raise core.ImporterValidationError(msg)
    by_label = make_ind_by_label(records)
    import_pedigree(recorder, by_label.itervalues())
    recorder.clean_up()
    args.ofile.close()
    args.ifile.close()
    args.report_file.close()
Exemplo n.º 4
0
    def record(self, records, rtsv):
        def records_by_chunk(batch_size, records):
            offset = 0
            while len(records[offset:]) > 0:
                yield records[offset:offset + batch_size]
                offset += batch_size

        if not records:
            msg = 'No records are going to be imported'
            self.logger.critical(msg)
            raise core.ImporterValidationError(msg)
        self.preload_individuals()
        self.preload_birth_data_records()
        self.preload_locations()
        records, bad_records = self.do_consistency_checks(records)
        for br in bad_records:
            rtsv.writerow(br)
        study = self.find_study(records)
        device_label = 'importer.birth_data-%s' % (version)
        device = self.get_device(label=device_label,
                                 maker='CRS4',
                                 model='importer',
                                 release=version)
        asetup = self.get_action_setup('importer.birth_data-%f' % time.time(),
                                       json.dumps(self.action_setup_conf))
        for i, c in enumerate(records_by_chunk(self.batch_size, records)):
            self.logger.info('start processing chunk %d' % i)
            self.process_chunk(c, study, asetup, device)
            self.logger.info('done processing chunk %d' % i)
Exemplo n.º 5
0
 def record(self, records, otsv):
   def records_by_chunk(batch_size, records):
     offset = 0
     while len(records[offset:]) > 0:
       yield records[offset:offset+batch_size]
       offset += batch_size
   if len(records) == 0:
     msg = 'No records are going to be imported'
     self.logger.critical(msg)
     raise core.ImporterValidationError(msg)
   self.preload_groups()
   self.preload_individuals()
   def keyfunc(r): return r['group']
   sub_records = []
   records = sorted(records, key=keyfunc)
   for k, g in it.groupby(records, keyfunc):
     sub_records.append(self.do_consistency_checks(k, list(g)))
   records = sum(sub_records, [])
   records = sorted(records, key=keyfunc)
   for k, g in it.groupby(records, keyfunc):
     group_conf = {'label': k}
     group = self.kb.factory.create(self.kb.Study, group_conf).save()
     for i, c in enumerate(records_by_chunk(self.batch_size, list(g))):
       self.logger.info('start processing chunk %s-%d' % (k, i))
       self.process_chunk(otsv, group, c)
       self.logger.info('done processing chunk %s-%d' % (k,i))
Exemplo n.º 6
0
    def record(self, records, otsv, rtsv, blocking_validation):
        def records_by_chunk(batch_size, records):
            offset = 0
            while len(records[offset:]) > 0:
                yield records[offset:offset + batch_size]
                offset += batch_size

        if not records:
            msg = 'No records are going to be imported'
            self.logger.critical(msg)
            raise core.ImporterValidationError(msg)
        study = self.find_study(records)
        self.source_klass = self.find_source_klass(records)
        self.vessel_klass = self.find_vessel_klass(records)
        self.preload_sources()
        if self.vessel_klass == self.kb.PlateWell:
            self.preload_plates()
        records, bad_records = self.do_consistency_checks(records)
        for br in bad_records:
            rtsv.writerow(br)
        if blocking_validation and len(bad_records) >= 1:
            raise core.ImporterValidationError('%d invalid records' %
                                               len(bad_records))
        device = self.get_device('importer-%s.biosample' % version, 'CRS4',
                                 'IMPORT', version)
        act_setups = set(
            Recorder.get_action_setup_options(r, self.action_setup_conf)
            for r in records)
        asetup = {}
        for acts in act_setups:
            # asetup = self.get_action_setup('import-prog-%f' % time.time(),
            #                                json.dumps(self.action_setup_conf))
            setup_conf = {
                'label': 'import-prog-%f' % time.time(),
                'conf': acts
            }
            setup = self.kb.factory.create(self.kb.ActionSetup, setup_conf)
            asetup[acts] = self.kb.save(setup)
        for i, c in enumerate(records_by_chunk(self.batch_size, records)):
            self.logger.info('start processing chunk %d' % i)
            self.process_chunk(otsv, c, study, asetup, device)
            self.logger.info('done processing chunk %d' % i)
Exemplo n.º 7
0
    def record(self, records, blocking_validation):
        def records_by_chunk(batch_size, records):
            offset = 0
            while len(records[offset:]) > 0:
                yield records[offset:offset + batch_size]
                offset += batch_size

        if not records:
            msg = 'No records are going to be imported'
            self.logger.critical(msg)
            raise core.ImporterValidationError(msg)
        self.preload_studies()
        records, bad_records = self.do_consistency_checks(records)
        for br in bad_records:
            self.report_stream.writerow(br)
        if blocking_validation and len(bad_records) >= 1:
            raise core.ImporterValidationError('%d invalid records' %
                                               len(bad_records))
        for i, c in enumerate(records_by_chunk(self.batch_size, records)):
            self.logger.info('start processing chunk %d' % i)
            self.process_chunk(c)
            self.logger.info('done processing chunk %d' % i)
Exemplo n.º 8
0
    def record(self, records, otsv, rtsv, blocking_validation):
        def records_by_chunk(batch_size, records):
            offset = 0
            while len(records[offset:]) > 0:
                yield records[offset:offset + batch_size]
                offset += batch_size

        def get_data_collection(label, action):
            if label in self.preloaded_data_collections:
                return self.preloaded_data_collections[label]
            else:
                dc_conf = {'label': label, 'action': action}
                return self.kb.factory.create(self.kb.DataCollection, dc_conf)

        if len(records) == 0:
            msg = 'No records are going to be imported'
            self.logger.critical(msg)
            raise core.ImporterValidationError(msg)
        study = self.find_study(records)
        self.data_sample_klass = self.find_data_sample_klass(records)
        self.preload_data_samples()
        self.preload_data_collections()
        asetup = self.get_action_setup(
            'importer.data_collection-%f' % time.time(),
            json.dumps(self.action_setup_conf))
        device = self.get_device('importer-%s.data_collection' % version,
                                 'CRS4', 'IMPORT', version)
        conf = {
            'setup': asetup,
            'device': device,
            'actionCategory': self.kb.ActionCategory.PROCESSING,
            'operator': self.operator,
            'context': study,
        }
        action = self.kb.factory.create(self.kb.Action, conf).save()

        def keyfunc(r):
            return r['label']

        sub_records = []
        data_collections = {}
        records = sorted(records, key=keyfunc)
        for k, g in it.groupby(records, keyfunc):
            data_collections[k] = get_data_collection(k, action)
            good_records, bad_records = self.do_consistency_checks(
                data_collections[k], list(g))
            sub_records.append(good_records)
            for br in bad_records:
                rtsv.writerow(br)
            if blocking_validation and len(bad_records) >= 1:
                self.kb.delete(action)
                raise core.ImporterValidationError('%d invalid records' %
                                                   len(bad_records))
        records = sum(sub_records, [])
        if len(records) == 0:
            self.kb.delete(action)
            msg = 'No records are going to be imported'
            self.logger.warning(msg)
            sys.exit(0)
        records = sorted(records, key=keyfunc)
        for k, g in it.groupby(records, keyfunc):
            dc = data_collections[k]
            if not dc.is_mapped():
                dc.save()
            for i, c in enumerate(records_by_chunk(self.batch_size, list(g))):
                self.logger.info('start processing chunk %s-%d' % (k, i))
                self.process_chunk(otsv, study, dc, c)
                self.logger.info('done processing chunk %s-%d' % (k, i))
 def record(self, records, otsv, rtsv, blocking_validation):
     def records_by_chunk(batch_size, records):
         offset = 0
         while len(records[offset:]) > 0:
             yield records[offset : offset + batch_size]
             offset += batch_size
     if len(records) == 0:
       msg = 'No records are going to be imported'
       self.logger.critical(msg)
       raise core.ImporterValidationError(msg)
     study = self.find_study(records)
     self.source_klass = self.find_source_klass(records)
     self.seq_sample_klass = self.find_seq_sample_klass(records)
     self.preload_sources()
     self.preload_devices()
     if self.seq_sample_klass == self.kb.RawSeqDataSample:
         self.preload_lanes()
     if self.seq_sample_klass == self.kb.SeqDataSample:
         self.preload_tubes()
     records, bad_records = self.do_consistency_checks(records)
     for br in bad_records:
         rtsv.writerow(br)
     if blocking_validation and len(bad_records) >= 1:
         raise core.ImporterValidationError('%d invalid records' % len(bad_records))
     act_setups = set((r['source'], r.get('device', None),
                       Recorder.get_action_setup_options(r, self.action_setup_conf,
                                                         self.history))
                      for r in records)
     self.logger.debug('Action setups:\n%r' % act_setups)
     actions = {}
     for acts in act_setups:
         # TODO: if a history has been passed, add this to the options
         act_label = 'importer.seq_data_sample.%f' % time.time()
         act_setup_conf = {'label' : act_label,
                           'conf' : acts[2]}
         act_setup = self.kb.save(self.kb.factory.create(self.kb.ActionSetup, 
                                                         act_setup_conf))
         if issubclass(self.source_klass, self.kb.FlowCell):
             act_klass = self.kb.ActionOnCollection
             act_category = self.kb.ActionCategory.MEASUREMENT
         elif issubclass(self.source_klass, self.kb.DataSample):
             act_klass = self.kb.ActionOnDataSample
             act_category = self.kb.ActionCategory.PROCESSING
         else:
             self.logger.error('Unmanaged source type %r' % self.source_klass)
             sys.exit('Unmanaged source type %r' % self.source_klass)
         act_conf = {'setup' : act_setup,
                     'actionCategory' : act_category,
                     'operator' : self.operator,
                     'context' : study,
                     'target' : self.preloaded_sources[acts[0]]}
         if acts[1]:
             act_conf['device'] = self.preloaded_devices[acts[1]]
         action = self.kb.factory.create(act_klass, act_conf)
         action = self.kb.save(action)
         # Unload the action object or it will cause a bug when
         # saving objects that references to ActionOnDataSample
         # records, too many inheritance steps
         action.unload()
         actions[acts] = action
     self.logger.debug('Actions are:\n%r' % actions)
     for i,c in enumerate(records_by_chunk(self.batch_size, records)):
         self.logger.info('start processing chunk %d' % i)
         self.process_chunk(otsv, c, actions, study)
         self.logger.info('done processing chunk %d' % i)