def action(args): if args.tubes is None: raise ValueError('Tube types must be selected using option --tubes <>') # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Get case_tube_idx list [in numeric order] q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) case_tube_list = [] for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) case_tube_list.append(case_tube_idx) case_tube_list.sort() # Get features HDF_obj = HDF5_IO(filepath=args.hdf5_fp, clobber=False) features_df = HDF_obj.make_single_tube_analysis(case_tube_list) log.debug(features_df.head()) # Get annotations [ordered by case_tube_idx] annotation_df = db.query(exporttype='df', getCaseAnnotations=True, **vars(args)).results log.debug(annotation_df.head())
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) i = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False, auto_comp=False) outfile = 'output/' + '_'.join([ case, str(case_tube_idx), a.case_tube.replace(' ', '_'), a.date.strftime("%Y%m%d") ]) + '.png' a.comp_visualize_FCS(outfile=outfile) i += 1 if args.n_files is not None and i >= args.n_files: done = True break if done is True: break
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) i = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False, auto_comp=False) outfile = 'output/' + '_'.join([case, str(case_tube_idx), a.case_tube.replace(' ', '_'), a.date.strftime("%Y%m%d")]) + '.png' a.comp_visualize_FCS(outfile=outfile) i += 1 if args.n_files is not None and i >= args.n_files: done = True break if done is True: break
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction( extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print( "Skipping feature extraction for case: {} because of 'ValueError {}'" .format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def action(args): # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Copy database to out database shutil.copyfile(args.db, args.outdb) out_db = FCSdatabase(db=args.outdb, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) q_list = [] for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): q_list.append((path.join(args.dir, relpath), case_tube_idx)) log.info("Length of q_list is {}".format(len(q_list))) # Setup lists n = args.load # length of sublists sublists = [q_list[i:i + n] for i in range(0, len(q_list), n)] log.info("Number of sublists to process: {}".format(len(sublists))) # Setup args vargs = { key: value for key, value in vars(args).items() if key in ['nosinglet', 'noviability'] } i = 0 for sublist in sublists: p = Pool(args.workers) results = [ p.apply_async(worker, args=(case_info, ), kwds=vargs) for case_info in sublist ] p.close() for f in results: i += 1 fFCS = f.get() fFCS.histostats_to_db(db=out_db) del fFCS print "Case_tubes: {} of {} have been processed\r".format( i, len(q_list)), del results if args.testing is True: break # run loop once then break if testing
def action(args): # Connect to database db = FCSdatabase(db=args.db) if args.export: print "Export tube types to %s" % args.file db.exportTubeTypes(**vars(args)) elif args.load: print "Import tube types from %s" % args.file db.importTubeTypes(**vars(args)) else: print "Nothing to do"
def action(args): # Identify query option if (args.getfiles is False and args.getTubeInfo is False): raise Exception("ERROR: Must select either --getfiles or --getTubeinfo") # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(**vars(args)) if args.out_file is None: pprint.pprint(q.results)
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) a = FCS(filepath=filepath, case_tube_idx=case_tube_idx) # Do something print a.case_tube
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Get case_tube_idx list q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) case_tube_list = [] for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) case_tube_list.append(case_tube_idx) # Get features HDF_obj = HDF5_IO(filepath=args.hdf, clobber=False) features_df = HDF_obj.make_single_tube_analysis(case_tube_list) print features_df
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual( db.query(**q_dict).results, { u'12-00031': { u'Myeloid 1': { datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs' } } }) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {})
def action(args): # Connect to database dbcon = FCSdatabase(db=args.db, rebuild=False) print "Processing database %s" % args.db # Get QC data if args.testing: testdbcon = FCSdatabase(db='db/test.db', rebuild=True) args.table_format = 'tall' qc = FlowQC(dbcon=dbcon, **vars(args)) qc.pushQC(db=testdbcon) else: qc = FlowQC(dbcon=dbcon, **vars(args)) log.debug(qc.histos) log.debug(qc.PmtStats) log.debug(qc.TubeStats)
def action(args): # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Copy database to out database shutil.copyfile(args.db, args.outdb) out_db = FCSdatabase(db=args.outdb, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) q_list = [] for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): q_list.append((path.join(args.dir, relpath), case_tube_idx)) log.info("Length of q_list is {}".format(len(q_list))) # Setup lists n = args.load # length of sublists sublists = [q_list[i:i+n] for i in range(0, len(q_list), n)] log.info("Number of sublists to process: {}".format(len(sublists))) # Setup args vargs = {key: value for key, value in vars(args).items() if key in ['nosinglet', 'noviability']} i = 0 for sublist in sublists: p = Pool(args.workers) results = [p.apply_async(worker, args=(case_info, ), kwds=vargs) for case_info in sublist] p.close() for f in results: i += 1 fFCS = f.get() fFCS.histostats_to_db(db=out_db) del fFCS print "Case_tubes: {} of {} have been processed\r".format(i, len(q_list)), del results if args.testing is True: break # run loop once then break if testing
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction(extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print("Skipping feature extraction for case: {} because of 'ValueError {}'". format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def action(args): # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Copy database to out database shutil.copyfile(args.db, args.outdb) out_db = FCSdatabase(db=args.outdb, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) n = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, auto_comp=False, **vars(args)) fFCS.extract_FCS_histostats() except: fFCS.flag = 'stats_extraction_fail' fFCS.error_message = str(sys.exc_info()[0]) fFCS.histostats_to_db(db=out_db) n += 1 if args.n is not None and n >= args.n: done = True break if done is True: break
def action(args): if args.tubes is None: raise ValueError('Tube types must be selected using option --tubes <>') # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Get case_tube_idx list [in numeric order] q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) case_tube_list = [] for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) case_tube_list.append(case_tube_idx) case_tube_list.sort() # Get features Feature_obj = Feature_IO(filepath=args.feature_fp, clobber=False) features_df, not_in_cti, merge_fail_cti = Feature_obj.make_single_tube_analysis( case_tube_list) log.debug("Feature DF: {} \n Case_tube_indices that failed: {}".format( features_df.head(), merge_fail_cti)) # Get annotations [ordered by case_tube_idx] annotation_df = db.query(exporttype='df', getCaseAnnotations=True, **vars(args)).results log.debug(annotation_df.head()) # this is a dummy for now, not sure where to generate this list from. """Function to convert merge_fail_cti to merge_fail_case_nums""" """Not sure what error codes to provide""" not_found_df = pd.DataFrame(['12-12345', 'Not found'], columns=["case_num", "error_code"]) """This has only been partially tested, can not be done without an annotation_df""" # Open/Create MergedData MLinput_obj = MergedFeatures_IO(filepath=args.MLinput_fp, clobber=True) MLinput_obj.push_features(features_df) MLinput_obj.push_annotations(annotation_df) MLinput_obj.push_not_found(not_found_df)
def action(args): raise Exception("DEPRECATED METHOD") # Copy database shutil.copyfile(args.db, args.outdb) outdb = FCSdatabase(db=args.outdb, rebuild=False) # Add text table and whittle cases not in db (unless args says otherwise) outdb.addCustomCaseData(file=args.file, whittle=not args.no_whittle) if args.no_whittle is False: # Delete cases in db not in Custom table [do not add to exclusions table] outdb.query(delCasesByCustom=True) outdb.close()
def test_meta_to_db(self): """ Make sure that the push of meta data to db 'runs' NOTE: not explicitly checking what is in the db """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filepath = data(test_fcs_fn) a = FCS(filepath=filepath) db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) """
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {u'Myeloid 1': {datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs'}}}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {})
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test empty query q_dict = {'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {})
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis( [FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations( pd.DataFrame([[test_fcs_fn, 0]], columns=['case_num', 'annotation']))
def test_push_pull(self): """ tests Feature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # hdf initialization HDF_obj = Feature_IO(filepath=HDF_fp) # push fcs_features HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) # pull fcs_features output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval log.debug(output) np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data) cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]), index=[1], columns=['casenum','cti','errormessage']) # push failed_cti list to "meta data" HDF_obj.push_failed_cti_list(cti_list) # pull meta data from HDF5 file meta_data = HDF_obj.get_meta_data() log.debug("File meta data is {}".format(meta_data))
def action(args): # Collect files/dirs Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list) # Connect to database (and rebuild) db = FCSdatabase(db=args.db_filepath, rebuild=True) print "Building database %s" % db.db_file # Process files/dirs case_tube_idx = 0 for f in Finder.filenames: try: fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx) fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True) except: print "Skipping FCS %s because of unknown error related to: %s" % \ (f, sys.exc_info()[0]) print("{:6d} Cases uploaded\r".format(case_tube_idx)), case_tube_idx += 1 if args.n is not None and case_tube_idx >= args.n: break
def action(args): argd = vars(args) # Collect options if argd['cases'] is not None or argd['case_tube_idxs'] is not None: raise ValueError('Should I be able to pass cases or cti\'s here??') # Connect to database db = FCSdatabase(db=argd['db'], rebuild=False) # Get features_HDF case_tube_idx's HDF_feature_obj = Feature_IO(filepath=argd['feature_hdf5_fp'], clobber=False) feature_cti = HDF_feature_obj.get_case_tube_idxs() # Load annotations (row-index is case_number) ann = CustomData(args.case_annot).dat ann_cases = Set(ann.index.tolist()) log.debug("Annotation cases: {}".format(ann_cases)) # Get/pick case, case_tube_idx list feature_cases = Set( db.query(getCases=True, aslist=True, case_tube_idxs=feature_cti).results) log.debug("Feature cases: {}".format(feature_cases)) # Identify annotation cases not represented in HDF5 exclusions_dic = {} exclusions_dic['no_features'] = list(ann_cases - feature_cases) # Cases to consider (insersection of annotations and HDF5 features) cases_to_consider = ann_cases & feature_cases argd['cases'] = list(cases_to_consider) argd['case_tube_idxs'] = feature_cti q = db.query(pick_cti=True, **argd) case_tube_index_list = q.results.case_tube_idx.tolist() case_list = Set(q.results.case_number.tolist()) log.debug('Selected case/cti: {}'.format( [[list(case_list)[i], case_tube_index_list[i]] for i in range(len(case_tube_index_list))])) # Keep track of cases that were excluded at the query step exclusions_dic['excluded_by_DB_query'] = list(cases_to_consider - case_list) log.debug(exclusions_dic) # Get features [assuming that features are returned in order!] features_df, not_in_data, merge_fail = HDF_feature_obj.make_single_tube_analysis( case_tube_index_list) features_df.set_index('bin_num', drop=True, inplace=True) features_df.columns = case_list features_df = features_df.T log.debug(features_df.head()) # Get annotations [ordered by case_tube_idx] annotation_df = ann.loc[list(case_list), :] log.debug(annotation_df.head()) # Send features_df, annotation_df, and exclusions to ML_input_HDF5 (args.ml_hdf5_fp) Merged_ML_feat_obj = MergedFeatures_IO(filepath=args.ml_hdf5_fp, clobber=True) Merged_ML_feat_obj.push_features(features_df) Merged_ML_feat_obj.push_annotations(annotation_df) Merged_ML_feat_obj.push_not_found( exclusions_dic) # exclusions is a dictionary
def action(args): argd = vars(args) # Collect options if argd['cases'] is not None or argd['case_tube_idxs'] is not None: raise ValueError('Should I be able to pass cases or cti\'s here??') # Connect to database db = FCSdatabase(db=argd['db'], rebuild=False) # Get features_HDF case_tube_idx's HDF_feature_obj = Feature_IO(filepath=argd['feature_hdf5_fp'], clobber=False) feature_cti = HDF_feature_obj.get_case_tube_idxs() # Load annotations (row-index is case_number) ann = CustomData(args.case_annot).dat ann_cases = Set(ann.index.tolist()) log.debug("Annotation cases: {}".format(ann_cases)) # Get/pick case, case_tube_idx list feature_cases = Set(db.query(getCases=True, aslist=True, case_tube_idxs=feature_cti).results) log.debug("Feature cases: {}".format(feature_cases)) # Identify annotation cases not represented in HDF5 exclusions_dic = {} exclusions_dic['no_features'] = list(ann_cases - feature_cases) # Cases to consider (insersection of annotations and HDF5 features) cases_to_consider = ann_cases & feature_cases argd['cases'] = list(cases_to_consider) argd['case_tube_idxs'] = feature_cti q = db.query(pick_cti=True, **argd) case_tube_index_list = q.results.case_tube_idx.tolist() case_list = Set(q.results.case_number.tolist()) log.debug('Selected case/cti: {}'. format([[list(case_list)[i], case_tube_index_list[i]] for i in range(len(case_tube_index_list))])) # Keep track of cases that were excluded at the query step exclusions_dic['excluded_by_DB_query'] = list(cases_to_consider - case_list) log.debug(exclusions_dic) # Get features [assuming that features are returned in order!] features_df, not_in_data, merge_fail = HDF_feature_obj.make_single_tube_analysis(case_tube_index_list) features_df.set_index('bin_num', drop=True, inplace=True) features_df.columns = case_list features_df = features_df.T log.debug(features_df.head()) # Get annotations [ordered by case_tube_idx] annotation_df = ann.loc[list(case_list), :] log.debug(annotation_df.head()) # Send features_df, annotation_df, and exclusions to ML_input_HDF5 (args.ml_hdf5_fp) Merged_ML_feat_obj = MergedFeatures_IO(filepath=args.ml_hdf5_fp, clobber=True) Merged_ML_feat_obj.push_features(features_df) Merged_ML_feat_obj.push_annotations(annotation_df) Merged_ML_feat_obj.push_not_found(exclusions_dic) # exclusions is a dictionary
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test empty query q_dict = {'getfiles': True} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {})