def addCustomCaseData(self, file, whittle=True): """ Method to load file (tab-text) into database table CustomCaseData This must include <case_number> and <category> columns Note: this will overwrite existing data """ a = Lab_pred_table(db=self, file=file) # ### Handle column names ### a.dat.columns = [c.lower() for c in a.dat.columns.values] a_cols = a.dat.columns.tolist() # Convert 'CASE*' => 'case_number' case_index = next((index for index, value in enumerate(a_cols) if value[:4] == 'case'), None) if case_index is not None: a_cols[case_index] = 'case_number' db_cols = CustomCaseData.__mapper__.columns.keys() cols = [c for c in a_cols if c.lower() in db_cols] # Add second column if only captured 1 column and rename to <category> if (len(db_cols) > 1) and (len(cols) == 1): cols.append(db_cols[1]) a_cols[1] = cols[1] a.dat.columns = a_cols # ### If one of columns matches use it ### if (len(cols) > 0): log.info('Adding file %s to db %s' % (file, self.db_file)) a.dat = a.dat[cols] # Identify cases in custom data but not in meta db db_case_list = zip(*queryDB(self, getCases=True, not_flagged=False).results)[0] cases_missing_in_db = a.dat.case_number.loc[~a.dat.case_number.isin(db_case_list)].\ tolist() # Write custom data a.dat.to_sql('CustomCaseData', con=self.engine, if_exists='replace', index=False) # Add empty FCS objects to db for each in cases_to_exclude for c in cases_missing_in_db: log.info('Making empty FCS for {}'.format(c)) fFCS = FCS(case_number=c, flag='CustomData_ONLY', error_message='Added to db because in custom list but not in metadb') fFCS.meta_to_db(db=self, add_lists=True) else: raise ValueError("File %s does not have columns 'case_number' and 'category'" % (file))
def test_meta_to_db(self): """ Make sure that the push of meta data to db 'runs' NOTE: not explicitly checking what is in the db """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filepath = data(test_fcs_fn) a = FCS(filepath=filepath) db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) """
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test empty query q_dict = {'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {})
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual( db.query(**q_dict).results, { u'12-00031': { u'Myeloid 1': { datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs' } } }) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {})
def test_push_pull(self): """ tests Feature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # hdf initialization HDF_obj = Feature_IO(filepath=HDF_fp) # push fcs_features HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) # pull fcs_features output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval log.debug(output) np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data) cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]), index=[1], columns=['casenum','cti','errormessage']) # push failed_cti list to "meta data" HDF_obj.push_failed_cti_list(cti_list) # pull meta data from HDF5 file meta_data = HDF_obj.get_meta_data() log.debug("File meta data is {}".format(meta_data))
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis( [FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations( pd.DataFrame([[test_fcs_fn, 0]], columns=['case_num', 'annotation']))
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF,not_in_data,merge_fail = FT_HDF_obj.make_single_tube_analysis([FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp,clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations(pd.DataFrame([[test_fcs_fn,0]], columns=['case_num','annotation']))
def action(args): # Collect files/dirs Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list) # Connect to database (and rebuild) db = FCSdatabase(db=args.db_filepath, rebuild=True) print "Building database %s" % db.db_file # Process files/dirs case_tube_idx = 0 for f in Finder.filenames: try: fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx) fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True) except: print "Skipping FCS %s because of unknown error related to: %s" % \ (f, sys.exc_info()[0]) print("{:6d} Cases uploaded\r".format(case_tube_idx)), case_tube_idx += 1 if args.n is not None and case_tube_idx >= args.n: break
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {u'Myeloid 1': {datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs'}}}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {})
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test empty query q_dict = {'getfiles': True} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {})