Пример #1
0
    def addCustomCaseData(self, file, whittle=True):
        """ Method to load file (tab-text) into database table CustomCaseData

        This must include <case_number> and <category> columns
        Note: this will overwrite existing data
        """

        a = Lab_pred_table(db=self, file=file)

        # ### Handle column names ###
        a.dat.columns = [c.lower() for c in a.dat.columns.values]
        a_cols = a.dat.columns.tolist()

        # Convert 'CASE*' => 'case_number'
        case_index = next((index for index, value in enumerate(a_cols)
                           if value[:4] == 'case'), None)
        if case_index is not None:
            a_cols[case_index] = 'case_number'

        db_cols = CustomCaseData.__mapper__.columns.keys()
        cols = [c for c in a_cols if c.lower() in db_cols]

        # Add second column if only captured 1 column and rename to <category>
        if (len(db_cols) > 1) and (len(cols) == 1):
            cols.append(db_cols[1])
            a_cols[1] = cols[1]

        a.dat.columns = a_cols

        # ### If one of columns matches use it ###
        if (len(cols) > 0):
            log.info('Adding file %s to db %s' % (file, self.db_file))

            a.dat = a.dat[cols]

            # Identify cases in custom data but not in meta db
            db_case_list = zip(*queryDB(self, getCases=True, not_flagged=False).results)[0]
            cases_missing_in_db = a.dat.case_number.loc[~a.dat.case_number.isin(db_case_list)].\
                               tolist()

            # Write custom data
            a.dat.to_sql('CustomCaseData', con=self.engine,
                         if_exists='replace', index=False)

            # Add empty FCS objects to db for each in cases_to_exclude
            for c in cases_missing_in_db:
                log.info('Making empty FCS for {}'.format(c))
                fFCS = FCS(case_number=c,
                           flag='CustomData_ONLY',
                           error_message='Added to db because in custom list but not in metadb')
                fFCS.meta_to_db(db=self, add_lists=True)
        else:
            raise ValueError("File %s does not have columns 'case_number' and 'category'" % (file))
Пример #2
0
    def test_meta_to_db(self):
        """ Make sure that the push of meta data to db 'runs'

        NOTE: not explicitly checking what is in the db
        """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')

        filepath = data(test_fcs_fn)
        a = FCS(filepath=filepath)
        db = FCSdatabase(db=outfile, rebuild=True)
        a.meta_to_db(db=db, dir=root_dir)
        """
Пример #3
0
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00031']}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results, {})
Пример #4
0
    def test_meta_to_db(self):
        """ Make sure that the push of meta data to db 'runs'

        NOTE: not explicitly checking what is in the db
        """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')

        filepath = data(test_fcs_fn)
        a = FCS(filepath=filepath)
        db = FCSdatabase(db=outfile, rebuild=True)
        a.meta_to_db(db=db, dir=root_dir)

        """
Пример #5
0
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(
            db.query(**q_dict).results, {
                u'12-00031': {
                    u'Myeloid 1': {
                        datetime.datetime(2012, 1, 3, 12, 0, 15):
                        u'testfiles/12-00031_Myeloid 1.fcs'
                    }
                }
            })

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})
Пример #6
0
    def test_push_pull(self):
        """
        tests Feature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords, rescale_lim=(-0.5, 1),
                                    strict=False, auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # hdf initialization
        HDF_obj = Feature_IO(filepath=HDF_fp)

        # push fcs_features
        HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                  FCS=FCS_obj, db=DB_obj)

        # pull fcs_features
        output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval
        log.debug(output)
        np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data)
        
        cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]),
                                index=[1],
                                columns=['casenum','cti','errormessage'])
        # push failed_cti list to "meta data"
        HDF_obj.push_failed_cti_list(cti_list)
                
        # pull meta data from HDF5 file
        meta_data = HDF_obj.get_meta_data()
        log.debug("File meta data is {}".format(meta_data))
Пример #7
0
    def test_ML_push_pull(self):
        """
        tests MergedFeature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5')
        ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords,
                                    rescale_lim=(-0.5, 1),
                                    strict=False,
                                    auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # feature hdf initialization
        FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp)

        # push fcs_features
        FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                     FCS=FCS_obj,
                                     db=DB_obj)

        feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis(
            [FCS_obj.case_tube_idx])

        ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True)

        ML_HDF_obj.push_features(feature_DF)

        ML_HDF_obj.push_annotations(
            pd.DataFrame([[test_fcs_fn, 0]],
                         columns=['case_num', 'annotation']))
Пример #8
0
    def test_ML_push_pull(self):
        """
        tests MergedFeature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5')
        ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords, rescale_lim=(-0.5, 1),
                                    strict=False, auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # feature hdf initialization
        FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp)

        # push fcs_features
        FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                     FCS=FCS_obj, db=DB_obj)
        
        feature_DF,not_in_data,merge_fail = FT_HDF_obj.make_single_tube_analysis([FCS_obj.case_tube_idx])

        ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp,clobber=True)
        
        ML_HDF_obj.push_features(feature_DF)
        
        ML_HDF_obj.push_annotations(pd.DataFrame([[test_fcs_fn,0]],
                                    columns=['case_num','annotation']))
Пример #9
0
def action(args):
    # Collect files/dirs
    Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list)

    # Connect to database (and rebuild)
    db = FCSdatabase(db=args.db_filepath, rebuild=True)
    print "Building database %s" % db.db_file

    # Process files/dirs
    case_tube_idx = 0
    for f in Finder.filenames:
        try:
            fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx)
            fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True)
        except:
            print "Skipping FCS %s because of unknown error related to: %s" % \
                (f, sys.exc_info()[0])

        print("{:6d} Cases uploaded\r".format(case_tube_idx)),
        case_tube_idx += 1

        if args.n is not None and case_tube_idx >= args.n:
            break
Пример #10
0
def action(args):
    # Collect files/dirs
    Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list)

    # Connect to database (and rebuild)
    db = FCSdatabase(db=args.db_filepath, rebuild=True)
    print "Building database %s" % db.db_file

    # Process files/dirs
    case_tube_idx = 0
    for f in Finder.filenames:
        try:
            fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx)
            fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True)
        except:
            print "Skipping FCS %s because of unknown error related to: %s" % \
                (f, sys.exc_info()[0])

        print("{:6d} Cases uploaded\r".format(case_tube_idx)),
        case_tube_idx += 1

        if args.n is not None and case_tube_idx >= args.n:
            break
Пример #11
0
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {u'Myeloid 1':
                                        {datetime.datetime(2012, 1, 3, 12, 0, 15):
                                         u'testfiles/12-00031_Myeloid 1.fcs'}}})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results, {})
Пример #12
0
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00031']}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})