def __init__(self, number_of_appropriate_bios_records: int = 2000, number_of_training_records: int = 2115, number_of_test_records: int = 0): self.number_of_appropriate_bios_records = number_of_appropriate_bios_records self.inappropriate_bios = PandasUtils.select_series( FileUtils.read_excel_file(self.inappropriate_bios_path), self.ColNames.BIO.value) self.number_of_inappropriate_bios_records = len( self.inappropriate_bios.index) self.number_of_all_bios_records = self.number_of_appropriate_bios_records + self.number_of_inappropriate_bios_records self.number_of_training_records = number_of_training_records self.number_of_test_records = min( self.number_of_all_bios_records - self.number_of_training_records, number_of_test_records ) if number_of_test_records else self.number_of_all_bios_records - self.number_of_training_records self.bios = FileUtils.read_excel_file(self.bios_path) self.appropriate_bios = PandasUtils.select_series( self.bios.head(self.number_of_appropriate_bios_records), self.ColNames.BIO.value) self.__generate_training_and_test_series() self.model = None self.predictions = [] Logger.info( "Number of appropriate labeled bios records is : {}".format( self.number_of_appropriate_bios_records)) Logger.info( "Number of inappropriate labeled bios records is : {}".format( self.number_of_inappropriate_bios_records)) Logger.info("Number of training_records is : {}".format( self.number_of_training_records)) Logger.info("Number of test records is : {}".format( self.number_of_test_records))