def test_trainer(self): """ to see if it can produce parameters file and produce figure """ self.individual_debug = True self.init_test('test_trainer') self.init_trainer_instance() training_data = DataSet(os.path.join(cbv_const.CBV_SAMPLE_DATASET_DIR, 'training_dataset')) validation_data = DataSet(os.path.join(cbv_const.CBV_SAMPLE_DATASET_DIR, 'validation_dataset')) trainer = Trainer(training_data, validation_data, seed=20, n_hidden_nodes=7, figure_dir=self.working_dir) trainer.train(iterations=50) params_file = os.path.join(self.working_dir, 'params.npz') trainer.export_best_parameters(params_file=params_file) self.assertTrue(os.path.exists(params_file), msg='Trainer does not functional properly') figure_file = os.path.join(self.working_dir, '07.eps') self.assertTrue(os.path.exists(figure_file), msg='Trainer does not functional properly')
def train_combivep_using_cbv_data(training_data_file, params_out_file=cbv_const.USER_PARAMS_FILE, random_seed=cbv_const.DFLT_SEED, n_hidden_nodes=cbv_const.DFLT_HIDDEN_NODES, figure_dir=cbv_const.DFLT_FIGURE_DIR, iterations=cbv_const.DFLT_ITERATIONS, cfg_file=cbv_const.CBV_CFG_FILE, ): """ CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP. CBV has 5 fields: - CHROM - POS - REF - ALT - EFFECT(1=deleterious, 0=neutral) All are tab separated. Required arguments - neutral_data_file : list of SNPs with no harmful effect, CBV format - pathognice_data_file : list of SNPs with deleterious effect, CBV format """ #pre-processing dataset print >> sys.stderr, 'pre-processing dataset, this may take a while (around 750 SNPs/mins). . .' dm = DataSetManager(cfg_file=cfg_file) dm.load_data(training_data_file, file_type=cbv_const.FILE_TYPE_CBV) dm.validate_data() dm.calculate_scores() dm.set_shuffle_seed(random_seed) dm.shuffle_data() dm.partition_data() #partition data training_data = dm.get_training_data() validation_data = dm.get_validation_data() #train !!! print >> sys.stderr, 'Training CombiVEP, please wait (around 500 SNPs/mins) . . .' trainer = Trainer(training_data, validation_data, random_seed, n_hidden_nodes, figure_dir) trainer.train(iterations) if not os.path.exists(cbv_const.USER_PARAMS_DIR): os.makedirs(cbv_const.USER_PARAMS_DIR) trainer.export_best_parameters(params_out_file)
def fast_training(training_data_file, params_out_file=cbv_const.USER_PARAMS_FILE, random_seed=cbv_const.DFLT_SEED, n_hidden_nodes=cbv_const.DFLT_HIDDEN_NODES, figure_dir=cbv_const.DFLT_FIGURE_DIR, iterations=cbv_const.DFLT_ITERATIONS, cfg_file=cbv_const.CBV_CFG_FILE, ): """ CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP. CBV has 5 fields, CHROM, POS, REF, ALT, EFFECT (1=deleterious, 0=neutr). All are tab separated Required arguments - neutr_data_file : list of SNPs with no harmful effect, CBV format - pathognice_data_file : list of SNPs with deleterious effect, CBV format """ #pre-processing dataset info('pre-processing dataset, this may take a while (around 750 SNPs/mins). . .') dm = FastDataSetManager(cfg_file=cfg_file) dm.load_data(training_data_file, file_type=dev_const.FILE_TYPE_SCORES) dm.set_shuffle_seed(random_seed) dm.shuffle_data() dm.partition_data() #partition data training_dataset = dm.get_training_data() validation_dataset = dm.get_validation_data() #train !!! info('Training CombiVEP, please wait (around 500 SNPs/mins) . . .') trainer = Trainer(training_dataset, validation_dataset, random_seed, n_hidden_nodes, figure_dir) trainer.train(iterations) if not os.path.exists(cbv_const.USER_PARAMS_DIR): os.makedirs(cbv_const.USER_PARAMS_DIR) trainer.export_best_parameters(params_out_file)
def test_trainer(self): """ to see if it can produce parameters file and produce figure """ self.individual_debug = True self.init_test("test_trainer") self.init_trainer_instance() training_dataset = DataSet( os.path.join(combivep_settings.COMBIVEP_CENTRAL_TEST_DATASET_DIR, "training_dataset") ) validation_dataset = DataSet( os.path.join(combivep_settings.COMBIVEP_CENTRAL_TEST_DATASET_DIR, "validation_dataset") ) trainer = Trainer(training_dataset, validation_dataset, seed=20, n_hidden_nodes=7, figure_dir=self.working_dir) trainer.train(iterations=50) params_file = os.path.join(self.working_dir, "params.npz") trainer.export_best_parameters(params_file=params_file) self.assertTrue(os.path.exists(params_file), msg="Trainer does not functional properly") figure_file = os.path.join(self.working_dir, "07.eps") self.assertTrue(os.path.exists(figure_file), msg="Trainer does not functional properly")