Exemplo n.º 1
0
    def test_trainer(self):
        """

        to see if it can produce parameters file and produce figure

        """
        self.individual_debug = True
        self.init_test('test_trainer')
        self.init_trainer_instance()
        training_data   = DataSet(os.path.join(cbv_const.CBV_SAMPLE_DATASET_DIR,
                                               'training_dataset'))
        validation_data = DataSet(os.path.join(cbv_const.CBV_SAMPLE_DATASET_DIR,
                                               'validation_dataset'))
        trainer = Trainer(training_data,
                          validation_data,
                          seed=20,
                          n_hidden_nodes=7,
                          figure_dir=self.working_dir)
        trainer.train(iterations=50)

        params_file = os.path.join(self.working_dir,
                                   'params.npz')
        trainer.export_best_parameters(params_file=params_file)
        self.assertTrue(os.path.exists(params_file),
                        msg='Trainer does not functional properly')
        figure_file = os.path.join(self.working_dir,
                                   '07.eps')
        self.assertTrue(os.path.exists(figure_file),
                        msg='Trainer does not functional properly')
Exemplo n.º 2
0
def train_combivep_using_cbv_data(training_data_file,
                                  params_out_file=cbv_const.USER_PARAMS_FILE,
                                  random_seed=cbv_const.DFLT_SEED,
                                  n_hidden_nodes=cbv_const.DFLT_HIDDEN_NODES,
                                  figure_dir=cbv_const.DFLT_FIGURE_DIR,
                                  iterations=cbv_const.DFLT_ITERATIONS,
                                  cfg_file=cbv_const.CBV_CFG_FILE,
                                  ):
    """

    CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP.
    CBV has 5 fields:
    - CHROM
    - POS
    - REF
    - ALT
    - EFFECT(1=deleterious, 0=neutral)
    All are tab separated.
    Required arguments
    - neutral_data_file : list of SNPs with no harmful effect, CBV format
    - pathognice_data_file : list of SNPs with deleterious effect, CBV format

    """
    #pre-processing dataset
    print >> sys.stderr, 'pre-processing dataset, this may take a while (around 750 SNPs/mins). . .'
    dm = DataSetManager(cfg_file=cfg_file)
    dm.load_data(training_data_file, file_type=cbv_const.FILE_TYPE_CBV)
    dm.validate_data()
    dm.calculate_scores()
    dm.set_shuffle_seed(random_seed)
    dm.shuffle_data()
    dm.partition_data()

    #partition data
    training_data   = dm.get_training_data()
    validation_data = dm.get_validation_data()

    #train !!!
    print >> sys.stderr, 'Training CombiVEP, please wait (around 500 SNPs/mins) . . .'
    trainer = Trainer(training_data,
                      validation_data,
                      random_seed,
                      n_hidden_nodes,
                      figure_dir)
    trainer.train(iterations)
    if not os.path.exists(cbv_const.USER_PARAMS_DIR):
        os.makedirs(cbv_const.USER_PARAMS_DIR)
    trainer.export_best_parameters(params_out_file)
Exemplo n.º 3
0
def fast_training(training_data_file,
                  params_out_file=cbv_const.USER_PARAMS_FILE,
                  random_seed=cbv_const.DFLT_SEED,
                  n_hidden_nodes=cbv_const.DFLT_HIDDEN_NODES,
                  figure_dir=cbv_const.DFLT_FIGURE_DIR,
                  iterations=cbv_const.DFLT_ITERATIONS,
                  cfg_file=cbv_const.CBV_CFG_FILE,
                  ):
    """

    CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP.
    CBV has 5 fields, CHROM, POS, REF, ALT, EFFECT (1=deleterious, 0=neutr).
    All are tab separated
    Required arguments
    - neutr_data_file : list of SNPs with no harmful effect, CBV format
    - pathognice_data_file : list of SNPs with deleterious effect, CBV format

    """
    #pre-processing dataset
    info('pre-processing dataset, this may take a while (around 750 SNPs/mins). . .')
    dm = FastDataSetManager(cfg_file=cfg_file)
    dm.load_data(training_data_file, file_type=dev_const.FILE_TYPE_SCORES)
    dm.set_shuffle_seed(random_seed)
    dm.shuffle_data()
    dm.partition_data()

    #partition data
    training_dataset   = dm.get_training_data()
    validation_dataset = dm.get_validation_data()

    #train !!!
    info('Training CombiVEP, please wait (around 500 SNPs/mins) . . .')
    trainer = Trainer(training_dataset,
                      validation_dataset,
                      random_seed,
                      n_hidden_nodes,
                      figure_dir)
    trainer.train(iterations)
    if not os.path.exists(cbv_const.USER_PARAMS_DIR):
        os.makedirs(cbv_const.USER_PARAMS_DIR)
    trainer.export_best_parameters(params_out_file)
    def test_trainer(self):
        """

        to see if it can produce parameters file and produce figure

        """
        self.individual_debug = True
        self.init_test("test_trainer")
        self.init_trainer_instance()
        training_dataset = DataSet(
            os.path.join(combivep_settings.COMBIVEP_CENTRAL_TEST_DATASET_DIR, "training_dataset")
        )
        validation_dataset = DataSet(
            os.path.join(combivep_settings.COMBIVEP_CENTRAL_TEST_DATASET_DIR, "validation_dataset")
        )
        trainer = Trainer(training_dataset, validation_dataset, seed=20, n_hidden_nodes=7, figure_dir=self.working_dir)
        trainer.train(iterations=50)

        params_file = os.path.join(self.working_dir, "params.npz")
        trainer.export_best_parameters(params_file=params_file)
        self.assertTrue(os.path.exists(params_file), msg="Trainer does not functional properly")
        figure_file = os.path.join(self.working_dir, "07.eps")
        self.assertTrue(os.path.exists(figure_file), msg="Trainer does not functional properly")