def test_multiprocessing_speedup(self, verbose=0):

        if n_cpus > 1:
            warnings.filterwarnings("ignore")
            parallel.start_if_needed(n_cpus=n_cpus)
            X, y = self.X_cls, self.y_cls = make_classification(n_classes=2, n_samples=500, n_features=40,
                                             n_informative=20, random_state=test_seed)
            mclf = MultichannelPipeline(n_channels=1)
            mclf.add_layer(DummyClassifier(futile_cycles_fit=2000000, futile_cycles_pred=10))

            # shut off warnings because ray and redis generate massive numbers
            SETUP_CODE = '''
import pipecaster.cross_validation'''
            TEST_CODE = '''
pipecaster.cross_validation.cross_val_score(mclf, [X], y, cv = 5, n_processes = 1)'''
            t_serial = timeit.timeit(setup = SETUP_CODE,
                                  stmt = TEST_CODE,
                                  globals = locals(),
                                  number = 5)
            TEST_CODE = '''
pipecaster.cross_validation.cross_val_score(mclf, [X], y, cv = 5, n_processes = {})'''.format(n_cpus)
            t_parallel = timeit.timeit(setup = SETUP_CODE,
                                  stmt = TEST_CODE,
                                  globals = locals(),
                                  number = 5)

            warnings.resetwarnings()

            if verbose > 0:
                print('serial run mean time = {} s'.format(t_serial))
                print('parallel run mean time = {} s'.format(t_parallel))

            if t_serial <= t_parallel:
                warnings.warn('mulitple cpus detected, but parallel cross_val_score not faster than serial, possible problem with multiprocessing')
 def test_multi_input_regression_parallel(self):
     if n_cpus > 1:
         warnings.filterwarnings("ignore")
         parallel.start_if_needed(n_cpus=n_cpus)
         mrgr = MultichannelPipeline(n_channels=1)
         mrgr.add_layer(self.rgr)
         pc_scores = pc_cross_validation.cross_val_score(mrgr, [self.X_rgr], self.y_rgr, scorer=explained_variance_score,
                                                        cv=self.cv, n_processes=n_cpus)
         self.assertTrue(np.array_equal(self.rgr_scores, pc_scores), 'regressor scores from pipecaster.cross_validation.cross_val_score did not match sklearn control (multi input predictor)')
         warnings.resetwarnings()
 def test_multi_input_classification_parallel(self):
     if n_cpus > 1:
         warnings.filterwarnings("ignore")
         parallel.start_if_needed()
         mclf = MultichannelPipeline(n_channels=1)
         mclf.add_layer(self.clf)
         pc_scores = pc_cross_validation.cross_val_score(
             mclf, [self.X_cls], self.y_cls, score_method='predict_proba',
             scorer=roc_auc_score, cv=self.cv, n_processes=n_cpus)
         self.assertTrue(np.array_equal(self.cls_scores, pc_scores), 'classifier scores from pipecaster.cross_validation.cross_val_score did not match sklearn control (multi input predictor)')
         warnings.resetwarnings()
 def test_multi_input_classification_parallel(self):
     if n_cpus > 1:
         warnings.filterwarnings("ignore")
         parallel.start_if_needed(n_cpus=n_cpus)
         mclf = MultichannelPipeline(n_channels=1)
         mclf.add_layer(self.clf)
         pc_predictions = pc_cross_validation.cross_val_predict(
             mclf, [self.X_cls], self.y_cls, cv=self.cv, n_processes=n_cpus)
         self.assertTrue(
             np.array_equal(self.cls_predictions,
                            pc_predictions['predict']['y_pred']),
             'pipecaster predictions did not match sklearn control')
         warnings.resetwarnings()
    def test_throttled_multiprocessing_speedup(self, verbose=0):

        if n_cpus > 1:
            warnings.filterwarnings("ignore")
            parallel.start_if_needed(n_cpus=n_cpus)
            X, y = self.X_cls, self.y_cls = make_classification(
                n_classes=2,
                n_samples=500,
                n_features=40,
                n_informative=20,
                random_state=test_seed)
            mclf = MultichannelPipeline(n_channels=1)
            mclf.add_layer(
                DummyClassifier(futile_cycles_fit=2000000,
                                futile_cycles_pred=10))

            SETUP_CODE = '''
import pipecaster.cross_validation'''
            TEST_CODE = '''
pipecaster.cross_validation.cross_val_predict(mclf, [X], y, cv = {}, n_processes = 1)'''.format(
                n_cpus - 1)
            t_serial = timeit.timeit(setup=SETUP_CODE,
                                     stmt=TEST_CODE,
                                     globals=locals(),
                                     number=5)
            TEST_CODE = '''
pipecaster.cross_validation.cross_val_predict(mclf, [X], y, cv = {}, n_processes = {})'''.format(
                n_cpus - 1, n_cpus - 1)
            t_parallel = timeit.timeit(setup=SETUP_CODE,
                                       stmt=TEST_CODE,
                                       globals=locals(),
                                       number=5)

            warnings.resetwarnings()

            if verbose > 0:
                print(
                    'number of CPUs detected and parallel jobs requested: {}'.
                    format(n_cpus))
                print('duration of serial cross cross_val_predict task: {} s'.
                      format(t_serial))
                print(
                    'duration of parallel cross cross_val_predict task (ray pool.starmap): {} s'
                    .format(t_parallel))

            if t_serial <= t_parallel:
                warnings.warn(
                    'multiple cpus detected, but parallel cross_val_predict not faster than serial using ray.multiprocessing.starmap(), possible problem with multiprocessing'
                )
 def test_multi_input_regression_parallel_starmap(self):
     if n_cpus > 2:
         warnings.filterwarnings("ignore")
         parallel.start_if_needed(n_cpus=n_cpus)
         mrgr = MultichannelPipeline(n_channels=1)
         mrgr.add_layer(self.rgr)
         pc_predictions = pc_cross_validation.cross_val_predict(
             mrgr, [self.X_rgr],
             self.y_rgr,
             cv=self.cv,
             n_processes=n_cpus - 1)
         self.assertTrue(
             np.array_equal(self.rgr_predictions,
                            pc_predictions['predict']['y_pred']),
             'pipecaster predictions did not match sklearn '
             'control')
         warnings.resetwarnings()