def test_main_featurize_function(): """Test main featurize function""" test_setup() shutil.copy( pjoin(DATA_PATH, "testfeature1.py"), cfg.CUSTOM_FEATURE_SCRIPT_FOLDER) results_msg = featurize.featurize( headerfile_path=pjoin( cfg.UPLOAD_FOLDER, "asas_training_subset_classes_with_metadata.dat"), zipfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"), features_to_use=["std_err", "f"], featureset_id="test", is_test=True, custom_script_path=pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER, "testfeature1.py"),) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "test_features.csv"))) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))) class_list = list(np.load(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))) os.remove(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy")) df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER, "test_features.csv")) cols = df.columns values = df.values os.remove(pjoin(cfg.FEATURES_FOLDER, "test_features.csv")) os.remove(pjoin(cfg.FEATURES_FOLDER, "test_features_with_classes.csv")) assert("std_err" in cols) assert("f" in cols) assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae', 'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti'] for class_name in class_list))
def test_feature_generation(): for f in glob.glob(os.path.join(cfg.FEATURES_FOLDER, '/*.csv')): os.remove(f) tic = time.time() this_dir = os.path.join(os.path.dirname(__file__)) featurize.featurize( os.path.join(cfg.UPLOAD_FOLDER, "asas_training_subset_classes.dat"), os.path.join(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"), featureset_id="testfeatset", is_test=True, USE_DISCO=False, features_to_use=cfg.features_list_science ) delta = time.time() - tic def features_from_csv(filename): with open(filename) as f: feature_names = f.readline().strip().split(",") feature_values = np.loadtxt(f, delimiter=',') return feature_names, feature_values features_extracted, values_computed = features_from_csv( os.path.join(cfg.FEATURES_FOLDER, "testfeatset_features.csv")) features_expected, values_expected = features_from_csv( os.path.join(this_dir, "data/expected_features.csv")) os.remove(os.path.join(cfg.FEATURES_FOLDER, "testfeatset_features.csv")) os.remove(os.path.join(cfg.FEATURES_FOLDER, "testfeatset_classes.npy")) npt.assert_equal(len(features_extracted), 81) npt.assert_equal(features_extracted, features_expected) npt.assert_array_almost_equal(values_computed, values_expected) # Ensure this test takes less than a minute to run assert delta < 60
def test_featurize(): """Test main featurize function.""" results_msg = featurize.featurize( headerfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset_classes_with_metadata.dat"), zipfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"), features_to_use=["std_err"], # #TEMP# TCP still broken under py3 featureset_id="TESTRUN", is_test=True, custom_script_path=pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER, "testfeature1.py"), USE_DISCO=False, ) assert os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features.csv")) assert not os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features_with_classes.csv")) assert os.path.exists(pjoin(cfg.MLTSP_PACKAGE_PATH, "Flask/static/data", "TESTRUN_features_with_classes.csv")) assert os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_classes.npy")) df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features.csv")) cols = df.columns values = df.values assert "std_err" in cols
def test_main_featurize_function_disco(): """Test main featurize function - using Disco""" test_setup() shutil.copy( pjoin(DATA_PATH, "testfeature1.py"), cfg.CUSTOM_FEATURE_SCRIPT_FOLDER) results_msg = featurize.featurize( headerfile_path=pjoin( cfg.UPLOAD_FOLDER, "asas_training_subset_classes_with_metadata.dat"), zipfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"), features_to_use=["std_err", "freq1_harmonics_freq_0"], featureset_id="test", is_test=True, custom_script_path=None, # TODO: Doesn't work when using Disco!!! USE_DISCO=True) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "test_features.csv"))) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))) class_list = list(np.load(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))) os.remove(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy")) df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER, "test_features.csv")) cols = df.columns values = df.values os.remove(pjoin(cfg.FEATURES_FOLDER, "test_features.csv")) os.remove(pjoin(pjoin(cfg.MLTSP_PACKAGE_PATH, "Flask/static/data"), "test_features_with_classes.csv")) assert("std_err" in cols) assert("freq1_harmonics_freq_0" in cols) assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae', 'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti'] for class_name in class_list))