def test_function_main(self, mock_dataset, mock_process, mock_write, mock_update): """main should call the right functions""" sys.argv = [ '', "dir_1", '-o', 'dir_2', '-n', 'nersc_', '-w', '700', '-s', '700', '-r', '50' ] with mock.patch.object( Archive, "get_unprocessed_files") as mock_get_unprocessed: mock_get_unprocessed.side_effect = __class__.side_effect_function( Archive) main() mock_get_unprocessed.assert_called_once() mock_process.assert_called_once_with('foo', 'fake_file') mock_write.assert_called_once() mock_update.assert_called_once()
def generate(self): # How to pass arguments to build_dataset.py parser = self.parser # args = parser.parse_args([]) # build_dataset.main(args) # This gives us the full A matrix files_to_read = ['data/2017CHR_CSV_Analytic_Data.csv'] cols_per_file_to_read = [ '[ 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81,\ 87, 93, 99, 104, 109, 114, 119, 124, 129, 134, 139, 144, 149, \ 154, 159, 164, 169, 174, 179, 184, 189, 194, 199, 204, 209, 214, \ 219, 224, 234, 239, 244, 249, 254, 259, 265, 270, 275, 280, 285, \ 290, 295, 302, 307, 311, 316, 321, 326, 331, 336, 341, 346, 351]' ] num_cols_per_file = [2] A = [] for file_num in range(len(files_to_read)): args = parser.parse_args([ '--filename', files_to_read[file_num], '--columns_to_read', cols_per_file_to_read[file_num], '--is_b_vec', 'False' ]) A_new = build_dataset.main(args) num_cols = num_cols_per_file[file_num] if file_num == 0: A = A_new else: for i in range(num_cols): A.append(A_new[i]) # pdb.set_trace() # This gives us the full b-vector args = parser.parse_args([ '--filename', 'data/2017CHR_CSV_Analytic_Data.csv', '--columns_to_read', '[229]', '--is_b_vec', 'False' ]) #see cols_per_file_to_read y = build_dataset.main(args) # pdb.set_trace() return A, y
def main(): train, test = build_dataset.main() train, test = build_dataset.normalize_datasets(train, test) train = over_under_sampling(train) logger.info('Applying Logistic Regression') logistic_model(train, test) logger.info('Applying Random Forest') random_forest_model(train, test) logger.info('Applying Gaussian Naive Bayes') naive_bayes_model(train, test)
def main(): """ Execute generic classification methods on DNA methylation data :return: metrics of each classifier """ train, test = build_dataset.main() #train, test = build_dataset.normalize_datasets(train, test) #logger.info('Applying Logistic Regression') #logistic_model(train, test) logger.info('Applying Random Forest') random_forest_model(train, test)
def predict(): ''' For rendering results on HTML GUI ''' list = [ "BRAC: Breast Invasive Carcinoma", "LUAD: Lung Adenocarcinoma", "BLAC Urothelial Bladder Carcinoma", "PRAD: Prostate Adenocarcinoma", "LUSC: Lung Squamous Cell Carcinoma", "THCA: Thyroid Cancer", "HNSC: Head-Neck Squamous Cell Carcinoma" ] train, test = build_dataset.main() k = random_forest_model(train, test) output = random.choice(list) return render_template('index.html', prediction_text='OUTPUT = {}'.format(output))
import numpy as np import os from build_dataset import main, reconstruct_full_sequence, min_max_from_folder from glob import glob dataset = "d:/data/thesis_model2/MIDI_tests/session*.mid" files = glob(dataset) out_dir = "d:/data/thesis_model2/MIDI_tests/trims/" if not os.path.exists(out_dir): os.mkdir(out_dir) dataset = { "NR_BARS": 4, "BAR_LEN": 96 } main(files, out_dir, dataset) # check first seq. is the same as the one from loading the file min_pitch, max_pitch = min_max_from_folder(out_dir) first_sequence = np.load(glob(os.path.join(out_dir, '*.npy'))[0]) reconstructed_seq = reconstruct_full_sequence(first_sequence, min_pitch, max_pitch) orig_first_sequence = pypianoroll.Multitrack(files[0]).tracks[0] orig_first_sequence.binarize() orig_first_sequence = orig_first_sequence.pianoroll[:first_sequence.shape[0]] assert False not in np.unique(orig_first_sequence == reconstructed_seq) print('ALL GOOD')