Exemplo n.º 1
0
 def test_function_main(self, mock_dataset, mock_process, mock_write,
                        mock_update):
     """main should call the right functions"""
     sys.argv = [
         '', "dir_1", '-o', 'dir_2', '-n', 'nersc_', '-w', '700', '-s',
         '700', '-r', '50'
     ]
     with mock.patch.object(
             Archive, "get_unprocessed_files") as mock_get_unprocessed:
         mock_get_unprocessed.side_effect = __class__.side_effect_function(
             Archive)
         main()
     mock_get_unprocessed.assert_called_once()
     mock_process.assert_called_once_with('foo', 'fake_file')
     mock_write.assert_called_once()
     mock_update.assert_called_once()
Exemplo n.º 2
0
    def generate(self):
        # How to pass arguments to build_dataset.py
        parser = self.parser
        # args = parser.parse_args([])
        # build_dataset.main(args)

        # This gives us the full A matrix
        files_to_read = ['data/2017CHR_CSV_Analytic_Data.csv']
        cols_per_file_to_read = [
            '[  6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71,  76, 81,\
                                   87,  93,  99, 104, 109, 114, 119, 124, 129, 134, 139, 144, 149, \
                                   154, 159, 164, 169, 174, 179, 184, 189, 194, 199, 204, 209, 214, \
                                   219, 224, 234, 239, 244, 249, 254, 259, 265, 270, 275, 280, 285, \
                                   290, 295, 302, 307, 311, 316, 321, 326, 331, 336, 341, 346, 351]'
        ]
        num_cols_per_file = [2]
        A = []
        for file_num in range(len(files_to_read)):
            args = parser.parse_args([
                '--filename', files_to_read[file_num], '--columns_to_read',
                cols_per_file_to_read[file_num], '--is_b_vec', 'False'
            ])
            A_new = build_dataset.main(args)
            num_cols = num_cols_per_file[file_num]
            if file_num == 0:
                A = A_new
            else:
                for i in range(num_cols):
                    A.append(A_new[i])

        # pdb.set_trace()

        # This gives us the full b-vector

        args = parser.parse_args([
            '--filename', 'data/2017CHR_CSV_Analytic_Data.csv',
            '--columns_to_read', '[229]', '--is_b_vec', 'False'
        ])  #see cols_per_file_to_read
        y = build_dataset.main(args)

        # pdb.set_trace()
        return A, y
Exemplo n.º 3
0
def main():
    train, test = build_dataset.main()
    train, test = build_dataset.normalize_datasets(train, test)
    train = over_under_sampling(train)

    logger.info('Applying Logistic Regression')
    logistic_model(train, test)

    logger.info('Applying Random Forest')
    random_forest_model(train, test)

    logger.info('Applying Gaussian Naive Bayes')
    naive_bayes_model(train, test)
Exemplo n.º 4
0
def main():
    """
    Execute generic classification methods on DNA methylation data

    :return: metrics of each classifier
    """

    train, test = build_dataset.main()
    #train, test = build_dataset.normalize_datasets(train, test)

    #logger.info('Applying Logistic Regression')
    #logistic_model(train, test)

    logger.info('Applying Random Forest')
    random_forest_model(train, test)
Exemplo n.º 5
0
def predict():
    '''
    For rendering results on HTML GUI
    '''
    list = [
        "BRAC: Breast Invasive Carcinoma", "LUAD: Lung Adenocarcinoma",
        "BLAC Urothelial Bladder Carcinoma", "PRAD: Prostate Adenocarcinoma",
        "LUSC: Lung Squamous Cell Carcinoma", "THCA: Thyroid Cancer",
        "HNSC: Head-Neck Squamous Cell Carcinoma"
    ]
    train, test = build_dataset.main()
    k = random_forest_model(train, test)
    output = random.choice(list)

    return render_template('index.html',
                           prediction_text='OUTPUT =  {}'.format(output))
import numpy as np
import os
from build_dataset import main, reconstruct_full_sequence, min_max_from_folder
from glob import glob

dataset = "d:/data/thesis_model2/MIDI_tests/session*.mid"
files = glob(dataset)
out_dir = "d:/data/thesis_model2/MIDI_tests/trims/"
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

dataset = {
    "NR_BARS": 4,
    "BAR_LEN": 96
}

main(files, out_dir, dataset)

# check first seq. is the same as the one from loading the file
min_pitch, max_pitch = min_max_from_folder(out_dir)

first_sequence = np.load(glob(os.path.join(out_dir, '*.npy'))[0])
reconstructed_seq = reconstruct_full_sequence(first_sequence, min_pitch, max_pitch)

orig_first_sequence = pypianoroll.Multitrack(files[0]).tracks[0]
orig_first_sequence.binarize()
orig_first_sequence = orig_first_sequence.pianoroll[:first_sequence.shape[0]]

assert False not in np.unique(orig_first_sequence == reconstructed_seq)
print('ALL GOOD')