def crossValidate(data_set_parent):
    # This function reads in several parts of the data_set_parent
    #     and choose each per round as the test set.
    from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass
    #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/'
    parts = glob.glob(os.path.join(data_set_parent, 'part_*'))
    #pool = multiprocessing.Pool(processes=3)
    for i in parts:
        #if 'part_9' not in i: continue
        iname = os.path.basename(i)
        path_training = parts.copy()
        path_testing = i
        path_training.remove(i)
        #             pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2,
        #                                  mean, zero,
        #                                  'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_'))
        # No normalizing.
        keep_probability = 0.5
        #         pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5,
        #                   1e-5, 10000, True, max, zero, keep_probability,
        #                   os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1'))
        useTwoLayers = False
        kernel1size = 5
        kernel2size = 5
        if useTwoLayers:
            prefix_useTwoLayers = 'two'
        else:
            prefix_useTwoLayers = 'one'
        model_dir = os.path.join(
            data_set_parent,
            prefix_useTwoLayers + '_kernel_' + str(keep_probability) +
            '_oversamp1_4fold_normalizebymean_fillby0_stride25_' + iname +
            '_as_test_')

        print(model_dir + str(kernel1size) + '_' + str(kernel2size))
        #         __main__(path_training, path_testing, 21, 62, 5, 5,
        #                 1e-5, 20000, False, mean, zero, keep_probability,
        #                 model_dir, None)

        #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_25/correct_label'
        #         CNNClass(path_training, path_testing, 27, 24, kernel1size, kernel2size, 1e-5, 0,
        #                  useTwoLayers, mean, zero, keep_probability, model_dir, '1', 1)

        path_testing = "D:/eclipse-workspace/CNN_CNV/data_ERBB2_stride40_split_to_training_test_gccontent/testing"
        CNNClass(path_training,
                 path_testing,
                 27,
                 24,
                 kernel1size,
                 kernel2size,
                 1e-5,
                 0,
                 useTwoLayers,
                 mean,
                 zero,
                 keep_probability,
                 model_dir,
                 '1',
                 1,
                 divide_by_gcconent=True)
示例#2
0
def crossValidate(data_set_parent):
    # This function reads in several parts of the data_set_parent
    #     and choose each per round as the test set.
    from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass
    #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/'
    parts = glob.glob(os.path.join(data_set_parent, 'part_*'))
    #pool = multiprocessing.Pool(processes=3)
    for i in parts:
        if not 'part_0' in i: continue

        iname = os.path.basename(i)
        path_training = parts.copy()
        path_testing = i
        path_training.remove(i)
        #             pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2,
        #                                  mean, zero,
        #                                  'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_'))
        # No normalizing.
        keep_probability = 0.5
        #         pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5,
        #                   1e-5, 10000, True, max, zero, keep_probability,
        #                   os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1'))
        useTwoLayers = False
        kernel1size = 5
        kernel2size = 5
        if useTwoLayers:
            prefix_useTwoLayers = 'two'
        else:
            prefix_useTwoLayers = 'one'
        model_dir = os.path.join(
            data_set_parent,
            'Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_0_as_test_'
        )

        print(model_dir + str(kernel1size) + '_' + str(kernel2size))
        #         __main__(path_training, path_testing, 21, 62, 5, 5,
        #                 1e-5, 20000, False, mean, zero, keep_probability,
        #                 model_dir, None)

        path_testing = r'D:\eclipse-workspace\CNN_CNV\data_bigger_training_added_strong_positive_stride_40\mixed\\'
        CNNClass(path_training, path_testing, 21, 62, kernel1size, kernel2size,
                 1e-5, 0, useTwoLayers, mean, zero, keep_probability,
                 model_dir, None)
示例#3
0
def crossValidate(data_set_parent, learning_rate, iter_num, keep_probability, kernel_size, over_sampling_class=None, over_sampling_fold=1):    
    # This function reads in several parts of the data_set_parent 
    #     and choose each per round as the test set. 
    #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/'
    parts = glob.glob(os.path.join(data_set_parent, 'part_*'))
    #pool = multiprocessing.Pool(processes=3)
    for i in parts:  
        #if not 'part_4' in i: continue
        iname = os.path.basename(i) 
        path_training = parts.copy()
        path_testing = i
        path_training.remove(i)
        sample_path_to_get_shape = path_training[0][0]
        try:
            mt = pickle.load(open(sample_path_to_get_shape, 'rb'), encoding='iso-8859-1')
        except:
            mt = pickle.load(open(sample_path_to_get_shape, 'rb'))
        m, n = mt.shape
#             pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2, 
#                                  mean, zero, 
#                                  'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_')) 
        # No normalizing.  
#         pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5, 
#                   1e-5, 10000, True, max, zero, keep_probability,
#                   os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1'))
        useTwoLayers = False
        kernel1size = 5
        kernel2size = kernel_size
        if useTwoLayers:
            prefix_useTwoLayers = 'two'
        else:
            prefix_useTwoLayers = 'one'
        model_dir = os.path.join(data_set_parent, prefix_useTwoLayers+'_kernel_'+
                                 str(keep_probability)+'_oversamp1_4fold_normalizebymean_fillby0_stride25_'+iname+'_as_test_')
        
        print(model_dir+str(kernel1size)+'_'+str(kernel2size))
#         __main__(path_training, path_testing, 21, 62, 5, 5, 
#                 1e-5, 20000, False, mean, zero, keep_probability,
#                 model_dir, None)

        #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_25/correct_label'
        CNNClass(path_training, path_testing, m, n, kernel1size, kernel2size, learning_rate, iter_num, 
                 useTwoLayers, mean, zero, keep_probability, model_dir, over_sampling_class, over_sampling_fold)
示例#4
0
    input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_gccontent/mixed'
    #for i in range(9, 10):
    generate_fake = False

    path_testing = u"D:\eclipse-workspace\CNN_CNV\data_producing_soft_links_mats_stride_40\correct_label"
    model_dir = u"D:\eclipse-workspace\CNN_CNV\data_bigger_training_added_strong_positive_stride_40_gccontent\5xCV_data_rand_0\one_kernel_0.5_nooversamp_normalizebymean_fillby0_stride40_part_0_as_test_5_5"
    useTwoLayers = False
    keep_probability = 0.5
    CNNClass('',
             path_testing,
             21,
             62,
             5,
             5,
             1e-5,
             0,
             useTwoLayers,
             mean,
             zero,
             keep_probability,
             model_dir,
             '1',
             1,
             divide_by_gcconent=True)
    exit()
    for i in range(2, 10):
        fold = 5
        output_dir = os.path.join(os.path.dirname(input_dir),
                                  str(fold) + 'xCV_data_rand_' + str(i))
        if not os.path.exists(output_dir):
            splitDataSet(input_dir, output_dir, fold)
            paths_mats = glob.glob(
示例#5
0
                 1e-6, 0, useTwoLayers, mean, zero, keep_probability,
                 model_dir, None)


#     pool.close()
#     pool.join()
#
if __name__ == '__main__':
    #input_dir = 'D:/eclipse-workspace/CNN_CNV/data_stride_40/all'
    from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass
    input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense_fake_data\mixed'
    input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_40plus40/mixed'
    input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40'
    path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_40/correct_label_dense'
    #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_4'
    path_training = [
        'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_0',
        'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_1',
        'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_2',
        'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_3'
    ]
    model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_4_as_test_'
    #model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_6/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_0_as_test_'
    #model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_9/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_3_as_test_'

    model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_5/one_kernel_0.8_normalizebymean_fillby0_stride40_part_2_as_test_'
    model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_6/one_kernel_0.8_normalizebymean_fillby0_stride40_part_0_as_test_'
    model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_6/one_kernel_0.8_normalizebymean_fillby0_stride40_part_1_as_test_'
    CNNClass(path_training, path_testing, 12, 13, 5, 5, 1e-5, 0, False, mean,
             zero, 0.8, model_dir_prefix, None)