def crossValidate(data_set_parent): # This function reads in several parts of the data_set_parent # and choose each per round as the test set. from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/' parts = glob.glob(os.path.join(data_set_parent, 'part_*')) #pool = multiprocessing.Pool(processes=3) for i in parts: #if 'part_9' not in i: continue iname = os.path.basename(i) path_training = parts.copy() path_testing = i path_training.remove(i) # pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2, # mean, zero, # 'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_')) # No normalizing. keep_probability = 0.5 # pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5, # 1e-5, 10000, True, max, zero, keep_probability, # os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1')) useTwoLayers = False kernel1size = 5 kernel2size = 5 if useTwoLayers: prefix_useTwoLayers = 'two' else: prefix_useTwoLayers = 'one' model_dir = os.path.join( data_set_parent, prefix_useTwoLayers + '_kernel_' + str(keep_probability) + '_oversamp1_4fold_normalizebymean_fillby0_stride25_' + iname + '_as_test_') print(model_dir + str(kernel1size) + '_' + str(kernel2size)) # __main__(path_training, path_testing, 21, 62, 5, 5, # 1e-5, 20000, False, mean, zero, keep_probability, # model_dir, None) #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_25/correct_label' # CNNClass(path_training, path_testing, 27, 24, kernel1size, kernel2size, 1e-5, 0, # useTwoLayers, mean, zero, keep_probability, model_dir, '1', 1) path_testing = "D:/eclipse-workspace/CNN_CNV/data_ERBB2_stride40_split_to_training_test_gccontent/testing" CNNClass(path_training, path_testing, 27, 24, kernel1size, kernel2size, 1e-5, 0, useTwoLayers, mean, zero, keep_probability, model_dir, '1', 1, divide_by_gcconent=True)
def crossValidate(data_set_parent): # This function reads in several parts of the data_set_parent # and choose each per round as the test set. from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/' parts = glob.glob(os.path.join(data_set_parent, 'part_*')) #pool = multiprocessing.Pool(processes=3) for i in parts: if not 'part_0' in i: continue iname = os.path.basename(i) path_training = parts.copy() path_testing = i path_training.remove(i) # pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2, # mean, zero, # 'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_')) # No normalizing. keep_probability = 0.5 # pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5, # 1e-5, 10000, True, max, zero, keep_probability, # os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1')) useTwoLayers = False kernel1size = 5 kernel2size = 5 if useTwoLayers: prefix_useTwoLayers = 'two' else: prefix_useTwoLayers = 'one' model_dir = os.path.join( data_set_parent, 'Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_0_as_test_' ) print(model_dir + str(kernel1size) + '_' + str(kernel2size)) # __main__(path_training, path_testing, 21, 62, 5, 5, # 1e-5, 20000, False, mean, zero, keep_probability, # model_dir, None) path_testing = r'D:\eclipse-workspace\CNN_CNV\data_bigger_training_added_strong_positive_stride_40\mixed\\' CNNClass(path_training, path_testing, 21, 62, kernel1size, kernel2size, 1e-5, 0, useTwoLayers, mean, zero, keep_probability, model_dir, None)
def crossValidate(data_set_parent, learning_rate, iter_num, keep_probability, kernel_size, over_sampling_class=None, over_sampling_fold=1): # This function reads in several parts of the data_set_parent # and choose each per round as the test set. #data_set_parent = 'd:/eclipse-workspace/CNN_CNV/data_stride_40/' parts = glob.glob(os.path.join(data_set_parent, 'part_*')) #pool = multiprocessing.Pool(processes=3) for i in parts: #if not 'part_4' in i: continue iname = os.path.basename(i) path_training = parts.copy() path_testing = i path_training.remove(i) sample_path_to_get_shape = path_training[0][0] try: mt = pickle.load(open(sample_path_to_get_shape, 'rb'), encoding='iso-8859-1') except: mt = pickle.load(open(sample_path_to_get_shape, 'rb')) m, n = mt.shape # pool.apply_async(__main__, ('d:/eclipse-workspace/CNN_CNV/data_stride_40/', 21, 62, 1e-5, 20000, conv1, conv2, # mean, zero, # 'd:/eclipse-workspace/CNN_CNV/data_stride_40/normalize_by_mean_fill_by_0_stride_40_')) # No normalizing. # pool.apply_async(__main__, (path_training, path_testing, 21, 62, 5, 5, # 1e-5, 10000, True, max, zero, keep_probability, # os.path.join(data_set_parent, 'TwoCNN_over_sampling_'+str(keep_probability)+'_normalize_by_max_fill_by_0_stride_40_'+iname+'_as_test_'), '1')) useTwoLayers = False kernel1size = 5 kernel2size = kernel_size if useTwoLayers: prefix_useTwoLayers = 'two' else: prefix_useTwoLayers = 'one' model_dir = os.path.join(data_set_parent, prefix_useTwoLayers+'_kernel_'+ str(keep_probability)+'_oversamp1_4fold_normalizebymean_fillby0_stride25_'+iname+'_as_test_') print(model_dir+str(kernel1size)+'_'+str(kernel2size)) # __main__(path_training, path_testing, 21, 62, 5, 5, # 1e-5, 20000, False, mean, zero, keep_probability, # model_dir, None) #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_25/correct_label' CNNClass(path_training, path_testing, m, n, kernel1size, kernel2size, learning_rate, iter_num, useTwoLayers, mean, zero, keep_probability, model_dir, over_sampling_class, over_sampling_fold)
input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_gccontent/mixed' #for i in range(9, 10): generate_fake = False path_testing = u"D:\eclipse-workspace\CNN_CNV\data_producing_soft_links_mats_stride_40\correct_label" model_dir = u"D:\eclipse-workspace\CNN_CNV\data_bigger_training_added_strong_positive_stride_40_gccontent\5xCV_data_rand_0\one_kernel_0.5_nooversamp_normalizebymean_fillby0_stride40_part_0_as_test_5_5" useTwoLayers = False keep_probability = 0.5 CNNClass('', path_testing, 21, 62, 5, 5, 1e-5, 0, useTwoLayers, mean, zero, keep_probability, model_dir, '1', 1, divide_by_gcconent=True) exit() for i in range(2, 10): fold = 5 output_dir = os.path.join(os.path.dirname(input_dir), str(fold) + 'xCV_data_rand_' + str(i)) if not os.path.exists(output_dir): splitDataSet(input_dir, output_dir, fold) paths_mats = glob.glob(
1e-6, 0, useTwoLayers, mean, zero, keep_probability, model_dir, None) # pool.close() # pool.join() # if __name__ == '__main__': #input_dir = 'D:/eclipse-workspace/CNN_CNV/data_stride_40/all' from pkg.CNN_for_CNV_simpler_net_cross_validation import CNNClass input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense_fake_data\mixed' input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_40plus40/mixed' input_dir = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40' path_testing = 'D:/eclipse-workspace/CNN_CNV/data_producing_soft_links_mats_stride_40/correct_label_dense' #path_testing = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_4' path_training = [ 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_0', 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_1', 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_2', 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/part_3' ] model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_5/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_4_as_test_' #model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_6/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_0_as_test_' #model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40/5xCV_data_rand_9/Strong_positive_simpler_over_sampling_0.5_normalize_by_file_size_fill_by_0_stride_40_part_3_as_test_' model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_5/one_kernel_0.8_normalizebymean_fillby0_stride40_part_2_as_test_' model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_6/one_kernel_0.8_normalizebymean_fillby0_stride40_part_0_as_test_' model_dir_prefix = 'D:/eclipse-workspace/CNN_CNV/data_bigger_training_added_strong_positive_stride_40_dense/5xCV_data_rand_6/one_kernel_0.8_normalizebymean_fillby0_stride40_part_1_as_test_' CNNClass(path_training, path_testing, 12, 13, 5, 5, 1e-5, 0, False, mean, zero, 0.8, model_dir_prefix, None)