def _args(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--trained_model_directory') parser.add_argument('--depth_file_name') parser.add_argument('--chromosome_number', type=int) parser.add_argument('--start', type=int) parser.add_argument('--end', type=int) parser.add_argument('--fold_reduction_of_sample_size', type=float) parser.add_argument('--window_half_width', type=int) parser.add_argument('--resampling_target_file_name') parser.add_argument('--filter') parser.add_argument('--batch_size', type=int) parser.add_argument('--learning_rate', type=float) args = parser.parse_args() args.fasta_file = '../data/sequences/human_g1k_v37.fasta' args_to_save = args.__dict__.copy() # this allows us to pass "None" to bash script to indicate that there is no json file describing a resampling scheme if args.resampling_target_file_name == "None": args_to_save['resampling_target'] = "None" args.resampling_target = None else: with open(args.resampling_target_file_name, 'r') as fp: args_to_save['resampling_target'] = json.load(fp) args.resampling_target = args_to_save['resampling_target'].copy() args.resampling_target['function'] = getattr( load_preprocess_data, args.resampling_target['function']) args.resampling_target = named_tuple(args.resampling_target) args.filter = getattr(load_preprocess_data, args.filter) return named_tuple(args.__dict__), args_to_save
def _compute_observed_depth_mean(chromosome_number, depth_file_name): from utility import named_tuple from load_preprocess_data import read_depths depths = read_depths(named_tuple({'chromosome_number': chromosome_number, 'depth_file_name': depth_file_name})) from load_preprocess_data import compute_observed_depth_mean return compute_observed_depth_mean(depths, chromosome_number)
def _args(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--depth_file_name') parser.add_argument('--trained_model_directory') parser.add_argument('--test_directory') parser.add_argument('--filter') parser.add_argument('--chromosome_number', type=int) parser.add_argument('--content_start', type=int) parser.add_argument('--content_end', type=int) parser.add_argument('--number_test_examples', type=int) parser.add_argument('--padding', type=float) args = parser.parse_args() # high-quality deletion on chromosome 1 # region_start = (189704000 - 100000) # region_end = (189783300 + 100000) _compute_start_end(args) from utility import get_train_args args.window_half_width = get_train_args(args.trained_model_directory)['window_half_width'] args.fasta_file = '../data/sequences/human_g1k_v37.fasta' _dump_json(args) import load_preprocess_data args.filter = getattr(load_preprocess_data, args.filter) from utility import named_tuple return named_tuple(args.__dict__)
def get_resampling_target(trained_model_directory): with open( get_train_args(trained_model_directory) ['resampling_target_file_name'], 'r') as f: dictionary = json.load(f) dictionary['function'] = getattr(load_preprocess_data, dictionary['function']) print(dictionary['function']) return named_tuple(dictionary)
def plot_corrected_depths_test_all(trained_models, normalized_depths=True, corrected_depths=True, chromosome_number=1, min_y=0, max_y=2, show_title=True, grid=True): for trained_model in trained_models: if 'depth_file_name' in trained_model: from utility import named_tuple from load_preprocess_data import read_depths depths = read_depths(named_tuple({'chromosome_number': chromosome_number, 'depth_file_name': trained_model['depth_file_name']})) from load_preprocess_data import compute_observed_depth_mean observed_depth_mean = compute_observed_depth_mean(depths, chromosome_number) else: train_sampled_data, _, _ = utility_train.unpickle(trained_model['path']) observed_depth_mean = _compute_observed_depth_mean(train_sampled_data) test_data = pd.read_pickle(os.path.join(trained_model['path'], trained_model['test_file_name'])) title = trained_model['annotation'] if show_title else '' figure_file_name = trained_model['figure_file_name'] if 'figure_file_name' in trained_model else None _plot_corrected_depths(test_data, observed_depth_mean, chromosome_number, title=title, min_y=min_y, max_y=max_y, normalized_depths=normalized_depths, corrected_depths=corrected_depths, figure_file_name=figure_file_name, grid=grid)