예제 #1
0
def _args():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--trained_model_directory')
    parser.add_argument('--depth_file_name')
    parser.add_argument('--chromosome_number', type=int)
    parser.add_argument('--start', type=int)
    parser.add_argument('--end', type=int)
    parser.add_argument('--fold_reduction_of_sample_size', type=float)
    parser.add_argument('--window_half_width', type=int)
    parser.add_argument('--resampling_target_file_name')
    parser.add_argument('--filter')
    parser.add_argument('--batch_size', type=int)
    parser.add_argument('--learning_rate', type=float)
    args = parser.parse_args()

    args.fasta_file = '../data/sequences/human_g1k_v37.fasta'

    args_to_save = args.__dict__.copy()

    # this allows us to pass "None" to bash script to indicate that there is no json file describing a resampling scheme
    if args.resampling_target_file_name == "None":
        args_to_save['resampling_target'] = "None"
        args.resampling_target = None
    else:
        with open(args.resampling_target_file_name, 'r') as fp:
            args_to_save['resampling_target'] = json.load(fp)
            args.resampling_target = args_to_save['resampling_target'].copy()
            args.resampling_target['function'] = getattr(
                load_preprocess_data, args.resampling_target['function'])
            args.resampling_target = named_tuple(args.resampling_target)

    args.filter = getattr(load_preprocess_data, args.filter)

    return named_tuple(args.__dict__), args_to_save
예제 #2
0
def _compute_observed_depth_mean(chromosome_number, depth_file_name):
    from utility import named_tuple
    from load_preprocess_data import read_depths
    depths = read_depths(named_tuple({'chromosome_number': chromosome_number,
                                      'depth_file_name': depth_file_name}))
    from load_preprocess_data import compute_observed_depth_mean
    return compute_observed_depth_mean(depths, chromosome_number)
예제 #3
0
def _args():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--depth_file_name')
    parser.add_argument('--trained_model_directory')
    parser.add_argument('--test_directory')
    parser.add_argument('--filter')
    parser.add_argument('--chromosome_number', type=int)
    parser.add_argument('--content_start', type=int)
    parser.add_argument('--content_end', type=int)
    parser.add_argument('--number_test_examples', type=int)
    parser.add_argument('--padding', type=float)

    args = parser.parse_args()

    # high-quality deletion on chromosome 1
    # region_start = (189704000 - 100000)
    # region_end = (189783300 + 100000)

    _compute_start_end(args)

    from utility import get_train_args
    args.window_half_width = get_train_args(args.trained_model_directory)['window_half_width']

    args.fasta_file = '../data/sequences/human_g1k_v37.fasta'

    _dump_json(args)

    import load_preprocess_data
    args.filter = getattr(load_preprocess_data, args.filter)

    from utility import named_tuple
    return named_tuple(args.__dict__)
def get_resampling_target(trained_model_directory):
    with open(
            get_train_args(trained_model_directory)
        ['resampling_target_file_name'], 'r') as f:
        dictionary = json.load(f)
        dictionary['function'] = getattr(load_preprocess_data,
                                         dictionary['function'])
        print(dictionary['function'])
        return named_tuple(dictionary)
예제 #5
0
def plot_corrected_depths_test_all(trained_models,
                                   normalized_depths=True, corrected_depths=True,
                                   chromosome_number=1, min_y=0, max_y=2,
                                   show_title=True, grid=True):
    for trained_model in trained_models:
        if 'depth_file_name' in trained_model:
            from utility import named_tuple
            from load_preprocess_data import read_depths
            depths = read_depths(named_tuple({'chromosome_number': chromosome_number,
                                              'depth_file_name': trained_model['depth_file_name']}))
            from load_preprocess_data import compute_observed_depth_mean
            observed_depth_mean = compute_observed_depth_mean(depths, chromosome_number)
        else:
            train_sampled_data, _, _ = utility_train.unpickle(trained_model['path'])
            observed_depth_mean = _compute_observed_depth_mean(train_sampled_data)
        test_data = pd.read_pickle(os.path.join(trained_model['path'], trained_model['test_file_name']))
        title = trained_model['annotation'] if show_title else ''
        figure_file_name = trained_model['figure_file_name'] if 'figure_file_name' in trained_model else None
        _plot_corrected_depths(test_data, observed_depth_mean, chromosome_number,
                               title=title, min_y=min_y, max_y=max_y,
                               normalized_depths=normalized_depths, corrected_depths=corrected_depths,
                               figure_file_name=figure_file_name,
                               grid=grid)