def _run_analysis(config_file, seed): if seed is not None: random.seed(seed) config = paths.load_config(config_file) alpha = config['concentration']['value'] if 'prior' in config['concentration']: alpha_priors = config['concentration']['prior'] else: alpha_priors = None num_iters = config['num_iters'] density = config['density'] if density == 'pyclone_beta_binomial': run_pyclone_beta_binomial_analysis(config_file, num_iters, alpha, alpha_priors) elif density == 'pyclone_binomial': run_pyclone_binomial_analysis(config_file, num_iters, alpha, alpha_priors) else: raise Exception( '{0} is not a valid density for PyClone.'.format(density))
def _load_sample_data(file_name, error_rate, tumour_content): ''' Load data from PyClone formatted input file. ''' data = OrderedDict() config = paths.load_config(file_name) for mutation_dict in config['mutations']: mutation = load_mutation_from_dict(mutation_dict) data[mutation.id] = _get_pyclone_data(mutation, error_rate, tumour_content) return data
def load_table(config_file, burnin=0, min_size=0, max_clusters=None, mesh_size=101, thin=1): config = paths.load_config(config_file) if config['density'] == 'pyclone_beta_binomial': precision_file = paths.get_precision_trace_file(config_file) precision = pd.read_csv(precision_file, header=None, compression='bz2', sep='\t', squeeze=True) precision = precision.iloc[burnin::thin].mean() density = PyCloneBetaBinomialDensity(GammaData(precision)) elif config['density'] == 'pyclone_binomial': density = PyCloneBinomialDensity() else: raise Exception( 'Only pyclone_binomial and pyclone_beta_binomial density are supported.' ) data, sample_ids = load_data(config_file) labels = cluster_pyclone_trace(config_file, burnin, thin, max_clusters=max_clusters) labels = labels.set_index('mutation_id') posteriors = [] for cluster_id, cluster_df in labels.groupby('cluster_id'): mutation_ids = list(cluster_df.index) cluster_data = [data[x] for x in mutation_ids] for sample_id in sample_ids: cluster_sample_data = [x[sample_id] for x in cluster_data] cluster_sample_posterior = _compute_posterior( cluster_sample_data, density, mesh_size) cluster_sample_posterior['sample_id'] = sample_id cluster_sample_posterior['cluster_id'] = cluster_id cluster_sample_posterior['size'] = len(mutation_ids) posteriors.append(cluster_sample_posterior) df = pd.DataFrame(posteriors) df = df.set_index(['sample_id', 'cluster_id', 'size']) df = df.reset_index() df = df[df['size'] >= min_size] return df
def load_precision_params(config_file): config = paths.load_config(config_file) return config['beta_binomial_precision_params']
def load_init_method(config_file): config = paths.load_config(config_file) return config.get('init_method', 'disconnected')
def load_base_measure_params(config_file): config = paths.load_config(config_file) params = config['base_measure_params'] return params