def assertPriorIsPrecise(self, msbayes_workers, places=2):
     msbayes_workers = list(msbayes_workers)
     self.assertWorkersFinished(msbayes_workers)
     param_sums = self.get_parameter_summaries_from_msbayes_workers(
         msbayes_workers)
     sample_size = 0
     for w in msbayes_workers:
         sample_size += w.sample_size
     for s in param_sums.itervalues():
         self.assertEqual(s.n, sample_size)
     cfg = self.get_config_from_msbayes_workers(msbayes_workers)
     psi_indices = get_indices_of_patterns(msbayes_workers[0].header,
                                           PSI_PATTERNS)
     self.assertEqual(len(psi_indices), 1)
     model_indices = get_indices_of_patterns(msbayes_workers[0].header,
                                             MODEL_PATTERNS)
     if not msbayes_workers[0].model_index is None:
         self.assertEqual(len(model_indices), 1)
     else:
         self.assertEqual(len(model_indices), 0)
     tau_indices = get_indices_of_patterns(msbayes_workers[0].header,
                                           TAU_PATTERNS)
     a_theta_indices = get_indices_of_patterns(msbayes_workers[0].header,
                                               A_THETA_PATTERNS)
     d_theta_indices = get_indices_of_patterns(msbayes_workers[0].header,
                                               D_THETA_PATTERNS)
     if msbayes_workers[0].report_parameters:
         self.assertEqual(len(tau_indices), cfg.npairs)
         self.assertEqual(len(a_theta_indices), cfg.npairs)
         self.assertEqual(len(d_theta_indices), 2 * cfg.npairs)
     else:
         self.assertEqual(len(tau_indices), 0)
         self.assertEqual(len(a_theta_indices), 0)
         self.assertEqual(len(d_theta_indices), 0)
     _LOG.debug('\n{0}\n'.format('\n'.join(
         [str(param_sums[i]) for i in sorted(param_sums.iterkeys())])))
     for i in psi_indices:
         self.assertSampleIsFromDistribution(param_sums[i],
                                             cfg.psi,
                                             places=places)
     for i in tau_indices:
         self.assertSampleIsFromDistribution(param_sums[i],
                                             cfg.tau,
                                             places=places)
     for i in a_theta_indices:
         self.assertSampleIsFromDistribution(param_sums[i],
                                             cfg.a_theta,
                                             places=places)
     for i in d_theta_indices:
         self.assertSampleIsFromDistribution(param_sums[i],
                                             cfg.d_theta,
                                             mean_adj=cfg.theta.mean,
                                             max_adj=cfg.theta.maximum,
                                             compare_variance=False,
                                             places=places)
Exemplo n.º 2
0
 def assertPriorIsPrecise(self, msbayes_workers, places=2):
     msbayes_workers = list(msbayes_workers)
     self.assertWorkersFinished(msbayes_workers)
     param_sums = self.get_parameter_summaries_from_msbayes_workers(
             msbayes_workers)
     sample_size = 0
     for w in msbayes_workers:
         sample_size += w.sample_size
     for s in param_sums.itervalues():
         self.assertEqual(s.n, sample_size)
     cfg = self.get_config_from_msbayes_workers(msbayes_workers)
     psi_indices = get_indices_of_patterns(msbayes_workers[0].header,
             PSI_PATTERNS)
     self.assertEqual(len(psi_indices), 1)
     model_indices = get_indices_of_patterns(msbayes_workers[0].header,
             MODEL_PATTERNS)
     if not msbayes_workers[0].model_index is None:
         self.assertEqual(len(model_indices), 1)
     else:
         self.assertEqual(len(model_indices), 0)
     tau_indices = get_indices_of_patterns(msbayes_workers[0].header,
             TAU_PATTERNS)
     a_theta_indices = get_indices_of_patterns(msbayes_workers[0].header,
             A_THETA_PATTERNS)
     d_theta_indices = get_indices_of_patterns(msbayes_workers[0].header,
             D_THETA_PATTERNS)
     if msbayes_workers[0].report_parameters:
         self.assertEqual(len(tau_indices), cfg.npairs)
         self.assertEqual(len(a_theta_indices), cfg.npairs)
         self.assertEqual(len(d_theta_indices), 2*cfg.npairs)
     else:
         self.assertEqual(len(tau_indices), 0)
         self.assertEqual(len(a_theta_indices), 0)
         self.assertEqual(len(d_theta_indices), 0)
     _LOG.debug('\n{0}\n'.format('\n'.join(
             [str(param_sums[i]) for i in sorted(param_sums.iterkeys())])))
     for i in psi_indices:
         self.assertSampleIsFromDistribution(param_sums[i], cfg.psi,
                 places=places)
     for i in tau_indices:
         self.assertSampleIsFromDistribution(param_sums[i], cfg.tau,
                 places=places)
     for i in a_theta_indices:
         self.assertSampleIsFromDistribution(param_sums[i], cfg.a_theta,
                 places=places)
     for i in d_theta_indices:
         self.assertSampleIsFromDistribution(param_sums[i], cfg.d_theta,
                 mean_adj=cfg.theta.mean,
                 max_adj=cfg.theta.maximum,
                 compare_variance=False,
                 places=places)
Exemplo n.º 3
0
def parameter_density_iter(parameter_density_file,
        parameter_patterns = DIV_MODEL_PATTERNS + MODEL_PATTERNS + \
                PSI_PATTERNS + MEAN_TAU_PATTERNS + OMEGA_PATTERNS + \
                CV_PATTERNS):
    dens_file, close = process_file_arg(parameter_density_file)
    try:
        header = parse_header(dens_file, seek = False)
        parameter_indices = functions.get_indices_of_patterns(header,
                parameter_patterns)
        indices_to_heads = dict(zip(parameter_indices,
                [header[i] for i in parameter_indices]))
        heads_to_dens_tups = dict(zip([header[i] for i in parameter_indices],
                [None for i in range(len(parameter_indices))]))
        if not len(parameter_indices) == len(set(indices_to_heads.itervalues())):
            raise errors.ParameterParsingError('some parameters were found in '
                    'multiple columns in density file {0!r}'.format(
                            dens_file.name))
        for i, line in enumerate(dens_file):
            l = line.strip().split()
            if l:
                for idx in parameter_indices:
                    heads_to_dens_tups[indices_to_heads[idx]] = (float(l[idx]),
                            float(l[idx + 1]))
                yield heads_to_dens_tups
    except:
        raise
    finally:
        if close:
            dens_file.close()
Exemplo n.º 4
0
 def result_path_iter(self, observed_index, prior_index):
     true_model = self.observed_index_to_prior_index[observed_index]
     out_dir = self.get_result_dir(observed_index, prior_index)
     if not os.path.isdir(out_dir):
         raise Exception('expected result direcory {0!r} does not '
                 'exist'.format(out_dir))
     observed_stream, close = process_file_arg(
             self.observed_index_to_path[observed_index])
     header = parsing.parse_header(observed_stream, sep = '\t', strict = True,
             seek = False)
     parameter_indices = functions.get_indices_of_patterns(header,
             parsing.PARAMETER_PATTERNS)
     for i, line in enumerate(observed_stream):
         l = line.strip().split()
         true_params = dict(zip([header[x] for x in parameter_indices],
                 [l[x] for x in parameter_indices]))
         true_params['PRI.model'] = str(true_model)
         result_prefix = '{0}{1}-'.format(self.get_result_path_prefix(
                 observed_index, prior_index, i + 1), 
                 self.final_result_index)
         summary_path = result_prefix + 'posterior-summary.txt'
         psi_path = result_prefix + 'psi-results.txt'
         omega_path = result_prefix + 'omega-results.txt'
         cv_path = result_prefix + 'cv-results.txt'
         div_model_path = result_prefix + 'div-model-results.txt'
         model_path = result_prefix + 'model-results.txt'
         paths = {'summary': summary_path,
                  'psi': psi_path,
                  'omega': omega_path,
                  'cv': cv_path,
                  'div-model': div_model_path,
                  'model': model_path}
         yield true_params, paths
     observed_stream.close()
Exemplo n.º 5
0
def parameter_density_iter(parameter_density_file,
        parameter_patterns = DIV_MODEL_PATTERNS + MODEL_PATTERNS + \
                PSI_PATTERNS + MEAN_TAU_PATTERNS + OMEGA_PATTERNS + \
                CV_PATTERNS):
    dens_file, close = process_file_arg(parameter_density_file)
    try:
        header = parse_header(dens_file, seek=False)
        parameter_indices = functions.get_indices_of_patterns(
            header, parameter_patterns)
        indices_to_heads = dict(
            zip(parameter_indices, [header[i] for i in parameter_indices]))
        heads_to_dens_tups = dict(
            zip([header[i] for i in parameter_indices],
                [None for i in range(len(parameter_indices))]))
        if not len(parameter_indices) == len(set(
                indices_to_heads.itervalues())):
            raise errors.ParameterParsingError(
                'some parameters were found in '
                'multiple columns in density file {0!r}'.format(
                    dens_file.name))
        for i, line in enumerate(dens_file):
            l = line.strip().split()
            if l:
                for idx in parameter_indices:
                    heads_to_dens_tups[indices_to_heads[idx]] = (float(
                        l[idx]), float(l[idx + 1]))
                yield heads_to_dens_tups
    except:
        raise
    finally:
        if close:
            dens_file.close()
 def get_parameter_summaries_from_msbayes_workers(self,
                                                  msbayes_workers,
                                                  shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(
         zip([i for i in msbayes_workers[0].parameter_indices], [
             SampleSummarizer(tag=msbayes_workers[0].header[i])
             for i in msbayes_workers[0].parameter_indices
         ]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus:  # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(
                     w.header, PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(
                     w.header, TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s
Exemplo n.º 7
0
 def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers,
         shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(zip(
         [i for i in msbayes_workers[0].parameter_indices],
         [SampleSummarizer(
             tag=msbayes_workers[0].header[i]) for i in msbayes_workers[
                 0].parameter_indices]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus: # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(w.header,
                         PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(w.header,
                         TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s
Exemplo n.º 8
0
 def result_path_iter(self, observed_index, prior_index):
     true_model = self.observed_index_to_prior_index[observed_index]
     out_dir = self.get_result_dir(observed_index, prior_index)
     if not os.path.isdir(out_dir):
         raise Exception('expected result direcory {0!r} does not '
                         'exist'.format(out_dir))
     observed_stream, close = process_file_arg(
         self.observed_index_to_path[observed_index])
     header = parsing.parse_header(observed_stream,
                                   sep='\t',
                                   strict=True,
                                   seek=False)
     parameter_indices = functions.get_indices_of_patterns(
         header, parsing.PARAMETER_PATTERNS)
     for i, line in enumerate(observed_stream):
         l = line.strip().split()
         true_params = dict(
             zip([header[x] for x in parameter_indices],
                 [l[x] for x in parameter_indices]))
         true_params['PRI.model'] = str(true_model)
         result_prefix = '{0}{1}-'.format(
             self.get_result_path_prefix(observed_index, prior_index,
                                         i + 1), self.final_result_index)
         summary_path = result_prefix + 'posterior-summary.txt'
         psi_path = result_prefix + 'psi-results.txt'
         omega_path = result_prefix + 'omega-results.txt'
         cv_path = result_prefix + 'cv-results.txt'
         div_model_path = result_prefix + 'div-model-results.txt'
         model_path = result_prefix + 'model-results.txt'
         paths = {
             'summary': summary_path,
             'psi': psi_path,
             'omega': omega_path,
             'cv': cv_path,
             'div-model': div_model_path,
             'model': model_path
         }
         yield true_params, paths
     observed_stream.close()
Exemplo n.º 9
0
def parameter_iter(file_obj, include_line = False, include_thetas = False):
    indices = {}
    post_file, close = process_file_arg(file_obj)
    header = parse_header(post_file, seek = False)
    mean_t_indices = functions.get_indices_of_patterns(header,
            MEAN_TAU_PATTERNS)
    if len(mean_t_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} mean '
                'tau columns'.format(post_file.name, len(mean_t_indices)))
    if mean_t_indices:
        indices['mean_tau'] = mean_t_indices
    omega_indices = functions.get_indices_of_patterns(header, OMEGA_PATTERNS)
    if len(omega_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} omega '
                'columns'.format(post_file.name, len(omega_indices)))
    if omega_indices:
        indices['omega'] = omega_indices
    cv_indices = functions.get_indices_of_patterns(header, CV_PATTERNS)
    if len(cv_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} cv '
                'columns'.format(post_file.name, len(cv_indices)))
    if cv_indices:
        indices['cv'] = cv_indices
    t_indices = functions.get_indices_of_patterns(header, TAU_PATTERNS)
    if t_indices:
        indices['taus'] = t_indices
    if include_thetas:
        a_theta_indices = functions.get_indices_of_patterns(header,
                A_THETA_PATTERNS)
        d1_theta_indices = functions.get_indices_of_patterns(header,
                D1_THETA_PATTERNS)
        d2_theta_indices = functions.get_indices_of_patterns(header,
                D2_THETA_PATTERNS)
        if a_theta_indices:
            indices['a_thetas'] = a_theta_indices
        if d1_theta_indices:
            indices['d1_thetas'] = d1_theta_indices
        if d2_theta_indices:
            indices['d2_thetas'] = d2_theta_indices
    psi_indices = functions.get_indices_of_patterns(header, PSI_PATTERNS)
    if len(psi_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} psi '
                'columns'.format(post_file.name, len(psi_indices)))
    if psi_indices:
        indices['psi'] = psi_indices
    model_indices = functions.get_indices_of_patterns(header, MODEL_PATTERNS)
    if len(model_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} model '
                'columns'.format(post_file.name, len(model_indices)))
    if model_indices:
        indices['model'] = model_indices
    div_model_indices = functions.get_indices_of_patterns(header,
            DIV_MODEL_PATTERNS)
    if len(div_model_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} div '
                'model columns'.format(post_file.name, len(div_model_indices)))
    if div_model_indices:
        indices['div_model'] = div_model_indices
    samples = dict(zip(indices.keys(), [None for i in range(len(indices))]))
    for i, line in enumerate(post_file):
        l = line.strip().split()
        if l:
            if len(l) != len(header):
                post_file.close()
                raise errors.ParameterParsingError('posterior file {0} has '
                        '{1} columns at line {2}; expecting {3}'.format(
                                post_file.name, len(l), i + 2, len(header)))
            for k, idx_list in indices.iteritems():
                if k in ['mean_tau', 'omega', 'cv']:
                    samples[k] = [float(l[i]) for i in idx_list]
                elif k in ['psi', 'model', 'div_model']:
                    samples[k] = [int(l[i]) for i in idx_list]
                elif k in ['taus', 'a_thetas', 'd1_thetas', 'd2_thetas']:
                    samples[k] = [[float(l[i]) for i in idx_list]]
                else:
                    post_file.close()
                    raise errors.ParameterParsingError('unexpected key {0!r}; '
                            'posterior file {1}, line {2}'.format(
                                k, post_file.name, i+2))
            if include_line:
                yield samples, l
            else:
                yield samples
    if close:
        post_file.close()
Exemplo n.º 10
0
def get_dummy_indices(header_list, dummy_patterns=DUMMY_PATTERNS):
    return functions.get_indices_of_patterns(header_list, dummy_patterns)
Exemplo n.º 11
0
def get_stat_indices(header_list, stat_patterns=DEFAULT_STAT_PATTERNS):
    return functions.get_indices_of_patterns(header_list, stat_patterns)
Exemplo n.º 12
0
def get_parameter_indices(header_list, parameter_patterns=PARAMETER_PATTERNS):
    return functions.get_indices_of_patterns(header_list, parameter_patterns)
Exemplo n.º 13
0
def parameter_iter(file_obj, include_line=False, include_thetas=False):
    indices = {}
    post_file, close = process_file_arg(file_obj)
    header = parse_header(post_file, seek=False)
    mean_t_indices = functions.get_indices_of_patterns(header,
                                                       MEAN_TAU_PATTERNS)
    if len(mean_t_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} mean '
                                           'tau columns'.format(
                                               post_file.name,
                                               len(mean_t_indices)))
    if mean_t_indices:
        indices['mean_tau'] = mean_t_indices
    omega_indices = functions.get_indices_of_patterns(header, OMEGA_PATTERNS)
    if len(omega_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} omega '
                                           'columns'.format(
                                               post_file.name,
                                               len(omega_indices)))
    if omega_indices:
        indices['omega'] = omega_indices
    cv_indices = functions.get_indices_of_patterns(header, CV_PATTERNS)
    if len(cv_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} cv '
                                           'columns'.format(
                                               post_file.name,
                                               len(cv_indices)))
    if cv_indices:
        indices['cv'] = cv_indices
    t_indices = functions.get_indices_of_patterns(header, TAU_PATTERNS)
    if t_indices:
        indices['taus'] = t_indices
    if include_thetas:
        a_theta_indices = functions.get_indices_of_patterns(
            header, A_THETA_PATTERNS)
        d1_theta_indices = functions.get_indices_of_patterns(
            header, D1_THETA_PATTERNS)
        d2_theta_indices = functions.get_indices_of_patterns(
            header, D2_THETA_PATTERNS)
        if a_theta_indices:
            indices['a_thetas'] = a_theta_indices
        if d1_theta_indices:
            indices['d1_thetas'] = d1_theta_indices
        if d2_theta_indices:
            indices['d2_thetas'] = d2_theta_indices
    psi_indices = functions.get_indices_of_patterns(header, PSI_PATTERNS)
    if len(psi_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} psi '
                                           'columns'.format(
                                               post_file.name,
                                               len(psi_indices)))
    if psi_indices:
        indices['psi'] = psi_indices
    model_indices = functions.get_indices_of_patterns(header, MODEL_PATTERNS)
    if len(model_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} model '
                                           'columns'.format(
                                               post_file.name,
                                               len(model_indices)))
    if model_indices:
        indices['model'] = model_indices
    div_model_indices = functions.get_indices_of_patterns(
        header, DIV_MODEL_PATTERNS)
    if len(div_model_indices) > 1:
        post_file.close()
        raise errors.ParameterParsingError('posterior file {0} has {1} div '
                                           'model columns'.format(
                                               post_file.name,
                                               len(div_model_indices)))
    if div_model_indices:
        indices['div_model'] = div_model_indices
    samples = dict(zip(indices.keys(), [None for i in range(len(indices))]))
    for i, line in enumerate(post_file):
        l = line.strip().split()
        if l:
            if len(l) != len(header):
                post_file.close()
                raise errors.ParameterParsingError(
                    'posterior file {0} has '
                    '{1} columns at line {2}; expecting {3}'.format(
                        post_file.name, len(l), i + 2, len(header)))
            for k, idx_list in indices.iteritems():
                if k in ['mean_tau', 'omega', 'cv']:
                    samples[k] = [float(l[i]) for i in idx_list]
                elif k in ['psi', 'model', 'div_model']:
                    samples[k] = [int(l[i]) for i in idx_list]
                elif k in ['taus', 'a_thetas', 'd1_thetas', 'd2_thetas']:
                    samples[k] = [[float(l[i]) for i in idx_list]]
                else:
                    post_file.close()
                    raise errors.ParameterParsingError(
                        'unexpected key {0!r}; '
                        'posterior file {1}, line {2}'.format(
                            k, post_file.name, i + 2))
            if include_line:
                yield samples, l
            else:
                yield samples
    if close:
        post_file.close()
Exemplo n.º 14
0
def get_dummy_indices(header_list, dummy_patterns=DUMMY_PATTERNS):
    return functions.get_indices_of_patterns(header_list, dummy_patterns)
Exemplo n.º 15
0
def get_stat_indices(header_list, stat_patterns=DEFAULT_STAT_PATTERNS):
    return functions.get_indices_of_patterns(header_list, stat_patterns)
Exemplo n.º 16
0
def get_parameter_indices(header_list, parameter_patterns=PARAMETER_PATTERNS):
    return functions.get_indices_of_patterns(header_list, parameter_patterns)