Exemplo n.º 1
0
def get_distribution(distribution_string, state_name, valid_states):
    """
    This high level function returns a state distribution.
    Each valid state is expected to have exactly one weight.
    The weights in the returned dictionary are normalized to sum to one.
    @param distribution_string: a string of lines of text that each look something like 'A : 0.245'
    @param state_name: something like 'amino acid'
    @param valid_states: a set of valid states
    @return: a dictionary mapping each state to a probability
    """
    if not distribution_string:
        raise HandlingError('no %s distribution was specified' % state_name)
    state_to_weight = {}
    for line in smallutil.stripped_lines(StringIO(distribution_string)):
        state, weight = get_weight_pair(line, state_name, valid_states)
        if state in state_to_weight:
            raise HandlingError('duplicate %s: %s' % (state_name, state))
        state_to_weight[state] = weight
    if len(state_to_weight) < len(valid_states):
        raise HandlingError('one or more %s was not assigned a weight' %
                            state_name)
    total_weight = float(sum(state_to_weight.values()))
    if not total_weight:
        raise HandlingError('each %s weight is zero' % state_name)
    for state in state_to_weight:
        state_to_weight[state] /= total_weight
    return state_to_weight
Exemplo n.º 2
0
def multiline_state_to_ndarray(multiline_state):
    arr = []
    for line in stripped_lines(multiline_state.splitlines()):
        row = []
        for s in line:
            v = int(s)
            if v not in (0, 1):
                raise ValueError('invalid allele')
            row.append(v)
        arr.append(row)
    return np.array(arr)
Exemplo n.º 3
0
def multiline_state_to_ndarray(multiline_state):
    arr = []
    for line in stripped_lines(multiline_state.splitlines()):
        row = []
        for s in line:
            v = int(s)
            if v not in (0, 1):
                raise ValueError('invalid allele')
            row.append(v)
        arr.append(row)
    return np.array(arr)