Пример #1
0
def seq_to_seq_map(dict_or_file_or_dataframe):
    """Takes a dictionary (in which case it does not modify it) or a dataframe, 
    or an excel file name and returns a dict mapping segment names to sequences.
    Columns must be PID and Sequence"""
    import pandas as pd
    #if is dictionary just return it
    if isinstance(dict_or_file_or_dataframe, dict):
        return dict_or_file_or_dataframe

    #if is file, load it to a dataframe
    if u.is_str(dict_or_file_or_dataframe):
        df = pd.read_excel(dict_or_file_or_dataframe)
    else:
        df = dict_or_file_or_dataframe
    #else it must be a dataframe. Do your magic:)
    df = df.dropna()

    #cleanup whitespace
    df.PID = df.PID.str.strip()

    peps = {}
    for n, pep in df.iterrows():

        peps[pep.PID] = pep.Sequence.strip()
    return peps
Пример #2
0
def get_annotated_sequence(segments,
                           seg_to_seq,
                           linkers="GSGPG",
                           N_tag="",
                           C_tag=""):
    """Returns an anottated amino acid sequnce of the poylhedra.
    Linkers can be a single string or an array of string with the required number of linkers.
    N_tag, C_tag - are appended to the left and right side of the string"""

    seg_to_seq = seq_to_seq_map(seg_to_seq)
    N = len(segments)
    if u.is_str(linkers):
        linkers = [linkers] * (N - 1)

    assert len(linkers) == N - 1, (
        "Length of linkers must be one less than the number of segments." +
        "Is {NL}, but should be {N}".format(NL=len(linkers), N=N))

    max_seg_len = max([len(seg_to_seq[s]) for s in segments])

    aa_segments = [
        seg_to_seq[s].ljust(max_seg_len).replace('-', '') + "\t|" + s
        for s in segments
    ]

    lines = [N_tag] + list(u.roundrobin(aa_segments, linkers)) + [C_tag]
    lines = "\n".join(lines)
    return lines
Пример #3
0
def load_pairs(yaml_str):
    """Loads a yaml string and returns a list of pairs"""
    pairs = yaml.load(yaml_str)

    #split the pairs and chains if they are seperated by a colon
    for n in range(len(pairs)):
        #if it's a calumn split it
        if u.is_str(pairs[n]['pair']):
            pairs[n]['pair'] = pairs[n]['pair'].split(':')
        #strip whitespace
        pairs[n]['pair'] = [p.strip() for p in pairs[n]['pair']]

        #do the same for chains
        if u.is_str(pairs[n]['chains']):
            pairs[n]['chains'] = pairs[n]['chains'].split(':')

        #strip whitespace
        pairs[n]['chains'] = [c.strip() for c in pairs[n]['chains']]
    return pairs
Пример #4
0
def do_assignment_replacements(topology, assignments):
    """Replaces topology segments with replacemnt rules. 
       The first occurance of a name is replaced witht he first element in the list"""
    if u.is_str(assignments):
        assignments = segment_assignments_to_dict(assignments)
    else:
        assignments = assignments.copy()

    keys = assignments.keys()
    ret = []
    for s in topology:
        sl = s.upper()
        if sl in keys:
            ret.append(assignments[sl].pop(0))
        else:
            ret.append(s)
    return ret
Пример #5
0
def test_is_str():
    assert u.is_str("A string")
    assert u.is_str(u"A string")
    assert not u.is_str({})
    assert not u.is_str([])
    assert not u.is_str(1)