示例#1
0
def load_elution(fname, getname=True):
    # expected file structure:
    # first col: gene id
    # second col: treat differently if 2nd col header is 'Total' or
    # 'Description'
    # remaining cols: elution profile data
    lines = [l for l in ut.load_tab_file(fname)]
    # final row: total count in msblender output; don't skip in cuihong's data
    skip_final_row = (lines[-1][0][0] == '#')
    rows = lines[1:-1] if skip_final_row else lines[1:]
    fractions = [f for f in lines[0][1:]]
    if fractions[0].lower() in ['total', 'totalcount', 'description']:
        start_data_col = 2
        fractions.remove(fractions[0])
    else:
        start_data_col = 1
    mat = np.matrix([row[start_data_col:] for row in rows],dtype='float32')
    prots = [row[0] for row in rows]
    elut = Struct(mat=mat, prots=prots, fractions=fractions, filename=fname,
                  filename_original=fname)
    if start_data_col == 2:
        col2name_vals = [row[1] for row in rows]
        elut.column2vals = col2name_vals
    if getname: elut.name = os.path.basename(fname).split('.')[0]
    return elut
def load_go_ont(fname, filter_namespace='cellular_component'):
    dterms = {}
    term = None
    for r in file(fname,'r'):
        if r.strip()=='[Term]':
            if hasattr(term,'set_is_a'):
                if not filter_namespace or filter_namespace==term.namespace:
                    dterms[term.acc] = (term.name, term.set_is_a)
            term = Struct()
        elif r.strip() and term:
            att = r.split(': ')[0]
            val = ': '.join(r.split(': ')[1:]).strip()
            if att=='id':
                term.acc = val
            elif att=='name':
                term.name = val
            elif att=='namespace':
                term.namespace = val
            elif att=='is_a':
                val = val.split(' ! ')[0]
                if hasattr(term,'set_is_a'):
                    term.set_is_a.add(val)
                else:
                    term.set_is_a = set([val])
    return dterms
示例#3
0
def load_elution(fname, getname=True):
    # expected file structure:
    # first col: gene id
    # second col: treat differently if 2nd col header is 'Total' or
    # 'Description'
    # remaining cols: elution profile data
    lines = [l for l in ut.load_tab_file(fname)]
    # final row: total count in msblender output; don't skip in cuihong's data
    skip_final_row = lines[-1][0][0] == "#"
    rows = lines[1:-1] if skip_final_row else lines[1:]
    fractions = [f for f in lines[0][1:]]
    if fractions[0].lower() in ["total", "totalcount", "description"]:
        start_data_col = 2
        fractions.remove(fractions[0])
    else:
        start_data_col = 1
    mat = np.matrix([row[start_data_col:] for row in rows], dtype="float32")
    prots = [row[0] for row in rows]
    elut = Struct(mat=mat, prots=prots, fractions=fractions, filename=fname, filename_original=fname)
    if start_data_col == 2:
        col2name_vals = [row[1] for row in rows]
        elut.column2vals = col2name_vals
    if getname:
        elut.name = os.path.basename(fname).split(".")[0]
    return elut
示例#4
0
def downsample_elution(elution, downsample, seed=0):
    """
    Return a new elution with every downsample-th fraction.
    """
    down_elut = Struct()
    down_elut.__dict__ = elution.__dict__.copy()
    down_elut.mat = elution.mat[:,seed::2]
    down_elut.fractions = elution.fractions[::2]
    down_elut.name = elution.name + '_down%i' % downsample
    return(down_elut)
示例#5
0
def downsample_elution(elution, downsample, seed=0):
    """
    Return a new elution with every downsample-th fraction.
    """
    down_elut = Struct()
    down_elut.__dict__ = elution.__dict__.copy()
    down_elut.mat = elution.mat[:, seed::2]
    down_elut.fractions = elution.fractions[::2]
    down_elut.name = elution.name + "_down%i" % downsample
    return down_elut