def load_elution(fname, getname=True): # expected file structure: # first col: gene id # second col: treat differently if 2nd col header is 'Total' or # 'Description' # remaining cols: elution profile data lines = [l for l in ut.load_tab_file(fname)] # final row: total count in msblender output; don't skip in cuihong's data skip_final_row = (lines[-1][0][0] == '#') rows = lines[1:-1] if skip_final_row else lines[1:] fractions = [f for f in lines[0][1:]] if fractions[0].lower() in ['total', 'totalcount', 'description']: start_data_col = 2 fractions.remove(fractions[0]) else: start_data_col = 1 mat = np.matrix([row[start_data_col:] for row in rows],dtype='float32') prots = [row[0] for row in rows] elut = Struct(mat=mat, prots=prots, fractions=fractions, filename=fname, filename_original=fname) if start_data_col == 2: col2name_vals = [row[1] for row in rows] elut.column2vals = col2name_vals if getname: elut.name = os.path.basename(fname).split('.')[0] return elut
def load_go_ont(fname, filter_namespace='cellular_component'): dterms = {} term = None for r in file(fname,'r'): if r.strip()=='[Term]': if hasattr(term,'set_is_a'): if not filter_namespace or filter_namespace==term.namespace: dterms[term.acc] = (term.name, term.set_is_a) term = Struct() elif r.strip() and term: att = r.split(': ')[0] val = ': '.join(r.split(': ')[1:]).strip() if att=='id': term.acc = val elif att=='name': term.name = val elif att=='namespace': term.namespace = val elif att=='is_a': val = val.split(' ! ')[0] if hasattr(term,'set_is_a'): term.set_is_a.add(val) else: term.set_is_a = set([val]) return dterms
def load_elution(fname, getname=True): # expected file structure: # first col: gene id # second col: treat differently if 2nd col header is 'Total' or # 'Description' # remaining cols: elution profile data lines = [l for l in ut.load_tab_file(fname)] # final row: total count in msblender output; don't skip in cuihong's data skip_final_row = lines[-1][0][0] == "#" rows = lines[1:-1] if skip_final_row else lines[1:] fractions = [f for f in lines[0][1:]] if fractions[0].lower() in ["total", "totalcount", "description"]: start_data_col = 2 fractions.remove(fractions[0]) else: start_data_col = 1 mat = np.matrix([row[start_data_col:] for row in rows], dtype="float32") prots = [row[0] for row in rows] elut = Struct(mat=mat, prots=prots, fractions=fractions, filename=fname, filename_original=fname) if start_data_col == 2: col2name_vals = [row[1] for row in rows] elut.column2vals = col2name_vals if getname: elut.name = os.path.basename(fname).split(".")[0] return elut
def downsample_elution(elution, downsample, seed=0): """ Return a new elution with every downsample-th fraction. """ down_elut = Struct() down_elut.__dict__ = elution.__dict__.copy() down_elut.mat = elution.mat[:,seed::2] down_elut.fractions = elution.fractions[::2] down_elut.name = elution.name + '_down%i' % downsample return(down_elut)
def downsample_elution(elution, downsample, seed=0): """ Return a new elution with every downsample-th fraction. """ down_elut = Struct() down_elut.__dict__ = elution.__dict__.copy() down_elut.mat = elution.mat[:, seed::2] down_elut.fractions = elution.fractions[::2] down_elut.name = elution.name + "_down%i" % downsample return down_elut