Exemplo n.º 1
0
def filter_multi_orths(arr_in, basesp, cutoff):
    """
    For every interaction without base basespecies evidence, remove other basespecies
    evidence for that interaction when the base basespecies side of the orthogroup
    is greater than 1.
    """
    print "Filtering: require rows w/o %s > %s to have single orths" % (basesp,
            cutoff)
    arr = ut.arr_copy(arr_in)
    basesp_cols = [n for n in arr.dtype.names[3:] if n[:2]==basesp]
    assert len(basesp_cols)>0, 'No base species data.'
    maxes = arr_collist_maxes(arr, [basesp_cols])
    othersps = species_list(arr.dtype.names[3:])
    othersps.remove(basesp)
    spcols = [(sp, [n for n in arr.dtype.names[3:] if n[:2]==sp])
            for sp in othersps]
    ogs_all = orth.all_ogroup_sizes(basesp, othersps)
    cleared = 0
    for i in range(len(arr)):
        if maxes[i] < cutoff:
            row = arr[i]
            id1,id2 = row['id1'],row['id2']
            for sp, cols in spcols:
                ogsize_sp = ogs_all[sp]
                if (id1 in ogsize_sp and ogsize_sp[id1]>1) or (id2 in ogsize_sp
                        and ogsize_sp[id2]>1):
                    for col in cols: arr[i][col] = 0
                    cleared += 1
    print "%s species-sections of rows cleared" % cleared
    return arr
Exemplo n.º 2
0
def arrfeats_set_gold(arrfeats, pdgold):
    arrfeats = ut.arr_copy(arrfeats)
    for row in arrfeats:
        if pdgold.contains((row[0],row[1])):
            row[2] = 1
        else:
            row[2] = 0
    return arrfeats
Exemplo n.º 3
0
def norm_columns(arr):
    newarr = ut.arr_copy(arr)
    for n in newarr.dtype.names:
        newarr[n] = scipy.stats.zscore(np.nan_to_num(newarr[n]))
    return newarr
Exemplo n.º 4
0
def rank_columns(arr):
    newarr = ut.arr_copy(arr)
    for n in newarr.dtype.names:
        newarr[n] = scipy.stats.rankdata(np.nan_to_num(newarr[n]))
    return newarr
Exemplo n.º 5
0
def rescale_columns(arr, scale_factors):
    newarr = ut.arr_copy(arr)
    for i,n in enumerate(newarr.dtype.names):
        #newarr[n] = np.nan_to_num(newarr[n]/np.max(np.nan_to_num(newarr[n])))
        newarr[n] = newarr[n] * scale_factors[i]
    return newarr