Exemplos de Table em Python, exemplos de libms.DataStructures.Table.Table em Python

Exemplo n.º 1

0

Exibir arquivo

def testDynamicColumnAttributes():
    t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"], [])
    t.a
    t.b
    t.c
    assert len(t.a.values) == 0
    assert len(t.b.values) == 0
    assert len(t.c.values) == 0

    t.renameColumns(dict(a="aa"))
    assert "a" not in t.getColNames()
    assert "aa" in t.getColNames()
    t.aa
    try:
        t.a
        raise Exception("t.a should be deteted")
    except:
        pass

    col = pickle.loads(pickle.dumps(t.aa))
    assert len(col.values) == 0

    t.dropColumns("aa")
    assert "aa" not in t.getColNames()
    try:
        t.aa
        raise Exception("t.aa should be deteted")
    except:
        pass

Exemplo n.º 2

0

Exibir arquivo

def testToOpenMSFeatureMap():
    t = Table("mz rt".split(), [float, float], 2 * ["%.6f"])
    fm = toOpenMSFeatureMap(t)
    assert fm.size() == 0

    t.addRow([1.0, 2.0])
    fm = toOpenMSFeatureMap(t)
    assert fm.size() == 1

    f = fm[0]
    assert f.getMZ() == 1.0  # == ok, as no digits after decimal point
    assert f.getRT() == 2.0  # dito

Exemplo n.º 3

0

Exibir arquivo

def setupTable():
    names = "int long float str object array".split()
    types = [
        int,
        long,
        float,
        str,
        object,
        np.ndarray,
    ]
    formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % o.shape"]

    row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))]
    row2 = [2, 22323L, 2.0, "hi2", [
        2,
        3,
    ], np.array(((2, 3, 4), (1, 2, 3)))]
    row3 = [
        3, 32323L, 3.0, "hi3", (3, ),
        np.array(((3, 3, 4, 5), (1, 2, 3, 4)))
    ]

    rows = [row1, row2, row3]
    t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42))
    t = t.extractColumns("int", "float", "str")
    t.addEnumeration()
    t._name = "t"
    t._print()
    return t

Exemplo n.º 4

0

Exibir arquivo

Arquivo: testTable.py Projeto: burlab/emzed

def testDynamicColumnAttributes():
    t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"],[])
    t.a
    t.b
    t.c
    assert len(t.a.values) == 0
    assert len(t.b.values) == 0
    assert len(t.c.values) == 0

    t.renameColumns(dict(a="aa"))
    assert "a" not in t.getColNames()
    assert "aa"  in t.getColNames()
    t.aa
    try:
        t.a
        raise Exception("t.a should be deteted")
    except:
        pass

    col = pickle.loads(pickle.dumps(t.aa))
    assert len(col.values) == 0

    t.dropColumns("aa")
    assert "aa" not in t.getColNames()
    try:
        t.aa
        raise Exception("t.aa should be deteted")
    except:
        pass

Exemplo n.º 5

0

Exibir arquivo

def testDoubleColumnames():
    ex = None
    try:
        colnames = ["col0", "col0", "col1", "col1", "col2"]
        Table(colnames, [] * 5, [] * 5)
    except Exception, e:
        ex = e.message

Exemplo n.º 6

0

Exibir arquivo

def testSupportedPostfixes():

    names = "mz mzmin mzmax mz0 mzmin0 mzmax0 mz1 mzmax1 mzmin__0 mzmax__0 mz__0 "\
            "mzmax3 mz4 mzmin4".split()

    t = Table._create(names, [float] * len(names), [])
    assert len(t.supportedPostfixes(["mz"])) == len(names)
    assert t.supportedPostfixes(["mz", "mzmin"]) == ["", "0", "4", "__0"]
    assert t.supportedPostfixes(["mz", "mzmin", "mzmax"]) == ["", "0", "__0"]

Exemplo n.º 7

0

Exibir arquivo

def testRunnerTable():

    with ExceptionTester():
        Table(["a"], [np.float32], ["%f"], [[32.0]])

    #build table
    names = "int long float str object array".split()
    types = [
        int,
        long,
        float,
        str,
        object,
        np.ndarray,
    ]
    formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % (o.shape,)"]

    row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))]
    row2 = [2, 22323L, 2.0, "hi2", [
        2,
        3,
    ], np.array(((2, 3, 4), (1, 2, 3)))]
    row3 = [
        3, 32323L, 3.0, "hi3", (3, ),
        np.array(((3, 3, 4, 5), (1, 2, 3, 4)))
    ]

    rows = [row1, row2, row3]
    t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42))

    run(t, names, [row1, row2, row3])
    # test pickle
    dat = pickle.dumps(t)
    t = pickle.loads(dat)
    run(t, names, [row1, row2, row3])
    ms.storeTable(t, u"temp_output/test.table")
    try:
        ms.storeTable(t, "temp_output/test.table")
        assert False, "no exception thrown althoug file should exist!"
    except:
        pass
    ms.storeTable(t, "temp_output/test.table", True)
    t = ms.loadTable("temp_output/test.table")
    run(t, names, [row1, row2, row3])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: isotope_calculator.py Projeto: burlab/emzed

def isotopeDistributionTable(formula, R=None, fullC13=False, minp=0.01, **kw):
    """
    generates Table for most common isotopes of molecule with given mass
    *formula*.

    If the resolution *R* is given, the measurement device is simulated, and
    overlapping peaks may merge.

    *fullC13=True* assumes that only C13 carbon is present in formula.

    Further you can give a threshold *minp* for considering only isotope
    peaks with an abundance above the value. Standard is *minp=0.01*.

    If you have special elementary isotope abundances which differ from
    the natural abundances, you can tell that like
    ``ms.isotopeDistributionTable("S4C4", C=dict(C13=0.5, C12=0.5))``

    Examples:

    .. pycon::

       import ms !onlyoutput
       # natural abundances:
       tab = ms.isotopeDistributionTable("C3H7NO2")
       tab.abundance /= tab.abundance.sum()
       tab.print_()

       # artifical abundances:
       tab = ms.isotopeDistributionTable("C3H7NO2", C=dict(C13=0.5, C12=0.5))
       tab.abundance /= tab.abundance.sum()
       tab.print_()

    \
    """
    from libms.DataStructures.Table import Table
    gen = _setupIsotopeDistributionGenerator(formula, R, fullC13, minp, **kw)
    t = Table(["mf", "mass", "abundance"], [str, float, float],
                                           ["%s", "%.6f", "%.3f"], [])
    for mass, abundance in gen.getCentroids():
        t.addRow([formula, mass, abundance], False)
    t.resetInternals()
    return t

Exemplo n.º 9

0

Exibir arquivo

def testSomePredicates():
    #build table
    names = "int long float str object array".split()
    types = [
        int,
        long,
        float,
        str,
        object,
        np.ndarray,
    ]
    formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array%r' % (o.shape,)"]

    row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))]
    row2 = [2, 22323L, 2.0, "hi2", [
        2,
        3,
    ], np.array(((2, 3, 4), (1, 2, 3)))]
    row3 = [
        3, 32323L, 3.0, "hi3", (3, ),
        np.array(((3, 3, 4, 5), (1, 2, 3, 4)))
    ]

    rows = [row1, row2, row3]
    t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42))

    tn = t.filter((t.int + t.float).inRange(-1, 2))
    assert len(tn) == 1
    assert tn.getValue(tn.rows[0], "int") == 1
    tn = t.filter((t.float + t.int).inRange(-1, 2))
    assert len(tn) == 1
    assert tn.getValue(tn.rows[0], "int") == 1

    tn = t.filter(t.float.approxEqual(1.0, t.int / 10))
    tn._print()
    assert len(tn) == 1, len(tn)
    assert tn.getValue(tn.rows[0], "int") == 1

Exemplo n.º 10

0

Exibir arquivo

Arquivo: testTable.py Projeto: burlab/emzed

def testSomePredicates():
    #build table
    names="int long float str object array".split()
    types = [int, long, float, str, object, np.ndarray,]
    formats = [ "%3d", "%d", "%.3f", "%s", "%r", "'array%r' % (o.shape,)" ]

    row1 = [ 1, 12323L, 1.0, "hi", { 1: 1 },  np.array((1,2,3)) ]
    row2 = [ 2, 22323L, 2.0, "hi2", [2,3,], np.array(((2,3,4),(1,2,3))) ]
    row3 = [ 3, 32323L, 3.0, "hi3", (3,) , np.array(((3,3,4,5),(1,2,3,4))) ]

    rows = [row1, row2, row3]
    t=Table(names, types, formats, rows, "testtabelle", meta=dict(why=42))

    tn = t.filter((t.int+t.float).inRange(-1, 2))
    assert len(tn) == 1
    assert tn.getValue(tn.rows[0], "int") == 1
    tn = t.filter((t.float+t.int).inRange(-1, 2))
    assert len(tn) == 1
    assert tn.getValue(tn.rows[0], "int") == 1

    tn = t.filter(t.float.approxEqual(1.0, t.int/10))
    tn._print()
    assert len(tn) == 1, len(tn)
    assert tn.getValue(tn.rows[0], "int") == 1

Exemplo n.º 11

0

Exibir arquivo

def test_removePostfixes():
    t = Table._create(["abb__0", "bcb__0"], [str] * 2, ["%s"] * 2)
    assert t.getColNames() == ["abb__0", "bcb__0"]
    t.removePostfixes()
    assert t.getColNames() == ["abb", "bcb"]
    t.removePostfixes("bb", "cb")
    assert t.getColNames() == ["a", "b"]
    try:
        t.print_()
        t.removePostfixes("a", "b")
        t.print_()

    except:
        pass
    else:
        assert False, "expected exception"

Exemplo n.º 12

0

Exibir arquivo

Arquivo: regTestTable.py Projeto: burlab/emzed

def setupTable():
    names="int long float str object array".split()
    types = [int, long, float, str, object, np.ndarray,]
    formats = [ "%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % o.shape" ]

    row1 = [ 1, 12323L, 1.0, "hi", { 1: 1 },  np.array((1,2,3)) ]
    row2 = [ 2, 22323L, 2.0, "hi2", [2,3,], np.array(((2,3,4),(1,2,3))) ]
    row3 = [ 3, 32323L, 3.0, "hi3", (3,) , np.array(((3,3,4,5),(1,2,3,4))) ]

    rows = [row1, row2, row3]
    t=Table(names, types, formats, rows, "testtabelle", meta=dict(why=42))
    t = t.extractColumns("int", "float", "str")
    t.addEnumeration()
    t._name = "t"
    t._print()
    return t

Exemplo n.º 13

0

Exibir arquivo

def loadCSV(path=None, sep=";", keepNone = False, **specialFormats):
    # local import in order to keep namespaces clean
    import ms
    import csv, os.path, sys, re
    from   libms.DataStructures.Table import (Table, common_type_for,\
                                              bestConvert, guessFormatFor)
    if isinstance(path, unicode):
        path = path.encode(sys.getfilesystemencoding())
    elif path is None:
        path = ms.askForSingleFile(extensions=["csv"])
        if path is None:
            return None

    with open(path,"r") as fp:
        # remove clutter at right margin
        reader = csv.reader(fp, delimiter=sep)
        # reduce multiple spaces to single underscore
        colNames = [ re.sub(" +", "_", n.strip()) for n in reader.next()]

        if keepNone:
            conv = bestConvert
        else:
            conv = lambda v: None if v=="None" else bestConvert(v)

        rows = [ [conv(c.strip()) for c in row] for row in reader]


    columns = [[row[i] for row in rows] for i in range(len(colNames))]
    types = [common_type_for(col) for col in columns]

    #defaultFormats = {float: "%.2f", str: "%s", int: "%d"}
    formats = dict([(name, guessFormatFor(name,type_)) for (name, type_)\
                                                  in zip(colNames, types)])
    formats.update(specialFormats)

    formats = [formats[n] for n in colNames]

    title = os.path.basename(path)
    meta = dict(loaded_from=os.path.abspath(path))
    return Table._create(colNames, types, formats, rows, title, meta)

Exemplo n.º 14

0

Exibir arquivo

def loadTable(path=None):
    """ load pickled table

        If *path* is missing, a dialog for file selection is opened
        instead.
    """

    # local import in order to keep namespaces clean
    import ms
    import sys
    from   libms.DataStructures.Table import Table, compressPeakMaps

    if isinstance(path, unicode):
        path = path.encode(sys.getfilesystemencoding())
    elif path is None:
        path = ms.askForSingleFile(extensions=["table"])
        if path is None:
            return None

    result = Table.load(path)
    compressPeakMaps(result)
    return result

Exemplo n.º 15

0

Exibir arquivo

Arquivo: tools.py Projeto: swagatam11/emzed

def _build_starttable(tables, force_merge):
    colname_orders = []
    for table in tables:
        colname_orders.append(table._colNames)

    colum_names = _topo_sort_with_in_order(colname_orders)
    if colum_names is None:
        raise Exception("could not combine all column names to a "\
                "consistent order. you have to provide a reference table")

    types = dict()
    for table in tables:
        for name in table._colNames:
            type_ = table.getType(name)
            if types.get(name, type_) != type_:
                if not force_merge:
                    raise Exception("type conflictfor column %s" % name)
                print "type conflict:",name, types.get(name, type_), type_
            types[name] = type_

    formats = dict()
    for table in tables:
        for name in table._colNames:
            format_ = table.getFormat(name)
            if formats.get(name, format_) != format_:
                if not force_merge:
                    raise Exception("format conflict for column %s" % name)
                print "format conflict:", name, formats.get(name, format_), format_
            formats[name] = format_


    final_types = [types.get(n) for n in colum_names]
    final_formats = [formats.get(n) for n in colum_names]

    prototype = Table._create(colum_names, final_types, final_formats)
    return prototype, colum_names

Exemplo n.º 16

0

Exibir arquivo

def isotopeDistributionTable(formula, R=None, fullC13=False, minp=0.01, **kw):
    """
    generates Table for most common isotopes of molecule with given mass
    *formula*.

    If the resolution *R* is given, the measurement device is simulated, and
    overlapping peaks may merge.

    *fullC13=True* assumes that only C13 carbon is present in formula.

    Further you can give a threshold *minp* for considering only isotope
    peaks with an abundance above the value. Standard is *minp=0.01*.

    If you have special elementary isotope abundances which differ from
    the natural abundances, you can tell that like
    ``ms.isotopeDistributionTable("S4C4", C=dict(C13=0.5, C12=0.5))``

    Examples:

    .. pycon::

       import ms !onlyoutput
       # natural abundances:
       tab = ms.isotopeDistributionTable("C3H7NO2")
       tab.abundance /= tab.abundance.sum()
       tab.print_()

       # artifical abundances:
       tab = ms.isotopeDistributionTable("C3H7NO2", C=dict(C13=0.5, C12=0.5))
       tab.abundance /= tab.abundance.sum()
       tab.print_()

    \
    """
    from libms.DataStructures.Table import Table
    gen = _setupIsotopeDistributionGenerator(formula, R, fullC13, minp, **kw)
    t = Table(["mf", "mass", "abundance"], [str, float, float],
              ["%s", "%.6f", "%.3f"], [])
    for mass, abundance in gen.getCentroids():
        t.addRow([formula, mass, abundance], False)
    t.resetInternals()
    return t

Exemplo n.º 17

0

Exibir arquivo

Arquivo: testTable.py Projeto: burlab/emzed

def testIfThenElse():
    t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"],[])
    t.rows.append(["0", 1, 2])
    t.rows.append([None, 2, 1])
    t._print()
    t.addColumn("x", (t.a.isNotNone()).thenElse(t.b, t.c))
    assert t.getColNames()==["a", "b", "c", "x"]
    print
    t._print()
    t.addColumn("y", (t.a.isNotNone()).thenElse("ok", "not ok"))
    t._print()
    assert t.y.values == ["ok", "not ok"]

Exemplo n.º 18

0

Exibir arquivo

Arquivo: tools.py Projeto: swagatam11/emzed

def toTable(colName, iterable,  fmt="", type_=None, title="", meta=None):
    return Table.toTable(colName, iterable, fmt, type_, title, meta)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: Tools.py Projeto: swagatam11/emzed

def formulaTable(min_mass,
                 max_mass,
                 C=(0, None),
                 H=(0, None),
                 N=(0, None),
                 O=(0, None),
                 P=(0, None),
                 S=(0, None),
                 prune=True):
    """
    This is a reduced Python version of HR2 formula generator,
    see http://fiehnlab.ucdavis.edu/projects/Seven_Golden_Rules/Software/

    This function generates a table containing molecular formulas consisting of
    elements C, H, N, O, P and S having a mass in range
    [**min_mass**, **max_mass**].
    For each element one can provide an given count or an inclusive range of
    atom counts considered in this process.

    If **prune** is *True*, mass ratio rules and valence bond checks are used
    to avoid unrealistic compounds in the table, else all formulas explaining
    the given mass range are generated.

    Putting some restrictions on atomcounts, eg **C=(0, 100)**, can speed up
    the process tremendously.

    """
    import mass
    import math
    import collections

    from libms.DataStructures.Table import Table

    if isinstance(C, collections.Sequence):
        cmin, cmax = C
    else:
        cmin = cmax = C

    if isinstance(H, collections.Sequence):
        hmin, hmax = H
    else:
        hmin = hmax = H

    if isinstance(N, collections.Sequence):
        nmin, nmax = N
    else:
        nmin = nmax = N

    if isinstance(O, collections.Sequence):
        omin, omax = O
    else:
        omin = omax = O

    if isinstance(P, collections.Sequence):
        pmin, pmax = P
    else:
        pmin = pmax = P

    if isinstance(S, collections.Sequence):
        smin, smax = S
    else:
        smin = smax = S

    cmax = math.ceil(max_mass / mass.C) if cmax is None else cmax
    hmax = math.ceil(max_mass / mass.H) if hmax is None else hmax
    nmax = math.ceil(max_mass / mass.N) if nmax is None else nmax
    omax = math.ceil(max_mass / mass.O) if omax is None else omax
    pmax = math.ceil(max_mass / mass.P) if pmax is None else pmax
    smax = math.ceil(max_mass / mass.S) if smax is None else smax

    # upper bounds for  x/C ratios:
    hcmax = 6  # 3
    ncmax = 4  # 2
    ocmax = 3  # 1.2
    pcmax = 6  # 0.32
    scmax = 2  # 0.65

    # valence values for bound checks:
    valh = -1
    valc = +2
    valn = 1
    valo = 0
    valp = 3
    vals = 4

    int_range = lambda a, b: xrange(int(a), int(b))

    rows = []

    for c in int_range(cmin, cmax + 1):

        resmc_max = max_mass - c * mass.C
        s1 = min(smax, math.floor(resmc_max / mass.S))
        if prune:
            s1 = min(s1, scmax * c)

        for s in int_range(smin, s1 + 1):
            resms_max = resmc_max - s * mass.S
            p1 = min(pmax, math.floor(resms_max / mass.P))
            if prune:
                p1 = min(p1, pcmax * c)

            for p in int_range(pmin, p1 + 1):
                resmp_max = resms_max - p * mass.P
                o1 = min(omax, math.floor(resmp_max / mass.O))
                if prune:
                    o1 = min(o1, ocmax * c)

                for o in int_range(omin, o1 + 1):
                    resmo_max = resmp_max - o * mass.O
                    n1 = min(nmax, math.floor(resmo_max / mass.N))
                    if prune:
                        n1 = min(n1, ncmax * c)

                    for n in int_range(nmin, n1 + 1):
                        resmn_max = resmo_max - n * mass.N
                        h1 = min(hmax, math.floor(resmn_max / mass.H))
                        if prune:
                            h1 = min(h1, hcmax * c)

                        for h in int_range(hmin, h1 + 1):
                            resmh_max = resmn_max - h * mass.H
                            if 0 <= resmh_max <= max_mass - min_mass:
                                bond = (2.0+c*valc+n*valn+o*valo+p*valp \
                                           +s*vals+h*valh)/2.0
                                if not prune or (bond >= 0
                                                 and bond % 1 != 0.5):
                                    mf = "C%d.H%d.N%d.O%d.P%d.S%d."  \
                                       % (c, h, n, o, p, s)
                                    mf = mf.replace("C0.", ".")
                                    mf = mf.replace("H0.", ".")
                                    mf = mf.replace("N0.", ".")
                                    mf = mf.replace("O0.", ".")
                                    mf = mf.replace("P0.", ".")
                                    mf = mf.replace("S0.", ".")
                                    mf = mf.replace("C1.", "C.")
                                    mf = mf.replace("H1.", "H.")
                                    mf = mf.replace("N1.", "N.")
                                    mf = mf.replace("O1.", "O.")
                                    mf = mf.replace("P1.", "P.")
                                    mf = mf.replace("S1.", "S.")
                                    mf = mf.replace(".", "")

                                    rows.append([mf, max_mass - resmh_max])
    return Table(["mf", "m0"], [str, float], ["%s", "%.5f"], rows)

Exemplo n.º 20

0

Exibir arquivo

def testIllegalRows():
    try:
        t = Table(["a", "b"], [float, float], ["%f", "%f"], [(1, 2)])
    except Exception, e:
        assert "not all rows are lists" in str(e), str(e)

Exemplo n.º 21

0

Exibir arquivo

def testDetectionOfUnallowdColumnNames():
    ex = None
    try:
        Table(["__init__"], [int], ["%d"])
    except Exception, e:
        ex = e.message

Exemplo n.º 22

0

Exibir arquivo

def testIfThenElse():
    t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"], [])
    t.rows.append(["0", 1, 2])
    t.rows.append([None, 2, 1])
    t._print()
    t.addColumn("x", (t.a.isNotNone()).thenElse(t.b, t.c))
    assert t.getColNames() == ["a", "b", "c", "x"]
    print
    t._print()
    t.addColumn("y", (t.a.isNotNone()).thenElse("ok", "not ok"))
    t._print()
    assert t.y.values == ["ok", "not ok"]