示例#1
0
def xls_tidy(xls,qvalue):
    d=etl.fromtsv(xls)
    sd=etl.select(d,lambda x: float(x.PepQValue) <=float(qvalue))
    psmsummary=sd

    ssd=etl.cut(sd, 'Peptide', 'Protein', 'PepQValue')
    #remove the mod info in peptide.
    ssd=etl.transform.regex.sub(ssd,'Peptide', r'^[\w-]\.(.+)\.[\w-]$', r'\1')
    ssd=etl.transform.regex.sub(ssd,'Peptide', r'[\d\.\+]+', r'')

    aggregation = OrderedDict()
    aggregation['SpecCount'] = len
    cssd=etl.aggregate(ssd, 'Peptide', aggregation)

    fssd=etl.groupselectfirst(ssd, key=('Peptide','Protein',"PepQValue"))
    aggregation = OrderedDict()
    aggregation['Protein'] = 'Protein', etl.strjoin(';')
    aggregation['PepQValue'] = 'PepQValue', etl.strjoin(';')
    assd=etl.aggregate(fssd, 'Peptide', aggregation)
    pepsummary=etl.join(assd, cssd, key='Peptide')

    return (psmsummary, pepsummary)
示例#2
0
文件: examples.py 项目: datamade/petl
table3 = aggregate(table1, 'foo', sum, 'bar')
look(table3)
# alternative signature for single field aggregation using keyword args
table4 = aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz'))
look(table4)
# aggregate multiple fields
from collections import OrderedDict
from petl import strjoin
aggregation = OrderedDict()
aggregation['count'] = len
aggregation['minbar'] = 'bar', min
aggregation['maxbar'] = 'bar', max
aggregation['sumbar'] = 'bar', sum
aggregation['listbar'] = 'bar' # default aggregation function is list
aggregation['listbarbaz'] = ('bar', 'baz'), list
aggregation['bars'] = 'bar', strjoin(', ')
table5 = aggregate(table1, 'foo', aggregation)
look(table5)
# can also use list or tuple to specify multiple field aggregation
aggregation = [('count', len),
               ('minbar', 'bar', min),
               ('maxbar', 'bar', max),
               ('sumbar', 'bar', sum),
               ('listbar', 'bar'), # default aggregation function is list
               ('listbarbaz', ('bar', 'baz'), list),
               ('bars', 'bar', strjoin(', '))]
table6 = aggregate(table1, 'foo', aggregation)
look(table6)
# can also use suffix notation
table7 = aggregate(table1, 'foo')
table7['count'] = len
示例#3
0
from petl import rangeaggregate, look, strjoin
look(table1)
# aggregate whole rows
table2 = rangeaggregate(table1, 'bar', 2, len)
look(table2)
# aggregate single field
table3 = rangeaggregate(table1, 'bar', 2, list, 'foo')
look(table3)
# aggregate single field - alternative signature using keyword args
table4 = rangeaggregate(table1, key='bar', width=2, aggregation=list, value='foo')
look(table4)
# aggregate multiple fields
from collections import OrderedDict
aggregation = OrderedDict()
aggregation['foocount'] = len 
aggregation['foojoin'] = 'foo', strjoin('')
aggregation['foolist'] = 'foo' # default is list
table5 = rangeaggregate(table1, 'bar', 2, aggregation)
look(table5)


# rowmap

table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]

from petl import rowmap, look
示例#4
0
文件: examples.py 项目: greeness/petl
table4 = aggregate(table1,
                   key=('foo', 'bar'),
                   aggregation=list,
                   value=('bar', 'baz'))
look(table4)
# aggregate multiple fields
from collections import OrderedDict
from petl import strjoin

aggregation = OrderedDict()
aggregation['count'] = len
aggregation['minbar'] = 'bar', min
aggregation['maxbar'] = 'bar', max
aggregation['sumbar'] = 'bar', sum
aggregation['listbar'] = 'bar'  # default aggregation function is list
aggregation['bars'] = 'bar', strjoin(', ')
table5 = aggregate(table1, 'foo', aggregation)
look(table5)
# can also use list or tuple to specify multiple field aggregation
aggregation = [
    ('count', len),
    ('minbar', 'bar', min),
    ('maxbar', 'bar', max),
    ('sumbar', 'bar', sum),
    ('listbar', 'bar'),  # default aggregation function is list
    ('bars', 'bar', strjoin(', '))
]
table6 = aggregate(table1, 'foo', aggregation)
look(table6)
# can also use suffix notation
table7 = aggregate(table1, 'foo')
示例#5
0
文件: examples.py 项目: deytao/petl
# aggregate single field
table3 = aggregate(table1, 'foo', sum, 'bar')
look(table3)
# alternative signature for single field aggregation
table4 = aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz'))
look(table4)
# aggregate multiple fields
from collections import OrderedDict
from petl import strjoin
aggregation = OrderedDict()
aggregation['count'] = len
aggregation['minbar'] = 'bar', min
aggregation['maxbar'] = 'bar', max
aggregation['sumbar'] = 'bar', sum
aggregation['listbar'] = 'bar' # default aggregation function is list
aggregation['bars'] = 'bar', strjoin(', ')
table5 = aggregate(table1, 'foo', aggregation)
look(table5)
# can also use list or tuple to specify multiple field aggregation
aggregation = [('count', len),
               ('minbar', 'bar', min),
               ('maxbar', 'bar', max),
               ('sumbar', 'bar', sum),
               ('listbar', 'bar'), # default aggregation function is list
               ('bars', 'bar', strjoin(', '))]
table6 = aggregate(table1, 'foo', aggregation)
look(table6)
# can also use suffix notation
table7 = aggregate(table1, 'foo')
table7['count'] = len
table7['minbar'] = 'bar', min
示例#6
0
                       aggregation=list,
                       value=('bar', 'baz'))
table4
# aggregate multiple fields
from collections import OrderedDict
import petl as etl

aggregation = OrderedDict()
aggregation['count'] = len
aggregation['minbar'] = 'bar', min
aggregation['maxbar'] = 'bar', max
aggregation['sumbar'] = 'bar', sum
# default aggregation function is list
aggregation['listbar'] = 'bar'
aggregation['listbarbaz'] = ('bar', 'baz'), list
aggregation['bars'] = 'bar', etl.strjoin(', ')
table5 = etl.aggregate(table1, 'foo', aggregation)
table5

# mergeduplicates()
###################

import petl as etl

table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4],
          ['B', None, 7.8], ['E', None, 42.], ['D', 3, 12.3], ['A', 2, None]]
table2 = etl.mergeduplicates(table1, 'foo')
table2

# merge()
#########
示例#7
0
table4 = etl.aggregate(table1, key=('foo', 'bar'),
                       aggregation=list, value=('bar', 'baz'))
table4
# aggregate multiple fields
from collections import OrderedDict
import petl as etl

aggregation = OrderedDict()
aggregation['count'] = len
aggregation['minbar'] = 'bar', min
aggregation['maxbar'] = 'bar', max
aggregation['sumbar'] = 'bar', sum
# default aggregation function is list
aggregation['listbar'] = 'bar'
aggregation['listbarbaz'] = ('bar', 'baz'), list
aggregation['bars'] = 'bar', etl.strjoin(', ')
table5 = etl.aggregate(table1, 'foo', aggregation)
table5


# mergeduplicates()
###################

import petl as etl
table1 = [['foo', 'bar', 'baz'],
          ['A', 1, 2.7],
          ['B', 2, None],
          ['D', 3, 9.4],
          ['B', None, 7.8],
          ['E', None, 42.],
          ['D', 3, 12.3],