示例#1
0
For instance, gene ids can be selected and their FPKM/iBAQ values combined.
Also, features can be can be grouped into longer sequences
with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged
into LNGERPEPTIDE).
"""

import argparse, sys, re, csv, copy, decimal
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description=description)
parser.add_delimited_file(cols=['--group-on'])
parser.add_out()
parser.add_argument(
    '--substring',
    help=
    'If set, merge features by partial matches (such as collapsing peptides into larger peptides)',
    action='store_true')
parser.add_column_function(
    '--summary-col',
    col_help="The function to apply to grouped entries in modification columns."
)
parser.add_argument(
    '--summary-col-delimiter',
    help=
    "If the summary column has a delimiter, such as a ; for multiple proteins."
)
示例#2
0
    import re
from multiprocessing import Pool, Value
from collections import Counter
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to match peptides against.")
parser.add_out(help="The name of the file you wish to create with results appended.")
parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true')
parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide')
parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str)
parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins')
parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true')
parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true')
ibaq_group = parser.add_argument_group('iBAQ related options')
ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true')
ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str)
parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False)
ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true')
parser.add_enzyme(help="The enzyme used to digest the sample.")
ibaq_group.add_argument('--normalize', help="Normalize iBAQ to total intensity of column (useful for comparing multiple samples).", action='store_true')
protein_group = parser.add_argument_group('Protein Grouping Options')
protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true')
protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true')
protein_group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true')
示例#3
0
__author__ = 'chris'

description = """
This script will lookup features from one delimited file in another delimited file, and
perform various operations on the found entries in the alternative file
"""

import sys, csv
from pythomics.templates import CustomParser
from pythomics.utils import ColumnFunctions

parser = CustomParser(description=description)
parser.add_delimited_file(files=['-a'],
                          delimiter=['--adelim'],
                          cols=['--acol'],
                          header=['--aheader'],
                          help="This is the file to lookup values from.")
parser.add_delimited_file(files=['-b'],
                          delimiter=['--bdelim'],
                          cols=['--bcol'],
                          header=['--bheader'],
                          help="This is the file to lookup values in.")
parser.add_argument('--blookup',
                    help='The column to take entries from in file b.',
                    type=str,
                    default=1)
parser.add_argument(
    '--strict',
    help=
    'For numeric operations, fail if types are incorrect (converting NA to a float for instance).',
示例#4
0
For instance, gene ids can be selected and their FPKM/iBAQ values combined.
Also, features can be can be grouped into longer sequences
with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged
into LNGERPEPTIDE).
"""

import argparse, sys, re, csv, copy, decimal
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description = description)
parser.add_delimited_file(cols=['--group-on'])
parser.add_out()
parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true')
parser.add_column_function('--summary-col', col_help="The function to apply to grouped entries in modification columns.")
parser.add_argument('--summary-col-delimiter', help="If the summary column has a delimiter, such as a ; for multiple proteins.")
parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true')
parser.add_argument('--merge', help='Merge together identical entries.', action='store_true')
# parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true')
# parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';')
parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true')

def main():
    args = parser.parse_args()
    peptide_colname = False
    try:
        peptide_column = int(args.group_on)
示例#5
0
parser.add_argument('--peptide-out',
                    nargs='?',
                    help="The file to write digested products to.",
                    type=argparse.FileType('w'),
                    default=os.devnull)
parser.add_argument('--protein-out',
                    nargs='?',
                    help="The file to write grouped products to.",
                    type=argparse.FileType('w'),
                    default=os.devnull)
parser.add_argument(
    '--strict',
    help=
    'For numeric operations, fail if types are incorrect (converting NA to a float for instance).',
    action='store_true')
parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide')
parser.add_argument(
    '-r',
    '--regex',
    help=
    "A perl regular expression determining which parts of the header to capture.",
    type=str)
parser.add_argument(
    '--inferred-name',
    help=
    "The name you want to assign for protein inference (in case you are regexing for gene names or something).",
    type=str,
    default='Proteins')
parser.add_argument('--no-inference',
                    help="Do not append proteins inferred from sequences.",
                    action='store_true')
示例#6
0
description = """
This script will take a delimited file and collapse features together, such
as scan numbers. It can also be used to group peptides into longer sequences
with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged
into LNGERPEPTIDE).
"""

import argparse, sys, re, csv, copy, decimal
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta

parser = CustomParser(description = description)
parser.add_delimited_file()
parser.add_out()
parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true', default=False)
parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true', default=False)
parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';')
parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true', default=False)

def main():
    args = parser.parse_args()
    peptide_column = args.col-1
    tsv_file = args.tsv
    header_lines = args.header
    delimiter = args.delimiter
    peptide_join = args.substring
    col_delimiter = args.merge_delimiter
    merge_columns = args.merge_columns
示例#7
0
#!/usr/bin/env python

__author__ = 'chris'

description = """
This script will lookup features from one delimited file in another delimited file, and
perform various operations on the found entries in the alternative file
"""

import sys, csv
from pythomics.templates import CustomParser
from pythomics.utils import ColumnFunctions

parser = CustomParser(description = description)
parser.add_delimited_file(files=['-a'], delimiter=['--adelim'], cols=['--acol'], header=['--aheader'], help="This is the file to lookup values from.")
parser.add_delimited_file(files=['-b'], delimiter=['--bdelim'], cols=['--bcol'], header=['--bheader'], help="This is the file to lookup values in.")
parser.add_argument('--blookup', help='The column to take entries from in file b.', type=str, default=1)
parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true')
parser.add_out()
parser.add_argument('--function', help='The function to apply to found entries.', choices=['concat', 'mean', 'sum', 'median', 'var', 'std', 'count'], type=str, default='concat')
parser.add_argument('--colname', help='The column name to give the new appended value. Defaults to function chosen', type=str, default='')
parser.add_argument('--aregex', help='An optional regex pattern for matching columns in file a.', type=str, default='')
parser.add_argument('--bregex', help='An optional regex pattern for matching columns in file b.', type=str, default='')

def main():
    args = parser.parse_args()
    a_colname, b_colname, bl_colname = False, False, False
    try:
        a_column = int(args.acol)
        a_column = a_column-1 if a_column > 0 else a_column
    except ValueError: