示例#1
0
import sys
import argparse
import operator
import pythomics.parsers.fasta as fasta
import pythomics.genomics.parsers as gp

description = """
This script will incorporate the a given GFF file into a specified
fasta file. It can also incorporate variants given in a VCF file
while generating this fasta file.
"""

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to reference.")
parser.add_out(help="The file to write resulting fasta file to.")
gff_group = parser.add_argument_group('GFF file related options')
gff_group.add_argument('--gff', help="The GFF file to use.", type=argparse.FileType('r'), required=True)
gff_group.add_argument('--group-on', help="The key to group entries together by (such as transcript_id)", type=str, default='ID')
gff_group.add_argument('--feature', help="The feature to use for fetching coordinates (such as CDS, does not apply with cufflinks flag).", type=str, default='')
gff_group.add_argument('--cufflinks', help="If the gff file is in the standard cufflinks output", action='store_true', default=False)
vcf_group = parser.add_argument_group('VCF file related options')
vcf_group.add_vcf()
vcf_group.add_argument('--variants-only', help="Only output transcripts with variants.", action='store_true', default=False)
splice_group = parser.add_argument_group('Splice Junction Options (if a variant falls over a exon-exon junction. Default is to ignore.)')
splice_group.add_argument('--splice-partial', help="Partially splice variants (only include exonic portions of variant)", action='store_true', default=False)


def main():
    args = parser.parse_args()
    snps = not args.no_snps
    dels = args.dels
示例#2
0
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to match peptides against.")
parser.add_out(help="The name of the file you wish to create with results appended.")
parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true')
parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide')
parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str)
parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins')
parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true')
parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true')
ibaq_group = parser.add_argument_group('iBAQ related options')
ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true')
ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str)
parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False)
ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true')
parser.add_enzyme(help="The enzyme used to digest the sample.")
ibaq_group.add_argument('--normalize', help="Normalize iBAQ to total intensity of column (useful for comparing multiple samples).", action='store_true')
protein_group = parser.add_argument_group('Protein Grouping Options')
protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true')
protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true')
protein_group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true')
mod_group = parser.add_argument_group('Peptide Modification Options')
mod_group.add_argument('--mod-out', nargs='?', help="The file to write a modification-centric summary to.", type=argparse.FileType('w'), default=None)
mod_group.add_argument('--modification-site', help="Write the position in the parent protein of the modification (requires case-sensitive and modifications being lower-cased).", action='store_true')
parser.add_column_function('--mod-col', help="The column containing modification information.", group=mod_group)
motif_group = mod_group.add_argument_group('Motif Options')
示例#3
0
__author__ = 'chris'

description = """

"""

import sys
import os
import operator
import argparse

from pythomics.templates import CustomParser

parser = CustomParser(description=description)
group = parser.add_argument_group('Protein Inference File')
group.add_argument(
    '--inference',
    help=
    "The protein inference file (your peptide file with gene/protein annotations). For multiple files, separate by spaces (must be in same order as mods).",
    nargs='+',
    type=argparse.FileType('r'),
    required=True)
group.add_argument('--gene',
                   help="The Gene column name",
                   type=str,
                   default='Gene')
group.add_argument('--protein',
                   help="The Protein column name",
                   type=str,
                   default='Protein')
示例#4
0
__author__ = 'chris'

description = """

"""

import sys
import os
import operator
import argparse

from pythomics.templates import CustomParser

parser = CustomParser(description=description)
group = parser.add_argument_group('Protein Inference File')
group.add_argument('--inference', help="The protein inference file (your peptide file with gene/protein annotations). For multiple files, separate by spaces (must be in same order as mods).", nargs='+', type=argparse.FileType('r'), required=True)
group.add_argument('--gene', help="The Gene column name", type=str, default='Gene')
group.add_argument('--protein', help="The Protein column name", type=str, default='Protein')
group.add_argument('--peptide', help="The Peptide column name", type=str, default='Peptide')
group.add_argument('--quant', help="The name of quantification columns (such as Heavy/Light). Separate multiple columns by spaces", nargs='+', default=['Heavy/Light'])
mods = parser.add_argument_group('Modification File')
mods.add_argument('--mods', help="The modifications file (the file with sites, peptides). For multiple files, separate by spaces (must be in same order as inference).", nargs='+', type=argparse.FileType('r'), required=True)
mods.add_argument('--site-protein', help="The mod file protein column name", type=str, default='Protein')

parser.add_argument('--no-log2', help='Do not log2 normalize quantification values.', action='store_true')
parser.add_argument('--no-median', help='Do not normalize quantification values by the median of the experiment.', action='store_true')
parser.add_argument('--wp', help="The whole proteome inference file, if it exists. For multiple replicates, separate by spaces.", nargs='+', type=argparse.FileType('r'))
parser.add_argument('--non-mod-norm', help='Normalize the data by the non-modified peptides.', action='store_true')

parser.add_argument('--site-file', help='The output path for the file with sumamries at the site level.', default=sys.stdout, type=argparse.FileType('wb'))
示例#5
0
    "A perl regular expression determining which parts of the header to capture.",
    type=str)
parser.add_argument(
    '--inferred-name',
    help=
    "The name you want to assign for protein inference (in case you are regexing for gene names or something).",
    type=str,
    default='Proteins')
parser.add_argument('--no-inference',
                    help="Do not append proteins inferred from sequences.",
                    action='store_true')
parser.add_argument(
    '--no-equality',
    help="Do not consider Leucine and Isoleucine equal for peptide mapping.",
    action='store_true')
ibaq_group = parser.add_argument_group('iBAQ related options')
ibaq_group.add_argument(
    '--ibaq',
    help="Provide to append iBAQ values as well (requires protein inference).",
    action='store_true')
ibaq_group.add_argument(
    '--precursors',
    help=
    "The column with precursor area (defaults to header lines containing 'Precursor').",
    type=str)
parser.add_column_function(
    '',
    col_argument='--ibaq-function',
    group=ibaq_group,
    col_help=
    "The function to apply to groups of iBAQ values (for multiple peptide matches).",
示例#6
0
"""

import argparse, sys, re, csv, copy, decimal
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to match peptides against.")
parser.add_argument('--peptide_out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=sys.stdout)
parser.add_argument('--protein_out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=sys.stdout)
parser.add_delimited_file()
parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str)
parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_false', default=False)
group = parser.add_argument_group('iBAQ related options')
group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true', default=False)
group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=int, default=None)
parser.add_enzyme()
group.add_argument('--no-normalize', help="Don't normalize iBAQ to total intensity", action='store_false', default=True)
group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true', default=False)
protein_group = parser.add_argument_group('Protein Grouping Options')
protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true', default=False)
protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true', default=False)

def main():
    args = parser.parse_args()
    fasta_file = fasta.FastaIterator(args.fasta)
    peptide_column = args.col-1
    tsv_file = args.tsv
    header_lines = args.header