Exemplo n.º 1
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

Given typically two or more bigWig files, ``multiBigwigSummary`` computes the average scores for each of the files in every genomic region.
This analysis is performed for the entire genome by running the program in ``bins`` mode, or for certain user selected regions in ``BED-file``
mode. Most commonly, the default output of ``multiBigwigSummary`` (a compressed numpy array, .npz) is used by other tools such as ``plotCorrelation`` or ``plotPCA`` for visualization and diagnostic purposes.

Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBigwigSummary bins -h

  multiBigwigSummary BED-file -h


""",
            epilog='example usage:\n multiBigwigSummary bins '
                   '-b file1.bw file2.bw -o results.npz\n\n'
                   'multiBigwigSummary BED-file -b file1.bw file2.bw -o results.npz\n'
                   '--BED selection.bed'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='multiBigwigSummary {}'.format(__version__))
    subparsers = parser.add_subparsers(title="commands",
                                       dest='command',
                                       metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    dbParser = parserCommon.deepBlueOptionalArgs()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            multiBigwigSummaryArgs(case='bins'), parent_parser,
            parserCommon.gtf_options(suppress=True), dbParser
        ],
        help="The average score is based on equally sized bins "
        "(10 kilobases by default), which consecutively cover the "
        "entire genome. The only exception is the last bin of a chromosome, which "
        "is often smaller. The output of this mode is commonly used to assess the "
        "overall similarity of different bigWig files.",
        add_help=False,
        usage='multiBigwigSummary bins '
        '-b file1.bw file2.bw '
        '-o results.npz\n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            multiBigwigSummaryArgs(case='BED-file'), parent_parser,
            parserCommon.gtf_options(), dbParser
        ],
        help="The user provides a BED file that contains all regions "
        "that should be considered for the analysis. A "
        "common use is to compare scores (e.g. ChIP-seq scores) between "
        "different samples over a set of pre-defined peak regions.",
        usage='multiBigwigSummary BED-file '
        '-b file1.bw file2.bw '
        '-o results.npz --BED selection.bed\n',
        add_help=False)

    return parser
Exemplo n.º 2
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse()
    outputParser = parserCommon.output()
    dbParser = parserCommon.deepBlueOptionalArgs()
    parser = argparse.ArgumentParser(
        parents=[parentParser, outputParser, dbParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two bigWig files based on the number '
        'of mapped reads. To compare the bigWig files, the genome is '
        'partitioned into bins of equal size, then the number of reads found '
        'in each BAM file are counted per bin and finally a summary '
        'value is reported. This value can be the ratio of the number of reads'
        'per bin, the log2 of the ratio, the sum or the difference.')

    # define the arguments
    parser.add_argument('--bigwig1', '-b1',
                        metavar='Bigwig file',
                        help='Bigwig file 1. Usually the file for the '
                        'treatment.',
                        required=True)

    parser.add_argument('--bigwig2', '-b2',
                        metavar='Bigwig file',
                        help='Bigwig file 2. Usually the file for the '
                        'control.',
                        required=True)

    parser.add_argument('--scaleFactors',
                        help='Set this parameter to multipy the bigwig values '
                        'by a constant. The format is '
                        'scaleFactor1:scaleFactor2. '
                        'For example 0.7:1 to scale the first bigwig file '
                        'by 0.7 while not scaling the second bigwig file',
                        default=None,
                        required=False)

    parser.add_argument('--pseudocount',
                        help='small number to avoid x/0. Only useful '
                        'when ratio = log2 or ratio',
                        default=1,
                        type=float,
                        required=False)

    parser.add_argument('--ratio',
                        help='The default is to output the log2ratio of the '
                        'two samples. The reciprocal ratio returns the '
                        'the negative of the inverse of the ratio '
                        'if the ratio is less than 0. The resulting '
                        'values are interpreted as negative fold changes. '
                        '*NOTE*: Only with --ratio subtract can --normalizeTo1x or '
                        '--normalizeUsingRPKM be used. Instead of performing a '
                        'computation using both files, the scaled signal can '
                        'alternatively be output for the first or second file using '
                        'the \'--ratio first\' or \'--ratio second\'',
                        default='log2',
                        choices=['log2', 'ratio', 'subtract', 'add',
                                 'reciprocal_ratio', 'first', 'second'],
                        required=False)

    parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
                        help='This parameter determines if non-covered regions (regions without a score) '
                        'in the bigWig files should be skipped. The default is to treat those '
                        'regions as having a value of zero. '
                        'The decision to skip non-covered regions '
                        'depends on the interpretation of the data. Non-covered regions '
                        'in a bigWig file may represent repetitive regions that should '
                        'be skipped. Alternatively, the interpretation of non-covered regions as '
                        'zeros may be wrong and this option should be used ',
                        action='store_true')

    return parser
Exemplo n.º 3
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse()
    outputParser = parserCommon.output()
    dbParser = parserCommon.deepBlueOptionalArgs()
    parser = argparse.ArgumentParser(
        parents=[parentParser, outputParser, dbParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two bigWig files based on the number '
        'of mapped reads. To compare the bigWig files, the genome is '
        'partitioned into bins of equal size, then the number of reads found '
        'in each BAM file are counted per bin and finally a summary '
        'value is reported. This value can be the ratio of the number of reads'
        'per bin, the log2 of the ratio, the sum or the difference.')

    # define the arguments
    parser.add_argument('--bigwig1', '-b1',
                        metavar='Bigwig file',
                        help='Bigwig file 1. Usually the file for the '
                        'treatment.',
                        required=True)

    parser.add_argument('--bigwig2', '-b2',
                        metavar='Bigwig file',
                        help='Bigwig file 2. Usually the file for the '
                        'control.',
                        required=True)

    parser.add_argument('--scaleFactors',
                        help='Set this parameter to multipy the bigwig values '
                        'by a constant. The format is '
                        'scaleFactor1:scaleFactor2. '
                        'For example 0.7:1 to scale the first bigwig file '
                        'by 0.7 while not scaling the second bigwig file',
                        default=None,
                        required=False)

    parser.add_argument('--pseudocount',
                        help='small number to avoid x/0. Only useful '
                        'when ratio = log2 or ratio',
                        default=1,
                        type=float,
                        required=False)

    parser.add_argument('--ratio',
                        help='The default is to output the log2ratio of the '
                        'two samples. The reciprocal ratio returns the '
                        'the negative of the inverse of the ratio '
                        'if the ratio is less than 0. The resulting '
                        'values are interpreted as negative fold changes. '
                        '*NOTE*: Only with --ratio subtract can --normalizeTo1x or '
                        '--normalizeUsingRPKM be used. Instead of performing a '
                        'computation using both files, the scaled signal can '
                        'alternatively be output for the first or second file using '
                        'the \'--ratio first\' or \'--ratio second\'',
                        default='log2',
                        choices=['log2', 'ratio', 'subtract', 'add', 'mean',
                                 'reciprocal_ratio', 'first', 'second'],
                        required=False)

    parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
                        help='This parameter determines if non-covered regions (regions without a score) '
                        'in the bigWig files should be skipped. The default is to treat those '
                        'regions as having a value of zero. '
                        'The decision to skip non-covered regions '
                        'depends on the interpretation of the data. Non-covered regions '
                        'in a bigWig file may represent repetitive regions that should '
                        'be skipped. Alternatively, the interpretation of non-covered regions as '
                        'zeros may be wrong and this option should be used ',
                        action='store_true')

    return parser
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

This tool calculates scores per genome regions and prepares an intermediate file that can be used with ``plotHeatmap`` and ``plotProfiles``.
Typically, the genome regions are genes, but any other regions defined in a BED file can be used.
computeMatrix accepts multiple score files (bigWig format) and multiple regions files (BED format).
This tool can also be used to filter and sort regions according
to their score.

To learn more about the specific parameters, type:

$ computeMatrix reference-point --help or

$ computeMatrix scale-regions --help

""",
            epilog='An example usage is:\n  computeMatrix reference-point -S '
            '<bigwig file(s)> -R <bed file(s)> -b 1000\n \n')

    parser.add_argument('--version', action='version',
                        version='%(prog)s {}'.format(__version__))

    subparsers = parser.add_subparsers(
        title='Commands',
        dest='command',
        metavar='')

    dbParser = parserCommon.deepBlueOptionalArgs()

    # scale-regions mode options
    subparsers.add_parser(
        'scale-regions',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[computeMatrixRequiredArgs(),
                 computeMatrixOutputArgs(),
                 computeMatrixOptArgs(case='scale-regions'),
                 parserCommon.gtf_options(),
                 dbParser],
        help="In the scale-regions mode, all regions in the BED file are "
        "stretched or shrunken to the length (in bases) indicated by the user.",
        usage='An example usage is:\n  computeMatrix scale-regions -S '
        '<biwig file(s)> -R <bed file> -b 1000\n\n')

    # reference point arguments
    subparsers.add_parser(
        'reference-point',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[computeMatrixRequiredArgs(),
                 computeMatrixOutputArgs(),
                 computeMatrixOptArgs(case='reference-point'),
                 parserCommon.gtf_options(),
                 dbParser],
        help="Reference-point refers to a position within a BED region "
        "(e.g., the starting point). In this mode, only those genomic"
        "positions before (upstream) and/or after (downstream) of the "
        "reference point will be plotted.",
        usage='An example usage is:\n  computeMatrix reference-point -S '
        '<biwig file(s)> -R <bed file> -a 3000 -b 3000\n\n')

    return parser
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

Given typically two or more bigWig files, ``multiBigwigSummary`` computes the average scores for each of the files in every genomic region.
This analysis is performed for the entire genome by running the program in ``bins`` mode, or for certain user selected regions in ``BED-file``
mode. Most commonly, the default output of ``multiBigwigSummary`` (a compressed numpy array, .npz) is used by other tools such as ``plotCorrelation`` or ``plotPCA`` for visualization and diagnostic purposes.

Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBigwigSummary bins -h

  multiBigwigSummary BED-file -h


""",
            epilog='example usage:\n multiBigwigSummary bins '
                   '-b file1.bw file2.bw -out results.npz\n\n'
                   'multiBigwigSummary BED-file -b file1.bw file2.bw -out results.npz\n'
                   '--BED selection.bed'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version', action='version',
                        version='multiBigwigSummary {}'.format(__version__))
    subparsers = parser.add_subparsers(
        title="commands",
        dest='command',
        metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    dbParser = parserCommon.deepBlueOptionalArgs()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[multiBigwigSummaryArgs(case='bins'),
                 parent_parser,
                 parserCommon.gtf_options(suppress=True),
                 dbParser
                 ],
        help="The average score is based on equally sized bins "
             "(10 kilobases by default), which consecutively cover the "
             "entire genome. The only exception is the last bin of a chromosome, which "
             "is often smaller. The output of this mode is commonly used to assess the "
             "overall similarity of different bigWig files.",
        add_help=False,
        usage='multiBigwigSummary '
              '-b file1.bw file2.bw '
              '-out results.npz\n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[multiBigwigSummaryArgs(case='BED-file'),
                 parent_parser,
                 parserCommon.gtf_options(),
                 dbParser
                 ],
        help="The user provides a BED file that contains all regions "
             "that should be considered for the analysis. A "
             "common use is to compare scores (e.g. ChIP-seq scores) between "
             "different samples over a set of pre-defined peak regions.",
        usage='multiBigwigSummary '
              '-b file1.bw file2.bw '
              '-out results.npz --BED selection.bed\n',
        add_help=False)

    return parser