示例#1
0
def get_parser():
    description = 'Create the personalized PageRank matrix for the given '\
                  'network and restart probability beta.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-e',
                        '--edgelist_file',
                        required=True,
                        help='Location of edgelist file.')
    parser.add_argument('-i',
                        '--gene_index_file',
                        required=True,
                        help='Location of gene-index file.')
    parser.add_argument('-o',
                        '--output_dir',
                        required=True,
                        help="Output dir.")
    parser.add_argument('-p', '--prefix', required=True, help="Output prefix.")
    parser.add_argument('-s',
                        '--start_index',
                        default=1,
                        type=int,
                        help="Index to output edge list, etc..")
    parser.add_argument('-b',
                        '--beta',
                        required=True,
                        type=float,
                        help="Restart probability beta.")
    parser.add_argument('-f',
                        '--format',
                        default='hdf5',
                        type=str,
                        choices=['hdf5', 'npy', 'matlab'],
                        help="Output file format.")
    return parser
示例#2
0
def get_parser():
    description = 'Create the personalized pagerank matrix and 100 permuted PPR matrices for the\
                   given network and restart probability beta.'
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')
    parser.add_argument('-e', '--edgelist_file', required=True,
                        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network.')
    parser.add_argument('-i', '--gene_index_file', required=True,
                        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument('-p', '--prefix', required=True,
                        help='Output prefix.')
    parser.add_argument('-is', '--index_file_start_index', default=1, type=int,
                        help='Minimum index in the index file.')
    parser.add_argument('-a', '--alpha', required=True, type=float,
                        help='Page Rank dampening factor, equal to 1-beta (where beta is the\
                              restart probability for insulated heat diffusion|process).')

    parser.add_argument('-q', '--Q', default=115, type=float,
                        help='Edge swap constant. The script will attempt Q*|E| edge swaps')
    parser.add_argument('-ps', '--permutation_start_index', default=1, type=int,
                        help='Index at which to start permutation file names.')
    parser.add_argument('-n', '--num_permutations', default=100, type=int,
                        help='Number of permuted networks to create.')

    parser.add_argument('-o', '--output_dir', required=True,
                        help='Output directory.')
    parser.add_argument("--matlab", default=False, action="store_true",
                        help="Create the PPR matrix using an external call "\
                             "to a MATLAB script instead of SciPy.")

    return parser
示例#3
0
def get_parser():
    description = 'Creates a heat diffusion influence matrix from an input graph.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument(
        '-e',
        '--edgelist_file',
        required=True,
        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network.')
    parser.add_argument(
        '-i',
        '--gene_index_file',
        required=True,
        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument('-o',
                        '--output_dir',
                        required=True,
                        help='Path to output directory.')
    parser.add_argument('-p', '--prefix', required=True, help='Output prefix.')
    parser.add_argument('-s',
                        '--start_index',
                        default=1,
                        type=int,
                        help='Minimum index in the index file.')
    parser.add_argument('-t',
                        '--time',
                        required=True,
                        type=float,
                        help='Diffusion time.')
    return parser
示例#4
0
def get_parser():
    description = 'Creates a website showing the subnetworks output by HotNet2.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-r',
                        '--results_files',
                        nargs='+',
                        required=True,
                        help='Paths to results.json files output by HotNet2')
    parser.add_argument(
        '-ef',
        '--edge_file',
        required=True,
        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network.')
    parser.add_argument(
        '-dsf',
        '--display_score_file',
        help='Path to a tab seperated file contain a gene name in the first\
                              column and the display score for that gene in the second column\
                              of each line.')
    parser.add_argument('-nn',
                        '--network_name',
                        default='Network',
                        help='Display name for the interaction network.')
    parser.add_argument(
        '-o',
        '--output_directory',
        required=True,
        help='Output directory in which the website should be generated.')
    return parser
示例#5
0
def get_parser():
    description = 'Create the personalized pagerank matrix for the given '\
                  'network and restart probability beta.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-e',
                        '--edgelist_file',
                        required=True,
                        help='Location of edgelist file.')
    parser.add_argument('-i',
                        '--gene_index_file',
                        required=True,
                        help='Location of gene-index file.')
    parser.add_argument('-o',
                        '--output_dir',
                        required=True,
                        help="Output dir.")
    parser.add_argument('-p', '--prefix', required=True, help="Output prefix.")
    parser.add_argument('-s',
                        '--start_index',
                        default=1,
                        type=int,
                        help="Index to output edge list, etc..")
    parser.add_argument('-a',
                        '--alpha',
                        required=True,
                        type=float,
                        help="Page Rank dampening factor.")
    parser.add_argument("--matlab", default=False, action="store_true",
                 help="Create the PPR matrix using an external call "\
                             "to a MATLAB script instead of Scipy.")
    return parser
示例#6
0
def get_parser():
    description = "Helper script for simple runs of generalized HotNet2, including automated\
                   parameter selection."
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')

    parser.add_argument('-r', '--runname', help='Name of run / disease.')
    parser.add_argument('-mf', '--infmat_file', required=True,
                        help='Path to HDF5 (.h5) file containing influence matrix. '\
                        'NumPy (.npy) and MATLAB (.mat) files also supported.')
    parser.add_argument('-if', '--infmat_index_file', required=True,
                        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument('-hf', '--heat_file', required=True,
                        help='Path to heat file containing gene names and scores. This can either\
                              be a JSON file created by generateHeat.py, in which case the file\
                              name must end in .json, or a tab-separated file containing a gene\
                              name in the first column and the heat score for that gene in the\
                              second column of each line.')
    parser.add_argument('-ccs', '--min_cc_size', type=int, default=2,
                        help='Minimum size connected components that should be returned.')
    parser.add_argument('-pnp', '--permuted_networks_path', required=False, default='',
                        help='Path to influence matrices for permuted networks. Include ' +\
                              ITERATION_REPLACEMENT_TOKEN + ' in the path to be replaced with the\
                              iteration number')
    parser.add_argument('-d', '--deltas', nargs='*', type=float, default=[],
                        help='Delta value(s).')
    parser.add_argument('-dp', '--delta_permutations', type=int, default=100,
                        help='Number of permutations to be used for delta parameter selection.')
    parser.add_argument('-sp', '--significance_permutations', type=int, default=100,
                        help='Number of permutations to be used for statistical significance testing.')
    parser.add_argument('-o', '--output_directory', default='hotnet_output',
                        help='Output directory. Files results.json, components.txt, and\
                              significance.txt will be generated in subdirectories for each delta.')
    parser.add_argument('-c', '--num_cores', type=int, default=1,
                        help='Number of cores to use for running permutation tests in parallel. If\
                              -1, all available cores will be used.')
    parser.add_argument('-ef', '--edge_file',
                        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network. This is used to create subnetwork visualizations; if not\
                              provided, visualizations will not be made.')
    parser.add_argument('-dsf', '--display_score_file',
                        help='Path to a tab-separated file containing a gene name in the first\
                        column and the display score for that gene in the second column of\
                        each line.')
    parser.add_argument('-dnf', '--display_name_file',
                        help='Path to a tab-separated file containing a gene name in the first\
                        column and the display name for that gene in the second column of\
                        each line.')
    parser.add_argument('-nn', '--network_name', default='Network',
                        help='Display name for the interaction network. (Used for subnetwork\
                              visualizations)')
    parser.add_argument('--output_hierarchy', default=False, required=False, action='store_true',
                        help='Output the hierarchical decomposition of the HotNet2 similarity matrix.')

    return parser
示例#7
0
def get_parser():
	description = 'Create the personalized pagerank matrix for the given '\
                  'network and restart probability beta.'
	parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')
	parser.add_argument('-e', '--edgelist_file', required=True,
                        help='Location of edgelist file.')
	parser.add_argument('-i', '--gene_index_file', required=True,
                        help='Location of gene-index file.')
	parser.add_argument('-o', '--output_dir', required=True,
	                help="Output dir.")
	parser.add_argument('-p', '--prefix', required=True,
	                help="Output prefix.")
	parser.add_argument('-a', '--alpha', required=True, type=float,
	                help="Page Rank dampening factor.")
	return parser
示例#8
0
def parse_args(raw_args): 
    description = "Runs the runHotNet2 script with the given config files and compares the\
                   resulting components to those in the given results files."
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')

    parser.add_argument('-c', '--config_files', nargs='+',
                        help='Paths to config files to pass to runHotNet2. Note that\
                              output_directory parameter will be ignored. In addition, these\
                              config files must specify exactly one value for delta.')
    parser.add_argument('-r', '--results_files', nargs='+',
                        help='Paths to results.json files whose components the run output should\
                              be compared to. Note that the components section is the only one\
                              that is considered.')
    
    return parser.parse_args(raw_args)
示例#9
0
def get_parser():
    description = 'Create a hierarchical decomposition of the HotNet2 similarity matrix.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-r',
                        '--run_name',
                        required=False,
                        default='Hotnet2',
                        help='Name of run to appear in output files.')
    parser.add_argument('-mf',
                        '--infmat_file',
                        required=True,
                        help='Path to .mat file containing influence matrix')
    parser.add_argument(
        '-if',
        '--infmat_index_file',
        required=True,
        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help=
        'Path to heat file containing gene names and scores. This can either\
                              be a JSON file created by generateHeat.py, in which case the file\
                              name must end in .json, or a tab-separated file containing a gene\
                              name in the first column and the heat score for that gene in the\
                              second column of each line.')
    parser.add_argument('-in',
                        '--infmat_name',
                        required=False,
                        default='PPR',
                        help='Name of matrix in MATLAB file.')
    parser.add_argument('-v',
                        '--verbose',
                        required=False,
                        default=False,
                        action='store_true',
                        help='Flag verbose output.')
    parser.add_argument(
        '-o',
        '--output_directory',
        required=True,
        help='Output directory in which the hierarchy should be created.')
    return parser
示例#10
0
def get_parser():
    description = "Helper script for simple runs of generalized HotNet, including automated\
                   parameter selection."
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')

    parser.add_argument('-r', '--runname', help='Name of run / disease.')
    parser.add_argument('-mf', '--infmat_file', required=True,
                        help='Path to .mat file containing influence matrix')
    parser.add_argument('-if', '--infmat_index_file', required=True,
                        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument('-hf', '--heat_file', required=True,
                        help='Path to heat file containing gene names and scores. This can either\
                              be a JSON file created by generateHeat.py, in which case the file\
                              name must end in .json, or a tab-separated file containing a gene\
                              name in the first column and the heat score for that gene in the\
                              second column of each line.')
    parser.add_argument('-ccs', '--min_cc_size', type=int, default=2,
                        help='Minimum size connected components that should be returned.')
    parser.add_argument('-dp', '--delta_permutations', type=int, default=100,
                        help='Number of permutations to be used for delta parameter selection.')
    parser.add_argument('-sp', '--significance_permutations', type=int, default=100,
                        help='Number of permutations to be used for statistical significance testing.')
    parser.add_argument('-o', '--output_directory', default='hotnet_output',
                        help='Output directory. Files results.json, components.txt, and\
                              significance.txt will be generated in subdirectories for each delta.')
    parser.add_argument('-c', '--num_cores', type=int, default=1,
                        help='Number of cores to use for running permutation tests in parallel. If\
                              -1, all available cores will be used.')
    parser.add_argument('-ef', '--edge_file',
                        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network. This is used to create subnetwork visualizations; if not\
                              provided, visualizations will not be made.')
    parser.add_argument('-dsf', '--display_score_file',
                        help='Path to a tab-separated file containing a gene name in the first\
                        column and the display score for that gene in the second column of\
                        each line.')
    parser.add_argument('-nn', '--network_name', default='Network',
                        help='Display name for the interaction network. (Used for subnetwork\
                              visualizations)')
    
    return parser
示例#11
0
def get_parser():
    from hotnet2 import hnap
    description = 'Creates permuted versions of the given network, where each '\
                  'node retains its degree.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    # We set Q to be 115 by default (instead of the standard 100) because some
    # proportion of swaps fail as we are doing *connected* edge swaps
    parser.add_argument('-q',
                        '--Q',
                        default=115,
                        type=float,
                        help='Edge swap constant.')
    parser.add_argument('-s',
                        '--start_index',
                        default=1,
                        type=int,
                        help='Index to start output of permutations at.')
    parser.add_argument('-e',
                        '--edgelist_file',
                        required=True,
                        help='Edgelist file path.')
    parser.add_argument('-p',
                        '--output_prefix',
                        required=True,
                        help='Output prefix.')
    parser.add_argument('-o',
                        '--output_dir',
                        required=True,
                        help='Output directory.')
    parser.add_argument('-n',
                        '--num_permutations',
                        default=100,
                        type=int,
                        help='Number of permuted networks to create.')
    parser.add_argument(
        '-c',
        '--cores',
        default=1,
        type=int,
        help='Use given number of cores. Pass -1 to use all available.')

    return parser
示例#12
0
def get_parser():
    from hotnet2 import hnap
    description = 'Create gene-index and edge files.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-n', '--network_file', required=True,
                        help='Path to tab-separated file containing network interactions, where'\
                              'each line is of the form gene1\tgene2, indicating a network'\
                              'interaction between gene1 and gene 2.')
    parser.add_argument('-s',
                        '--separator',
                        required=False,
                        default='\t',
                        help='Separator in network file; tabs by defaults')
    parser.add_argument('-e', '--edgelist_file', required=True,
                        help='Path to tab-separated file listing edges of the interaction network,'\
                              'where each row contains the indices of two genes that are connected'\
                              'in the network.')
    parser.add_argument('-i', '--gene_index_file', required=True,
                        help='Path to tab-separated file containing an index in the first column'\
                              'and the name of the gene represented at that index in the second'\
                              'column of each line.')
    return parser
示例#13
0
def get_parser():
    description = "Generates a JSON heat file for input to runHotNet2."
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument(
        '-o',
        '--output_file',
        help='Output file.  If none given, output will be written to stdout.')
    parent_parser.add_argument('-n',
                               '--name',
                               help='Name/Label describing the heat scores.')

    subparsers = parser.add_subparsers(title='Heat score type')

    heat_parser = subparsers.add_parser('scores',
                                        help='Pre-computed heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='Path to a tab-separated file containing a gene name in the first\
                                   column and the heat score for that gene in the second column of\
                                   each line.')
    heat_parser.add_argument(
        '-ms',
        '--min_heat_score',
        type=float,
        default=0,
        help='Minimum heat score for genes to have their original heat score\
                                   in the resulting output file. Genes with score below this value\
                                   will be assigned score 0.')
    heat_parser.add_argument(
        '-gff',
        '--gene_filter_file',
        default=None,
        help='Path to file listing genes whose heat scores should be\
                                   preserved, one per line. If present, all other heat scores\
                                   will be discarded.')
    heat_parser.set_defaults(heat_fn=load_direct_heat)

    mutation_parser = subparsers.add_parser('mutation',
                                            help='Mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '--snv_file',
        required=True,
        help='Path to a tab-separated file containing SNVs where the first\
                                       column of each line is a sample ID and subsequent columns\
                                       contain the names of genes with SNVs in that sample. Lines\
                                       starting with "#" will be ignored.')
    mutation_parser.add_argument(
        '--cna_file',
        help='Path to a tab-separated file containing CNAs where the first\
                                       column of each line is a sample ID and subsequent columns\
                                       contain gene names followed by "(A)" or "(D)" indicating an\
                                       amplification or deletion in that gene for the sample.\
                                       Lines starting with "#" will be ignored.'
    )
    mutation_parser.add_argument(
        '--sample_file',
        default=None,
        help='File listing samples. Any SNVs or CNAs in samples not listed\
                                       in this file will be ignored. If HotNet is run with mutation\
                                       permutation testing, all samples in this file will be eligible\
                                       for random mutations even if the sample did not have any\
                                       mutations in the real data. If not provided, the set of samples\
                                       is assumed to be all samples that are provided in the SNV\
                                       or CNA data.')
    mutation_parser.add_argument(
        '--sample_type_file',
        default=None,
        help='File listing type (e.g. cancer, datasets, etc.) of samples\
                                       (see --sample_file). Each line is a space-separated row\
                                       listing one sample and its type. The sample types are used\
                                       for creating the HotNet(2) web output.')
    mutation_parser.add_argument(
        '--gene_file',
        default=None,
        help='File listing tested genes. SNVs or CNAs in genes not listed\
                                       in this file will be ignored. If HotNet is run with mutation\
                                       permutation testing, every gene in this file will be eligible\
                                       for random mutations even if the gene did not have mutations\
                                       in any samples in the original data. If not provided, the set\
                                       of tested genes is assumed to be all genes that have mutations\
                                       in either the SNV or CNA data.')
    mutation_parser.add_argument(
        '--min_freq',
        type=int,
        default=1,
        help='The minimum number of samples in which a gene must have an\
                                       SNV to be considered mutated in the heat score calculation.'
    )
    mutation_parser.add_argument(
        '--cna_filter_threshold',
        type=valid_cna_filter_thresh,
        default=None,
        help='Proportion of CNAs in a gene across samples that must share\
                                       the same CNA type in order for the CNAs to be included. This\
                                       must either be > .5, or the default, None, in which case all\
                                       CNAs will be included.')
    mutation_parser.set_defaults(heat_fn=load_mutation_heat)

    oncodrive_parser = subparsers.add_parser('oncodrive',
                                             help='Oncodrive scores',
                                             parents=[parent_parser])
    oncodrive_parser.add_argument(
        '--fm_scores',
        required=True,
        help='Oncodrive-FM scores (gene to q-value).')
    oncodrive_parser.add_argument(
        '--cis_amp_scores',
        required=True,
        help='Oncodrive-CIS scores (gene to q-value); amplifications only.')
    oncodrive_parser.add_argument(
        '--cis_del_scores',
        required=True,
        help='Oncodrive-CIS scores (gene to q-value); deletions only.')
    oncodrive_parser.add_argument(
        '--fm_threshold',
        type=float,
        default=0.2,
        help='Maximum Oncodrive-FM q-value threshold')
    oncodrive_parser.add_argument(
        '--cis_threshold',
        type=float,
        default=0.2,
        help='Maximum Oncodrive-CIS q-value threshold')
    oncodrive_parser.add_argument('--cis', default=False, action='store_true',
                                  help='Flag whether to include Oncodrive-CIS scores when generating '\
                                        'the Oncodrive heat file.')
    oncodrive_parser.add_argument(
        '--gene_filter_file',
        default=None,
        help='File listing genes whose heat scores should be preserved.\
                                       If present, all other heat scores will be discarded.'
    )
    oncodrive_parser.set_defaults(heat_fn=load_oncodrive_heat)

    mutsig_parser = subparsers.add_parser('mutsig',
                                          help='MutSig scores',
                                          parents=[parent_parser])
    mutsig_parser.add_argument('--mutsig_score_file',
                               required=True,
                               help='MutSig score file (gene to q-value).')
    mutsig_parser.add_argument('--threshold',
                               type=float,
                               default=1.0,
                               help='Maximum q-value threshold.')
    mutsig_parser.add_argument(
        '--gene_filter_file',
        default=None,
        help='File listing genes whose heat scores should be preserved.\
                                       If present, all other heat scores will be discarded.'
    )
    mutsig_parser.set_defaults(heat_fn=load_mutsig_heat)

    music_parser = subparsers.add_parser('music',
                                         help='MuSiC scores',
                                         parents=[parent_parser])
    music_parser.add_argument('--music_score_file',
                              required=True,
                              help='MuSiC score file (gene to q-value).')
    music_parser.add_argument('--threshold',
                              type=float,
                              default=1.0,
                              help='Maximum q-value threshold.')
    music_parser.add_argument('--max_heat',
                              type=float,
                              default=15,
                              help='Max heat')
    music_parser.add_argument(
        '--gene_filter_file',
        default=None,
        help='File listing genes whose heat scores should be preserved.\
                                       If present, all other heat scores will be discarded.'
    )
    music_parser.set_defaults(heat_fn=load_music_heat)

    return parser
示例#14
0
def get_parser():
    description = "Runs generalized HotNet2.\
                   Note that some or all parameters can be specified via a configuration file by\
                   passing '@<ConfigFileName>' as a command-line parameter, e.g.\
                   'python runHotnet2.py @testConf.txt --runname TestRun'."

    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    parser.add_argument('-r', '--runname', help='Name of run / disease.')
    parser.add_argument('-mf',
                        '--infmat_file',
                        required=True,
                        help='Path to .mat file containing influence matrix')
    parser.add_argument(
        '-mn',
        '--infmat_name',
        default='PPR',
        help='Variable name of the influence matrix in the .mat file')
    parser.add_argument(
        '-if',
        '--infmat_index_file',
        required=True,
        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='JSON heat score file generated via generateHeat.py')
    parser.add_argument('-d',
                        '--deltas',
                        nargs='+',
                        type=float,
                        required=True,
                        help='Weight threshold for edge removal')
    parser.add_argument(
        '-ccs',
        '--min_cc_size',
        type=int,
        default=2,
        help='Minimum size connected components that should be returned.')
    parser.add_argument('-c',
                        '--classic',
                        default=False,
                        action='store_true',
                        help='Run classic HotNet (rather than HotNet2).')
    parser.add_argument(
        '-o',
        '--output_directory',
        required=True,
        help='Output directory. Files results.json, components.txt, and\
                              significance.txt will be generated in subdirectories for each delta.'
    )

    #parent parser for arguments common to all permutation types
    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument('-n',
                               '--num_permutations',
                               type=int,
                               required=True,
                               help='Number of permutation tests to run')
    parent_parser.add_argument(
        '-s',
        '--cc_start_size',
        type=int,
        default=2,
        help='Smallest connected component size to count in permutation tests')
    parent_parser.add_argument(
        '-l',
        '--cc_stop_size',
        type=int,
        default=10,
        help='Largest connected component size to count in permutation tests')
    parent_parser.add_argument(
        '-c',
        '--num_cores',
        type=int,
        default=1,
        help='Number of cores to use for running permutation tests in\
                               parallel. If -1, all available cores will be used.'
    )

    subparsers = parser.add_subparsers(title='Permutation type',
                                       dest='permutation_type')

    subparsers.add_parser(
        'none',
        help='Do not perform statistical significance permutation tests')

    heat_parser = subparsers.add_parser('heat',
                                        help='Permute heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-pgf',
        '--permutation_genes_file',
        help='Path to file containing a list of additional genes that can have\
                                   permuted heat values assigned to them in permutation tests'
    )

    mutation_parser = subparsers.add_parser('mutations',
                                            help='Permute mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '-glf',
        '--gene_length_file',
        required=True,
        help='Path to tab-separated file containing gene names in the\
                                       first column and the length of the gene in base pairs in\
                                       the second column')
    mutation_parser.add_argument(
        '-gof',
        '--gene_order_file',
        required=True,
        help='Path to file containing tab-separated lists of genes on\
                                 each chromosome, in order of their position on the chromosome,\
                                 one chromosome per line')
    mutation_parser.add_argument('-b',
                                 '--bmr',
                                 type=float,
                                 required=True,
                                 help='Default background mutation rate')
    mutation_parser.add_argument(
        '-bf',
        '--bmr_file',
        help='File listing gene-specific BMRs. If none, the default BMR\
                                       will be used for all genes.')

    #create subparser for options for permuting networks
    network_parser = subparsers.add_parser('network',
                                           help='Permute networks',
                                           parents=[parent_parser])
    network_parser.add_argument(
        '-pnp',
        '--permuted_networks_path',
        required=True,
        help='Path to influence matrices for permuted networks.\
                                      Include ' + ITERATION_REPLACEMENT_TOKEN +
        ' in the\
                                      path to be replaced with the iteration number'
    )

    precomp_parser = subparsers.add_parser('precomputed',
                                           help='Use precomputed datasets',
                                           parents=[parent_parser])
    precomp_parser.add_argument(
        '-dp',
        '--datasets_path',
        required=True,
        help='Path to datasets to use for significance testing. Include ' +
        ITERATION_REPLACEMENT_TOKEN + ' in the path to be replaced\
                                      with the iteration number.')

    return parser
示例#15
0
def get_parser():
    description = "Helper script for simple runs of generalized HotNet2, including automated"\
                   "parameter selection."
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    parser.add_argument(
        '-nf',
        '--network_files',
        required=True,
        nargs='*',
        help=
        'Path to HDF5 (.h5) file containing influence matrix and edge list.')
    parser.add_argument('-pnp', '--permuted_network_paths', required=True, default='',
                        help='Path to influence matrices for permuted networks, one path '\
                              'per network file. Include ' + ITERATION_REPLACEMENT_TOKEN + ' '\
                              'in the path to be replaced with the iteration number', nargs='*')
    parser.add_argument('-hf', '--heat_files', required=True, nargs='*',
                        help='Path to heat file containing gene names and scores. This can either'\
                              'be a JSON file created by generateHeat.py, in which case the file'\
                              'name must end in .json, or a tab-separated file containing a gene'\
                              'name in the first column and the heat score for that gene in the'\
                              'second column of each line.')
    parser.add_argument(
        '-ccs',
        '--min_cc_size',
        type=int,
        default=2,
        help='Minimum size connected components that should be returned.')
    parser.add_argument('-d',
                        '--deltas',
                        nargs='*',
                        type=float,
                        default=[],
                        help='Delta value(s).')
    parser.add_argument(
        '-np',
        '--network_permutations',
        type=int,
        default=100,
        help='Number of permutations to be used for delta parameter selection.'
    )
    parser.add_argument(
        '-cp',
        '--consensus_permutations',
        type=int,
        default=0,
        help=
        'Number of permutations to be used for consensus statistical significance testing.'
    )
    parser.add_argument(
        '-hp',
        '--heat_permutations',
        type=int,
        default=100,
        help=
        'Number of permutations to be used for statistical significance testing.'
    )
    parser.add_argument('-o', '--output_directory', required=True, default=None,
                        help='Output directory. Files results.json, components.txt, and'\
                              'significance.txt will be generated in subdirectories for each delta.')
    parser.add_argument('-c', '--num_cores', type=int, default=1,
                        help='Number of cores to use for running permutation tests in parallel. If'\
                              '-1, all available cores will be used.')
    parser.add_argument('-dsf', '--display_score_file',
                        help='Path to a tab-separated file containing a gene name in the first'\
                        'column and the display score for that gene in the second column of'\
                        'each line.')
    parser.add_argument('-dnf', '--display_name_file',
                        help='Path to a tab-separated file containing a gene name in the first'\
                        'column and the display name for that gene in the second column of'\
                        'each line.')
    parser.add_argument(
        '--output_hierarchy',
        default=False,
        required=False,
        action='store_true',
        help=
        'Output the hierarchical decomposition of the HotNet2 similarity matrix.'
    )
    parser.add_argument(
        '--verbose',
        default=1,
        choices=range(5),
        type=int,
        required=False,
        help='Set verbosity of output (minimum: 0, maximum: 5).')

    return parser
示例#16
0
def get_parser():
    description = "Runs HotNet threshold-finding procedure.\
                   Note that some or all parameters can be specified via a configuration file by\
                   passing '@<ConfigFileName>' as a command-line parameter, e.g.\
                   'python findThreshold.py @testConf.txt --runname TestRun'."

    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    #create parent parser for arguments common to both permutation types
    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument('-r',
                               '--runname',
                               help='Name of run / disease.')
    parent_parser.add_argument(
        '-mn',
        '--infmat_name',
        default='PPR',
        help='Variable name of the influence matrices in the .mat files')
    parent_parser.add_argument(
        '-if',
        '--infmat_index_file',
        required=True,
        default=None,
        help='Path to tab-separated file containing an index in the first\
                                     column and the name of the gene represented at that index in\
                                     the second column of each line.')
    parent_parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='JSON heat score file generated via generateHeat.py')
    parent_parser.add_argument('-n',
                               '--num_permutations',
                               type=int,
                               required=True,
                               help='Number of permuted data sets to generate')
    parent_parser.add_argument(
        '-s',
        '--test_statistic',
        choices=[MAX_CC_SIZE, NUM_CCS],
        default=MAX_CC_SIZE,
        help='If ' + MAX_CC_SIZE + ', select smallest delta for each permuted\
                                     dataset such that the size of the largest CC is <= l. If '
        + NUM_CCS + 'select for each permuted dataset the delta that \
                                     maximizes the number of CCs of size >= l.'
    )
    parent_parser.add_argument('-l',
                               '--sizes',
                               nargs='+',
                               type=int,
                               help='See test_statistic')
    parent_parser.add_argument(
        '-c',
        '--num_cores',
        type=int,
        default=1,
        help='Number of cores to use for running permutation tests in\
                               parallel. If -1, all available cores will be used.'
    )
    parent_parser.add_argument(
        '--classic',
        default=False,
        action='store_true',
        help='Run classic (instead of directed) HotNet.')
    parent_parser.add_argument(
        '-o',
        '--output_file',
        help='Output file.  If none given, output will be written to stdout.')

    subparsers = parser.add_subparsers(title='Permutation techniques',
                                       dest="perm_type")

    #create subparser for options for permuting networks
    network_parser = subparsers.add_parser('network',
                                           help='Permute networks',
                                           parents=[parent_parser])
    network_parser.add_argument(
        '-pnp',
        '--permuted_networks_path',
        required=True,
        help='Path to influence matrices for permuted networks.\
                                      Include ' + ITERATION_REPLACEMENT_TOKEN +
        ' in the\
                                      path to be replaced with the iteration number'
    )

    #create subparser for options for permuting heat scores
    heat_parser = subparsers.add_parser('heat',
                                        help='Permute heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-mf',
        '--infmat_file',
        required=True,
        help='Path to .mat file containing influence matrix')
    heat_parser.add_argument(
        '-pgf',
        '--permutation_genes_file',
        default=None,
        help='Path to file containing a list of additional genes that can have\
                                   permuted heat values assigned to them in permutation tests'
    )

    #create subparser for options for permuting mutation data
    mutation_parser = subparsers.add_parser('mutations',
                                            help='Permute mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '-mf',
        '--infmat_file',
        required=True,
        help='Path to .mat file containing influence matrix')
    mutation_parser.add_argument(
        '-glf',
        '--gene_length_file',
        required=True,
        help='Path to tab-separated file containing gene names in the\
                                       first column and the length of the gene in base pairs in\
                                       the second column')
    mutation_parser.add_argument(
        '-gof',
        '--gene_order_file',
        required=True,
        help='Path to file containing tab-separated lists of genes on\
                                 each chromosme, in order of their position on the chromosome, one\
                                  chromosome per line')
    mutation_parser.add_argument('-b',
                                 '--bmr',
                                 type=float,
                                 required=True,
                                 help='Default background mutation rate')
    mutation_parser.add_argument(
        '-bf',
        '--bmr_file',
        help='File listing gene-specific BMRs. If none, the default BMR\
                                       will be used for all genes.')
    return parser