def get_settings(): parser = VerboseParser(); build_help = "Genome build to download tables from. This uses UCSC's " +\ "naming convention, i.e. hg17, hg18, etc. Defaults to the latest human genome build."; parser.add_option("--build",dest="build",help=build_help); opts,args = parser.parse_args(); # Check build convention. if opts.build != None and opts.build[0:2] != 'hg': print >> sys.stderr, "Error: build must look like \'hg19\', \'hg18\', etc." sys.exit(1); return (opts,args);
def getCmdLine(self): parser = VerboseParser(); # Remember: default action is store, default type is string. snp_help = "Lookup information for a list of SNPs - these must be separated by commas, surrounded by quotes (whitespace ignored.)\n\n" +\ "Examples:\n" +\ "-s \"rs1002227, rs35712349\"\n" +\ "-s \" rs1002227, rs35712349\"\n" +\ "-s \" rs1002227,rs35712349 \"\n" +\ "-s \"rs1002227,rs35712349\""; parser.add_option("-s","--snp",dest="snp",help=snp_help); parser.add_option("-g","--gene",dest="gene",help="Lookup information for a list of gene symbols - these must be separated by commas, surrounded by quotes (whitespace ignored)"); snpfile_help = "Provide a list of SNPs to lookup from a file. The file may have *ANY* format. \ The program will pattern match rs### identifiers from your file."; parser.add_option("--snpfile",dest="snpfile",help=snpfile_help); genefile_help = "Provide a list of gene symbols in a file. This option unfortunately is not as lenient as --snpfile: you must put \ each gene symbol on a separate line in the file."; parser.add_option("--genefile",dest="genefile",help=genefile_help); regions_help = "Provide a list of chromosomal regions. Genes and other "\ "elements inside these regions will be returned.\n"\ "Example:\n"\ " --regions \"chr4:19141-939393,chrX:9191-939393\""; parser.add_option("-r","--regions",help=regions_help); build_help = ("Human genome build to use for SNP positions and genes. " "Snipper ships with hg19 by default. You can download " "pre-built databases from our website, or build them " "yourself using the bin/setup_snipper.py script."); parser.add_option("-b","--build",dest="build",help=build_help); dist_help = "Distance away from SNP to search, default is " + str(self.distance) + ". " +\ "If a distance is specified, the program will return *ALL* genes within the distance you specify, not just the default of 1. " +\ "To specify a new distance, but still only return 1 gene (or arbitrary number of genes), use -n <number>. " +\ "Distances can be specified using a kb or mb suffix, or as a raw distance. Examples: 500kb, 0.5MB, 1.4MB, 834141."; parser.add_option("-d",dest="distance",help=dist_help,type="string"); parser.add_option("-n",dest="num_genes",help="Number of genes to return per SNP, default is " + str(self.num_genes),type="int"); terms_help = "Comma-delimited string of terms, enclosed in quotes, to use in searching the literature. \ This will execute a search, per gene, for any of the search terms. For example:\n\ \n\ Genes: RB1, TCF7L2\n\ Search terms: \"glucose,retinoblastoma\"\n\ What happens: \n\ -- Search literature for RB1 AND (glucose OR retinoblastoma)\n\ -- Search literature for TCF7L2 AND (glucose OR retinoblastoma)"; parser.add_option("--terms",dest="terms",help=terms_help); parser.add_option("--no-generif",dest="gene_rif",action="store_true",default=False,help="Disable GeneRIFs."); parser.add_option("--no-scandb",dest="no_scandb",action="store_true",default=False,help="Disable use of ScanDB for eQTL information. Enabled by default."); parser.add_option("--scandb-pval",dest="scandb_pval",default=self.scandb_pval,help="P-value threshold for ScanDB eQTLs."); parser.add_option("--no-mimi",dest="mimi",action="store_true",default=False,help="Disable querying of MiMI database for interactions between genes near SNPs."); parser.add_option("--no-omim",dest="omim",action="store_true",default=False,help="Disable display and search of OMIM text."); parser.add_option("--no-pubmed",dest="pubmed",action="store_true",default=False,help="Disable searching PubMed."); parser.add_option("--papernum",dest="pnum",type="int",default=self.pnum,help="Number of papers to display, default is " + str(self.pnum)); each_term_help = "When specified, the program will search each gene x searchterm pair, instead of lumping together search terms. \ For example:\n\ \n\ Genes: RB1, TCF7L2\n\ Search terms: \"glucose,retinoblastoma\"\n\ What happens:\n\ -- Search literature for RB1 AND glucose\n\ -- Search literature for RB1 AND retinoblastoma\n\ -- Search literature for TCF7L2 AND glucose\n\ -- Search literature for TCF7L2 AND retinoblastoma\n\ \n\ This is a much more in-depth search, at the cost of running time - NCIBI limits to 1 query / 3 seconds. \ If you have a very large set of genes and search terms, this can take a VERY long time to run!"; parser.add_option("--each-term",dest="per_term",default=False,action="store_true",help=each_term_help); parser.add_option("--all",dest="all",action="store_true",default=True,help=SUPPRESS_HELP); parser.add_option("-o","--out",dest="outdir",default=self.outdir,help="Directory to use for storing output. This should be a directory that does not exist yet."); parser.add_option("--console",dest="console",action="store_true",default=self.console,help="Write results to console, instead of creating directory with HTML/text results."); parser.add_option("--debug",dest="debug",action="store_true",default=False,help=SUPPRESS_HELP); # Parse args. (options,args) = parser.parse_args(); # Was debug enabled? if options.debug: __builtin__._SNIPPER_DEBUG = True; else: __builtin__._SNIPPER_DEBUG = False; # If there are positional arguments, there was an error on the command line. # Let them know the potential problem. if len(args) > 0: print >> sys.stderr, "Error: positional arguments detected: " + str(args); print >> sys.stderr, "Most likely, you simply forgot to surround the entire argument in quotes." print >> sys.stderr, "Example: -g \"RB1, TCF7L2\" is correct, whereas -g \"RB1\" \"TCF7L2\" is wrong." sys.exit(1); # Console mode? self.console = options.console; # Human genome build. if options.build: self.build = options.build; # Get genes from command line (and file, if specified.) if options.gene != None: self.genes = set([i.upper() for i in parseTerms(options.gene)]); if options.genefile != None: self.genes.update(parseGeneFile(options.genefile)); # Get the set of SNPs. if options.snp != None: self.snpset = set(parseTerms(options.snp)); if options.snpfile != None: self.snpset.update(parseSNPFile(options.snpfile)); # Get regions. if options.regions != None: self.regions = parseRegions(options.regions); # Get list of search terms. if options.terms != None: self.terms = parseTerms(options.terms); # HACK: If distance is specified, but num_genes is not, # then we want to search for all genes in that distance space. # Set num_genes to a really really big number. :) if options.distance != None: options.distance = convertFlank(options.distance); if options.num_genes != None: self.distance = options.distance; self.num_genes = options.num_genes; else: self.distance = options.distance; self.num_genes = 9999; else: if options.num_genes != None: self.num_genes = options.num_genes; # OMIM? self.omim = not options.omim; # Pubmed? self.pubmed = not options.pubmed; self.pnum = options.pnum; self.per_term = options.per_term; # GeneRIF? self.gene_rif = not options.gene_rif; # ScanDB? if options.no_scandb: self.scandb = False; else: self.scandb_pval = float(options.scandb_pval); # MIMI? self.mimi = not options.mimi; # Check the output directory specified by the user. # It should not already exist. if len(sys.argv) > 1 and not self.console: self.outdir = options.outdir; if os.path.exists(self.outdir): msg = "Error: output directory already exists: %s\n"\ "Please rename or move this directory, or use "\ "--output or -o to change the directory name." % self.outdir; sys.exit(msg); else: mkpath(self.outdir);
def getCmdLine(self): parser = VerboseParser() # Remember: default action is store, default type is string. snp_help = "Lookup information for a list of SNPs - these must be separated by commas, surrounded by quotes (whitespace ignored.)\n\n" +\ "Examples:\n" +\ "-s \"rs1002227, rs35712349\"\n" +\ "-s \" rs1002227, rs35712349\"\n" +\ "-s \" rs1002227,rs35712349 \"\n" +\ "-s \"rs1002227,rs35712349\"" parser.add_option("-s", "--snp", dest="snp", help=snp_help) parser.add_option( "-g", "--gene", dest="gene", help= "Lookup information for a list of gene symbols - these must be separated by commas, surrounded by quotes (whitespace ignored)" ) snpfile_help = "Provide a list of SNPs to lookup from a file. The file may have *ANY* format. \ The program will pattern match rs### identifiers from your file." parser.add_option("--snpfile", dest="snpfile", help=snpfile_help) genefile_help = "Provide a list of gene symbols in a file. This option unfortunately is not as lenient as --snpfile: you must put \ each gene symbol on a separate line in the file." parser.add_option("--genefile", dest="genefile", help=genefile_help) regions_help = "Provide a list of chromosomal regions. Genes and other "\ "elements inside these regions will be returned.\n"\ "Example:\n"\ " --regions \"chr4:19141-939393,chrX:9191-939393\"" parser.add_option("-r", "--regions", help=regions_help) build_help = ("Human genome build to use for SNP positions and genes. " "Snipper ships with hg19 by default. You can download " "pre-built databases from our website, or build them " "yourself using the bin/setup_snipper.py script.") parser.add_option("-b", "--build", dest="build", help=build_help) dist_help = "Distance away from SNP to search, default is " + str(self.distance) + ". " +\ "If a distance is specified, the program will return *ALL* genes within the distance you specify, not just the default of 1. " +\ "To specify a new distance, but still only return 1 gene (or arbitrary number of genes), use -n <number>. " +\ "Distances can be specified using a kb or mb suffix, or as a raw distance. Examples: 500kb, 0.5MB, 1.4MB, 834141." parser.add_option("-d", dest="distance", help=dist_help, type="string") parser.add_option( "-n", dest="num_genes", help="Number of genes to return per SNP, default is " + str(self.num_genes), type="int") terms_help = "Comma-delimited string of terms, enclosed in quotes, to use in searching the literature. \ This will execute a search, per gene, for any of the search terms. For example:\n\ \n\ Genes: RB1, TCF7L2\n\ Search terms: \"glucose,retinoblastoma\"\n\ What happens: \n\ -- Search literature for RB1 AND (glucose OR retinoblastoma)\n\ -- Search literature for TCF7L2 AND (glucose OR retinoblastoma)" parser.add_option("--terms", dest="terms", help=terms_help) parser.add_option("--no-generif", dest="gene_rif", action="store_true", default=False, help="Disable GeneRIFs.") parser.add_option( "--no-scandb", dest="no_scandb", action="store_true", default=False, help= "Disable use of ScanDB for eQTL information. Enabled by default.") parser.add_option("--scandb-pval", dest="scandb_pval", default=self.scandb_pval, help="P-value threshold for ScanDB eQTLs.") parser.add_option( "--no-mimi", dest="mimi", action="store_true", default=False, help= "Disable querying of MiMI database for interactions between genes near SNPs." ) parser.add_option("--no-omim", dest="omim", action="store_true", default=False, help="Disable display and search of OMIM text.") parser.add_option("--no-pubmed", dest="pubmed", action="store_true", default=False, help="Disable searching PubMed.") parser.add_option("--papernum", dest="pnum", type="int", default=self.pnum, help="Number of papers to display, default is " + str(self.pnum)) each_term_help = "When specified, the program will search each gene x searchterm pair, instead of lumping together search terms. \ For example:\n\ \n\ Genes: RB1, TCF7L2\n\ Search terms: \"glucose,retinoblastoma\"\n\ What happens:\n\ -- Search literature for RB1 AND glucose\n\ -- Search literature for RB1 AND retinoblastoma\n\ -- Search literature for TCF7L2 AND glucose\n\ -- Search literature for TCF7L2 AND retinoblastoma\n\ \n\ This is a much more in-depth search, at the cost of running time - NCIBI limits to 1 query / 3 seconds. \ If you have a very large set of genes and search terms, this can take a VERY long time to run!" parser.add_option("--each-term", dest="per_term", default=False, action="store_true", help=each_term_help) parser.add_option("--all", dest="all", action="store_true", default=True, help=SUPPRESS_HELP) parser.add_option( "-o", "--out", dest="outdir", default=self.outdir, help= "Directory to use for storing output. This should be a directory that does not exist yet." ) parser.add_option( "--console", dest="console", action="store_true", default=self.console, help= "Write results to console, instead of creating directory with HTML/text results." ) parser.add_option("--debug", dest="debug", action="store_true", default=False, help=SUPPRESS_HELP) # Parse args. (options, args) = parser.parse_args() # Was debug enabled? if options.debug: __builtin__._SNIPPER_DEBUG = True else: __builtin__._SNIPPER_DEBUG = False # If there are positional arguments, there was an error on the command line. # Let them know the potential problem. if len(args) > 0: print >> sys.stderr, "Error: positional arguments detected: " + str( args) print >> sys.stderr, "Most likely, you simply forgot to surround the entire argument in quotes." print >> sys.stderr, "Example: -g \"RB1, TCF7L2\" is correct, whereas -g \"RB1\" \"TCF7L2\" is wrong." sys.exit(1) # Console mode? self.console = options.console # Human genome build. if options.build: self.build = options.build # Get genes from command line (and file, if specified.) if options.gene != None: self.genes = set([i.upper() for i in parseTerms(options.gene)]) if options.genefile != None: self.genes.update(parseGeneFile(options.genefile)) # Get the set of SNPs. if options.snp != None: self.snpset = set(parseTerms(options.snp)) if options.snpfile != None: self.snpset.update(parseSNPFile(options.snpfile)) # Get regions. if options.regions != None: self.regions = parseRegions(options.regions) # Get list of search terms. if options.terms != None: self.terms = parseTerms(options.terms) # HACK: If distance is specified, but num_genes is not, # then we want to search for all genes in that distance space. # Set num_genes to a really really big number. :) if options.distance != None: options.distance = convertFlank(options.distance) if options.num_genes != None: self.distance = options.distance self.num_genes = options.num_genes else: self.distance = options.distance self.num_genes = 9999 else: if options.num_genes != None: self.num_genes = options.num_genes # OMIM? self.omim = not options.omim # Pubmed? self.pubmed = not options.pubmed self.pnum = options.pnum self.per_term = options.per_term # GeneRIF? self.gene_rif = not options.gene_rif # ScanDB? if options.no_scandb: self.scandb = False else: self.scandb_pval = float(options.scandb_pval) # MIMI? self.mimi = not options.mimi # Check the output directory specified by the user. # It should not already exist. if len(sys.argv) > 1 and not self.console: self.outdir = options.outdir if os.path.exists(self.outdir): msg = "Error: output directory already exists: %s\n"\ "Please rename or move this directory, or use "\ "--output or -o to change the directory name." % self.outdir sys.exit(msg) else: mkpath(self.outdir)