예제 #1
0
파일: build_db.py 프로젝트: welchr/snipper
def get_settings():
  parser = VerboseParser();

  build_help = "Genome build to download tables from. This uses UCSC's " +\
    "naming convention, i.e. hg17, hg18, etc. Defaults to the latest human genome build.";
  parser.add_option("--build",dest="build",help=build_help);
  opts,args = parser.parse_args();

  # Check build convention.
  if opts.build != None and opts.build[0:2] != 'hg':
    print >> sys.stderr, "Error: build must look like \'hg19\', \'hg18\', etc."
    sys.exit(1);

  return (opts,args);
예제 #2
0
파일: settings.py 프로젝트: welchr/Snipper
  def getCmdLine(self):
    parser = VerboseParser();

    # Remember: default action is store, default type is string. 
    snp_help = "Lookup information for a list of SNPs - these must be separated by commas, surrounded by quotes (whitespace ignored.)\n\n" +\
      "Examples:\n" +\
      "-s \"rs1002227, rs35712349\"\n" +\
      "-s \"  rs1002227,    rs35712349\"\n" +\
      "-s \" rs1002227,rs35712349 \"\n" +\
      "-s \"rs1002227,rs35712349\"";
    parser.add_option("-s","--snp",dest="snp",help=snp_help);
    parser.add_option("-g","--gene",dest="gene",help="Lookup information for a list of gene symbols - these must be separated by commas, surrounded by quotes (whitespace ignored)");

    snpfile_help = "Provide a list of SNPs to lookup from a file. The file may have *ANY* format. \
  The program will pattern match rs### identifiers from your file.";
    parser.add_option("--snpfile",dest="snpfile",help=snpfile_help);

    genefile_help = "Provide a list of gene symbols in a file. This option unfortunately is not as lenient as --snpfile: you must put \
  each gene symbol on a separate line in the file.";
    parser.add_option("--genefile",dest="genefile",help=genefile_help);
    
    regions_help = "Provide a list of chromosomal regions. Genes and other "\
                   "elements inside these regions will be returned.\n"\
                   "Example:\n"\
                   " --regions \"chr4:19141-939393,chrX:9191-939393\"";
    parser.add_option("-r","--regions",help=regions_help);
    
    build_help = ("Human genome build to use for SNP positions and genes. "
                  "Snipper ships with hg19 by default. You can download "
                  "pre-built databases from our website, or build them "
                  "yourself using the bin/setup_snipper.py script.");
    parser.add_option("-b","--build",dest="build",help=build_help);

    dist_help = "Distance away from SNP to search, default is " + str(self.distance) + ". " +\
      "If a distance is specified, the program will return *ALL* genes within the distance you specify, not just the default of 1. " +\
      "To specify a new distance, but still only return 1 gene (or arbitrary number of genes), use -n <number>. " +\
      "Distances can be specified using a kb or mb suffix, or as a raw distance. Examples: 500kb, 0.5MB, 1.4MB, 834141.";
    parser.add_option("-d",dest="distance",help=dist_help,type="string");
    parser.add_option("-n",dest="num_genes",help="Number of genes to return per SNP, default is " + str(self.num_genes),type="int");
    
    terms_help = "Comma-delimited string of terms, enclosed in quotes, to use in searching the literature. \
  This will execute a search, per gene, for any of the search terms. For example:\n\
  \n\
  Genes: RB1, TCF7L2\n\
  Search terms: \"glucose,retinoblastoma\"\n\
  What happens: \n\
  -- Search literature for RB1 AND (glucose OR retinoblastoma)\n\
  -- Search literature for TCF7L2 AND (glucose OR retinoblastoma)";

    parser.add_option("--terms",dest="terms",help=terms_help); 
    parser.add_option("--no-generif",dest="gene_rif",action="store_true",default=False,help="Disable GeneRIFs.");
    parser.add_option("--no-scandb",dest="no_scandb",action="store_true",default=False,help="Disable use of ScanDB for eQTL information. Enabled by default.");
    parser.add_option("--scandb-pval",dest="scandb_pval",default=self.scandb_pval,help="P-value threshold for ScanDB eQTLs.");
    parser.add_option("--no-mimi",dest="mimi",action="store_true",default=False,help="Disable querying of MiMI database for interactions between genes near SNPs.");
    parser.add_option("--no-omim",dest="omim",action="store_true",default=False,help="Disable display and search of OMIM text.");
    parser.add_option("--no-pubmed",dest="pubmed",action="store_true",default=False,help="Disable searching PubMed.");
    parser.add_option("--papernum",dest="pnum",type="int",default=self.pnum,help="Number of papers to display, default is " + str(self.pnum));
    
    each_term_help = "When specified, the program will search each gene x searchterm pair, instead of lumping together search terms. \
  For example:\n\
  \n\
  Genes: RB1, TCF7L2\n\
  Search terms: \"glucose,retinoblastoma\"\n\
  What happens:\n\
  -- Search literature for RB1 AND glucose\n\
  -- Search literature for RB1 AND retinoblastoma\n\
  -- Search literature for TCF7L2 AND glucose\n\
  -- Search literature for TCF7L2 AND retinoblastoma\n\
  \n\
This is a much more in-depth search, at the cost of running time - NCIBI limits to 1 query / 3 seconds. \
If you have a very large set of genes and search terms, this can take a VERY long time to run!";
    
    parser.add_option("--each-term",dest="per_term",default=False,action="store_true",help=each_term_help);
    parser.add_option("--all",dest="all",action="store_true",default=True,help=SUPPRESS_HELP);
    parser.add_option("-o","--out",dest="outdir",default=self.outdir,help="Directory to use for storing output. This should be a directory that does not exist yet.");
    parser.add_option("--console",dest="console",action="store_true",default=self.console,help="Write results to console, instead of creating directory with HTML/text results.");
    parser.add_option("--debug",dest="debug",action="store_true",default=False,help=SUPPRESS_HELP);

    # Parse args. 
    (options,args) = parser.parse_args();

    # Was debug enabled?
    if options.debug:
      __builtin__._SNIPPER_DEBUG = True;
    else:
      __builtin__._SNIPPER_DEBUG = False;

    # If there are positional arguments, there was an error on the command line. 
    # Let them know the potential problem. 
    if len(args) > 0:
      print >> sys.stderr, "Error: positional arguments detected: " + str(args);
      print >> sys.stderr, "Most likely, you simply forgot to surround the entire argument in quotes."
      print >> sys.stderr, "Example: -g \"RB1, TCF7L2\" is correct, whereas -g \"RB1\" \"TCF7L2\" is wrong."
      sys.exit(1);

    # Console mode? 
    self.console = options.console;

    # Human genome build. 
    if options.build:
      self.build = options.build;

    # Get genes from command line (and file, if specified.) 
    if options.gene != None:
      self.genes = set([i.upper() for i in parseTerms(options.gene)]);
    if options.genefile != None:
      self.genes.update(parseGeneFile(options.genefile));

    # Get the set of SNPs. 
    if options.snp != None:
      self.snpset = set(parseTerms(options.snp));
    if options.snpfile != None:
      self.snpset.update(parseSNPFile(options.snpfile));

    # Get regions. 
    if options.regions != None:
      self.regions = parseRegions(options.regions);

    # Get list of search terms. 
    if options.terms != None:
      self.terms = parseTerms(options.terms);

    # HACK: If distance is specified, but num_genes is not,
    # then we want to search for all genes in that distance space. 
    # Set num_genes to a really really big number. :) 
    if options.distance != None:
      options.distance = convertFlank(options.distance);
      if options.num_genes != None:
        self.distance = options.distance;
        self.num_genes = options.num_genes;
      else:
        self.distance = options.distance;
        self.num_genes = 9999;
    else:
      if options.num_genes != None:
        self.num_genes = options.num_genes;

    # OMIM? 
    self.omim = not options.omim;

    # Pubmed?
    self.pubmed = not options.pubmed;
    self.pnum = options.pnum; 
    self.per_term = options.per_term;

    # GeneRIF? 
    self.gene_rif = not options.gene_rif;
    
    # ScanDB?
    if options.no_scandb:
      self.scandb = False;
    else:
      self.scandb_pval = float(options.scandb_pval);

    # MIMI?
    self.mimi = not options.mimi;
    
    # Check the output directory specified by the user.
    # It should not already exist. 
    if len(sys.argv) > 1 and not self.console:
      self.outdir = options.outdir;
      if os.path.exists(self.outdir):
        msg = "Error: output directory already exists: %s\n"\
              "Please rename or move this directory, or use "\
              "--output or -o to change the directory name." % self.outdir;
        sys.exit(msg);
      else:
        mkpath(self.outdir);
예제 #3
0
    def getCmdLine(self):
        parser = VerboseParser()

        # Remember: default action is store, default type is string.
        snp_help = "Lookup information for a list of SNPs - these must be separated by commas, surrounded by quotes (whitespace ignored.)\n\n" +\
          "Examples:\n" +\
          "-s \"rs1002227, rs35712349\"\n" +\
          "-s \"  rs1002227,    rs35712349\"\n" +\
          "-s \" rs1002227,rs35712349 \"\n" +\
          "-s \"rs1002227,rs35712349\""
        parser.add_option("-s", "--snp", dest="snp", help=snp_help)
        parser.add_option(
            "-g",
            "--gene",
            dest="gene",
            help=
            "Lookup information for a list of gene symbols - these must be separated by commas, surrounded by quotes (whitespace ignored)"
        )

        snpfile_help = "Provide a list of SNPs to lookup from a file. The file may have *ANY* format. \
  The program will pattern match rs### identifiers from your file."

        parser.add_option("--snpfile", dest="snpfile", help=snpfile_help)

        genefile_help = "Provide a list of gene symbols in a file. This option unfortunately is not as lenient as --snpfile: you must put \
  each gene symbol on a separate line in the file."

        parser.add_option("--genefile", dest="genefile", help=genefile_help)

        regions_help = "Provide a list of chromosomal regions. Genes and other "\
                       "elements inside these regions will be returned.\n"\
                       "Example:\n"\
                       " --regions \"chr4:19141-939393,chrX:9191-939393\""
        parser.add_option("-r", "--regions", help=regions_help)

        build_help = ("Human genome build to use for SNP positions and genes. "
                      "Snipper ships with hg19 by default. You can download "
                      "pre-built databases from our website, or build them "
                      "yourself using the bin/setup_snipper.py script.")
        parser.add_option("-b", "--build", dest="build", help=build_help)

        dist_help = "Distance away from SNP to search, default is " + str(self.distance) + ". " +\
          "If a distance is specified, the program will return *ALL* genes within the distance you specify, not just the default of 1. " +\
          "To specify a new distance, but still only return 1 gene (or arbitrary number of genes), use -n <number>. " +\
          "Distances can be specified using a kb or mb suffix, or as a raw distance. Examples: 500kb, 0.5MB, 1.4MB, 834141."
        parser.add_option("-d", dest="distance", help=dist_help, type="string")
        parser.add_option(
            "-n",
            dest="num_genes",
            help="Number of genes to return per SNP, default is " +
            str(self.num_genes),
            type="int")

        terms_help = "Comma-delimited string of terms, enclosed in quotes, to use in searching the literature. \
  This will execute a search, per gene, for any of the search terms. For example:\n\
  \n\
  Genes: RB1, TCF7L2\n\
  Search terms: \"glucose,retinoblastoma\"\n\
  What happens: \n\
  -- Search literature for RB1 AND (glucose OR retinoblastoma)\n\
  -- Search literature for TCF7L2 AND (glucose OR retinoblastoma)"

        parser.add_option("--terms", dest="terms", help=terms_help)
        parser.add_option("--no-generif",
                          dest="gene_rif",
                          action="store_true",
                          default=False,
                          help="Disable GeneRIFs.")
        parser.add_option(
            "--no-scandb",
            dest="no_scandb",
            action="store_true",
            default=False,
            help=
            "Disable use of ScanDB for eQTL information. Enabled by default.")
        parser.add_option("--scandb-pval",
                          dest="scandb_pval",
                          default=self.scandb_pval,
                          help="P-value threshold for ScanDB eQTLs.")
        parser.add_option(
            "--no-mimi",
            dest="mimi",
            action="store_true",
            default=False,
            help=
            "Disable querying of MiMI database for interactions between genes near SNPs."
        )
        parser.add_option("--no-omim",
                          dest="omim",
                          action="store_true",
                          default=False,
                          help="Disable display and search of OMIM text.")
        parser.add_option("--no-pubmed",
                          dest="pubmed",
                          action="store_true",
                          default=False,
                          help="Disable searching PubMed.")
        parser.add_option("--papernum",
                          dest="pnum",
                          type="int",
                          default=self.pnum,
                          help="Number of papers to display, default is " +
                          str(self.pnum))

        each_term_help = "When specified, the program will search each gene x searchterm pair, instead of lumping together search terms. \
  For example:\n\
  \n\
  Genes: RB1, TCF7L2\n\
  Search terms: \"glucose,retinoblastoma\"\n\
  What happens:\n\
  -- Search literature for RB1 AND glucose\n\
  -- Search literature for RB1 AND retinoblastoma\n\
  -- Search literature for TCF7L2 AND glucose\n\
  -- Search literature for TCF7L2 AND retinoblastoma\n\
  \n\
This is a much more in-depth search, at the cost of running time - NCIBI limits to 1 query / 3 seconds. \
If you have a very large set of genes and search terms, this can take a VERY long time to run!"

        parser.add_option("--each-term",
                          dest="per_term",
                          default=False,
                          action="store_true",
                          help=each_term_help)
        parser.add_option("--all",
                          dest="all",
                          action="store_true",
                          default=True,
                          help=SUPPRESS_HELP)
        parser.add_option(
            "-o",
            "--out",
            dest="outdir",
            default=self.outdir,
            help=
            "Directory to use for storing output. This should be a directory that does not exist yet."
        )
        parser.add_option(
            "--console",
            dest="console",
            action="store_true",
            default=self.console,
            help=
            "Write results to console, instead of creating directory with HTML/text results."
        )
        parser.add_option("--debug",
                          dest="debug",
                          action="store_true",
                          default=False,
                          help=SUPPRESS_HELP)

        # Parse args.
        (options, args) = parser.parse_args()

        # Was debug enabled?
        if options.debug:
            __builtin__._SNIPPER_DEBUG = True
        else:
            __builtin__._SNIPPER_DEBUG = False

        # If there are positional arguments, there was an error on the command line.
        # Let them know the potential problem.
        if len(args) > 0:
            print >> sys.stderr, "Error: positional arguments detected: " + str(
                args)
            print >> sys.stderr, "Most likely, you simply forgot to surround the entire argument in quotes."
            print >> sys.stderr, "Example: -g \"RB1, TCF7L2\" is correct, whereas -g \"RB1\" \"TCF7L2\" is wrong."
            sys.exit(1)

        # Console mode?
        self.console = options.console

        # Human genome build.
        if options.build:
            self.build = options.build

        # Get genes from command line (and file, if specified.)
        if options.gene != None:
            self.genes = set([i.upper() for i in parseTerms(options.gene)])
        if options.genefile != None:
            self.genes.update(parseGeneFile(options.genefile))

        # Get the set of SNPs.
        if options.snp != None:
            self.snpset = set(parseTerms(options.snp))
        if options.snpfile != None:
            self.snpset.update(parseSNPFile(options.snpfile))

        # Get regions.
        if options.regions != None:
            self.regions = parseRegions(options.regions)

        # Get list of search terms.
        if options.terms != None:
            self.terms = parseTerms(options.terms)

        # HACK: If distance is specified, but num_genes is not,
        # then we want to search for all genes in that distance space.
        # Set num_genes to a really really big number. :)
        if options.distance != None:
            options.distance = convertFlank(options.distance)
            if options.num_genes != None:
                self.distance = options.distance
                self.num_genes = options.num_genes
            else:
                self.distance = options.distance
                self.num_genes = 9999
        else:
            if options.num_genes != None:
                self.num_genes = options.num_genes

        # OMIM?
        self.omim = not options.omim

        # Pubmed?
        self.pubmed = not options.pubmed
        self.pnum = options.pnum
        self.per_term = options.per_term

        # GeneRIF?
        self.gene_rif = not options.gene_rif

        # ScanDB?
        if options.no_scandb:
            self.scandb = False
        else:
            self.scandb_pval = float(options.scandb_pval)

        # MIMI?
        self.mimi = not options.mimi

        # Check the output directory specified by the user.
        # It should not already exist.
        if len(sys.argv) > 1 and not self.console:
            self.outdir = options.outdir
            if os.path.exists(self.outdir):
                msg = "Error: output directory already exists: %s\n"\
                      "Please rename or move this directory, or use "\
                      "--output or -o to change the directory name." % self.outdir
                sys.exit(msg)
            else:
                mkpath(self.outdir)