def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="EXTENSION", help="val EXTENSION", action="store", type="string", dest="old") parser.add_option("-n", metavar="EXTENSION", help="err EXTENSION", action="store", type="string", dest="new") parser.add_option("--extract", help="Extract ERRORs only", action="store_true", dest="extract") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() for file in os.listdir('.'): if options.old in file: oldfile = file newfile = "%s.%s" % (oldfile.split(".")[0], options.new) print "Convert file %s into %s" % (oldfile, newfile) if options.extract: cmd = "grep ERROR %s > %s" % (oldfile, newfile) try: util.runProcess(cmd) except Exception, e: print "Error to extract %s" % oldfile print e
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||sequence_file line = line.strip() values = line.split('||') common_name = values[0] input_file = values[1] #util.checkFile(input_file) doSubmit(common_name, input_file)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="EXTENSION", help="Old EXTENSION", action="store", type="string", dest="old") parser.add_option("-n", metavar="EXTENSION", help="New EXTENSION", action="store", type="string", dest="new") parser.add_option("--convert", help="Do convert genbank file into embl", action="store_true", dest="convert") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() for file in os.listdir('.'): if options.old in file: oldfile = file newfile = "%s.%s" % (oldfile.split(".")[0], options.new) print "Convert file %s into %s" % (oldfile, newfile) if options.convert: cmd = "seqret -sequence gb::%s -feature Yes -outseq embl::%s" % (oldfile, newfile) try: util.runProcess(cmd) except Exception, e: print "Error to convert %s" % oldfile print e
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments if options.list: # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||organim_name||strain||locus_tag||fasta_file line = line.strip() values = line.split('||') print "Processing %s" % values[0] union(file=values[4], common_name=values[0], locus_tag=values[3], organism_name=values[1], strain=values[2])
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names and its associated file to load", action="store", type="string", dest="list") parser.add_option("-D", action="store", dest="dbhost") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " logger.info(cmdline) # Print logger file info logger.info(logsetup.logpath) # Setup database connection host = ropy.util.getDArg("dbhost", raiseOnEmpty = True) database = ropy.util.getDArg("dbname", raiseOnEmpty = True) port = ropy.util.getDArg("dbport", raiseOnEmpty = True) user = ropy.util.getDArg("dbuser", raiseOnEmpty = True) #password = ropy.util.getDArg("dbpassword", raiseOnEmpty = True) # Check if chado_load is installed util.isSoftInstalled("chado_load") # Read organism common name and load related embl file into the database data_path = options.list for line in open(data_path, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||taxon_id||filename line = line.strip() list = line.split('||') common_name = list[0] filename = list[2] util.checkFile(filename) # Loader command cmd = "chado_load embl -o %s -t contig -D %s:%s/%s -U %s %s" % (common_name, host, port, database, user, filename) # Run command util.runProcess(cmd)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names, its associated information", action="store", type="string", dest="list") parser.add_option("--submit", help="To submit data, not only checking locus_tag", action="store_true", dest="submit") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! organism_name||strain||locus_tag||seq_size||seq_depth||dna_source||description line = line.strip() values = line.split('||') organism_name = values[0] strain = values[1] locus_tag = values[2] seq_size = values[3] seq_depth = values[4] if values[5] == 'GHP': dna_source = 'Gut Health Programme, Rowett Institute of Nutrition and Health, University of Aberdeen. http://www.rowett.ac.uk/divisions/ghp/' elif values[5] == 'INRA': dna_source = 'INRA Clermont-Ferrand-Theix. http://www.clermont.inra.fr/' elif values[5] == 'DSMZ': dna_source = 'Deutsche Sammlung von Mikroorganismen und Zellkulturen. GmbH http://www.dsmz.de/' elif values[5] == 'NCTC': dna_source = "Health Protection Agency's National Collection of Type Cultures. http://www.hpacultures.org.uk/" else: print "DNA source %s not found! Please provide details..." % values[5] continue #print dna_source description = values[6] doSubmit(organism_name=organism_name, strain=strain, locus_tag=locus_tag, seq_size=seq_size, seq_depth=seq_depth, dna_source=dna_source, description=description, submit=options.submit)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name") parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input") parser.add_option("-j", metavar="ID", help="input job ID to fetch results", action="store", type="string", dest="jobid") parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list") parser.add_option("--fetch", help="To fetch results, job id must be provided", action="store_true", dest="fetch") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments if options.list: # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||sequence_file line = line.strip() values = line.split('||') common_name = values[0] if options.fetch: job_id = values[2] doFetch(common_name, job_id) else: input_file = checkValidInput(values[1], common_name) doSubmit(common_name, input_file) else: common_name = options.name if options.fetch: job_id = options.jobid doFetch(common_name, job_id) else: input_file = checkValidInput(options.input, common_name) doSubmit(common_name, input_file)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated information (common_name||organim_name||strain||locus_tag||genome_project_id||coverage)", action="store", type="string", dest="list") parser.add_option("--convert", help="Do convert embl file into tbl", action="store_true", dest="convert") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments if options.list: # Read organism common name and related locus tag list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if not line.count('||') == 6: continue # ! common_name||organim_name||strain||locus_tag||genome_project_id||coverage||source line = line.strip() values = line.split('||') common_name=values[0] locus_tag=values[3] embl_file = "../IMG/%s.4dep.embl" % common_name util.checkFile(embl_file) tbl_file = "%s.tbl" % common_name log.info("Convert file %s into %s" % (embl_file, tbl_file)) if options.convert: try: doConvert(embl_file, tbl_file, locus_tag) except Exception, e: log.error("Converting %s" % embl_file) log.error(traceback.extract_stack()) log.error(e)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated locus tag", action="store", type="string", dest="list") parser.add_option("--convert", help="Do convert genbank file into embl", action="store_true", dest="convert") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments if options.list: # Read organism common name and related locus tag list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||organim_name||strain||locus_tag||fasta_file line = line.strip() values = line.split('||') common_name=values[0] locus_tag=values[3] gbk_file = "%s.img.embl" % common_name util.checkFile(gbk_file) tbl_file = "%s.tbl" % common_name print "Convert file %s into %s" % (gbk_file, tbl_file) if options.convert: try: doConvert(gbk_file, tbl_file, locus_tag) except Exception, e: print "ERROR to convert %s" % gbk_file print e
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name") parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input") parser.add_option("-p", metavar="ID", help="IMG project ID (GOLD Stamp ID)", action="store", type="string", dest="id") parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names, its associated sequence file and IMG project ID", action="store", type="string", dest="list") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Get and check input arguments if options.list: # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! common_name||sequence_file line = line.strip() values = line.split('||') common_name = values[0] input_file = values[1] id = values[2] util.checkFile(input_file) doSubmit(common_name, input_file, id) else: common_name = options.name input_file = options.input id = options.id util.checkFile(input_file) doSubmit(common_name, input_file, id)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="EXTENSION", help="Old EXTENSION", action="store", type="string", dest="old") parser.add_option("-n", metavar="EXTENSION", help="New EXTENSION", action="store", type="string", dest="new") parser.add_option("--rename", help="Do rename", action="store_true", dest="rename") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() for file in os.listdir('.'): if options.old in file: oldfile = file newfile = "%s.%s" % (oldfile.split(".")[0], options.new) print "Rename old file %s into %s" % (oldfile, newfile) if options.rename: cmd = "mv %s %s" % (oldfile, newfile) util.runProcess(cmd) if not options.rename: print "To perform the action, please use --rename"
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organisms", action="store", type="string", dest="list") parser.add_option("-D", action="store", dest="dbhost") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " logger.info(cmdline) # Print logger file info logger.info(logsetup.logpath) # Setup database connection host = ropy.util.getDArg("dbhost", raiseOnEmpty = True) database = ropy.util.getDArg("dbname", raiseOnEmpty = True) port = ropy.util.getDArg("dbport", raiseOnEmpty = True) user = ropy.util.getDArg("dbuser", raiseOnEmpty = True) password = ropy.util.getDArg("dbpassword") connectionFactory = ropy.query.ConnectionFactory(host, database, user, password, port) # Read organism list file and load it into the database data_path = options.list for line in open(data_path, "r"): if line[0] == '!': continue if line.count('||') < 1: continue # ! Genus||species||strain||taxonId line = line.strip() list = line.split('||') genus = list[0] species = list[1].replace('sp.', 'unknown') strain = list[2] taxonid = list[3] # Load organism chado_species = "%s (%s)" % (species, strain) common_name = getCommonName(genus, species, strain) abbreviation = common_name comment = None logger.info(common_name) logger.info(db.makeOrganism(connectionFactory, genus, chado_species, abbreviation, common_name, comment)) # Load translation table logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "translationTable", 11)) # Load taxonomy id logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "taxonId", taxonid)) # Load HTML name fields for GeneDB web htmlFullName = getHtmlFullName(genus, species, strain) logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "htmlFullName", htmlFullName)) htmlShortName = getHtmlShortName(genus, species, strain) logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "htmlShortName", htmlShortName))