def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs (opts, args) = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [ ('level', opts.level), ('source', opts.source) ] for i in id_list: url = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" %(i, data['data'])) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15") parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc") parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public.") parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all") parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal") for sfield in SEARCH_FIELDS: parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no') ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url+'/search?'+urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields)+"\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 # get auth token = get_auth_token(opts) # build url / retrieve data / output data id_list = opts.ids.split(',') params = [('level', opts.level), ('source', opts.source)] for i in id_list: url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode( params, True) data = obj_from_url(url, auth=token) safe_print("%s\t%s\n" % (i, data['data'])) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs") # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata' url = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb # retrieve / output data result = obj_from_url(url, auth=token) if opts.verbosity == 'mixs': for r in sorted(result.keys()): if r not in ['project', 'library', 'sample']: safe_print("%s\t%s\n" %(r, result[r])) elif opts.verbosity == 'full': md = result['metadata'] safe_print("category\tlabel\tvalue\n") if ('project' in md) and md['project']['data']: for p in sorted(md['project']['data'].keys()): safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p])) if ('sample' in md) and md['sample']['data']: for s in sorted(md['sample']['data'].keys()): safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s])) if ('library' in md) and ('type' in md['library']) and md['library']['data']: for l in sorted(md['library']['data'].keys()): safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l])) if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']: for e in sorted(md['env_package']['data'].keys()): safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e])) else: sys.stderr.write("ERROR: invalid verbosity type\n") return 1 return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--verbosity", dest="verbosity", default='mixs', help="amount of metadata to display. use keyword 'mixs' for GSC MIxS metadata, use keyword 'full' for all GSC metadata, default is mixs") # get inputs (opts, args) = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url verb = opts.verbosity if opts.verbosity == 'mixs' else 'metadata' url = opts.url+'/metagenome/'+opts.id+'?verbosity='+verb # retrieve / output data result = obj_from_url(url, auth=token) if opts.verbosity == 'mixs': for r in sorted(result.keys()): if r not in ['project', 'library', 'sample']: safe_print("%s\t%s\n" %(r, result[r])) elif opts.verbosity == 'full': md = result['metadata'] safe_print("category\tlabel\tvalue\n") if ('project' in md) and md['project']['data']: for p in sorted(md['project']['data'].keys()): safe_print("project\t%s\t%s\n" %(p, md['project']['data'][p])) if ('sample' in md) and md['sample']['data']: for s in sorted(md['sample']['data'].keys()): safe_print("sample\t%s\t%s\n" %(s, md['sample']['data'][s])) if ('library' in md) and ('type' in md['library']) and md['library']['data']: for l in sorted(md['library']['data'].keys()): safe_print("library: %s\t%s\t%s\n" %(md['library']['type'], l, md['library']['data'][l])) if ('env_package' in md) and ('type' in md['env_package']) and md['env_package']['data']: for e in sorted(md['env_package']['data'].keys()): safe_print("env package: %s\t%s\t%s\n" %(md['env_package']['type'], e, md['env_package']['data'][e])) else: sys.stderr.write("ERROR: invalid verbosity type\n") return 1 return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, type=str, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, type=str, help="OAuth password") parser.add_argument("--token", dest="token", default=None, type=str, help="OAuth token") parser.add_argument("--name", dest="name", default=None, type=str, help="function name to filter by") parser.add_argument("--level", dest="level", default='function', help="function level to filter by") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum percent identity cutoff, default is 60") parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.name: params.append(('filter', opts.name)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+opts.id+'?'+urlencode(params, True) # output data stdout_from_url(url, auth=token) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--name", dest="name", default=None, help="taxon name to filter by") parser.add_option("", "--level", dest="level", default=None, help="taxon level to filter by") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") # get inputs (opts, args) = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('type', 'organism') ] if opts.name: params.append(('filter', opts.name)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/similarity/'+opts.id+'?'+urlencode(params, True) # output data stdout_from_url(url, auth=token) return 0
def main(args): global mgrast_auth, API_URL, SHOCK_URL ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("-u", "--mgrast_url", dest="mgrast_url", default=API_URL, help="MG-RAST API url") parser.add_argument("-s", "--shock_url", dest="shock_url", default=SHOCK_URL, help="Shock API url") parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token") parser.add_argument("-p", "--project", dest="project", default=None, help="project ID") parser.add_argument("-m", "--metadata", dest="metadata", default=None, help="metadata file ID") parser.add_argument( "-j", "--joinfile", dest="joinfile", default=None, help= "name of resulting pair-merge file (without extension), default is <pair 1 filename>_<pair 2 filename>" ) parser.add_argument("--retain", dest="retain", action="store_true", default=False, help="retain non-overlapping sequences in pair-merge") parser.add_argument( "--rc_index", dest="rc_index", action="store_true", default=False, help="barcodes in index file are reverse compliment of mapping file") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT") parser.add_argument("action", nargs="+", help="Action") global DEBUG # get inputs opts = parser.parse_args() args = opts.action if len(args) < 1: sys.stderr.write( "ERROR: missing action, please check usage with %s -h\n" % (sys.argv[0])) return 1 if opts.verbose: print("DEBUG") DEBUG = 1 else: DEBUG = 0 action = args[0] API_URL = opts.mgrast_url SHOCK_URL = opts.shock_url # validate inputs if action not in valid_actions: sys.stderr.write("ERROR: invalid action. use one of: %s\n" % ", ".join(valid_actions)) return 1 elif (action == "view") and ((len(args) < 2) or (args[1] not in view_options)): sys.stderr.write("ERROR: invalid view ion. use one of: %s\n" % ", ".join(view_options)) return 1 elif (action in ["upload", "upload-archive", "delete", "submit" ]) and (len(args) < 2): sys.stderr.write("ERROR: %s missing file\n" % action) return 1 elif action == "upload": for f in args[1:]: if not os.path.isfile(f): sys.stderr.write("ERROR: upload file '%s' does not exist\n" % f) return 1 elif action == "upload-archive": if len(args[1:]) > 1: sys.stderr.write("ERROR: upload-archive only supports one file\n") return 1 if not os.path.isfile(args[1]): sys.stderr.write( "ERROR: upload-archive file '%s' does not exist\n" % args[1]) return 1 elif (action == "rename") and (len(args) != 3): sys.stderr.write("ERROR: %s missing file or name\n" % action) return 1 elif action == "validate": if (len(args) < 2) or (args[1] not in validate_options): sys.stderr.write("ERROR: invalid validate ion. use one of: %s\n" % ", ".join(validate_options)) return 1 if len(args) < 3: sys.stderr.write("ERROR: validate missing file\n") return 1 elif action == "compute": if (len(args) < 2) or (args[1] not in compute_actions): sys.stderr.write( "ERROR: invalid compute action. use one of: %s\n" % ", ".join(compute_actions)) return 1 if (((args[1] == "sff2fastq") and (len(args) != 3)) or ((args[1] == "demultiplex") and (len(args) < 4)) or ((args[1] == "pairjoin") and (len(args) != 4)) or ((args[1] == "pairjoin_demultiplex") and (len(args) != 6))): sys.stderr.write("ERROR: compute %s missing file(s)\n" % args[1]) return 1 elif (action == "submit") and (not opts.project) and (not opts.metadata): sys.stderr.write( "ERROR: invalid submit, must have one of project or metadata\n") return 1 # explict login token = get_auth_token(opts) if action == "login": if not token: token = input('Enter your MG-RAST auth token: ') login(token) return 0 # get auth object, get from token if no login mgrast_auth = get_auth(token) if not mgrast_auth: return 1 # actions if action == "view": view(args[1]) elif action == "upload": upload(args[1:]) elif action == "upload-archive": upload_archive(args[1]) elif action == "rename": check_ids([args[1]]) rename(args[1], args[2]) elif action == "validate": check_ids(args[2:]) validate(args[1], args[2:]) elif action == "compute": check_ids(args[2:]) compute(args[1], args[2:], opts.retain, opts.joinfile, opts.rc_index) elif action == "delete": check_ids(args[1:]) delete(args[1:]) elif action == "submit": check_ids(args[1:]) submit(args[1:], opts.project, opts.metadata) return 0
def test_async_matrix3(): URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1' # takes too long?? URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15' token = get_auth_token(None) response = async_rest_api(URI, auth=token) print(response)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--format", dest="format", default='biom', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument( "--cluster", dest="cluster", default='ward', help= "cluster function, one of: ward, single, complete, mcquitty, median, centroid, default is ward" ) parser.add_argument( "--distance", dest="distance", default='bray-curtis', help= "distance function, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis" ) parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 5") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 4") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument("--order", dest="order", type=int, default=0, help="order columns, default is off: 1=true, 0=false") parser.add_argument( "--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument( "--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 for o in ['reference', 'order', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n" % o) return 1 # get auth token = get_auth_token(opts) # parse input for R tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # build R cmd order = 'TRUE' if opts.order == 1 else 'FALSE' label = 'TRUE' if opts.label == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_heatdend.r") suppressMessages( plot_mg_heatdend( table_in="%s", image_out="%s", order_columns=%s, label_rows=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))""" % (opts.rlib, tmp_in, opts.plot, order, label, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) return 0
parser = OptionParser(usage) # parser.add_option("-i", "--input", dest="input", default=None, help="Input sequence file.") parser.add_option("-s", "--source", dest="source", default="RefSeq", help="Annotation source: RefSeq, GenBank, IMG, SEED, TrEMBL, SwissProt, PATRIC, KEG, RDP, Greengenes, LSU, SSU") parser.add_option("-g", "--grouplevel", dest="grouplevel", default="domain", help="Grouping level: strain, species, genus, family, order, class, phylum, domain / function, level1, level2, level3") parser.add_option("-l", "--list", dest="targetlist", default="", help="Target list (filename).") # parser.add_option("-o", "--output", dest="output", default=None, help="Output file.") parser.add_option("-i", "--hittype", dest="hittype", default="single", help="Hit type: all, single, lca") parser.add_option("-c", "--call", dest="call", default="organism", help="organism or function") parser.add_option("-e", "--evalue", dest="evalue", default="1", help="organism or function") parser.add_option("-t", "--type", dest="resulttype", default="abundnace", help="Result type: abundnace, evalue, identity, or length") # parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=True, help="Verbose [default off]") parser.add_option("-k", "--token", dest="token", type="str", help="Auth token") parser.add_option("-m", "--metagenomes", dest="metagenomes", default="", type="str", help="Metagenome list") (opts, args) = parser.parse_args() key = get_auth_token(opts) # assign parameters if not opts.targetlist == "": metagenomes = get_ids(opts.targetlist) elif not opts.metagenomes == "": metagenomes = opts.metagenomes.split(",") else: metagenomes = ["mgm4447943.3", "mgm4447102.3"] group_level = opts.grouplevel result_type = "abundance" result_call = opts.call evalue = opts.evalue source = opts.source hittype = opts.hittype # construct API call
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % (VERSION, search_opts), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--limit", dest="limit", type=int, default=15, help= "Number of results to show, if > 50 will use paginated queries to get all, default 15" ) parser.add_argument( "--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering") parser.add_argument( "--direction", dest="direction", default="asc", help= "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc" ) parser.add_argument( "--public", dest="public", action="store_true", default=False, help= "return both private and pubulic data if using authenticated search, default is private only. Non-authenticated search only returns public." ) parser.add_argument( "--match", dest="match", default="all", help= "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all" ) parser.add_argument( "--status", dest="status", default="public", help= "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public" ) parser.add_argument( "--verbosity", dest="verbosity", default='minimal', help= "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal" ) for sfield in SEARCH_FIELDS: parser.add_argument("--" + sfield, dest=sfield, default=None, help="search parameter: query string for " + sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build call url total = 0 maxLimit = 50 params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit), ('public', 'yes' if opts.public or (not token) else 'no')] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) if opts.order: params.append(('order', opts.order)) params.append(('direction', opts.direction)) url = opts.url + '/search?' + urlencode(params, True) # retrieve data fields = ['metagenome_id', 'public'] + SEARCH_FIELDS result = obj_from_url(url, auth=token) found = len(result['data']) if found == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 total += found # output header safe_print("\t".join(fields) + "\n") # output rows display_search(result['data'], fields) while ('next' in result) and result['next'] and (total < opts.limit): url = result['next'] result = obj_from_url(url, auth=token) total += len(result['data']) display_search(result['data'], fields) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url+'/metagenome/'+opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url+'/researchobject/manifest/'+opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n"%(s1[0], s1[1])) else: tagsha1.write("%s\t%s\n"%(s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): global mgrast_auth, API_URL, SHOCK_URL ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) # access options parser.add_argument("-u", "--mgrast_url", dest="mgrast_url", default=API_URL, help="MG-RAST API url") parser.add_argument("-s", "--shock_url", dest="shock_url", default=SHOCK_URL, help="Shock API url") parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token") # required options parser.add_argument("-m", "--metadata", dest="metadata", default=None, help="metadata .xlsx file") parser.add_argument("--project_id", dest="project_id", default=None, help="project ID") parser.add_argument("--project_name", dest="project_name", default=None, help="project name") # pairjoin / demultiplex options parser.add_argument("--mg_name", dest="mgname", default=None, help="name of pair-merge metagenome if not in metadata, default is UUID") parser.add_argument("--barcode", dest="barcode", default=None, help="barcode file: metagenome_name \\t barcode_sequence") parser.add_argument("--retain", dest="retain", action="store_true", default=False, help="retain non-overlapping sequences in pair-merge") parser.add_argument("--rc_index", dest="rc_index", action="store_true", default=False, help="barcodes in index file are reverse compliment of mapping file") # pipeline flags parser.add_argument("--assembled", dest="assembled", action="store_true", default=False, help="if true sequences are assembeled, default is false") parser.add_argument("--no_filter_ln", dest="no_filter_ln", action="store_true", default=False, help="if true skip sequence length filtering, default is on") parser.add_argument("--no_filter_ambig", dest="no_filter_ambig", action="store_true", default=False, help="if true skip sequence ambiguous bp filtering, default is on") parser.add_argument("--no_dynamic_trim", dest="no_dynamic_trim", action="store_true", default=False, help="if true skip qual score dynamic trimmer, default is on") parser.add_argument("--no_dereplicate", dest="no_dereplicate", action="store_true", default=False, help="if true skip dereplication, default is on") parser.add_argument("--no_bowtie", dest="no_bowtie", action="store_true", default=False, help="if true skip bowtie screening, default is on") # pipeline options parser.add_argument("--filter_ln_mult", dest="filter_ln_mult", type=int, default=5, help="maximum ambiguous bps to allow through per sequence, default is 5") parser.add_argument("--max_ambig", dest="max_ambig", type=int, default=5, help="maximum number of low-quality bases per read, default is 5") parser.add_argument("--max_lqb", dest="max_lqb", type=int, default=15, help="quality threshold for low-quality bases, default is 15") parser.add_argument("--min_qual", dest="min_qual", type=float, default=2.0, help="sequence length filtering multiplier, default is 2.0") parser.add_argument("--screen_indexes", dest="screen_indexes", default=None, help="host organism to filter sequences by") parser.add_argument("--priority", dest="priority", default=None, help="indicate when making data public, influences analysis run time") # extra modes parser.add_argument("--synch", dest="synch", action="store_true", default=False, help="Run submit action in synchronious mode") parser.add_argument("--json_out", dest="json_out", default=None, help="Output final metagenome product as json object to this file, synch mode only") parser.add_argument("--json_in", dest="json_in", default=None, help="Input sequence file(s) encoded as shock handle in json file, simple or pairjoin types only") parser.add_argument("--tmp_dir", dest="tmp_dir", default="", help="Temp dir to download too if using json_in option, default is current working dir") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT") parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Submit in debug mode") parser.add_argument("action", type=str, default=False, help="Action") # parser.add_argument("subaction", type=str, default=False, help="Action word 2", default=None) # get inputs opts = parser.parse_args() # special case json_submit = True if opts.json_in and os.path.isfile(opts.json_in) else False if json_submit: action = "submit" else: if len(opts.action) < 1: sys.stderr.write("ERROR: missing action\n") return 1 action = opts.action args = opts.action API_URL = opts.mgrast_url SHOCK_URL = opts.shock_url if opts.verbose and opts.debug: print("##### Running in Debug Mode #####") # validate inputs if action not in valid_actions: sys.stderr.write("ERROR: invalid action. use one of: %s\n"%", ".join(valid_actions)) return 1 elif (action in ["status", "delete"]) and (len(args) < 2): sys.stderr.write("ERROR: %s missing submission ID\n"%action) return 1 elif (action == "submit") and (not json_submit): if not (opts.project_id or opts.project_name or opts.metadata): sys.stderr.write("ERROR: invalid submit, must have one of project_id, project_name, or metadata\n") return 1 if (len(args) < 2) or (args[1] not in submit_types): sys.stderr.write("ERROR: invalid submit option. use one of: %s\n"%", ".join(submit_types)) return 1 if ((args[1] == "simple") and (len(args) < 3) or ((args[1] == "batch") and (len(args) != 3)) or ((args[1] == "demultiplex") and (len(args) < 3)) or ((args[1] == "pairjoin") and (len(args) != 4)) or ((args[1] == "pairjoin_demultiplex") and (len(args) != 5))): sys.stderr.write("ERROR: submit %s missing file(s)\n"%args[1]) return 1 if ((args[1] == "demultiplex") or (args[1] == "pairjoin_demultiplex")) and (not (opts.metadata or opts.barcode)): sys.stderr.write("ERROR: submit %s requires either metadata or barcode file\n"%args[1]) return 1 # explict login token = get_auth_token(opts) if action == "login": if not token: token = input('Enter your MG-RAST auth token: ') login(token) return 0 # get auth object, get from token if no login mgrast_auth = get_auth(token) if not mgrast_auth: return 1 # actions if action == "list": if opts.verbose: print("Listing all submissions for "+mgrast_auth['login']) listall() elif action == "status": if opts.verbose: print("Status for submission"+args[1]) status(args[1]) elif action == "delete": if opts.verbose: print("Deleting submission"+args[1]) delete(args[1]) elif action == "submit": # process input json if exists if json_submit: stype, infiles = seqs_from_json(opts.json_in, opts.tmp_dir) else: stype, infiles = args[1], args[2:] # get name from output json if used if opts.json_out and (stype == "pairjoin") and (not opts.mgname): opts.mgname = os.path.splitext(opts.json_out)[0] # submit it if opts.verbose: print("Starting submission %s for %d files"%(stype, len(infiles))) submit(stype, infiles, opts) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--format", dest="format", default='biom', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument( "--distance", dest="distance", default='bray-curtis', help= "distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis" ) parser.add_argument( "--metadata", dest="metadata", default=None, help="metadata field to color by, only for 'biom' input") parser.add_argument( "--groups", dest="groups", default=None, help= "list of groups in JSON or tabbed format - either as input string or filename" ) parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)") parser.add_argument( "--color_auto", dest="color_auto", type=int, default=0, help= "auto-create colors based on like group names, default is use group name as color: 1=true, 0=false" ) parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 6") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 6") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument( "--three", dest="three", type=int, default=0, help="create 3-D PCoA, default is 2-D: 1=true, 0=false") parser.add_argument( "--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument( "--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.metadata: opts.color_auto = 1 for o in ['reference', 'color_auto', 'three', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n" % o) return 1 # get auth token = get_auth_token(opts) # parse inputs tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) mg_list = map(lambda x: x['id'], indata['columns']) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) if opts.metadata: groups = metadata_from_biom(indata, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile( opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write( "ERROR: position (%d) of group is out of bounds\n" % opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos - 1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n" % m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile( opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write( "ERROR: position (%d) of group is out of bounds\n" % opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos - 1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n" % m) return 1 # print groups to file for R input tmp_group = None if len(groups) == len(mg_list): tmp_group = 'tmp_' + random_str() + '.txt' hdl_group = open(tmp_group, 'w') hdl_group.write("\tgroup\n") for i, m in enumerate(mg_list): hdl_group.write( "%s\t%s\n" % (m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]]))) hdl_group.close() elif len(groups) > 0: sys.stderr.write("Warning: Not all metagenomes in a group\n") # build R cmd three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)' label = 'TRUE' if opts.label == 1 else 'FALSE' table = '"%s"' % tmp_group if tmp_group else 'NA' color = 'TRUE' if opts.color_auto == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_pcoa.r") suppressMessages( plot_mg_pcoa( table_in="%s", image_out="%s", plot_pcs=%s, dist_metric="%s", label_points=%s, color_table=%s, color_column=1, auto_colors=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))""" % (opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table, color, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) if tmp_group: os.remove(tmp_group) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) # sort data if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--plot", dest="plot", action="store_true", default=False, help= "display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level" ) parser.add_argument( "--stat", dest="stat", default='sequence', help= "type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence" ) # get inputs opts = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url url = opts.url + '/metagenome/' + opts.id + '?verbosity=stats&public=1' # retrieve / output data result = obj_from_url(url, auth=token) stats = result['statistics'] if opts.stat == 'sequence': for s in sorted(stats['sequence_stats'].keys()): safe_print("%s\t%s\n" % (s, stats['sequence_stats'][s])) elif opts.stat == 'bp_profile': if not stats['qc']['bp_profile']['percents']['data']: sys.stderr.write("ERROR: %s has no bp_profile statistics\n" % opts.id) return 1 if opts.plot: cols = stats['qc']['bp_profile']['percents']['columns'][1:5] data = map(lambda x: x[1:5], stats['qc']['bp_profile']['percents']['data']) plot_histo(cols, data, 20, 80) else: safe_print( "\t".join(stats['qc']['bp_profile']['percents']['columns']) + "\n") for d in stats['qc']['bp_profile']['percents']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'drisee': if not stats['qc']['drisee']['percents']['data']: sys.stderr.write("ERROR: %s has no drisee statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['drisee']['percents']['data']: x.append(d[0]) y.append(d[7]) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print( "\t".join(stats['qc']['drisee']['percents']['columns']) + "\n") for d in stats['qc']['drisee']['percents']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'kmer': if not stats['qc']['kmer']['15_mer']['data']: sys.stderr.write("ERROR: %s has no kmer statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['kmer']['15_mer']['data']: x.append(math.log(d[3], 10)) y.append(math.log(d[0], 10)) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns']) + "\n") for d in stats['qc']['kmer']['15_mer']['data']: safe_print("\t".join(map(str, d)) + "\n") elif opts.stat == 'rarefaction': if not stats['rarefaction']: sys.stderr.write("ERROR: %s has no rarefaction statistics\n" % opts.id) return 1 if opts.plot: x, y = [], [] for r in stats['rarefaction']: x.append(int(r[0])) y.append(float(r[1])) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("x\ty\n") for r in stats['rarefaction']: safe_print("%s\t%s\n" % (str(r[0]), str(r[1]))) elif opts.stat in stats['taxonomy']: ranked = sorted(stats['taxonomy'][opts.stat], key=lambda x: (-int(x[1]), x[0])) if opts.plot: top = map(lambda x: int(x[1]), ranked)[:50] aplotter.plot(top, output=sys.stdout, draw_axes=True, plot_slope=False, min_x=0, min_y=0) else: for t in ranked: safe_print("%s\t%s\n" % (t[0], str(t[1]))) else: sys.stderr.write("ERROR: invalid stat type\n") return 1 return 0
def main(args): global API_URL ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) # access options parser.add_argument("-u", "--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token") # other options parser.add_argument("-f", "--file", dest="mdfile", default=None, help="metadata .xlsx file") parser.add_argument("--taxa", dest="taxa", default=None, help="metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169") parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Run in debug mode") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT") parser.add_argument("args",type=str, nargs="+", help="Action (" + ",".join(valid_actions)+")" ) # get inputs opts = parser.parse_args() args = opts.args API_URL = opts.url # validate inputs if len(args) < 1: sys.stderr.write("ERROR: missing action\n") return 1 action = args[0] if action not in valid_actions: sys.stderr.write("ERROR: invalid action. use one of: %s\n"%", ".join(valid_actions)) return 1 if len(args) < 2: sys.stderr.write("ERROR: missing Project ID\n") return 1 pid = args[1] DEBUG = opts.verbose + opts.debug # get token token = get_auth_token(opts) if not token: token = input('Enter your MG-RAST auth token: ') # actions if action == "get-info": data = obj_from_url(opts.url+'/project/'+pid+'?verbosity=verbose&nocache=1', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "get-metadata": data = obj_from_url(opts.url+'/metadata/export/'+pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "update-metadata": result = post_file(opts.url+'/metadata/update', 'upload', opts.mdfile, auth=token, data=json.dumps({'project': pid}, separators=(',',':')), debug=DEBUG) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "make-public": data = obj_from_url(opts.url+'/project/'+pid+'/makepublic', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "submit-ebi": debug = 1 if opts.debug else 0 info = { 'project_id': pid, 'debug': debug } if opts.taxa: info['project_taxonomy'] = opts.taxa data = obj_from_url(opts.url+'/submission/ebi', auth=token, data=json.dumps(info, separators=(',',':'))) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "status-ebi": data = obj_from_url(opts.url+'/submission/'+pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument( "--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5" ) parser.add_argument( "--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 # get auth token = get_auth_token(opts) # build url params = [('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1')] url = opts.url + '/matrix/organism?' + urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version)] url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True) data = obj_from_url(url) sub_ann = set(map(lambda x: x[opts.level], data['data'])) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip(rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" % (k, v)) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10") parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs (opts, args) = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/organism?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) sub_ann = set( map(lambda x: x[opts.level], data['data']) ) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip( rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
sys.stderr.write(repr(item)) sys.stdout.write(( "\t".join([ item["metagenome_id"], # str(len(item.keys())), repr(public), item["created_on"], mg_name, project_id, project_name ]) + "\n")) CALL = "/search" key = get_auth_token() # assign parameters limit = 1000 # initial call # construct API call parameters = { "limit": limit, "order": "created_on", "direction": "asc", "public": "1" } API_URL = "https://api.mg-rast.org/" base_url = API_URL + CALL + "?" + urlencode(parameters)
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_option("", "--url", dest="url", default=API_URL, help="API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='function', help="function level to filter by") parser.add_option("", "--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_option("", "--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") for sfield in SEARCH_FIELDS: parser.add_option("", "--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs (opts, args) = parser.parse_args() # get auth token = get_auth_token(opts) # build url for metagenome query params = [ ('limit', '100'), ('verbosity', 'minimal'), ('match', 'all'), ('status', opts.status) ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append( (sfield, getattr(opts, sfield)) ) url = opts.url+'/metagenome?'+urlencode(params, True) # retrieve query results result = obj_from_url(url, auth=token) if len(result['data']) == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 mgids = set( map(lambda x: x['id'], result['data']) ) while result['next']: url = result['next'] result = obj_from_url(url, auth=token) if len(result['data']) == 0: break for d in result['data']: mgids.add(d['id']) # get sequences for mgids for mg in mgids: params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.function: params.append(('filter', opts.function)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True) # output data safe_print('Results from '+mg+":\n") stdout_from_url(url, auth=token) return 0
DEBUG = 0 if __name__ == '__main__': usage = "usage: %prog [options] URI" parser = ArgumentParser(usage) parser.add_argument("-v", "--verbose", dest="verbose", action="store_true") parser.add_argument("-k", "--token", dest="token", type=str, help="Auth token") parser.add_argument("URI", type=str, help="URI to query") opts = parser.parse_args() key = get_auth_token(opts) if opts.verbose: print("KEY = {}".format(key), file=sys.stderr) # assign parameters URI = opts.URI # construct API call print(URI, file=sys.stderr) # retrieve the data by sending at HTTP GET request to the MG-RAST API jsonstructure = async_rest_api(URI, auth=key) # unpack and display the data table if type(jsonstructure) == dict: # If we have data, not json structure print(json.dumps(jsonstructure), file=sys.stdout) else:
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--project", dest="project", default=None, help="project ID") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--file", dest="file", default=None, help="file ID for given project or metagenome") parser.add_argument("--dir", dest="dir", default=".", help="directory to do downloads") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files and their info for given ID") # get inputs opts = parser.parse_args() if not (opts.project or opts.metagenome): sys.stderr.write("ERROR: a project or metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir) return 1 downdir = opts.dir # get auth token = get_auth_token(opts) # get metagenome list mgs = [] if opts.project: url = opts.url+'/project/'+opts.project+'?verbosity=full' data = obj_from_url(url, auth=token) for mg in data['metagenomes']: mgs.append(mg["metagenome_id"]) elif opts.metagenome: mgs.append(opts.metagenome) # get file lists all_files = {} for mg in mgs: url = opts.url+'/download/'+mg data = obj_from_url(url, auth=token) all_files[mg] = data['data'] # just list if opts.list: pt = PrettyTable(["Metagenome", "File Name", "File ID", "Checksum", "Byte Size"]) for mg, files in all_files.items(): for f in files: fsize = f['file_size'] if f['file_size'] else 0 pt.add_row([mg, f['file_name'], f['file_id'], f['file_md5'], fsize]) pt.align = "l" pt.align['Byte Size'] = "r" print(pt) return 0 # download all in dirs by ID if opts.project: downdir = os.path.join(downdir, opts.project) if not os.path.isdir(downdir): os.mkdir(downdir) for mg, files in all_files.items(): mgdir = os.path.join(downdir, mg) if not os.path.isdir(mgdir): os.mkdir(mgdir) for f in files: if opts.file: if f['file_id'] == opts.file: file_download(token, f, dirpath=mgdir) elif f['file_name'] == opts.file: file_download(token, f, dirpath=mgdir) else: file_download(token, f, dirpath=mgdir) return 0
def test_async(): URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1' token = get_auth_token(None) print("MG-RAST token: ", token) response = async_rest_api(URI, auth=token) print(repr(response))
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) biomorig = biom biom = biomorig["data"] # sort data assert "matrix_type" in biom.keys(), repr(biom) if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--plot", dest="plot", action="store_true", default=False, help="display plot in ASCII art instead of table of numbers for: bp_profile, drisee, kmer, rarefaction, or taxa level") parser.add_option("", "--stat", dest="stat", default='sequence', help="type of stat to display, use keyword: 'sequence', 'bp_profile', 'drisee', 'kmer', 'rarefaction', or taxa level name, default is sequence") # get inputs (opts, args) = parser.parse_args() if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 # get auth token = get_auth_token(opts) # build call url url = opts.url+'/metagenome/'+opts.id+'?verbosity=stats' # retrieve / output data result = obj_from_url(url, auth=token) stats = result['statistics'] if opts.stat == 'sequence': for s in sorted(stats['sequence_stats'].keys()): safe_print("%s\t%s\n" %(s, stats['sequence_stats'][s])) elif opts.stat == 'bp_profile': if not stats['qc']['bp_profile']['percents']['data']: sys.stderr.write("ERROR: %s has no bp_profile statistics\n"%opts.id) return 1 if opts.plot: cols = stats['qc']['bp_profile']['percents']['columns'][1:5] data = map(lambda x: x[1:5], stats['qc']['bp_profile']['percents']['data']) plot_histo(cols, data, 20, 80) else: safe_print("\t".join(stats['qc']['bp_profile']['percents']['columns'])+"\n") for d in stats['qc']['bp_profile']['percents']['data']: safe_print("\t".join(map(str, d))+"\n") elif opts.stat == 'drisee': if not stats['qc']['drisee']['percents']['data']: sys.stderr.write("ERROR: %s has no drisee statistics\n"%opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['drisee']['percents']['data']: x.append(d[0]) y.append(d[7]) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("\t".join(stats['qc']['drisee']['percents']['columns'])+"\n") for d in stats['qc']['drisee']['percents']['data']: safe_print("\t".join(map(str, d))+"\n") elif opts.stat == 'kmer': if not stats['qc']['kmer']['15_mer']['data']: sys.stderr.write("ERROR: %s has no kmer statistics\n"%opts.id) return 1 if opts.plot: x, y = [], [] for d in stats['qc']['kmer']['15_mer']['data']: x.append( math.log(d[3], 10) ) y.append( math.log(d[0], 10) ) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("\t".join(stats['qc']['kmer']['15_mer']['columns'])+"\n") for d in stats['qc']['kmer']['15_mer']['data']: safe_print("\t".join(map(str, d))+"\n") elif opts.stat == 'rarefaction': if not stats['rarefaction']: sys.stderr.write("ERROR: %s has no rarefaction statistics\n"%opts.id) return 1 if opts.plot: x, y = [], [] for r in stats['rarefaction']: x.append(int(r[0])) y.append(float(r[1])) aplotter.plot(x, y, output=sys.stdout, draw_axes=True, plot_slope=True, min_x=0, min_y=0) else: safe_print("x\ty\n") for r in stats['rarefaction']: safe_print("%s\t%s\n" %(str(r[0]), str(r[1]))) elif opts.stat in stats['taxonomy']: ranked = sorted(stats['taxonomy'][opts.stat], key=lambda x: (-int(x[1]), x[0])) if opts.plot: top = map(lambda x: int(x[1]), ranked)[:50] aplotter.plot(top, output=sys.stdout, draw_axes=True, plot_slope=False, min_x=0, min_y=0) else: for t in ranked: safe_print("%s\t%s\n" %(t[0], str(t[1]))) else: sys.stderr.write("ERROR: invalid stat type\n") return 1 return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url + '/metagenome/' + opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url + '/researchobject/manifest/' + opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([ info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"] ]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... " % (info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace( '../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append( [s1, os.path.join(folder, info["bundledAs"]["filename"])]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n" % (s1[0], s1[1])) else: tagsha1.write("%s\t%s\n" % (s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--project", dest="project", default=None, help="project ID") # get inputs opts = parser.parse_args() if not opts.project or opts.project[0:3] != "mgp": sys.stderr.write("ERROR: a project id is required\n") return 1 # get auth PROJECT = opts.project TOKEN = get_auth_token(opts) # export metadata outfile = PROJECT + "-export.xlsx" # k = obj_from_url("http://api.mg-rast.org/metadata/export/{project}?verbosity=full".format(project=PROJECT), auth=TOKEN) metadata = k # json.loads(open(infile).read()) workbook = xlsxwriter.Workbook(outfile) print("Creating", outfile) worksheet = {} worksheet["README"] = workbook.add_worksheet("README") row = 0 for i in range(10): worksheet["README"].write_number(row, 0, i) row += 1 worksheet["project"] = workbook.add_worksheet("project") project_keys = get_project_keys(metadata) col = 0 for l in project_keys: value = metadata["data"][l]["value"] definition = metadata["data"][l]["definition"] worksheet["project"].write_string(0, col, l) worksheet["project"].write_string(1, col, definition) worksheet["project"].write_string(2, col, value) col += 1 worksheet["sample"] = workbook.add_worksheet("sample") samplekeys = get_sample_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in samplekeys: if l in sample["data"].keys(): value = sample["data"][l]["value"] definition = sample["data"][l]["definition"] fmt = sample["data"][l]["type"] worksheet["sample"].write_string(0, col, l) worksheet["sample"].write_string(1, col, definition) write_worksheet_value(worksheet["sample"], row, col, value, fmt) col += 1 col = 0 row += 1 try: librarytype = metadata["samples"][0]["libraries"][0]["data"]["investigation_type"]["value"] except IndexError: sys.exit("This metadata bundle does not have any libraries") worksheet["library"] = workbook.add_worksheet("library "+librarytype) libkeys = get_library_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in libkeys: if l in sample["libraries"][0]["data"].keys(): value = sample["libraries"][0]["data"][l]["value"] definition = sample["libraries"][0]["data"][l]["definition"] fmt = sample["libraries"][0]["data"][l]["type"] worksheet["library"].write_string(0, col, l) worksheet["library"].write_string(1, col, definition) write_worksheet_value(worksheet["library"], row, col, value, fmt) col += 1 col = 0 row += 1 eps = get_eps(metadata) print("eps", " ".join(eps)) epcol = {} eprow = {} for ep in eps: worksheet[ep] = workbook.add_worksheet("ep " + ep) epcol[ep] = 0 eprow[ep] = 2 epkeys = get_ep_keys(metadata, eps) for sample in metadata["samples"]: ep = sample["envPackage"]["type"] for l in epkeys[ep]: try: value = sample["envPackage"]["data"][l]["value"] definition = sample["envPackage"]["data"][l]["definition"] fmt = sample["envPackage"]["data"][l]["type"] except KeyError: value = "" ; definition = ""; fmt = "string" worksheet[ep].write_string(0, epcol[ep], l) worksheet[ep].write_string(1, epcol[ep], definition) write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt) epcol[ep] += 1 epcol[ep] = 0 eprow[ep] += 1 workbook.close()
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument("--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument("--distance", dest="distance", default='bray-curtis', help="distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis") parser.add_argument("--metadata", dest="metadata", default=None, help="metadata field to color by, only for 'biom' input") parser.add_argument("--groups", dest="groups", default=None, help="list of groups in JSON or tabbed format - either as input string or filename") parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)") parser.add_argument("--color_auto", dest="color_auto", type=int, default=0, help="auto-create colors based on like group names, default is use group name as color: 1=true, 0=false") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 6") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 6") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument("--three", dest="three", type=int, default=0, help="create 3-D PCoA, default is 2-D: 1=true, 0=false") parser.add_argument("--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument("--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.metadata: opts.color_auto = 1 for o in ['reference', 'color_auto', 'three', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n"%o) return 1 # get auth token = get_auth_token(opts) # parse inputs tmp_in = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) mg_list = map(lambda x: x['id'], indata['columns']) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) if opts.metadata: groups = metadata_from_biom(indata, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos-1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile(opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos-1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # print groups to file for R input tmp_group = None if len(groups) == len(mg_list): tmp_group = 'tmp_'+random_str()+'.txt' hdl_group = open(tmp_group, 'w') hdl_group.write("\tgroup\n") for i, m in enumerate(mg_list): hdl_group.write("%s\t%s\n"%(m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]]))) hdl_group.close() elif len(groups) > 0: sys.stderr.write("Warning: Not all metagenomes in a group\n") # build R cmd three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)' label = 'TRUE' if opts.label == 1 else 'FALSE' table = '"%s"'%tmp_group if tmp_group else 'NA' color = 'TRUE' if opts.color_auto == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_pcoa.r") suppressMessages( plot_mg_pcoa( table_in="%s", image_out="%s", plot_pcs=%s, dist_metric="%s", label_points=%s, color_table=%s, color_column=1, auto_colors=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))"""%(opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table, color, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) if tmp_group: os.remove(tmp_group) return 0
def main(args): global API_URL ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) # access options parser.add_argument("-u", "--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("-t", "--token", dest="token", default=None, help="MG-RAST token") # other options parser.add_argument("-f", "--file", dest="mdfile", default=None, help="metadata .xlsx file") parser.add_argument( "--taxa", dest="taxa", default=None, help= "metagenome_taxonomy for project: http://www.ebi.ac.uk/ena/data/view/Taxon:408169" ) parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Run in debug mode") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Verbose STDOUT") parser.add_argument("args", type=str, nargs="+", help="Action (" + ",".join(valid_actions) + ")") # get inputs opts = parser.parse_args() args = opts.args API_URL = opts.url # validate inputs if len(args) < 1: sys.stderr.write("ERROR: missing action\n") return 1 action = args[0] if action not in valid_actions: sys.stderr.write("ERROR: invalid action. use one of: %s\n" % ", ".join(valid_actions)) return 1 if len(args) < 2: sys.stderr.write("ERROR: missing Project ID\n") return 1 pid = args[1] DEBUG = opts.verbose + opts.debug # get token token = get_auth_token(opts) if not token: token = input('Enter your MG-RAST auth token: ') # actions if action == "get-info": data = obj_from_url(opts.url + '/project/' + pid + '?verbosity=verbose&nocache=1', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "get-metadata": data = obj_from_url(opts.url + '/metadata/export/' + pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "update-metadata": result = post_file(opts.url + '/metadata/update', 'upload', opts.mdfile, auth=token, data=json.dumps({'project': pid}, separators=(',', ':')), debug=DEBUG) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "make-public": data = obj_from_url(opts.url + '/project/' + pid + '/makepublic', auth=token) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "submit-ebi": debug = 1 if opts.debug else 0 info = {'project_id': pid, 'debug': debug} if opts.taxa: info['project_taxonomy'] = opts.taxa data = obj_from_url(opts.url + '/submission/ebi', auth=token, data=json.dumps(info, separators=(',', ':'))) print(json.dumps(data, sort_keys=True, indent=4)) elif action == "status-ebi": data = obj_from_url(opts.url + '/submission/' + pid, auth=token) print(json.dumps(data, sort_keys=True, indent=4)) return 0
def test_async0(): URI = API_URL + '/matrix/organism?id=mgm4440275.3&id=mgm4440276.3&id=mgm4440281.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1' token = get_auth_token(None) print(token) response = async_rest_api(URI, auth=token) print(repr(response))
else: public = "False" try: mg_name= item["name"] project_id = item["project_id"] project_name = item["project_name"] except KeyError: sys.stderr.write(repr(item)) sys.stdout.write(("\t".join([item["metagenome_id"], # str(len(item.keys())), repr(public), item["created_on"], mg_name, project_id, project_name]) + "\n")) CALL = "/search" key = get_auth_token() # assign parameters limit = 1000 # initial call # construct API call parameters = {"limit": limit, "order":"created_on", "direction": "asc", "public": "1"} API_URL= "https://api.mg-rast.org/" base_url = API_URL + CALL + "?" + urlencode(parameters) # convert the data from a JSON structure to a python data type, a dict of dicts. jsonstructure = obj_from_url(base_url, auth=key) # unpack and display the data table
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="function level to filter by") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public") for sfield in SEARCH_FIELDS: parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield) # get inputs opts = parser.parse_args() # get auth token = get_auth_token(opts) # build url for metagenome query params = [ ('limit', '100'), ('verbosity', 'minimal'), ('match', 'all'), ('status', opts.status) ] for sfield in SEARCH_FIELDS: if hasattr(opts, sfield) and getattr(opts, sfield): params.append((sfield, getattr(opts, sfield))) url = opts.url+'/metagenome?'+urlencode(params, True) # retrieve query results result = obj_from_url(url, auth=token) if len(result['data']) == 0: sys.stdout.write("No results found for the given search parameters\n") return 0 mgids = set(map(lambda x: x['id'], result['data'])) while result['next']: url = result['next'] result = obj_from_url(url, auth=token) if len(result['data']) == 0: break for d in result['data']: mgids.add(d['id']) # get sequences for mgids for mg in mgids: params = [ ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length) ] if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'): params.append(('type', 'ontology')) else: params.append(('type', 'function')) if opts.function: params.append(('filter', opts.function)) if opts.level: params.append(('filter_level', opts.level)) url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True) # output data safe_print('Results from '+mg+":\n") stdout_from_url(url, auth=token) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus") parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED") parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list") parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection") parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list") parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15") parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids,'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [ ('group_level', opts.level), ('source', opts.source), ('hit_type', opts.hit_type), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1') ] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i+size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url+'/matrix/organism?'+urlencode(cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url+'/matrix/organism?'+urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [ ('version', opts.version), ('min_level', opts.level) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) for ann in data['data']: if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[opts.level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom)+"\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--project", dest="project", default=None, help="project ID") # get inputs opts = parser.parse_args() if not opts.project or opts.project[0:3] != "mgp": sys.stderr.write("ERROR: a project id is required\n") return 1 # get auth PROJECT = opts.project TOKEN = get_auth_token(opts) # export metadata outfile = PROJECT + "-export.xlsx" # k = obj_from_url( "http://api.mg-rast.org/metadata/export/{project}?verbosity=full". format(project=PROJECT), auth=TOKEN) metadata = k # json.loads(open(infile).read()) workbook = xlsxwriter.Workbook(outfile) print("Creating", outfile) worksheet = {} worksheet["README"] = workbook.add_worksheet("README") row = 0 for i in range(10): worksheet["README"].write_number(row, 0, i) row += 1 worksheet["project"] = workbook.add_worksheet("project") project_keys = get_project_keys(metadata) col = 0 for l in project_keys: value = metadata["data"][l]["value"] definition = metadata["data"][l]["definition"] worksheet["project"].write_string(0, col, l) worksheet["project"].write_string(1, col, definition) worksheet["project"].write_string(2, col, value) col += 1 worksheet["sample"] = workbook.add_worksheet("sample") samplekeys = get_sample_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in samplekeys: if l in sample["data"].keys(): value = sample["data"][l]["value"] definition = sample["data"][l]["definition"] fmt = sample["data"][l]["type"] worksheet["sample"].write_string(0, col, l) worksheet["sample"].write_string(1, col, definition) write_worksheet_value(worksheet["sample"], row, col, value, fmt) col += 1 col = 0 row += 1 try: librarytype = metadata["samples"][0]["libraries"][0]["data"][ "investigation_type"]["value"] except IndexError: sys.exit("This metadata bundle does not have any libraries") worksheet["library"] = workbook.add_worksheet("library " + librarytype) libkeys = get_library_keys(metadata) col = 0 row = 2 for sample in metadata["samples"]: for l in libkeys: if l in sample["libraries"][0]["data"].keys(): value = sample["libraries"][0]["data"][l]["value"] definition = sample["libraries"][0]["data"][l]["definition"] fmt = sample["libraries"][0]["data"][l]["type"] worksheet["library"].write_string(0, col, l) worksheet["library"].write_string(1, col, definition) write_worksheet_value(worksheet["library"], row, col, value, fmt) col += 1 col = 0 row += 1 eps = get_eps(metadata) print("eps", " ".join(eps)) epcol = {} eprow = {} for ep in eps: worksheet[ep] = workbook.add_worksheet("ep " + ep) epcol[ep] = 0 eprow[ep] = 2 epkeys = get_ep_keys(metadata, eps) for sample in metadata["samples"]: ep = sample["envPackage"]["type"] for l in epkeys[ep]: try: value = sample["envPackage"]["data"][l]["value"] definition = sample["envPackage"]["data"][l]["definition"] fmt = sample["envPackage"]["data"][l]["type"] except KeyError: value = "" definition = "" fmt = "string" worksheet[ep].write_string(0, epcol[ep], l) worksheet[ep].write_string(1, epcol[ep], definition) write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt) epcol[ep] += 1 epcol[ep] = 0 eprow[ep] += 1 workbook.close()
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--project", dest="project", default=None, help="project ID") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--file", dest="file", default=None, help="file ID for given project or metagenome") parser.add_argument("--dir", dest="dir", default=".", help="directory to do downloads") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files and their info for given ID") # get inputs opts = parser.parse_args() if not (opts.project or opts.metagenome): sys.stderr.write("ERROR: a project or metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir) return 1 downdir = opts.dir # get auth token = get_auth_token(opts) # get metagenome list mgs = [] if opts.project: url = opts.url + '/project/' + opts.project + '?verbosity=full' data = obj_from_url(url, auth=token) for mg in data['metagenomes']: mgs.append(mg["metagenome_id"]) elif opts.metagenome: mgs.append(opts.metagenome) # get file lists all_files = {} for mg in mgs: url = opts.url + '/download/' + mg data = obj_from_url(url, auth=token) all_files[mg] = data['data'] # just list if opts.list: pt = PrettyTable( ["Metagenome", "File Name", "File ID", "Checksum", "Byte Size"]) for mg, files in all_files.items(): for f in files: fsize = f['file_size'] if f['file_size'] else 0 pt.add_row( [mg, f['file_name'], f['file_id'], f['file_md5'], fsize]) pt.align = "l" pt.align['Byte Size'] = "r" print(pt) return 0 # download all in dirs by ID if opts.project: downdir = os.path.join(downdir, opts.project) if not os.path.isdir(downdir): os.mkdir(downdir) for mg, files in all_files.items(): mgdir = os.path.join(downdir, mg) if not os.path.isdir(mgdir): os.mkdir(mgdir) for f in files: if opts.file: if f['file_id'] == opts.file: file_download(token, f, dirpath=mgdir) elif f['file_name'] == opts.file: file_download(token, f, dirpath=mgdir) else: file_download(token, f, dirpath=mgdir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument( "--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='level3', help="functional level to retrieve abundances for, default is level3") parser.add_argument( "--source", dest="source", default='Subsystems', help="function datasource to filter results by, default is Subsystems") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument( "--filter_name", dest="filter_name", default=None, help="function name to filter by, file or comma seperated list") parser.add_argument( "--intersect_source", dest="intersect_source", default='SEED', help="taxon datasource for insersection, default is SEED") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="taxon level for insersection") parser.add_argument( "--intersect_name", dest="intersect_name", default=None, help="taxon name(s) for insersection, file or comma seperated list") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--format", dest="format", default='biom', help= "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument( "--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15" ) parser.add_argument( "--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument( "--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write( "ERROR: both --intersect_level and --intersect_name need to be used together\n" ) return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids, 'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1')] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i + size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url + '/matrix/function?' + urlencode( cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url + '/matrix/function?' + urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [('version', opts.version), ('min_level', opts.level), ('source', opts.source)] url = opts.url + '/m5nr/ontology?' + urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level for ann in data['data']: if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom) + "\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_option("", "--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_option("", "--plot", dest="plot", default=None, help="filename for output plot") parser.add_option("", "--rlib", dest="rlib", default=None, help="R lib path") parser.add_option("", "--height", dest="height", type="float", default=8.5, help="image height in inches, default is 4") parser.add_option("", "--width", dest="width", type="float", default=11, help="image width in inches, default is 5") parser.add_option("", "--dpi", dest="dpi", type="int", default=300, help="image DPI, default is 300") parser.add_option("", "--name", dest="name", type="int", default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_option("", "--label", dest="label", type="int", default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs (opts, args) = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 for o in ['reference', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n"%o) return 1 return 0 # get auth token = get_auth_token(opts) # parse input for R tmp_in = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # build R cmd label = 'TRUE' if opts.label == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_boxplot.r") suppressMessages( plot_mg_boxplot( table_in="%s", image_out="%s", label_rows=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))"""%(opts.rlib, tmp_in, opts.plot, label, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) return 0