def test_500(): URI = API_URL + '/nonexistentapicall' try: response = async_rest_api(URI, auth="") assert False except SystemExit: pass
def test_private(): URI = API_URL + '/matrix/organism?id=mgm4454266.3' # mgm4454266.3 is private try: response = async_rest_api(URI, auth="") assert False except SystemExit: pass
def test_nonexist(): URI = API_URL + '/matrix/organism?id=mgm4454394.3' # mgm4454394.3 is deleted try: response = async_rest_api(URI, auth="") pass except SystemExit: pass
def test_badkey(): URI = API_URL + '/matrix/organism?id=mgm4454266.3' # mgm4454266.3 is private try: response = async_rest_api(URI, auth="ABCDEFGThisIsOneNoGoodKey") assert False except SystemExit: pass
def test_async0(): URI = API_URL + '/matrix/organism?id=mgm4440275.3&id=mgm4440276.3&id=mgm4440281.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1' token = get_auth_token(None) print(token) response = async_rest_api(URI, auth=token) print(repr(response))
def test_async_matrix3(): URI = API_URL + '1/matrix/organism?id=mgm4653781.3&id=mgm4653783.3&id=mgm4653789.3&id=mgm4662211.3&id=mgm4662212.3&id=mgm4662235.3&id=mgm4662210.3&group_level=phylum&source=RDP&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&asynchronous=1' # takes too long?? URI = API_URL + '/matrix/organism?id=mgm4447943.3&id=mgm4447192.3&id=mgm4447102.3&group_level=family&source=RefSeq&evalue=15' token = get_auth_token(None) response = async_rest_api(URI, auth=token) print(response)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) # sort data if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
def test_async(): URI = API_URL + '/matrix/organism?hit_type=single&group_level=strain&evalue=15&source=RefSeq&result_type=abundance&id=mgm4653783.3&asynchronous=1' token = get_auth_token(None) print("MG-RAST token: ", token) response = async_rest_api(URI, auth=token) print(repr(response))
if __name__ == '__main__': usage = "usage: %prog [options] URI" parser = ArgumentParser(usage) parser.add_argument("-v", "--verbose", dest="verbose", action="store_true") parser.add_argument("-k", "--token", dest="token", type=str, help="Auth token") parser.add_argument("URI", type=str, help="URI to query") opts = parser.parse_args() key = get_auth_token(opts) if opts.verbose: print("KEY = {}".format(key), file=sys.stderr) # assign parameters URI = opts.URI # construct API call print(URI, file=sys.stderr) # retrieve the data by sending at HTTP GET request to the MG-RAST API jsonstructure = async_rest_api(URI, auth=key) # unpack and display the data table if type(jsonstructure) == dict: # If we have data, not json structure print(json.dumps(jsonstructure), file=sys.stdout) else: sys.stdout.write(jsonstructure.decode("utf-8"))
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus") parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED") parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list") parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection") parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list") parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15") parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids,'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [ ('group_level', opts.level), ('source', opts.source), ('hit_type', opts.hit_type), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1') ] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i+size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url+'/matrix/organism?'+urlencode(cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url+'/matrix/organism?'+urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [ ('version', opts.version), ('min_level', opts.level) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) for ann in data['data']: if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[opts.level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom)+"\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function") parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems") parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/function?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version), ('source', opts.source) ] url = opts.url+'/m5nr/ontology?'+urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level sub_ann = set(map(lambda x: x[level], data['data'])) biomorig = biom biom = biomorig["data"] # sort data assert "matrix_type" in biom.keys(), repr(biom) if biom["matrix_type"] == "sparse": for d in sorted(biom['data'], key=itemgetter(2), reverse=True): name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[2] if biom["matrix_type"] == "dense": sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True) for n in sortindex: name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = biom['data'][n][0] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0
from mglib import async_rest_api, get_auth_token DEBUG = 0 if __name__ == '__main__': usage = "usage: %prog [options] URI" parser = ArgumentParser(usage) parser.add_argument("-v", "--verbose", dest="verbose", action="store_true") parser.add_argument("-k", "--token", dest="token", type=str, help="Auth token") parser.add_argument("URI", type=str, help="URI to query") opts = parser.parse_args() key = get_auth_token(opts) if opts.verbose: print("KEY = {}".format(key), file=sys.stderr) # assign parameters URI = opts.URI # construct API call print(URI, file=sys.stderr) # retrieve the data by sending at HTTP GET request to the MG-RAST API jsonstructure = async_rest_api(URI, auth=key) # unpack and display the data table if type(jsonstructure) == str: # If we have data, not json structure sys.stdout.write(jsonstructure) else: print(json.dumps(jsonstructure), file=sys.stdout)
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_argument( "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10") parser.add_argument( "--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5" ) parser.add_argument( "--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs opts = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 # get auth token = get_auth_token(opts) # build url params = [('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1')] url = opts.url + '/matrix/organism?' + urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version)] url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True) data = obj_from_url(url) sub_ann = set(map(lambda x: x[opts.level], data['data'])) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip(rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" % (k, v)) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument( "--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs") parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument( "--level", dest="level", default='level3', help="functional level to retrieve abundances for, default is level3") parser.add_argument( "--source", dest="source", default='Subsystems', help="function datasource to filter results by, default is Subsystems") parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by") parser.add_argument( "--filter_name", dest="filter_name", default=None, help="function name to filter by, file or comma seperated list") parser.add_argument( "--intersect_source", dest="intersect_source", default='SEED', help="taxon datasource for insersection, default is SEED") parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="taxon level for insersection") parser.add_argument( "--intersect_name", dest="intersect_name", default=None, help="taxon name(s) for insersection, file or comma seperated list") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--format", dest="format", default='biom', help= "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument( "--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15" ) parser.add_argument( "--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60") parser.add_argument( "--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1") parser.add_argument( "--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration") # get inputs opts = parser.parse_args() if not opts.ids: sys.stderr.write("ERROR: one or more ids required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write( "ERROR: both --filter_level and --filter_name need to be used together\n" ) return 1 if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level): sys.stderr.write( "ERROR: both --intersect_level and --intersect_name need to be used together\n" ) return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 # get auth token = get_auth_token(opts) # build url id_list = [] if os.path.isfile(opts.ids): id_str = open(opts.ids, 'r').read() try: id_obj = json.loads(id_str) if 'elements' in id_obj: id_list = id_obj['elements'].keys() elif 'members' in id_obj: id_list = map(lambda x: x['ID'], id_obj['members']) except: id_list = id_str.strip().split('\n') else: id_list = opts.ids.strip().split(',') params = [('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1')] if opts.intersect_level and opts.intersect_name: params.append(('filter_source', opts.intersect_source)) params.append(('filter_level', opts.intersect_level)) if os.path.isfile(opts.intersect_name): with open(opts.intersect_name) as file_: for f in file_: params.append(('filter', f.strip())) else: for f in opts.intersect_name.strip().split(','): params.append(('filter', f)) # retrieve data biom = None size = 50 if len(id_list) > size: for i in xrange(0, len(id_list), size): sub_ids = id_list[i:i + size] cur_params = copy.deepcopy(params) for i in sub_ids: cur_params.append(('id', i)) cur_url = opts.url + '/matrix/function?' + urlencode( cur_params, True) cur_biom = async_rest_api(cur_url, auth=token) biom = merge_biom(biom, cur_biom) if opts.temp: json.dump(biom, open(opts.temp, 'w')) else: for i in id_list: params.append(('id', i)) url = opts.url + '/matrix/function?' + urlencode(params, True) biom = async_rest_api(url, auth=token) if opts.temp: json.dump(biom, open(opts.temp, 'w')) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: # get input filter list filter_list = [] if os.path.isfile(opts.filter_name): with open(opts.filter_name) as file_: for f in file_: filter_list.append(f.strip()) else: for f in opts.filter_name.strip().split(','): filter_list.append(f) # annotation mapping from m5nr params = [('version', opts.version), ('min_level', opts.level), ('source', opts.source)] url = opts.url + '/m5nr/ontology?' + urlencode(params, True) data = obj_from_url(url) level = 'level4' if opts.level == 'function' else opts.level for ann in data['data']: if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list): sub_ann.add(ann[level]) # output data if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if opts.format == 'biom': out_hdl.write(json.dumps(biom) + "\n") else: biom_to_tab(biom, out_hdl, rows=sub_ann) out_hdl.close() return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID") parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species") parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED") parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by") parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by") parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10") parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5") parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60") parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15") parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1") # get inputs (opts, args) = parser.parse_args() opts.top = int(opts.top) if not opts.id: sys.stderr.write("ERROR: id required\n") return 1 if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level): sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n") return 1 # get auth token = get_auth_token(opts) # build url params = [ ('id', opts.id), ('group_level', opts.level), ('source', opts.source), ('evalue', opts.evalue), ('identity', opts.identity), ('length', opts.length), ('version', opts.version), ('result_type', 'abundance'), ('asynchronous', '1'), ('hide_metadata', '1') ] url = opts.url+'/matrix/organism?'+urlencode(params, True) # retrieve data top_ann = {} biom = async_rest_api(url, auth=token) # get sub annotations sub_ann = set() if opts.filter_name and opts.filter_level: params = [ ('filter', opts.filter_name), ('filter_level', opts.filter_level), ('min_level', opts.level), ('version', opts.version) ] url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True) data = obj_from_url(url) sub_ann = set( map(lambda x: x[opts.level], data['data']) ) if biom['matrix_type'] == "dense": data = biom['data'] else: data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols'])) rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))] datalist = [biom['data'][i][0] for i in range(len(biom['rows']))] data2 = zip( rows, datalist) # sort data for d in sorted(data2, key=itemgetter(1), reverse=True): name = d[0] if len(top_ann) >= opts.top: break if sub_ann and (name not in sub_ann): continue top_ann[name] = d[1] # output data for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True): safe_print("%s\t%d\n" %(k, v)) return 0