def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--url", dest="url", default=API_URL, help="API url")
    parser.add_option("", "--md5", dest="md5", default=None, help="sequence md5")
    parser.add_option("", "--id", dest="id", default=None, help="accession ID")
    parser.add_option("", "--source", dest="source", default='SwissProt', help="datasource to get record from, one of: SwissProt, TreMBL, InterPro")
    parser.add_option("", "--version", dest="version", default='1', help="M5NR version to use, one of 1 or 9")
    
    # get inputs
    (opts, args) = parser.parse_args()

    # build url for m5nr query
    params = [ ('limit', '1'),
               ('version', opts.version),
               ('source', opts.source) ]
    if opts.md5:
        url = opts.url+'/m5nr/md5/'+opts.md5+'?'+urlencode(params, True)
    elif opts.id:
        url = opts.url+'/m5nr/accession/'+opts.id+'?'+urlencode(params, True)
    else:
        sys.stderr.write("ERROR: no md5 checksum or accession given\n")
        return 1
        
    # retrieve data
    result = obj_from_url(url)
    if len(result['data']) == 0:
        sys.stderr.write("ERROR: no match in M5NR version %s\n"%opts.version)
        return 1
    
    # output data
    stdout_from_url(UNIPROT_URL+result['data'][0]['accession']+'.txt')
    
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--ids", dest="ids", default=None, help="comma seperated list of KBase Metagenome IDs")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params  = [ ('level', opts.level), ('source', opts.source) ]
    for i in id_list:
        url  = opts.url+'/compute/alphadiversity/'+i+'?'+urlencode(params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" %(i, data['data']))
    
    return 0
Пример #3
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="API url")
    parser.add_argument("--md5", dest="md5", default=None, help="sequence md5")
    parser.add_argument("--id", dest="id", default=None, help="accession ID")
    parser.add_argument(
        "--source",
        dest="source",
        default='SwissProt',
        help=
        "datasource to get record from, one of: SwissProt, TreMBL, InterPro")
    parser.add_argument("--version",
                        dest="version",
                        default='1',
                        help="M5NR version to use, one of 1 or 9")

    # get inputs
    opts = parser.parse_args()

    # build url for m5nr query
    params = [('limit', '1'), ('version', opts.version),
              ('source', opts.source)]
    if opts.md5:
        url = opts.url + '/m5nr/md5/' + opts.md5 + '?' + urlencode(
            params, True)
    elif opts.id:
        url = opts.url + '/m5nr/accession/' + opts.id + '?' + urlencode(
            params, True)
    else:
        sys.stderr.write("ERROR: no md5 checksum or accession given\n")
        return 1

    # retrieve data
    result = obj_from_url(url)
    if len(result['data']) == 0:
        sys.stderr.write("ERROR: no match in M5NR version %s\n" % opts.version)
        return 1

    # output data
    stdout_from_url(UNIPROT_URL + result['data'][0]['accession'] + '.txt')

    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--limit", dest="limit", type=int, default=15, help="Number of results to show, if > 50 will use paginated queries to get all, default 15")
    parser.add_argument("--order", dest="order", default=None, help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument("--direction", dest="direction", default="asc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc")
    parser.add_argument("--public", dest="public", action="store_true", default=False, help="return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public.")
    parser.add_argument("--match", dest="match", default="all", help="search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all")
    parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    parser.add_argument("--verbosity", dest="verbosity", default='minimal', help="amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal")
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    opts = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)
    
    # build call url
    total = 0
    maxLimit = 50
    params = [ ('limit', opts.limit if opts.limit < maxLimit else maxLimit),
               ('public', 'yes' if opts.public or (not token) else 'no') ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url+'/search?'+urlencode(params, True)
    
    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found
    
    # output header
    safe_print("\t".join(fields)+"\n")
    # output rows
    display_search(result['data'], fields)
    
    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)
    
    return 0
Пример #5
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--ids",
                        dest="ids",
                        default=None,
                        help="comma seperated list of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url / retrieve data / output data
    id_list = opts.ids.split(',')
    params = [('level', opts.level), ('source', opts.source)]
    for i in id_list:
        url = opts.url + '/compute/alphadiversity/' + i + '?' + urlencode(
            params, True)
        data = obj_from_url(url, auth=token)
        safe_print("%s\t%s\n" % (i, data['data']))

    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, type=str, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, type=str, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, type=str, help="OAuth token")
    parser.add_argument("--name", dest="name", default=None, type=str, help="function name to filter by")
    parser.add_argument("--level", dest="level", default='function', help="function level to filter by")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum percent identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length) ]
    if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
        params.append(('type', 'ontology'))
    else:
        params.append(('type', 'function'))
    if opts.name:
        params.append(('filter', opts.name))
        if opts.level:
            params.append(('filter_level', opts.level))
    url = opts.url+'/annotation/sequence/'+opts.id+'?'+urlencode(params, True)
    
    # output data
    stdout_from_url(url, auth=token)
    
    return 0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--name", dest="name", default=None, help="taxon name to filter by")
    parser.add_option("", "--level", dest="level", default=None, help="taxon level to filter by")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    
    # get inputs
    (opts, args) = parser.parse_args()
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('type', 'organism') ]
    if opts.name:
        params.append(('filter', opts.name))
        if opts.level:
            params.append(('filter_level', opts.level))
    url = opts.url+'/annotation/similarity/'+opts.id+'?'+urlencode(params, True)
    
    # output data
    stdout_from_url(url, auth=token)
    
    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % (VERSION, search_opts),
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--limit",
        dest="limit",
        type=int,
        default=15,
        help=
        "Number of results to show, if > 50 will use paginated queries to get all, default 15"
    )
    parser.add_argument(
        "--order",
        dest="order",
        default=None,
        help="field metagenomes are ordered by, default is no ordering")
    parser.add_argument(
        "--direction",
        dest="direction",
        default="asc",
        help=
        "direction of order. 'asc' for ascending order, 'desc' for descending order, default is asc"
    )
    parser.add_argument(
        "--public",
        dest="public",
        action="store_true",
        default=False,
        help=
        "return both private and pubulic data if using authenticated search, default is private only.  Non-authenticated search only returns public."
    )
    parser.add_argument(
        "--match",
        dest="match",
        default="all",
        help=
        "search logic. 'all' for metagenomes that match all search parameters, 'any' for metagenomes that match any search parameters, default is all"
    )
    parser.add_argument(
        "--status",
        dest="status",
        default="public",
        help=
        "types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public"
    )
    parser.add_argument(
        "--verbosity",
        dest="verbosity",
        default='minimal',
        help=
        "amount of information to display. use keyword 'minimal' for id and name, use keyword 'full' for MIxS GSC metadata, default is minimal"
    )
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--" + sfield,
                            dest=sfield,
                            default=None,
                            help="search parameter: query string for " +
                            sfield)

    # get inputs
    opts = parser.parse_args()

    # get auth
    token = get_auth_token(opts)

    # build call url
    total = 0
    maxLimit = 50
    params = [('limit', opts.limit if opts.limit < maxLimit else maxLimit),
              ('public', 'yes' if opts.public or (not token) else 'no')]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    if opts.order:
        params.append(('order', opts.order))
        params.append(('direction', opts.direction))
    url = opts.url + '/search?' + urlencode(params, True)

    # retrieve data
    fields = ['metagenome_id', 'public'] + SEARCH_FIELDS
    result = obj_from_url(url, auth=token)
    found = len(result['data'])
    if found == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    total += found

    # output header
    safe_print("\t".join(fields) + "\n")
    # output rows
    display_search(result['data'], fields)

    while ('next' in result) and result['next'] and (total < opts.limit):
        url = result['next']
        result = obj_from_url(url, auth=token)
        total += len(result['data'])
        display_search(result['data'], fields)

    return 0
Пример #9
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='genus', help="taxon level to retrieve abundances for, default is genus")
    parser.add_argument("--source", dest="source", default='SEED', help="taxon datasource to filter results by, default is SEED")
    parser.add_argument("--hit_type", dest="hit_type", default='lca', help="Set of organisms to search results by, one of: all, single, lca")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="taxon name to filter by, file or comma seperated list")
    parser.add_argument("--intersect_source", dest="intersect_source", default='Subsystems', help="function datasource for insersection, default is Subsystems")
    parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="function level for insersection")
    parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="function name(s) for insersection, file or comma seperated list")
    parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout")
    parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
    parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15")
    parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration")
    
    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level):
        sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n")
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids,'r').read()
        try:
            id_obj  = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [ ('group_level', opts.level), 
               ('source', opts.source),
               ('hit_type', opts.hit_type),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1') ]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i+size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url  = opts.url+'/matrix/organism?'+urlencode(cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url+'/matrix/organism?'+urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [ ('version', opts.version),
                   ('min_level', opts.level) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        for ann in data['data']:
            if (opts.filter_level in ann) and (opts.level in ann) and (ann[opts.filter_level] in filter_list):
                sub_ann.add(ann[opts.level])
    
    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')
    
    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom)+"\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)
    
    out_hdl.close()
    return 0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    biomorig = biom
    biom = biomorig["data"] 
    # sort data
    assert "matrix_type" in biom.keys(), repr(biom)
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
Пример #11
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_argument("--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="functional level to retrieve abundances for, default is function")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by")
    parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
    parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N taxa, default is 10")
    parser.add_argument("--evalue", dest="evalue", type=int, default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", type=int, default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", type=int, default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level), 
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/function?'+urlencode(params, True)
    
    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version),
                   ('source', opts.source) ]
        url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        sub_ann = set(map(lambda x: x[level], data['data']))
    
    # sort data
    if biom["matrix_type"] == "sparse":
        for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
            name = biom['rows'][d[0]]['id']  # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = d[2]
    if biom["matrix_type"] == "dense":
        sortindex = sorted(range(len(biom['data'])), key=biom['data'].__getitem__, reverse=True)
        for n in sortindex:
            name = biom['rows'][n]['id'] # if opts.source != 'Subsystems' else biom['rows'][n]['metadata']['ontology'][-1]
            if len(top_ann) >= opts.top:
                break
            if sub_ann and (name not in sub_ann):
                continue
            top_ann[name] = biom['data'][n][0]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
Пример #12
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument("--id",
                        dest="id",
                        default=None,
                        help="KBase Metagenome ID")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='species',
        help="taxon level to retrieve abundances for, default is species")
    parser.add_argument(
        "--source",
        dest="source",
        default='SEED',
        help="datasource to filter results by, default is SEED")
    parser.add_argument("--filter_name",
                        dest="filter_name",
                        default=None,
                        help="taxon name to filter by")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="taxon level to filter by")
    parser.add_argument("--top",
                        dest="top",
                        type=int,
                        default=10,
                        help="display only the top N taxa, default is 10")
    parser.add_argument(
        "--evalue",
        dest="evalue",
        type=int,
        default=5,
        help="negative exponent value for maximum e-value cutoff, default is 5"
    )
    parser.add_argument(
        "--identity",
        dest="identity",
        type=int,
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        dest="length",
        type=int,
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")

    # get inputs
    opts = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    params = [('id', opts.id), ('group_level', opts.level),
              ('source', opts.source), ('evalue', opts.evalue),
              ('identity', opts.identity), ('length', opts.length),
              ('version', opts.version), ('result_type', 'abundance'),
              ('asynchronous', '1'), ('hide_metadata', '1')]
    url = opts.url + '/matrix/organism?' + urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [('filter', opts.filter_name),
                  ('filter_level', opts.filter_level),
                  ('min_level', opts.level), ('version', opts.version)]
        url = opts.url + '/m5nr/taxonomy?' + urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set(map(lambda x: x[opts.level], data['data']))
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']),
                               len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip(rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" % (k, v))

    return 0
Пример #13
0
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--id", dest="id", default=None, help="KBase Metagenome ID")
    parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='species', help="taxon level to retrieve abundances for, default is species")
    parser.add_option("", "--source", dest="source", default='SEED', help="datasource to filter results by, default is SEED")
    parser.add_option("", "--filter_name", dest="filter_name", default=None, help="taxon name to filter by")
    parser.add_option("", "--filter_level", dest="filter_level", default=None, help="taxon level to filter by")
    parser.add_option("", "--top", dest="top", type="int", default=10, help="display only the top N taxa, default is 10")
    parser.add_option("", "--evalue", dest="evalue", type="int", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", type="int", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", type="int", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_option("", "--version", type="int", dest="version", default=1, help="M5NR annotation version to use, default is 1")
    
    # get inputs
    (opts, args) = parser.parse_args()
    opts.top = int(opts.top)
    if not opts.id:
        sys.stderr.write("ERROR: id required\n")
        return 1
    if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
        sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
        return 1
    
    # get auth
    token = get_auth_token(opts)
    
    # build url
    params = [ ('id', opts.id),
               ('group_level', opts.level),
               ('source', opts.source),
               ('evalue', opts.evalue),
               ('identity', opts.identity),
               ('length', opts.length),
               ('version', opts.version),
               ('result_type', 'abundance'),
               ('asynchronous', '1'),
               ('hide_metadata', '1') ]
    url = opts.url+'/matrix/organism?'+urlencode(params, True)

    # retrieve data
    top_ann = {}
    biom = async_rest_api(url, auth=token)
    
    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        params = [ ('filter', opts.filter_name),
                   ('filter_level', opts.filter_level),
                   ('min_level', opts.level),
                   ('version', opts.version) ]
        url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
        data = obj_from_url(url)
        sub_ann = set( map(lambda x: x[opts.level], data['data']) )
    if biom['matrix_type'] == "dense":
        data = biom['data']
    else:
        data = sparse_to_dense(biom['data'], len(biom['rows']), len(biom['cols']))
    rows = [biom['rows'][i]['id'] for i in range(len(biom['rows']))]
    datalist = [biom['data'][i][0] for i in range(len(biom['rows']))]
    data2 = zip( rows, datalist)
    # sort data
    for d in sorted(data2, key=itemgetter(1), reverse=True):
        name = d[0]
        if len(top_ann) >= opts.top:
            break
        if sub_ann and (name not in sub_ann):
            continue
        top_ann[name] = d[1]

    # output data
    for k, v in sorted(top_ann.items(), key=itemgetter(1), reverse=True):
        safe_print("%s\t%d\n" %(k, v))
    
    return 0
Пример #14
0
key = get_auth_token()

# assign parameters
limit = 1000  # initial call

# construct API call

parameters = {
    "limit": limit,
    "order": "created_on",
    "direction": "asc",
    "public": "1"
}
API_URL = "https://api.mg-rast.org/"

base_url = API_URL + CALL + "?" + urlencode(parameters)

# convert the data from a JSON structure to a python data type, a dict of dicts.
jsonstructure = obj_from_url(base_url, auth=key)

# unpack and display the data table
total_count = int(jsonstructure["total_count"])
sys.stderr.write("Total number of records: {:d}\n".format(total_count))

for i in range(0, int(total_count / limit) + 1):
    sys.stderr.write("Page {:d}\t".format(i))
    jsonstructure = obj_from_url(base_url, auth=key)
    printlist(jsonstructure)
    try:
        next_url = jsonstructure["next"]
        base_url = next_url
def main(args):
    OptionParser.format_description = lambda self, formatter: self.description
    OptionParser.format_epilog = lambda self, formatter: self.epilog
    parser = OptionParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_option("", "--url", dest="url", default=API_URL, help="API url")
    parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
    parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
    parser.add_option("", "--level", dest="level", default='function', help="function level to filter by")
    parser.add_option("", "--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_option("", "--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_option("", "--identity", dest="identity", default=60, help="percent value for minimum % identity cutoff, default is 60")
    parser.add_option("", "--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_option("", "--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    for sfield in SEARCH_FIELDS:
        parser.add_option("", "--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    (opts, args) = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)

    # build url for metagenome query
    params = [ ('limit', '100'),
               ('verbosity', 'minimal'),
               ('match', 'all'),
               ('status', opts.status) ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append( (sfield, getattr(opts, sfield)) )
    url = opts.url+'/metagenome?'+urlencode(params, True)

    # retrieve query results
    result = obj_from_url(url, auth=token)
    if len(result['data']) == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    mgids = set( map(lambda x: x['id'], result['data']) )
    while result['next']:
        url = result['next']
        result = obj_from_url(url, auth=token)
        if len(result['data']) == 0:
            break
        for d in result['data']:
            mgids.add(d['id'])

    # get sequences for mgids
    for mg in mgids:
        params = [ ('source', opts.source),
                   ('evalue', opts.evalue),
                   ('identity', opts.identity),
                   ('length', opts.length) ]
        if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
            params.append(('type', 'ontology'))
        else:
            params.append(('type', 'function'))
        if opts.function:
            params.append(('filter', opts.function))
            if opts.level:
                params.append(('filter_level', opts.level))
        url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True)
        # output data
        safe_print('Results from '+mg+":\n")
        stdout_from_url(url, auth=token)
    
    return 0
Пример #16
0
                         repr(public), item["created_on"],
                         mg_name, project_id, project_name]) + "\n"))

CALL = "/search"

key = get_auth_token()

# assign parameters
limit = 1000 # initial call

# construct API call

parameters = {"limit": limit, "order":"created_on", "direction": "asc", "public": "1"}
API_URL= "https://api.mg-rast.org/"

base_url = API_URL + CALL + "?" + urlencode(parameters)

# convert the data from a JSON structure to a python data type, a dict of dicts.
jsonstructure = obj_from_url(base_url, auth=key)

# unpack and display the data table
total_count = int(jsonstructure["total_count"])
sys.stderr.write("Total number of records: {:d}\n".format(total_count))

for i in range(0, int(total_count / limit) +1):
    sys.stderr.write("Page {:d}\t".format(i))
    jsonstructure = obj_from_url(base_url, auth=key)
    printlist(jsonstructure)
    try:
        next_url = jsonstructure["next"]
        base_url = next_url
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='',
                            description=prehelp % VERSION,
                            epilog=posthelp % AUTH_LIST)
    parser.add_argument(
        "--ids",
        dest="ids",
        default=None,
        help="comma seperated list or file of KBase Metagenome IDs")
    parser.add_argument("--url",
                        dest="url",
                        default=API_URL,
                        help="communities API url")
    parser.add_argument("--user",
                        dest="user",
                        default=None,
                        help="OAuth username")
    parser.add_argument("--passwd",
                        dest="passwd",
                        default=None,
                        help="OAuth password")
    parser.add_argument("--token",
                        dest="token",
                        default=None,
                        help="OAuth token")
    parser.add_argument(
        "--level",
        dest="level",
        default='level3',
        help="functional level to retrieve abundances for, default is level3")
    parser.add_argument(
        "--source",
        dest="source",
        default='Subsystems',
        help="function datasource to filter results by, default is Subsystems")
    parser.add_argument("--filter_level",
                        dest="filter_level",
                        default=None,
                        help="function level to filter by")
    parser.add_argument(
        "--filter_name",
        dest="filter_name",
        default=None,
        help="function name to filter by, file or comma seperated list")
    parser.add_argument(
        "--intersect_source",
        dest="intersect_source",
        default='SEED',
        help="taxon datasource for insersection, default is SEED")
    parser.add_argument("--intersect_level",
                        dest="intersect_level",
                        default=None,
                        help="taxon level for insersection")
    parser.add_argument(
        "--intersect_name",
        dest="intersect_name",
        default=None,
        help="taxon name(s) for insersection, file or comma seperated list")
    parser.add_argument(
        "--output",
        dest="output",
        default='-',
        help="output: filename or stdout (-), default is stdout")
    parser.add_argument(
        "--format",
        dest="format",
        default='biom',
        help=
        "output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom"
    )
    parser.add_argument(
        "--evalue",
        type=int,
        dest="evalue",
        default=15,
        help="negative exponent value for maximum e-value cutoff, default is 15"
    )
    parser.add_argument(
        "--identity",
        type=int,
        dest="identity",
        default=60,
        help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument(
        "--length",
        type=int,
        dest="length",
        default=15,
        help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--version",
                        type=int,
                        dest="version",
                        default=1,
                        help="M5NR annotation version to use, default is 1")
    parser.add_argument(
        "--temp",
        dest="temp",
        default=None,
        help="filename to temporarly save biom output at each iteration")

    # get inputs
    opts = parser.parse_args()
    if not opts.ids:
        sys.stderr.write("ERROR: one or more ids required\n")
        return 1
    if (opts.filter_name and
        (not opts.filter_level)) or ((not opts.filter_name)
                                     and opts.filter_level):
        sys.stderr.write(
            "ERROR: both --filter_level and --filter_name need to be used together\n"
        )
        return 1
    if (opts.intersect_name and
        (not opts.intersect_level)) or ((not opts.intersect_name)
                                        and opts.intersect_level):
        sys.stderr.write(
            "ERROR: both --intersect_level and --intersect_name need to be used together\n"
        )
        return 1
    if opts.format not in ['text', 'biom']:
        sys.stderr.write("ERROR: invalid input format\n")
        return 1

    # get auth
    token = get_auth_token(opts)

    # build url
    id_list = []
    if os.path.isfile(opts.ids):
        id_str = open(opts.ids, 'r').read()
        try:
            id_obj = json.loads(id_str)
            if 'elements' in id_obj:
                id_list = id_obj['elements'].keys()
            elif 'members' in id_obj:
                id_list = map(lambda x: x['ID'], id_obj['members'])
        except:
            id_list = id_str.strip().split('\n')
    else:
        id_list = opts.ids.strip().split(',')
    params = [('group_level', opts.level), ('source', opts.source),
              ('evalue', opts.evalue), ('identity', opts.identity),
              ('length', opts.length), ('version', opts.version),
              ('result_type', 'abundance'), ('asynchronous', '1')]
    if opts.intersect_level and opts.intersect_name:
        params.append(('filter_source', opts.intersect_source))
        params.append(('filter_level', opts.intersect_level))
        if os.path.isfile(opts.intersect_name):
            with open(opts.intersect_name) as file_:
                for f in file_:
                    params.append(('filter', f.strip()))
        else:
            for f in opts.intersect_name.strip().split(','):
                params.append(('filter', f))

    # retrieve data
    biom = None
    size = 50
    if len(id_list) > size:
        for i in xrange(0, len(id_list), size):
            sub_ids = id_list[i:i + size]
            cur_params = copy.deepcopy(params)
            for i in sub_ids:
                cur_params.append(('id', i))
            cur_url = opts.url + '/matrix/function?' + urlencode(
                cur_params, True)
            cur_biom = async_rest_api(cur_url, auth=token)
            biom = merge_biom(biom, cur_biom)
            if opts.temp:
                json.dump(biom, open(opts.temp, 'w'))
    else:
        for i in id_list:
            params.append(('id', i))
        url = opts.url + '/matrix/function?' + urlencode(params, True)
        biom = async_rest_api(url, auth=token)
        if opts.temp:
            json.dump(biom, open(opts.temp, 'w'))

    # get sub annotations
    sub_ann = set()
    if opts.filter_name and opts.filter_level:
        # get input filter list
        filter_list = []
        if os.path.isfile(opts.filter_name):
            with open(opts.filter_name) as file_:
                for f in file_:
                    filter_list.append(f.strip())
        else:
            for f in opts.filter_name.strip().split(','):
                filter_list.append(f)
        # annotation mapping from m5nr
        params = [('version', opts.version), ('min_level', opts.level),
                  ('source', opts.source)]
        url = opts.url + '/m5nr/ontology?' + urlencode(params, True)
        data = obj_from_url(url)
        level = 'level4' if opts.level == 'function' else opts.level
        for ann in data['data']:
            if (opts.filter_level
                    in ann) and (level in ann) and (ann[opts.filter_level]
                                                    in filter_list):
                sub_ann.add(ann[level])

    # output data
    if (not opts.output) or (opts.output == '-'):
        out_hdl = sys.stdout
    else:
        out_hdl = open(opts.output, 'w')

    if opts.format == 'biom':
        out_hdl.write(json.dumps(biom) + "\n")
    else:
        biom_to_tab(biom, out_hdl, rows=sub_ann)

    out_hdl.close()
    return 0
Пример #18
0
def main(args):
    ArgumentParser.format_description = lambda self, formatter: self.description
    ArgumentParser.format_epilog = lambda self, formatter: self.epilog
    parser = ArgumentParser(usage='', description=prehelp%(VERSION, search_opts), epilog=posthelp%AUTH_LIST)
    parser.add_argument("--url", dest="url", default=API_URL, help="API url")
    parser.add_argument("--user", dest="user", default=None, help="OAuth username")
    parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
    parser.add_argument("--token", dest="token", default=None, help="OAuth token")
    parser.add_argument("--level", dest="level", default='function', help="function level to filter by")
    parser.add_argument("--source", dest="source", default='Subsystems', help="datasource to filter results by, default is Subsystems")
    parser.add_argument("--evalue", dest="evalue", default=5, help="negative exponent value for maximum e-value cutoff, default is 5")
    parser.add_argument("--identity", dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
    parser.add_argument("--length", dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
    parser.add_argument("--status", dest="status", default="public", help="types of metagenomes to return. 'both' for all data (public and private), 'public' for public data, 'private' for users private data, default is public")
    for sfield in SEARCH_FIELDS:
        parser.add_argument("--"+sfield, dest=sfield, default=None, help="search parameter: query string for "+sfield)
    
    # get inputs
    opts = parser.parse_args()
    
    # get auth
    token = get_auth_token(opts)

    # build url for metagenome query
    params = [ ('limit', '100'),
               ('verbosity', 'minimal'),
               ('match', 'all'),
               ('status', opts.status) ]
    for sfield in SEARCH_FIELDS:
        if hasattr(opts, sfield) and getattr(opts, sfield):
            params.append((sfield, getattr(opts, sfield)))
    url = opts.url+'/metagenome?'+urlencode(params, True)

    # retrieve query results
    result = obj_from_url(url, auth=token)
    if len(result['data']) == 0:
        sys.stdout.write("No results found for the given search parameters\n")
        return 0
    mgids = set(map(lambda x: x['id'], result['data']))
    while result['next']:
        url = result['next']
        result = obj_from_url(url, auth=token)
        if len(result['data']) == 0:
            break
        for d in result['data']:
            mgids.add(d['id'])

    # get sequences for mgids
    for mg in mgids:
        params = [ ('source', opts.source),
                   ('evalue', opts.evalue),
                   ('identity', opts.identity),
                   ('length', opts.length) ]
        if (opts.source in ['Subsystems', 'KO', 'NOG', 'COG']) and (opts.level != 'function'):
            params.append(('type', 'ontology'))
        else:
            params.append(('type', 'function'))
        if opts.function:
            params.append(('filter', opts.function))
            if opts.level:
                params.append(('filter_level', opts.level))
        url = opts.url+'/annotation/sequence/'+mg+'?'+urlencode(params, True)
        # output data
        safe_print('Results from '+mg+":\n")
        stdout_from_url(url, auth=token)
    
    return 0