Пример #1
0
def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True)
        lists = get_lists()
        
        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()
        
        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])
        
        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)
        
        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print citation_path
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print dataset.description

            return
            
        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()
            
            return
        
        if args.command == 'ls' or args.dataset is None:
            import lscolumns

            #If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = set([script.shortname for script in script_list])
            all_tags = set(["ALL"] + 
                            [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')])

            print "Available datasets (%s):" % len(all_scripts)
            lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower()))
            print "Groups:"
            lscolumns.printls(sorted(list(all_tags)))
            return
        
        engine = choose_engine(args.__dict__)
        
        if hasattr(args, 'debug') and args.debug: debug = True
        else: debug = False
        
        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset %s isn't currently available in the Retriever" % (args.dataset)
            print "Run 'retriever -ls to see a list of currently available datasets"
Пример #2
0
def main():
    import argparse

    bp_formats = ' | '.join(bp._io.supported_formats)
    input_formats = bp_formats
    output_formats = '%s | ascii' % bp_formats

    parser = argparse.ArgumentParser()
    parser.add_argument('--version', action='version', version=__version__)
    parser.add_argument('-v', '--verbose', action='store_true', help='write out SPARQL queries before executing')
    parser.add_argument('-s', '--store', help='name of Redland store (default=virtuoso)')
    parser.add_argument('-d', '--dsn', help='ODBC DSN (default=Virtuoso)')
    parser.add_argument('-u', '--user', help='ODBC user (default=dba)')
    parser.add_argument('-p', '--password', help='ODBC password (default=dba)')

    subparsers = parser.add_subparsers(help='sub-command help', dest='command')
    
    # treestore add: add trees to the database
    add_parser = subparsers.add_parser('add', help='add trees to treestore')
    add_parser.add_argument('file', help='tree file')
    add_parser.add_argument('uri', help='tree uri (default=file name)', nargs='?', default=None)
    add_parser.add_argument('-f', '--format', help='file format (%s)' % input_formats,
                            nargs='?', default='newick')
    add_parser.add_argument('--rooted', help='this is a rooted tree', action='store_true')
    add_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to label higher-order taxa",
                            nargs='?', default=None)
    add_parser.add_argument('--tax-root', help="the name of the top-most taxonomic group in the tree, used to subset the taxonomy and avoid homonymy issues",
                            nargs='?', default=None)
    
    # treestore get: download an entire tree
    get_parser = subparsers.add_parser('get', help='retrieve trees from treestore')
    get_parser.add_argument('uri', help='tree uri')
    get_parser.add_argument('-f', '--format', help='serialization format (%s) (default=newick)' % output_formats, 
                            nargs='?', default='newick')
    
    # treestore rm: delete trees from the database
    rm_parser = subparsers.add_parser('rm', help='remove trees from treestore')
    rm_parser.add_argument('uri', help='tree uri')

    # treestore ls: list trees
    ls_parser = subparsers.add_parser('ls', help='list all trees in treestore')
    ls_parser.add_argument('contains', help='comma-delimited list of desired taxa',
                           nargs='?', default='')
    ls_parser.add_argument('--counts', help="display the number of matched taxa next to each tree URI",
                           action='store_true')
    ls_parser.add_argument('-l', help="list one per line; don't try to pretty-print",
                           action='store_true')
    ls_parser.add_argument('-f', help="show full URIs instead of just IDs",
                           action='store_true')
    ls_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to enable synonymy lookup",
                           nargs='?', default=None)
    ls_parser.add_argument('--filter', help="SPARQL graph pattern that returned trees must match",
                           nargs='?', default=None)
    
    # treestore names: get list of taxa contained in a tree
    names_parser = subparsers.add_parser('names', 
                                         help='return a comma-separated list of all taxa names')
    names_parser.add_argument('uri', help='tree uri (default=all trees)', 
                              nargs='?', default=None)
    names_parser.add_argument('-f', '--format', help='file format (json, csv, xml) (default=csv)', 
                              default='csv')
    
    # treestore count: count the number of labeled nodes
    count_parser = subparsers.add_parser('count', 
                                         help='returns the number of labeled nodes in a tree')
    count_parser.add_argument('uri', help='tree uri (default=all trees)', 
                              nargs='?', default=None)
    
    # treestore query: create a subtree from a list of taxa
    query_parser = subparsers.add_parser('query', 
                                         help='retrieve the best subtree containing a given set of taxa')
    query_parser.add_argument('contains', help='comma-delimited list of desired taxa',
                              nargs='?')
    query_parser.add_argument('uri', help='tree uri (default=select automatically)', 
                              nargs='?', default=None)
    query_parser.add_argument('-f', '--format', help='serialization format (%s) (default=newick)' % output_formats, 
                              nargs='?', default='newick')
    query_parser.add_argument('--complete', help="return complete subtree from MRCA; don't prune other taxa from the resulting tree",
                              action='store_true')
    query_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to enable synonymy lookup",
                              nargs='?', default=None)
    query_parser.add_argument('--filter', help="SPARQL graph pattern that returned trees must match",
                              nargs='?', default=None)
    
    # treestore annotate: add metadata annotations to tree
    ann_parser = subparsers.add_parser('annotate', help='annotate tree with triples from RDF file')
    ann_parser.add_argument('uri', help='tree uri', default=None)
    ann_parser.add_argument('--file', help='annotation file')
    ann_parser.add_argument('--text', help='annotation, in turtle format', default=None)
    ann_parser.add_argument('--doi', help='tree source DOI', default=None)

    args = parser.parse_args()

    if args.dsn: kwargs['dsn'] = args.dsn
    if args.user: kwargs['user'] = args.user
    if args.password: kwargs['password'] = args.password
    elif not 'password' in kwargs: password = getpass()
    kwargs['verbose'] = args.verbose
    treestore = Treestore(**kwargs)

    if args.command == 'add':
        # parse a tree and add it to the treestore
        treestore.add_trees(args.file, args.format, args.uri, rooted=args.rooted,
                            taxonomy=args.taxonomy, tax_root=args.tax_root)
        
    elif args.command == 'get':
        # get a tree, serialize in specified format, and output to stdout
        treestore.serialize_trees(args.uri, args.format, handle=sys.stdout)
        
    elif args.command == 'rm':
        # remove a certain tree from the treestore
        treestore.remove_trees(args.uri)
        
    elif args.command == 'ls':
        # list all trees in the treestore or trees containing a list of taxa
        contains = args.contains
        if contains: 
            contains = set([s.strip() for s in contains.split(',')])
            trees = list(treestore.list_trees_containing_taxa(
                            contains=contains, taxonomy=args.taxonomy, 
                            show_counts=args.counts, filter=args.filter))
            if args.counts: trees = ['%s (%s)' % tree for tree in trees]
            else: trees = [str(x) for x in trees]
        else:
            trees = list(treestore.list_trees(filter=args.filter))
        
        if not trees: exit()

        if not args.f:
            trees = [treestore.id_from_uri(x) for x in trees]
        
        if args.l:
            print '\n'.join(trees)
        else:
            import lscolumns
            lscolumns.printls(trees)


    elif args.command == 'names':
        print treestore.get_names(tree_uri=args.uri, format=args.format)

    elif args.command == 'count':
        print len([r for r in treestore.get_names(tree_uri=args.uri, format=None)])

    elif args.command == 'query':
        contains = set([s.strip() for s in args.contains.split(',')])
        treestore.get_subtree(contains=contains, tree_uri=args.uri,
                              format=args.format, 
                              prune=not args.complete,
                              taxonomy=treestore.uri_from_id(args.taxonomy) if args.taxonomy else None,
                              filter=args.filter,
                              handle=sys.stdout,
                              )

    elif args.command == 'annotate':
        treestore.annotate(args.uri, annotations=args.text, annotation_file=args.file, doi=args.doi)
def main():
    import argparse

    bp_formats = ' | '.join(bp._io.supported_formats)
    input_formats = bp_formats
    output_formats = '%s | ascii' % bp_formats

    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--version', action='version', version=__version__)
    parser.add_argument('-s', '--store', help='name of Redland store (default=virtuoso)')
    parser.add_argument('-d', '--dsn', help='ODBC DSN (default=Virtuoso)')
    parser.add_argument('-u', '--user', help='ODBC user (default=dba)')
    parser.add_argument('-p', '--password', help='ODBC password (default=dba)')

    subparsers = parser.add_subparsers(help='sub-command help', dest='command')

    add_parser = subparsers.add_parser('add', help='add trees to treestore')
    add_parser.add_argument('file', help='tree file')
    add_parser.add_argument('format', help='file format (%s)' % input_formats)
    add_parser.add_argument('uri', help='tree uri (default=file name)', nargs='?', default=None)
    add_parser.add_argument('--bulk', help='use the virtuoso bulk loader', action='store_true')
    add_parser.add_argument('--puid', help='create a pseudo-unique ID for the tree', action='store_true')
    add_parser.add_argument('--rooted', help='this is a rooted tree', action='store_true')

    get_parser = subparsers.add_parser('get', help='retrieve trees from treestore')
    get_parser.add_argument('uri', help='tree uri')
    get_parser.add_argument('format', help='serialization format (%s) (default=newick)' % output_formats, 
                            nargs='?', default='newick')

    rm_parser = subparsers.add_parser('rm', help='remove trees from treestore')
    rm_parser.add_argument('uri', help='tree uri')

    ls_parser = subparsers.add_parser('ls', help='list all trees in treestore')
    ls_parser.add_argument('contains', 
        help='comma-delimited list of species that must be contained in each returned tree (default=none)',
        nargs='?', default='')
    ls_parser.add_argument('--all', help='only return trees that contain all given species', 
                           action='store_true')

    names_parser = subparsers.add_parser('names', 
                                         help='return a comma-separated list of all taxa names')
    names_parser.add_argument('uri', help='tree uri (default=all trees)', 
                              nargs='?', default=None)
    names_parser.add_argument('-f', '--format', help='file format (json, csv, xml) (default=csv)', 
                              default='csv')

    count_parser = subparsers.add_parser('count', 
                                         help='returns the number of labelled nodes in a tree')
    count_parser.add_argument('uri', help='tree uri (default=all trees)', 
                              nargs='?', default=None)

    query_parser = subparsers.add_parser('query', 
                                         help='retrieve the best subtree containing a given set of taxa')
    query_parser.add_argument('contains', 
        help='comma-delimited list of species that must be contained in each returned tree',
        nargs='?')
    query_parser.add_argument('format', help='serialization format (%s) (default=newick)' % output_formats, 
                              nargs='?', default='newick')
    query_parser.add_argument('uri', help='tree uri (default=select automatically)', 
                              nargs='?', default=None)
    query_parser.add_argument('--all', help='only return trees that contain all given species', 
                              action='store_true')
    query_parser.add_argument('--complete', help="return complete subtree from MRCA; don't prune other taxa from the resulting tree",
                              action='store_true')

    ann_parser = subparsers.add_parser('annotate', help='annotate tree with triples from RDF file')
    ann_parser.add_argument('file', help='annotation file')
    ann_parser.add_argument('format', help='annotation file format (default=ntriples)')
    ann_parser.add_argument('uri', help='tree uri', default=None)

    args = parser.parse_args()

    kwargs = {}
    if args.store: kwargs['storage_name'] = args.store
    if args.dsn: kwargs['dsn'] = args.dsn
    if args.user: kwargs['user'] = args.user
    if args.password: kwargs['password'] = args.password
    treestore = Treestore(**kwargs)

    if args.command == 'add':
        # parse a tree and add it to the treestore
        treestore.add_trees(args.file, args.format, args.uri, bulk_loader=args.bulk, puid=args.puid,
                            rooted=args.rooted)
        
    elif args.command == 'get':
        # get a tree, serialize in specified format, and output to stdout
        print treestore.serialize_trees(args.uri, args.format),
        
    elif args.command == 'rm':
        # remove a certain tree from the treestore
        treestore.remove_trees(args.uri)
        
    elif args.command == 'ls':
        # list all trees in the treestore or trees containing a list of taxa
        contains = args.contains
        if contains: 
            contains = set([s.strip() for s in contains.split(',')])
            trees = [r for r in treestore.list_trees_containing_taxa(
                        contains=contains, match_all=args.all)]
        else:
            trees = treestore.list_trees()

        if not trees: exit()
        
        import lscolumns
        lscolumns.printls(trees)


    elif args.command == 'names':
        print treestore.get_names(tree_uri=args.uri, format=args.format)

    elif args.command == 'count':
        print len([r for r in treestore.get_names(tree_uri=args.uri, format=None)])

    elif args.command == 'query':
        contains = set([s.strip() for s in args.contains.split(',')])
        print treestore.get_subtree(contains=contains, tree_uri=args.uri,
                                    match_all=args.all, format=args.format, prune=not args.complete),

    elif args.command == 'annotate':
        annotate(args.uri, args.file, treestore, format=args.format)
Пример #4
0
def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if current_platform == 'darwin' else True)
        lists = get_lists()

        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print "\nCitation for retriever:\n"
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:

                    print ("\nCitation:   {}".format(dataset.citation))
                    print ("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        if args.command == 'ls' or args.dataset is None:

            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.name:
                    if args.l!=None:
                        script_name = script.name + "\nShortname: " + script.shortname+"\n"
                        if script.tags:
                            script_name += "Tags: "+str([tag for tag in script.tags])+"\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print "Available datasets : {}\n".format(len(all_scripts))

            if args.l==None:
                import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print ("%d. %s"%(count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False

        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset {} isn't currently available in the Retriever".format(args.dataset)
            print "Run 'retriever ls to see a list of currently available datasets"