def get_lists(): # get a list of category tags from all scripts from retriever import SCRIPT_LIST SCRIPT_LIST = SCRIPT_LIST() SCRIPT_LIST.sort(key=attrgetter('name')) full_tags = set() tag_heads = set() for script in SCRIPT_LIST: for tag in script.tags: full_tags.update([tag]) if len(tag_tree(tag)) > 0: tag_heads.update([tag_tree(tag)[0]]) full_tags = sorted(list(full_tags)) tag_heads = sorted(list(tag_heads)) lists = [] for head in tag_heads: valid_scripts = [ script for script in SCRIPT_LIST if len([ tag for tag in script.tags if len(tag_tree(tag)) > 0 and tag_tree(tag)[0] == head ]) > 0 ] lists.append( Category(head, valid_scripts, children=children(head, valid_scripts, [ tag_tree(tag)[1:] for tag in full_tags if tag_tree(tag)[0] == head ], 1))) # Get list of additional datasets from scripts.config file if os.path.isfile("scripts.config"): other_scripts = [] config = open("scripts.config", 'rb') for line in config: if line: try: new_dataset = eval(line) other_scripts.append(new_dataset) except: pass other_scripts.sort(key=attrgetter('name')) if len(other_scripts) > 0: lists.append(Category("Custom", other_scripts)) for script in other_scripts: lists[0].scripts.append(script) choice_tree = Category("All Datasets", SCRIPT_LIST, children=lists) return choice_tree
def get_lists(): # get a list of category tags from all scripts from retriever import SCRIPT_LIST SCRIPT_LIST = SCRIPT_LIST() SCRIPT_LIST.sort(key=attrgetter('name')) full_tags = set() tag_heads = set() for script in SCRIPT_LIST: for tag in script.tags: full_tags.update([tag]) if len(tag_tree(tag)) > 0: tag_heads.update([tag_tree(tag)[0]]) full_tags = sorted(list(full_tags)) tag_heads = sorted(list(tag_heads)) lists = [] for head in tag_heads: valid_scripts = [script for script in SCRIPT_LIST if len([tag for tag in script.tags if len(tag_tree(tag)) > 0 and tag_tree(tag)[0] == head]) > 0] lists.append(Category(head, valid_scripts, children=children(head, valid_scripts, [tag_tree(tag)[1:] for tag in full_tags if tag_tree(tag)[0] == head], 1)) ) # Get list of additional datasets from scripts.config file if os.path.isfile("scripts.config"): other_scripts = [] config = open("scripts.config", 'rb') for line in config: if line: try: new_dataset = eval(line) other_scripts.append(new_dataset) except: pass other_scripts.sort(key=attrgetter('name')) if len(other_scripts) > 0: lists.append(Category("Custom", other_scripts)) for script in other_scripts: lists[0].scripts.append(script) choice_tree = Category("All Datasets", SCRIPT_LIST, children=lists) return choice_tree
def download_public_data(datasets, data_dir='./data/'): """Download public datasets using the EcoData Retriever""" from retriever import VERSION, SCRIPT_LIST, ENGINE_LIST from retriever.lib.tools import choose_engine, get_opts for dataset in datasets: script_list = SCRIPT_LIST() opts = get_opts(script_list, args=[ 'install', dataset, '-e', 's', '-f', 'downloaded_data.sqlite' ]) script = opts["script"] engine = choose_engine(opts) if isinstance(script, list): for dataset in script: print "=> Installing", dataset.name dataset.download(engine, debug=debug) else: script.download(engine) print "Datasets successfully downloaded."
def Find(self, evt): dlg = wx.TextEntryDialog(self, 'Enter the keyword(s) to search for', 'Find', '') dlg.ShowModal() result = dlg.GetValue().strip() if result: search_terms = [ term.strip() for term in result.split(' ') if term.strip() ] scripts = [] for script in SCRIPT_LIST(): if script.matches_terms(search_terms): scripts.append(script) if len(scripts) > 0: results = Category( "Search results: " + ', '.join(search_terms), scripts) self.cat_list.AddChild(results, select=True) else: wx.MessageBox("Your search returned no results.", "No results") dlg.Destroy()
"json": { 'engine': 'json', 'table_name': 'output_file_{table}.json' }, "csv": { 'engine': 'csv', 'table_name': 'output_file_{table}.csv' }, "sqlite": { 'engine': 'sqlite', 'file': dbfile, 'table_name': '{db}_{table}' } } SCRIPT_LIST = SCRIPT_LIST() TEST_ENGINES = {} IGNORE = [ "forest-inventory-analysis", "bioclim", "prism-climate", "vertnet", "NPN", "mammal-super-tree" ] IGNORE = [dataset.lower() for dataset in IGNORE] for engine in ENGINE_LIST: if engine.abbreviation in engine_test: try: opts = engine_test[engine.abbreviation] TEST_ENGINES[engine.abbreviation] = choose_engine(opts) except: TEST_ENGINES[engine.abbreviation] = None pass
def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print citation_path with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print dataset.description return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return if args.command == 'ls' or args.dataset is None: import lscolumns #If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = set([script.shortname for script in script_list]) all_tags = set(["ALL"] + [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')]) print "Available datasets (%s):" % len(all_scripts) lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower())) print "Groups:" lscolumns.printls(sorted(list(all_tags))) return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset %s isn't currently available in the Retriever" % (args.dataset) print "Run 'retriever -ls to see a list of currently available datasets"
from builtins import str from retriever import VERSION, COPYRIGHT from retriever.lib.repository import check_for_updates from retriever import SCRIPT_LIST # Create the .rst file for the available datasets datasetfile = open("datasets.rst", "w") datasetfile_title = """ ================== Datasets Available ================== """ check_for_updates() script_list = SCRIPT_LIST() # write the title of dataset rst file datasetfile.write(datasetfile_title) # get info from the scripts for script_num, script in enumerate(script_list, start=1): if script.ref.strip(): reference_link = script.ref elif bool(script.urls.values()): reference_link = script.urls.values()[0].rpartition('/')[0] else: reference_link = "" datasetfile.write("| " + str(script_num) + ". **{}** \n| shortname: {}\n| reference: {}\n\n".format( script.name, script.shortname, reference_link))
from __future__ import print_function from builtins import input import os import json from time import sleep from retriever import SCRIPT_LIST, HOME_DIR short_names = [script.shortname.lower() for script in SCRIPT_LIST()] def is_empty(val): """Check if a variable is an empty string or an empty list""" return val == "" or val == [] def clean_input(prompt="", split_char='', ignore_empty=False, dtype=None): """Clean the user-input from the CLI before adding it""" while True: val = input(prompt).strip() # split to list type if split_char specified if split_char != "": val = [v.strip() for v in val.split(split_char) if v.strip() != ""] # do not ignore empty input if not allowed if not ignore_empty and is_empty(val): print("\tError: empty input. Need one or more values.\n") continue # ensure correct input datatype if specified if not is_empty(val) and dtype is not None: try: if not type(eval(val)) == dtype: print("\tError: input doesn't match required type ", dtype,
def main(): """This function launches the Data Retriever.""" if len(sys.argv) == 1: # if no command line args are passed, show the help options parser.parse_args(['-h']) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.command == "install" and not args.engine: parser.parse_args(['install','-h']) if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'defaults': for engine_item in engine_list: print("Default options for engine ", engine_item.name) for default_opts in engine_item.required_opts: print(default_opts[0], " ", default_opts[2]) print() return if args.command == 'update': check_for_updates() script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: print("\nCitation for retriever:\n") print(CITATION) else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print("\nDataset: {}".format(dataset.name)) print("Citation: {}".format(dataset.citation)) print("Description: {}\n".format(dataset.description)) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return elif args.command == 'new_json': # create new JSON script create_json() return elif args.command == 'edit_json': # edit existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.filename.lower()) != -1: edit_json(json_file) return raise Exception("File not found") elif args.command == 'delete_json': # delete existing JSON script for json_file in [filename for filename in os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']: if json_file.lower().find(args.dataset.lower()) != -1: confirm = input("Really remove " + json_file + " and all its contents? (y/N): ") if confirm.lower().strip() in ['y', 'yes']: # raise Exception(json_file) os.remove(os.path.join(HOME_DIR, 'scripts', json_file)) try: os.remove(os.path.join( HOME_DIR, 'scripts', json_file[:-4] + 'py')) except: # Not compiled yet pass return raise Exception("File not found") if args.command == 'ls': # If scripts have never been downloaded there is nothing to list if not script_list: print("No scripts are currently available. Updating scripts now...") check_for_updates() print("\n\nScripts downloaded.\n") script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.shortname: if args.l is not None: script_name = script.name + "\nShortname: " + script.shortname + "\n" if script.tags: script_name += "Tags: " + \ str([tag for tag in script.tags]) + "\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print("Available datasets : {}\n".format(len(all_scripts))) if args.l is None: from retriever import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print("%d. %s" % (count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if hasattr(args, 'debug') and args.not_cached: use_cache = False else: use_cache = True if args.dataset is not None: scripts = name_matches(script_list, args.dataset) else: raise Exception("no dataset specified.") if scripts: for dataset in scripts: print("=> Installing", dataset.name) try: dataset.download(engine, debug=debug, use_cache=use_cache) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise print("Done!") else: print("The dataset {} isn't currently available in the Retriever".format( args.dataset)) print("Run 'retriever ls to see a list of currently available datasets")
def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if current_platform == 'darwin' else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print "\nCitation for retriever:\n" with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print ("\nCitation: {}".format(dataset.citation)) print ("Description: {}\n".format(dataset.description)) return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return if args.command == 'ls' or args.dataset is None: # If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.name: if args.l!=None: script_name = script.name + "\nShortname: " + script.shortname+"\n" if script.tags: script_name += "Tags: "+str([tag for tag in script.tags])+"\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print "Available datasets : {}\n".format(len(all_scripts)) if args.l==None: import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print ("%d. %s"%(count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset {} isn't currently available in the Retriever".format(args.dataset) print "Run 'retriever ls to see a list of currently available datasets"