def _join(args, use_cache, debug, compile): """Install scripts for pydataweaver.""" engine = choose_engine(args) engine.use_cache = use_cache script_list = SCRIPT_LIST() if not (script_list or os.listdir(SCRIPT_WRITE_PATH)): check_for_updates() script_list = SCRIPT_LIST() scripts = name_matches(script_list, args["dataset"]) if scripts: for dataset_script in scripts: try: dataset_script.integrate(engine, debug=debug) dataset_script.engine.final_cleanup() except Exception as e: print(e) if debug: raise else: message = ( 'The dataset "{}" isn\'t available in the pydataweaver. ' "Run pydataweaver.datasets()to list the currently available " "datasets".format(args["dataset"])) raise ValueError(message) return engine
def dataset_licenses(): """Return set with all available licenses.""" script_license = [] for script in SCRIPT_LIST(): temp_list = [lc.lower for lc in sum(script.licenses.values(), [])] script_license.append(temp_list) return set(script_license)
def datasets(keywords=None, licenses=None): """Return list of all available datasets.""" script_list = SCRIPT_LIST() if not keywords and not licenses: return sorted(script_list, key=lambda s: s.name.lower()) result_scripts = set() if licenses: licenses = [l.lower() for l in licenses] for script in script_list: if script.name: if licenses: # get a list of all licenses in lower case present in the scripts script_license = [ lc.lower for lc in sum(script.licenses.values(), []) ] if script_license and set(script_license).intersection( set(licenses)): result_scripts.add(script) continue if keywords: script_keywords = script.title + " " + script.name if script.keywords: script_keywords = script_keywords + " " + "-".join( script.keywords) script_keywords = script_keywords.lower() for k in keywords: if script_keywords.find(k.lower()) != -1: result_scripts.add(script) break return sorted(list(result_scripts), key=lambda s: s.name.lower())
def test_make_sql(key): script_list = SCRIPT_LIST() for i in script_list: if i.name == key: output_query = make_sql(i).strip().replace("\n", "") assert output_query == expected_query[key].strip().replace( "\n", "")
def get_script_version(): """This function gets the version number of the scripts and returns them in array form.""" from pydataweaver.lib.scripts import SCRIPT_LIST modules = SCRIPT_LIST() scripts = [] for module in modules: if module.public: if (os.path.isfile(".".join(module._file.split(".")[:-1]) + ".json") and module.version): module_name = module._name + ".json" scripts.append(",".join([module_name, str(module.version)])) elif os.path.isfile(".".join(module._file.split(".")[:-1]) + ".py") and not os.path.isfile( ".".join(module._file.split(".")[:-1]) + ".json"): module_name = module._name + ".py" scripts.append(",".join([module_name, str(module.version)])) scripts = sorted(scripts, key=str.lower) return scripts
from __future__ import print_function import glob import json import os import re from builtins import input from time import sleep from pydataweaver.lib.defaults import HOME_DIR, ENCODING from pydataweaver.lib.scripts import SCRIPT_LIST short_names = [script.name.lower() for script in SCRIPT_LIST()] def is_empty(val): """Check if a variable is an empty string or an empty list.""" return val == "" or val == [] def clean_input(prompt="", split_char="", ignore_empty=False, dtype=None): """Clean the user-input from the CLI before adding it.""" while True: val = input(prompt).strip() # split to list type if split_char specified if split_char != "": val = [v.strip() for v in val.split(split_char) if v.strip() != ""] # do not ignore empty input if not allowed if not ignore_empty and is_empty(val): print("\tError: empty input. Need one or more values.\n") continue
from __future__ import absolute_import from __future__ import print_function from pydataweaver.engines import choose_engine from pydataweaver.lib.scripts import SCRIPT_LIST from pydataweaver.lib.engine_tools import name_matches script_list = SCRIPT_LIST() def download(dataset, path="./", quiet=False, subdir=False, debug=False): """Download scripts for pydataweaver.""" args = { "dataset": dataset, "command": "download", "path": path, "subdir": subdir, "quiet": quiet, } engine = choose_engine(args) scripts = name_matches(script_list, args["dataset"]) if scripts: for dataset in scripts: print("=> Download csv Integrated data", dataset.name) try: dataset.integrate(engine, debug=debug) # Todo csv should fetch the file to path dataset.engine.to_csv() dataset.engine.final_cleanup() except KeyboardInterrupt:
def main(): """This function launches the pydataweaver.""" if len(sys.argv) == 1: # If no command line Args are passed, show the help options parser.parse_args(["-h"]) else: args = parser.parse_args() if (args.command not in ["reset", "update"] and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not [ f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1]) if os.path.exists(SCRIPT_SEARCH_PATHS[-1]) ]): check_for_updates() reload_scripts() script_list = SCRIPT_LIST() if args.command == "join" and not args.engine: parser.parse_args(["join", "-h"]) if args.quiet: sys.stdout = open(os.devnull, "w") if args.command == "help": parser.parse_args(["-h"]) if args.command == "update": check_for_updates() reload_scripts() return if args.command == "reset": reset_weaver(args.scope) return if args.command == "citation": if args.dataset is None: # get the citation of pydataweaver print(CITATION) return else: scripts = name_matches(script_list, args.dataset) for data_set in scripts: print("\nDataset: {}".format(data_set.name)) print("Description: {}".format(data_set.description)) print("Citations:") for cite in data_set.citation: for key, value in cite.items(): print("{k}: {v}".format(k=key, v=value)) return if args.command == "license": data_set_license = license(args.dataset) if data_set_license: print(data_set_license) else: print("There is no license information for {}".format( args.dataset)) return # list the data sets available if args.command == "ls": if not (args.l or args.k or isinstance(args.v, list)): all_scripts = dataset_names() print("Available datasets : {}\n".format(len(all_scripts))) from pydataweaver import lscolumns lscolumns.printls(all_scripts) # If pydataweaver ls -v has a list of scripts, i.e item1, item2, # print the items' information, else consider all scripts" elif isinstance(args.v, list): if args.v: try: all_scripts = [ get_script(dataset) for dataset in args.v ] except KeyError: all_scripts = [] print("Dataset(s) is not found.") else: all_scripts = datasets() print_info(all_scripts) else: param_licenses = args.l if args.l else None keywords = args.k if args.k else None # search searched_scripts = datasets(keywords, param_licenses) if not searched_scripts: print("No available datasets found") else: print("Available datasets : {}\n".format( len(searched_scripts))) print_info(searched_scripts, keywords_license=True) return if args.command == "join": engine = choose_engine(args.__dict__) if hasattr(args, "debug") and args.debug: debug = True else: debug = False sys.tracebacklimit = 0 if args.dataset is not None: scripts = name_matches(script_list, args.dataset) if scripts: for data_set in scripts: print("=> Integrating", data_set.name) try: data_set.integrate(engine, debug=debug) data_set.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print(e) if debug: raise