示例#1
0
def _join(args, use_cache, debug, compile):
    """Install scripts for pydataweaver."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not (script_list or os.listdir(SCRIPT_WRITE_PATH)):
        check_for_updates()
        script_list = SCRIPT_LIST()
    scripts = name_matches(script_list, args["dataset"])
    if scripts:
        for dataset_script in scripts:
            try:
                dataset_script.integrate(engine, debug=debug)
                dataset_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = (
            'The dataset "{}" isn\'t available in the pydataweaver. '
            "Run pydataweaver.datasets()to list the currently available "
            "datasets".format(args["dataset"]))
        raise ValueError(message)
    return engine
示例#2
0
def dataset_licenses():
    """Return set with all available licenses."""
    script_license = []
    for script in SCRIPT_LIST():
        temp_list = [lc.lower for lc in sum(script.licenses.values(), [])]
        script_license.append(temp_list)
    return set(script_license)
示例#3
0
def datasets(keywords=None, licenses=None):
    """Return list of all available datasets."""
    script_list = SCRIPT_LIST()

    if not keywords and not licenses:
        return sorted(script_list, key=lambda s: s.name.lower())

    result_scripts = set()
    if licenses:
        licenses = [l.lower() for l in licenses]
    for script in script_list:
        if script.name:
            if licenses:
                # get a list of all licenses in lower case present in the scripts
                script_license = [
                    lc.lower for lc in sum(script.licenses.values(), [])
                ]

                if script_license and set(script_license).intersection(
                        set(licenses)):
                    result_scripts.add(script)
                    continue
            if keywords:
                script_keywords = script.title + " " + script.name
                if script.keywords:
                    script_keywords = script_keywords + " " + "-".join(
                        script.keywords)
                script_keywords = script_keywords.lower()
                for k in keywords:
                    if script_keywords.find(k.lower()) != -1:
                        result_scripts.add(script)
                        break

    return sorted(list(result_scripts), key=lambda s: s.name.lower())
def test_make_sql(key):
    script_list = SCRIPT_LIST()
    for i in script_list:
        if i.name == key:
            output_query = make_sql(i).strip().replace("\n", "")
            assert output_query == expected_query[key].strip().replace(
                "\n", "")
示例#5
0
def get_script_version():
    """This function gets the version number of the scripts and returns them in array form."""
    from pydataweaver.lib.scripts import SCRIPT_LIST

    modules = SCRIPT_LIST()
    scripts = []
    for module in modules:
        if module.public:
            if (os.path.isfile(".".join(module._file.split(".")[:-1]) + ".json") and
                    module.version):
                module_name = module._name + ".json"
                scripts.append(",".join([module_name, str(module.version)]))
            elif os.path.isfile(".".join(module._file.split(".")[:-1]) +
                                ".py") and not os.path.isfile(
                                    ".".join(module._file.split(".")[:-1]) + ".json"):
                module_name = module._name + ".py"
                scripts.append(",".join([module_name, str(module.version)]))

    scripts = sorted(scripts, key=str.lower)
    return scripts
示例#6
0
from __future__ import print_function

import glob
import json
import os
import re
from builtins import input
from time import sleep

from pydataweaver.lib.defaults import HOME_DIR, ENCODING
from pydataweaver.lib.scripts import SCRIPT_LIST

short_names = [script.name.lower() for script in SCRIPT_LIST()]


def is_empty(val):
    """Check if a variable is an empty string or an empty list."""
    return val == "" or val == []


def clean_input(prompt="", split_char="", ignore_empty=False, dtype=None):
    """Clean the user-input from the CLI before adding it."""
    while True:
        val = input(prompt).strip()
        # split to list type if split_char specified
        if split_char != "":
            val = [v.strip() for v in val.split(split_char) if v.strip() != ""]
        # do not ignore empty input if not allowed
        if not ignore_empty and is_empty(val):
            print("\tError: empty input. Need one or more values.\n")
            continue
示例#7
0
from __future__ import absolute_import
from __future__ import print_function

from pydataweaver.engines import choose_engine
from pydataweaver.lib.scripts import SCRIPT_LIST
from pydataweaver.lib.engine_tools import name_matches

script_list = SCRIPT_LIST()


def download(dataset, path="./", quiet=False, subdir=False, debug=False):
    """Download scripts for pydataweaver."""
    args = {
        "dataset": dataset,
        "command": "download",
        "path": path,
        "subdir": subdir,
        "quiet": quiet,
    }
    engine = choose_engine(args)
    scripts = name_matches(script_list, args["dataset"])
    if scripts:
        for dataset in scripts:
            print("=> Download csv Integrated data", dataset.name)
            try:
                dataset.integrate(engine, debug=debug)

                # Todo csv should fetch the file to path
                dataset.engine.to_csv()
                dataset.engine.final_cleanup()
            except KeyboardInterrupt:
示例#8
0
def main():
    """This function launches the pydataweaver."""
    if len(sys.argv) == 1:
        # If no command line Args are passed, show the help options
        parser.parse_args(["-h"])
    else:
        args = parser.parse_args()

        if (args.command not in ["reset", "update"]
                and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not [
                    f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
                    if os.path.exists(SCRIPT_SEARCH_PATHS[-1])
                ]):
            check_for_updates()
            reload_scripts()
        script_list = SCRIPT_LIST()

        if args.command == "join" and not args.engine:
            parser.parse_args(["join", "-h"])

        if args.quiet:
            sys.stdout = open(os.devnull, "w")

        if args.command == "help":
            parser.parse_args(["-h"])

        if args.command == "update":
            check_for_updates()
            reload_scripts()
            return

        if args.command == "reset":
            reset_weaver(args.scope)
            return
        if args.command == "citation":
            if args.dataset is None:
                # get the citation of pydataweaver
                print(CITATION)
                return
            else:
                scripts = name_matches(script_list, args.dataset)
                for data_set in scripts:
                    print("\nDataset:  {}".format(data_set.name))
                    print("Description:   {}".format(data_set.description))
                    print("Citations:")
                    for cite in data_set.citation:
                        for key, value in cite.items():
                            print("{k}:    {v}".format(k=key, v=value))
            return
        if args.command == "license":
            data_set_license = license(args.dataset)
            if data_set_license:
                print(data_set_license)
            else:
                print("There is no license information for {}".format(
                    args.dataset))
            return

        # list the data sets available
        if args.command == "ls":
            if not (args.l or args.k or isinstance(args.v, list)):
                all_scripts = dataset_names()
                print("Available datasets : {}\n".format(len(all_scripts)))
                from pydataweaver import lscolumns

                lscolumns.printls(all_scripts)

            # If pydataweaver ls  -v  has a list of scripts, i.e item1, item2,
            # print the items' information, else consider all scripts"
            elif isinstance(args.v, list):
                if args.v:
                    try:
                        all_scripts = [
                            get_script(dataset) for dataset in args.v
                        ]
                    except KeyError:
                        all_scripts = []
                        print("Dataset(s) is not found.")
                else:
                    all_scripts = datasets()
                print_info(all_scripts)

            else:
                param_licenses = args.l if args.l else None
                keywords = args.k if args.k else None

                # search
                searched_scripts = datasets(keywords, param_licenses)
                if not searched_scripts:
                    print("No available datasets found")
                else:
                    print("Available datasets : {}\n".format(
                        len(searched_scripts)))
                    print_info(searched_scripts, keywords_license=True)

            return
        if args.command == "join":
            engine = choose_engine(args.__dict__)

            if hasattr(args, "debug") and args.debug:
                debug = True
            else:
                debug = False
                sys.tracebacklimit = 0

            if args.dataset is not None:
                scripts = name_matches(script_list, args.dataset)
            if scripts:
                for data_set in scripts:
                    print("=> Integrating", data_set.name)
                    try:
                        data_set.integrate(engine, debug=debug)
                        data_set.engine.final_cleanup()
                    except KeyboardInterrupt:
                        pass
                    except Exception as e:
                        print(e)
                        if debug:
                            raise