Exemplo n.º 1
0
    def test_get_all_idents(self):

        sets = {"a_set": ["A", "B", "C"]}

        with self.assertRaises(ValueError):
            _Identifier.get_all_idents("a_set", sets=sets)

        idents = _Identifier.get_all_idents(["a_set"], sets=sets)
        self.assertEqual(idents, ["A", "B", "C"])

        idents = _Identifier.get_all_idents(["a_set,a_set"], sets=sets)
        self.assertEqual(idents,
                         [("A", "A"), ("A", "B"), ("A", "C"), ("B", "A"),
                          ("B", "B"), ("B", "C"), ("C", "A"), ("C", "B"),
                          ("C", "C")])

        idents = _Identifier.get_all_idents([("a_set", "a_set")], sets=sets)
        self.assertEqual(idents,
                         [("A", "A"), ("A", "B"), ("A", "C"), ("B", "A"),
                          ("B", "B"), ("B", "C"), ("C", "A"), ("C", "B"),
                          ("C", "C")])
Exemplo n.º 2
0
    def _load_set(set: "List[str]"):

        set = _Identifier.get_all_idents(set)

        try:
            assert (issubclass(type(set), list))
        except AssertionError:
            msg = "Each set must be provided as a list, not type '%s' for object %s." % (
                type(set), set)
            FromCERO._logger.error(msg)
            raise TypeError(msg)

        return set
Exemplo n.º 3
0
        def _exec_op(self, op: dict):

            # Apply operation to procedure
            func_name = op.pop('func', "noop") # Perform noop (no-operation) if no func provided.
            op_args = op.pop('args', [])
            rename = op.pop("rename", None)

            arrays = None
            if "arrays" in op:
                arrays = op.pop("arrays")
                if issubclass(type(arrays), str):
                    arrays = [arrays]
                arrays = _Identifier.get_all_idents(arrays, sets=self["sets"])

            for mod in self["libfuncs"]:
                if hasattr(mod, func_name):
                    func = getattr(mod, func_name)
                    break
            else:
                msg = ('Invalid function name provided - \'%s\'. Function does not exist in any of the modules %s. It may be necessary to create a python module with the necessary functions and provide this file with the \'libfuncs\' option.' %
                            (func_name, self["libfuncs"]))
                FromCERO._logger.error(msg)
                raise AttributeError(msg)

            FromCERO._logger.debug("Function call: %s(*arrays, **op)" % func.__name__)

            ret = func(self.inputs, *op_args, locs=arrays, **op)
            op['func'] = func.__name__  # For cleanliness of presentation

            if rename is not None:

                if ret is None:
                    ret = getattr(libfuncs, "noop")(self.inputs, *op_args, locs=arrays, **op)

                if isinstance(rename, str):
                    rename = {ret.index.tolist()[0]: rename} # Rename the first index by default

                if issubclass(type(rename), list):
                    # Build mapping dictionary
                    rename = _Identifier.get_mapping_dict(ret.index.tolist(), rename, sets=self.get("sets"))
                elif issubclass(type(rename), dict):
                    rename = _Identifier.get_one_to_one_mapping(rename, sets=self.get("sets"))

                # At this point, rename should be one-to-one mapping dict

                renamed = CERO.rename_index_values(ret.loc[list(rename.keys())], rename, inplace=False)
                ret = renamed.loc[list(rename.values())]  # Restrict renamed to only the rows that have been specified

                # Note that ret will be restricted to only those values that have been renamed.

            return ret
Exemplo n.º 4
0
    def read_csv(csv_file):
        """
        Reads CEROs that have been exported to csv file. It is assumed that ';' are used to seperate the fields (if more than one) of the identifier.

        :param str csv_file: Path to the file containing the CERO.
        :return pandas.DataFrame: The imported CERO.
        """

        cero = pd.read_csv(csv_file, header=0, index_col=0)  # Read header
        cero = cero.astype(pd.np.float32)
        cero.index = CERO.create_cero_index(_Identifier.get_all_idents(cero.index.tolist(), sep=";"))
        cero.columns = pd.to_datetime(cero.columns.tolist(), format="%Y")
        assert CERO.is_cero(cero)  # Check that it is a valid CERO object

        return cero
Exemplo n.º 5
0
        def load_config(proc_dict: dict, parent: 'FromCERO' = None):

            # Add default options here
            defaults = {"name": "Unnamed_proc",
                        "operations": [],
                        "inputs": [],
                        "ref_dir": None,
                        "sets": {},
                        "map": {},
                        "libfuncs": [],
                        }

            if parent is None:
                parent = {}

            defaults.update(parent)
            defaults.update(proc_dict)

            if defaults.get("ref_dir") is None:
                defaults["ref_dir"] = os.getcwd()
            defaults["ref_dir"] = os.path.abspath(defaults["ref_dir"])

            if defaults.get("file"):
                defaults["file"] = os.path.join(defaults["ref_dir"], os.path.relpath(defaults["file"]))

            if issubclass(type(defaults.get("libfuncs")), str):
                defaults["libfuncs"] = [defaults["libfuncs"]]

            lf_files = []
            for lf in defaults["libfuncs"]:
                if issubclass(type(lf), str) and lf in proc_dict.get("libfuncs", []):
                    lf = os.path.join(defaults["ref_dir"], lf)
                elif issubclass(type(lf), str):
                    pass
                elif issubclass(type(lf), ModuleType):
                    lf = lf.__file__
                else:
                    raise TypeError("'libfuncs' must be provided as a list of strings and/or modules (not %s)." % type(lf))

                lf_files.append(lf)

            # Ensure system libfuncs is on search path...
            system_libfuncs = concero.conf.find_file("libfuncs.py")
            if system_libfuncs not in lf_files:
                lf_files.append(system_libfuncs)
                defaults["libfuncs"].append(system_libfuncs)

            # Load all libfuncs modules
            mods = []
            for idx, (lf, mod) in enumerate(zip(lf_files, defaults["libfuncs"])):
                if issubclass(type(mod), str):
                    spec = importlib.util.spec_from_file_location(os.path.basename(lf), lf)
                    mod = importlib.util.module_from_spec(spec)
                    spec.loader.exec_module(mod)
                mods.append(mod)
            defaults["libfuncs"] = mods

            # Load sets
            for k in defaults["sets"]:
                if isinstance(defaults["sets"][k], str):
                    defaults["sets"][k] = os.path.join(defaults["ref_dir"], defaults["sets"][k])
                    defaults["sets"][k] = read_yaml(defaults["sets"][k])

                defaults["sets"][k] = FromCERO._load_set(defaults["sets"][k])

            if isinstance(defaults["inputs"], str):
                defaults["inputs"] = [defaults["inputs"]]

            # Determine identifiers for all inputs
            defaults["inputs"] = _Identifier.get_all_idents(defaults["inputs"], sets=defaults["sets"])

            if "lstrip" in defaults:
                defaults["inputs"] = [_Identifier.lstrip_identifier(defaults["lstrip"], inp) for inp in defaults["inputs"]]

            if "outputs" in defaults:
                if isinstance(defaults["outputs"], str):
                    defaults["outputs"] = [defaults["outputs"]]

                if issubclass(type(defaults["outputs"]), list):
                    defaults["outputs"] = _Identifier.get_all_idents(defaults["outputs"], sets=defaults["sets"])
                elif defaults["outputs"] == True:
                    defaults.pop("outputs")
                elif defaults["outputs"] == False:
                    defaults["outputs"] = None
                elif defaults["outputs"] is None:
                    pass
                else:
                    raise ValueError("'outputs' must be provided as a list, True or None.")

            return defaults