Пример #1
0
    def loader(self, file_name, *args, **kwargs):
        """Loads data into a DataSet object and returns it"""

        new_tests = []

        test_no = 1
        channel_index = 1
        channel_number = 1
        creator = "no name"
        item_ID = 1
        schedule_file_name = "no name"
        start_datetime = "2020.02.24 14:58:00"
        test_ID = 1
        test_name = "no name"

        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        self.logger.debug("in loader")
        self.logger.debug("filename: %s" % file_name)

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)

        data = Cell()
        data.cell_no = test_no
        data.loaded_from = file_name
        fid = FileID(file_name)
        data.channel_index = channel_index
        data.channel_number = channel_number
        data.creator = creator
        data.item_ID = item_ID
        data.schedule_file_name = schedule_file_name
        data.start_datetime = start_datetime
        data.test_ID = test_ID
        data.test_name = test_name
        data.raw_data_files.append(fid)

        length_of_test, normal_df = load_nda()

        data.summary = empty_df

        data.raw = normal_df
        data.raw_data_files_length.append(length_of_test)

        data = self._post_process(data)
        data = self.identify_last_data_point(data)

        new_tests.append(data)

        return new_tests
Пример #2
0
 def _init_data(self, file_name, global_data_df, test_no):
     data = Cell()
     data.cell_no = test_no
     data.loaded_from = file_name
     fid = FileID(file_name)
     # name of the .res file it is loaded from:
     # data.parent_filename = os.path.basename(file_name)
     data.channel_index = int(
         global_data_df[self.headers_global["channel_index_txt"]][test_no])
     data.channel_number = int(
         global_data_df[self.headers_global["channel_number_txt"]][test_no])
     data.creator = global_data_df[
         self.headers_global["creator_txt"]][test_no]
     data.item_ID = global_data_df[
         self.headers_global["item_id_txt"]][test_no]
     data.schedule_file_name = global_data_df[
         self.headers_global["schedule_file_name_txt"]][test_no]
     data.start_datetime = global_data_df[
         self.headers_global["start_datetime_txt"]][test_no]
     data.test_ID = int(
         global_data_df[self.headers_normal.test_id_txt][test_no])
     data.test_name = global_data_df[
         self.headers_global["test_name_txt"]][test_no]
     data.raw_data_files.append(fid)
     return data
Пример #3
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        """Loads data from biologics .mpr files.

        Args:
            file_name (str): path to .res file.
            bad_steps (list of tuples): (c, s) tuples of steps s
             (in cycle c) to skip loading.

        Returns:
            new_tests (list of data objects)
        """
        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)

        # creating temporary file and connection
        temp_dir = tempfile.gettempdir()
        temp_filename = os.path.join(temp_dir, os.path.basename(file_name))
        shutil.copy2(file_name, temp_dir)

        self.logger.debug("tmp file: %s" % temp_filename)
        self.logger.debug("HERE WE LOAD THE DATA")

        data = Cell()
        fid = FileID(file_name)

        # div parameters and information (probably load this last)
        test_no = 1
        data.cell_no = test_no
        data.loaded_from = file_name

        # some overall prms
        data.channel_index = None
        data.channel_number = None
        data.creator = None
        data.item_ID = None
        data.schedule_file_name = None
        data.start_datetime = None
        data.test_ID = None
        data.test_name = None
        data.raw_data_files.append(fid)

        # --------- read raw-data (normal-data) -------------------------
        self.logger.debug("reading raw-data")
        self.mpr_data = None
        self.mpr_log = None
        self.mpr_settings = None

        self._load_mpr_data(temp_filename, bad_steps)
        length_of_test = self.mpr_data.shape[0]
        self.logger.debug(f"length of test: {length_of_test}")

        self.logger.debug("renaming columns")
        self._rename_headers()
        # ---------  stats-data (summary-data) -------------------------
        summary_df = self._create_summary_data()

        if summary_df.empty:
            txt = "\nCould not find any summary (stats-file)!"
            txt += " (summary_df.empty = True)"
            txt += "\n -> issue make_summary(use_cellpy_stat_file=False)"
            warnings.warn(txt)

        data.summary = summary_df
        data.raw = self.mpr_data

        data.raw_data_files_length.append(length_of_test)
        new_tests.append(data)

        self._clean_up(temp_filename)
        return new_tests
Пример #4
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        """Loads data from arbin .res files.

        Args:
            file_name (str): path to .res file.
            bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c)
            to skip loading.

        Returns:
            new_tests (list of data objects)
        """
        # TODO: @jepe - insert kwargs - current chunk, only normal data, etc

        if DEBUG_MODE:
            time_0 = time.time()

        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        self.logger.debug("in loader")
        self.logger.debug("filename: %s" % file_name)

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)
        if (filesize > prms.Instruments.Arbin.max_res_filesize
                and not prms.Reader.load_only_summary):
            error_message = "\nERROR (loader):\n"
            error_message += "%s > %s - File is too big!\n" % (
                hfilesize,
                humanize_bytes(prms.Instruments.Arbin.max_res_filesize),
            )
            error_message += "(edit prms.Instruments.Arbin" "['max_res_filesize'])\n"
            print(error_message)
            return None

        table_name_global = TABLE_NAMES["global"]
        table_name_stats = TABLE_NAMES["statistic"]
        table_name_normal = TABLE_NAMES["normal"]

        # creating temporary file and connection

        temp_dir = tempfile.gettempdir()
        temp_filename = os.path.join(temp_dir, os.path.basename(file_name))
        shutil.copy2(file_name, temp_dir)
        self.logger.debug("tmp file: %s" % temp_filename)

        use_mdbtools = False
        if use_subprocess:
            use_mdbtools = True
        if is_posix:
            use_mdbtools = True

        # windows with same python bit as windows bit (the ideal case)
        if not use_mdbtools:
            constr = self.__get_res_connector(temp_filename)

            if use_ado:
                conn = dbloader.connect(constr)
            else:
                conn = dbloader.connect(constr, autocommit=True)
            self.logger.debug("constr str: %s" % constr)

            self.logger.debug("reading global data table")
            sql = "select * from %s" % table_name_global
            self.logger.debug("sql statement: %s" % sql)
            global_data_df = pd.read_sql_query(sql, conn)
            # col_names = list(global_data_df.columns.values)

        else:
            import subprocess

            if is_posix:
                if is_macos:
                    self.logger.debug("\nMAC OSX USING MDBTOOLS")
                else:
                    self.logger.debug("\nPOSIX USING MDBTOOLS")
            else:
                self.logger.debug("\nWINDOWS USING MDBTOOLS-WIN")

            # creating tmp-filenames
            temp_csv_filename_global = os.path.join(temp_dir, "global_tmp.csv")
            temp_csv_filename_normal = os.path.join(temp_dir, "normal_tmp.csv")
            temp_csv_filename_stats = os.path.join(temp_dir, "stats_tmp.csv")

            # making the cmds
            mdb_prms = [
                (table_name_global, temp_csv_filename_global),
                (table_name_normal, temp_csv_filename_normal),
                (table_name_stats, temp_csv_filename_stats),
            ]

            # executing cmds
            for table_name, tmp_file in mdb_prms:
                with open(tmp_file, "w") as f:
                    subprocess.call(
                        [sub_process_path, temp_filename, table_name],
                        stdout=f)
                    self.logger.debug(f"ran mdb-export {str(f)} {table_name}")

            # use pandas to load in the data
            global_data_df = pd.read_csv(temp_csv_filename_global)

        tests = global_data_df[self.headers_normal.test_id_txt]
        number_of_sets = len(tests)
        self.logger.debug("number of datasets: %i" % number_of_sets)

        for counter, test_no in enumerate(range(number_of_sets)):
            if counter > 0:
                self.logger.warning("***MULTITEST-FILE (not recommended)")
                if not ALLOW_MULTI_TEST_FILE:
                    break
            data = Cell()
            data.cell_no = test_no
            data.loaded_from = file_name
            fid = FileID(file_name)
            # name of the .res file it is loaded from:
            # data.parent_filename = os.path.basename(file_name)
            data.channel_index = int(global_data_df[
                self.headers_global["channel_index_txt"]][test_no])
            data.channel_number = int(global_data_df[
                self.headers_global["channel_number_txt"]][test_no])
            data.creator = global_data_df[
                self.headers_global["creator_txt"]][test_no]
            data.item_ID = global_data_df[
                self.headers_global["item_id_txt"]][test_no]
            data.schedule_file_name = global_data_df[
                self.headers_global["schedule_file_name_txt"]][test_no]
            data.start_datetime = global_data_df[
                self.headers_global["start_datetime_txt"]][test_no]
            data.test_ID = int(
                global_data_df[self.headers_normal.test_id_txt][test_no])
            data.test_name = global_data_df[
                self.headers_global["test_name_txt"]][test_no]
            data.raw_data_files.append(fid)

            self.logger.debug("reading raw-data")
            if not use_mdbtools:
                # --------- read raw-data (normal-data) ------------------------
                length_of_test, normal_df = self._load_res_normal_table(
                    conn, data.test_ID, bad_steps)
                # --------- read stats-data (summary-data) ---------------------
                sql = "select * from %s where %s=%s order by %s" % (
                    table_name_stats,
                    self.headers_normal.test_id_txt,
                    data.test_ID,
                    self.headers_normal.data_point_txt,
                )
                summary_df = pd.read_sql_query(sql, conn)
                if counter > number_of_sets:
                    self._clean_up_loadres(None, conn, temp_filename)
            else:
                normal_df = pd.read_csv(temp_csv_filename_normal)
                # filter on test ID
                normal_df = normal_df[normal_df[
                    self.headers_normal.test_id_txt] == data.test_ID]
                # sort on data point
                if prms._sort_if_subprocess:
                    normal_df = normal_df.sort_values(
                        self.headers_normal.data_point_txt)
                length_of_test = normal_df.shape[0]
                summary_df = pd.read_csv(temp_csv_filename_stats)
                # clean up
                for f in [
                        temp_filename,
                        temp_csv_filename_stats,
                        temp_csv_filename_normal,
                        temp_csv_filename_global,
                ]:
                    if os.path.isfile(f):
                        try:
                            os.remove(f)
                        except WindowsError as e:
                            self.logger.warning(
                                f"could not remove tmp-file\n{f} {e}")

            if summary_df.empty and prms.Reader.use_cellpy_stat_file:
                txt = "\nCould not find any summary (stats-file)!"
                txt += "\n -> issue make_summary(use_cellpy_stat_file=False)"
                logging.debug(txt)
            # normal_df = normal_df.set_index("Data_Point")

            data.summary = summary_df
            if DEBUG_MODE:
                mem_usage = normal_df.memory_usage()
                logging.debug(f"memory usage for "
                              f"loaded data: \n{mem_usage}"
                              f"\ntotal: {humanize_bytes(mem_usage.sum())}")
                logging.debug(f"time used: {(time.time() - time_0):2.4f} s")

            data.raw = normal_df
            data.raw_data_files_length.append(length_of_test)

            data = self._post_process(data)

            new_tests.append(data)

        new_tests = self._inspect(new_tests)

        return new_tests
Пример #5
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        logging.debug(txt)

        data = Cell()
        fid = FileID(file_name)

        # div parameters and information (probably load this last)
        test_no = 1
        data.cell_no = test_no
        data.loaded_from = file_name

        # some overall prms
        data.channel_index = None
        data.channel_number = None
        data.creator = None
        data.item_ID = None
        data.schedule_file_name = None
        data.test_ID = None
        data.test_name = None
        data.raw_data_files.append(fid)

        # --------- read raw-data (normal-data) -------------------------

        self._load_pec_data(file_name, bad_steps)
        data.start_datetime = self.pec_settings["start_time"]
        length_of_test = self.pec_data.shape[0]
        logging.debug(f"length of test: {length_of_test}")

        logging.debug("renaming columns")
        self._rename_headers()
        self._convert_units()

        data.raw = self.pec_data

        data.raw_data_files_length.append(length_of_test)
        new_tests.append(data)

        return new_tests
Пример #6
0
    def loader(self, file_name, **kwargs):
        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return

        # find out strategy (based on structure)
        if self.structure["format"] != "csv":
            raise NotImplementedError

        sep = self.structure.get("sep", prms.Reader.sep)
        if sep is None:
            sep = prms.Reader.sep

        locate_vars_by = self.structure.get("locate_vars_by", "key_value_pairs")
        comment_chars = self.structure.get("comment_chars", ["#", "!"])
        header_row = self.structure.get("start_data", None)
        if header_row is None:
            header_row = self._find_data_start(file_name, sep)

        # parse variables
        var_lines = []
        with open(file_name, "rb") as fp:
            for i, line in enumerate(fp):
                if i < header_row:
                    line = line.strip()
                    try:
                        line = line.decode()
                    except UnicodeDecodeError:
                        logging.debug(
                            "UnicodeDecodeError: " "skipping this line: " f"{line}"
                        )
                    else:
                        if line.startswith(comment_chars):
                            logging.debug(f"Comment: {line}")
                        else:
                            var_lines.append(line)
                else:
                    break

        var_dict = dict()
        if locate_vars_by == "key_value_pairs":
            for line in var_lines:
                parts = line.split(sep)
                try:
                    var_dict[parts[0]] = parts[1]
                except IndexError as e:
                    logging.debug(f"{e}\ncould not split var-value\n{line}")

        else:
            raise NotImplementedError

        data = Cell()
        data.loaded_from = file_name
        fid = self._generate_fid(file_name, var_dict)

        # parsing cellpydata attributes
        for attribute in ATTRS_CELLPYFILE:
            key = self.variables.get(attribute, None)
            # print(f"{attribute} -> {key}")
            if key:
                val = var_dict.pop(key, None)
                if key in ["mass"]:
                    val = float(val)
                # print(f"{attribute}: {val}")
                setattr(data, attribute, val)

        data.raw_data_files.append(fid)

        # setting optional attributes (will be implemented later I hope)
        key = self.variables.get("total_mass", None)
        if key:
            total_mass = var_dict.pop(key, None)
            logging.debug("total_mass is given, but not propagated")

        logging.debug(f"unused vars: {var_dict}")

        raw = self._parse_csv_data(file_name, sep, header_row)
        raw = self._rename_cols(raw)
        raw = self._check_cycleno_stepno(raw)
        data.raw_data_files_length.append(raw.shape[0])
        data.summary = None
        data.raw = raw
        new_tests.append(data)
        return new_tests