def loader(self, file_name, *args, **kwargs): """Loads data into a DataSet object and returns it""" new_tests = [] test_no = 1 channel_index = 1 channel_number = 1 creator = "no name" item_ID = 1 schedule_file_name = "no name" start_datetime = "2020.02.24 14:58:00" test_ID = 1 test_name = "no name" if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) data = Cell() data.cell_no = test_no data.loaded_from = file_name fid = FileID(file_name) data.channel_index = channel_index data.channel_number = channel_number data.creator = creator data.item_ID = item_ID data.schedule_file_name = schedule_file_name data.start_datetime = start_datetime data.test_ID = test_ID data.test_name = test_name data.raw_data_files.append(fid) length_of_test, normal_df = load_nda() data.summary = empty_df data.raw = normal_df data.raw_data_files_length.append(length_of_test) data = self._post_process(data) data = self.identify_last_data_point(data) new_tests.append(data) return new_tests
def loader(self, file_name, bad_steps=None, **kwargs): new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) logging.debug(txt) data = DataSet() fid = FileID(file_name) # div parameters and information (probably load this last) test_no = 1 data.test_no = test_no data.loaded_from = file_name # some overall prms data.channel_index = None data.channel_number = None data.creator = None data.item_ID = None data.schedule_file_name = None data.test_ID = None data.test_name = None data.raw_data_files.append(fid) # --------- read raw-data (normal-data) ------------------------- self._load_pec_data(file_name, bad_steps) data.start_datetime = self.pec_settings["start_time"] length_of_test = self.pec_data.shape[0] logging.debug(f"length of test: {length_of_test}") logging.debug("renaming columns") self._rename_headers() self._convert_units() data.dfdata = self.pec_data data.raw_data_files_length.append(length_of_test) new_tests.append(data) return new_tests
def loader(self, file_name, bad_steps=None, **kwargs): """Loads data from biologics .mpr files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. Returns: new_tests (list of data objects) """ new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) self.logger.debug("HERE WE LOAD THE DATA") data = DataSet() fid = FileID(file_name) # div parameters and information (probably load this last) test_no = 1 data.test_no = test_no data.loaded_from = file_name # some overall prms data.channel_index = None data.channel_number = None data.creator = None data.item_ID = None data.schedule_file_name = None data.start_datetime = None data.test_ID = None data.test_name = None data.raw_data_files.append(fid) # --------- read raw-data (normal-data) ------------------------- self.logger.debug("reading raw-data") self.mpr_data = None self.mpr_log = None self.mpr_settings = None self._load_mpr_data(temp_filename, bad_steps) length_of_test = self.mpr_data.shape[0] self.logger.debug(f"length of test: {length_of_test}") self.logger.debug("renaming columns") self._rename_headers() # --------- stats-data (summary-data) ------------------------- summary_df = self._create_summary_data() if summary_df.empty: txt = "\nCould not find any summary (stats-file)!" txt += " (summary_df.empty = True)" txt += "\n -> issue make_summary(use_cellpy_stat_file=False)" warnings.warn(txt) data.dfsummary = summary_df data.dfdata = self.mpr_data data.raw_data_files_length.append(length_of_test) new_tests.append(data) self._clean_up(temp_filename) return new_tests
def loader(self, file_name, bad_steps=None, dataset_number=None, data_points=None): """Loads data from arbin .res files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. dataset_number (int): the data set number to select if you are dealing with arbin files with more than one data-set. data_points (tuple of ints): load only data from data_point[0] to data_point[1] (use None for infinite). Returns: new_tests (list of data objects) """ # TODO: @jepe - insert kwargs - current chunk, only normal data, etc if DEBUG_MODE: time_0 = time.time() new_tests = [] conn = None if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) if (filesize > prms.Instruments.Arbin.max_res_filesize and not prms.Reader.load_only_summary): error_message = "\nERROR (loader):\n" error_message += "%s > %s - File is too big!\n" % ( hfilesize, humanize_bytes(prms.Instruments.Arbin.max_res_filesize), ) error_message += "(edit prms.Instruments.Arbin ['max_res_filesize'])\n" print(error_message) return None table_name_global = TABLE_NAMES["global"] table_name_stats = TABLE_NAMES["statistic"] table_name_normal = TABLE_NAMES["normal"] # creating temporary file and connection tmp_name_global = None tmp_name_raw = None tmp_name_stats = None temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) use_mdbtools = False if use_subprocess: use_mdbtools = True if is_posix: use_mdbtools = True # windows with same python bit as windows bit (the ideal case) # SPLIT FROM HERE if not use_mdbtools: constr = self._get_res_connector(temp_filename) if use_ado: conn = dbloader.connect(constr) else: conn = dbloader.connect(constr, autocommit=True) self.logger.debug("constr str: %s" % constr) self.logger.debug("reading global data table") sql = "select * from %s" % table_name_global self.logger.debug("sql statement: %s" % sql) global_data_df = pd.read_sql_query(sql, conn) # col_names = list(global_data_df.columns.values) else: if is_posix: if is_macos: self.logger.debug("\nMAC OSX USING MDBTOOLS") else: self.logger.debug("\nPOSIX USING MDBTOOLS") else: self.logger.debug("\nWINDOWS USING MDBTOOLS-WIN") tmp_name_global, tmp_name_raw, tmp_name_stats = self._create_tmp_files( table_name_global, table_name_normal, table_name_stats, temp_dir, temp_filename, ) # use pandas to load in the data global_data_df = pd.read_csv(tmp_name_global) tests = global_data_df[self.headers_normal.test_id_txt] number_of_sets = len(tests) self.logger.debug("number of datasets: %i" % number_of_sets) self.logger.debug(f"datasets: {tests}") if dataset_number is not None: self.logger.info(f"Dataset number given: {dataset_number}") self.logger.info(f"Available dataset numbers: {tests}") test_nos = [dataset_number] else: test_nos = range(number_of_sets) for counter, test_no in enumerate(test_nos): if counter > 0: self.logger.warning( "** WARNING ** MULTI-TEST-FILE (not recommended)") if not ALLOW_MULTI_TEST_FILE: break data = self._init_data(file_name, global_data_df, test_no) self.logger.debug("reading raw-data") if not use_mdbtools: # --------- read raw-data (normal-data) ------------------------ length_of_test, normal_df = self._load_res_normal_table( conn, data.test_ID, bad_steps) # --------- read stats-data (summary-data) --------------------- sql = "select * from %s where %s=%s order by %s" % ( table_name_stats, self.headers_normal.test_id_txt, data.test_ID, self.headers_normal.data_point_txt, ) summary_df = pd.read_sql_query(sql, conn) else: length_of_test, normal_df, summary_df = self._load_from_tmp_files( data, tmp_name_global, tmp_name_raw, tmp_name_stats, temp_filename) if summary_df.empty and prms.Reader.use_cellpy_stat_file: txt = "\nCould not find any summary (stats-file)!" txt += "\n -> issue make_summary(use_cellpy_stat_file=False)" logging.debug(txt) # normal_df = normal_df.set_index("Data_Point") data.summary = summary_df if DEBUG_MODE: mem_usage = normal_df.memory_usage() logging.debug(f"memory usage for " f"loaded data: \n{mem_usage}" f"\ntotal: {humanize_bytes(mem_usage.sum())}") logging.debug(f"time used: {(time.time() - time_0):2.4f} s") data.raw = normal_df data.raw_data_files_length.append(length_of_test) data = self._post_process(data) data = self.identify_last_data_point(data) new_tests.append(data) # COMBINE FROM HERE new_tests = self._inspect(new_tests) return new_tests
def investigate(self, file_name): """Investigate a .res file. Args: file_name: name of the file Returns: dictionary with div. stats and info. """ step_txt = self.headers_normal.step_index_txt point_txt = self.headers_normal.data_point_txt cycle_txt = self.headers_normal.cycle_index_txt self.logger.debug("investigating file: %s" % file_name) if not os.path.isfile(file_name): print("Missing file_\n %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.info(txt) table_name_global = TABLE_NAMES["global"] table_name_stats = TABLE_NAMES["statistic"] table_name_normal = TABLE_NAMES["normal"] # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) constr = self._get_res_connector(temp_filename) if use_ado: conn = dbloader.connect(constr) else: conn = dbloader.connect(constr, autocommit=True) self.logger.debug("tmp file: %s" % temp_filename) self.logger.debug("constr str: %s" % constr) # --------- read global-data ------------------------------------ self.logger.debug("reading global data table") sql = "select * from %s" % table_name_global global_data_df = pd.read_sql_query(sql, conn) # col_names = list(global_data_df.columns.values) self.logger.debug("sql statement: %s" % sql) tests = global_data_df[self.headers_normal.test_id_txt] number_of_sets = len(tests) self.logger.debug("number of datasets: %i" % number_of_sets) self.logger.debug("only selecting first test") test_no = 0 self.logger.debug("setting data for test number %i" % test_no) loaded_from = file_name # fid = FileID(file_name) start_datetime = global_data_df[ self.headers_global["start_datetime_txt"]][test_no] test_ID = int( global_data_df[self.headers_normal.test_id_txt][test_no]) # OBS test_name = global_data_df[ self.headers_global["test_name_txt"]][test_no] # --------- read raw-data (normal-data) ------------------------- self.logger.debug("reading raw-data") columns = ["Data_Point", "Step_Index", "Cycle_Index"] columns_txt = ", ".join(["%s"] * len(columns)) % tuple(columns) sql_1 = "select %s " % columns_txt sql_2 = "from %s " % table_name_normal sql_3 = "where %s=%s " % (self.headers_normal.test_id_txt, test_ID) sql_5 = "order by %s" % self.headers_normal.data_point_txt import time info_list = [] info_header = [ "cycle", "step", "row_count", "start_point", "end_point" ] self.logger.info(" ".join(info_header)) self.logger.info("-------------------------------------------------") for cycle_number in range(1, 2000): t1 = time.time() self.logger.debug("picking cycle %i" % cycle_number) sql_4 = "AND %s=%i " % (cycle_txt, cycle_number) sql = sql_1 + sql_2 + sql_3 + sql_4 + sql_5 self.logger.debug("sql statement: %s" % sql) normal_df = pd.read_sql_query(sql, conn) t2 = time.time() dt = t2 - t1 self.logger.debug("time: %f" % dt) if normal_df.empty: self.logger.debug("reached the end") break row_count, _ = normal_df.shape steps = normal_df[self.headers_normal.step_index_txt].unique() txt = "cycle %i: %i [" % (cycle_number, row_count) for step in steps: self.logger.debug(" step: %i" % step) step_df = normal_df.loc[normal_df[step_txt] == step] step_row_count, _ = step_df.shape start_point = step_df[point_txt].min() end_point = step_df[point_txt].max() txt += " %i-(%i)" % (step, step_row_count) step_list = [ cycle_number, step, step_row_count, start_point, end_point ] info_list.append(step_list) txt += "]" self.logger.info(txt) self._clean_up_loadres(None, conn, temp_filename) info_dict = pd.DataFrame(info_list, columns=info_header) return info_dict
def _iterdump(self, file_name, headers=None): """ Function for dumping values from a file. Should only be used by developers. Args: file_name: name of the file headers: list of headers to pick default: ["Discharge_Capacity", "Charge_Capacity"] Returns: pandas.DataFrame """ if headers is None: headers = ["Discharge_Capacity", "Charge_Capacity"] step_txt = self.headers_normal.step_index_txt point_txt = self.headers_normal.data_point_txt cycle_txt = self.headers_normal.cycle_index_txt self.logger.debug("iterating through file: %s" % file_name) if not os.path.isfile(file_name): print("Missing file_\n %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.info(txt) table_name_global = TABLE_NAMES["global"] table_name_stats = TABLE_NAMES["statistic"] table_name_normal = TABLE_NAMES["normal"] # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) constr = self._get_res_connector(temp_filename) if use_ado: conn = dbloader.connect(constr) else: conn = dbloader.connect(constr, autocommit=True) self.logger.debug("tmp file: %s" % temp_filename) self.logger.debug("constr str: %s" % constr) # --------- read global-data ------------------------------------ self.logger.debug("reading global data table") sql = "select * from %s" % table_name_global global_data_df = pd.read_sql_query(sql, conn) # col_names = list(global_data_df.columns.values) self.logger.debug("sql statement: %s" % sql) tests = global_data_df[self.headers_normal.test_id_txt] number_of_sets = len(tests) self.logger.debug("number of datasets: %i" % number_of_sets) self.logger.debug("only selecting first test") test_no = 0 self.logger.debug("setting data for test number %i" % test_no) loaded_from = file_name # fid = FileID(file_name) start_datetime = global_data_df[ self.headers_global["start_datetime_txt"]][test_no] test_ID = int( global_data_df[self.headers_normal.test_id_txt][test_no]) # OBS test_name = global_data_df[ self.headers_global["test_name_txt"]][test_no] # --------- read raw-data (normal-data) ------------------------- self.logger.debug("reading raw-data") columns = ["Data_Point", "Step_Index", "Cycle_Index"] columns.extend(headers) columns_txt = ", ".join(["%s"] * len(columns)) % tuple(columns) sql_1 = "select %s " % columns_txt sql_2 = "from %s " % table_name_normal sql_3 = "where %s=%s " % (self.headers_normal.test_id_txt, test_ID) sql_5 = "order by %s" % self.headers_normal.data_point_txt import time info_list = [] info_header = ["cycle", "row_count", "start_point", "end_point"] info_header.extend(headers) self.logger.info(" ".join(info_header)) self.logger.info("-------------------------------------------------") for cycle_number in range(1, 2000): t1 = time.time() self.logger.debug("picking cycle %i" % cycle_number) sql_4 = "AND %s=%i " % (cycle_txt, cycle_number) sql = sql_1 + sql_2 + sql_3 + sql_4 + sql_5 self.logger.debug("sql statement: %s" % sql) normal_df = pd.read_sql_query(sql, conn) t2 = time.time() dt = t2 - t1 self.logger.debug("time: %f" % dt) if normal_df.empty: self.logger.debug("reached the end") break row_count, _ = normal_df.shape start_point = normal_df[point_txt].min() end_point = normal_df[point_txt].max() last = normal_df.iloc[-1, :] step_list = [cycle_number, row_count, start_point, end_point] step_list.extend([last[x] for x in headers]) info_list.append(step_list) self._clean_up_loadres(None, conn, temp_filename) info_dict = pd.DataFrame(info_list, columns=info_header) return info_dict
def loader( self, file_name, *args, bad_steps=None, dataset_number=None, data_points=None, **kwargs, ): """Loads data from arbin .res files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. dataset_number (int): the data set number to select if you are dealing with arbin files with more than one data-set. data_points (tuple of ints): load only data from data_point[0] to data_point[1] (use None for infinite). Returns: new_tests (list of data objects) """ # TODO: @jepe - insert kwargs - current chunk, only normal data, etc if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) if (filesize > prms.Instruments.Arbin.max_res_filesize and not prms.Reader.load_only_summary): error_message = "\nERROR (loader):\n" error_message += "%s > %s - File is too big!\n" % ( hfilesize, humanize_bytes(prms.Instruments.Arbin.max_res_filesize), ) error_message += "(edit prms.Instruments.Arbin ['max_res_filesize'])\n" print(error_message) return None temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) use_mdbtools = False if use_subprocess: use_mdbtools = True if is_posix: use_mdbtools = True if use_mdbtools: new_tests = self._loader_posix( file_name, temp_filename, temp_dir, *args, bad_steps=bad_steps, dataset_number=dataset_number, data_points=data_points, **kwargs, ) else: new_tests = self._loader_win( file_name, temp_filename, *args, bad_steps=bad_steps, dataset_number=dataset_number, data_points=data_points, **kwargs, ) new_tests = self._inspect(new_tests) return new_tests
def loader(self, file_name, bad_steps=None, **kwargs): """Loads data from arbin .res files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. Returns: new_tests (list of data objects) """ # TODO: @jepe - insert kwargs - current chunk, only normal data, etc if DEBUG_MODE: time_0 = time.time() new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) if (filesize > prms.Instruments.Arbin.max_res_filesize and not prms.Reader.load_only_summary): error_message = "\nERROR (loader):\n" error_message += "%s > %s - File is too big!\n" % ( hfilesize, humanize_bytes(prms.Instruments.Arbin.max_res_filesize), ) error_message += "(edit prms.Instruments.Arbin" "['max_res_filesize'])\n" print(error_message) return None table_name_global = TABLE_NAMES["global"] table_name_stats = TABLE_NAMES["statistic"] table_name_normal = TABLE_NAMES["normal"] # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) use_mdbtools = False if use_subprocess: use_mdbtools = True if is_posix: use_mdbtools = True # windows with same python bit as windows bit (the ideal case) if not use_mdbtools: constr = self.__get_res_connector(temp_filename) if use_ado: conn = dbloader.connect(constr) else: conn = dbloader.connect(constr, autocommit=True) self.logger.debug("constr str: %s" % constr) self.logger.debug("reading global data table") sql = "select * from %s" % table_name_global self.logger.debug("sql statement: %s" % sql) global_data_df = pd.read_sql_query(sql, conn) # col_names = list(global_data_df.columns.values) else: import subprocess if is_posix: if is_macos: self.logger.debug("\nMAC OSX USING MDBTOOLS") else: self.logger.debug("\nPOSIX USING MDBTOOLS") else: self.logger.debug("\nWINDOWS USING MDBTOOLS-WIN") # creating tmp-filenames temp_csv_filename_global = os.path.join(temp_dir, "global_tmp.csv") temp_csv_filename_normal = os.path.join(temp_dir, "normal_tmp.csv") temp_csv_filename_stats = os.path.join(temp_dir, "stats_tmp.csv") # making the cmds mdb_prms = [ (table_name_global, temp_csv_filename_global), (table_name_normal, temp_csv_filename_normal), (table_name_stats, temp_csv_filename_stats), ] # executing cmds for table_name, tmp_file in mdb_prms: with open(tmp_file, "w") as f: subprocess.call( [sub_process_path, temp_filename, table_name], stdout=f) self.logger.debug(f"ran mdb-export {str(f)} {table_name}") # use pandas to load in the data global_data_df = pd.read_csv(temp_csv_filename_global) tests = global_data_df[self.headers_normal.test_id_txt] number_of_sets = len(tests) self.logger.debug("number of datasets: %i" % number_of_sets) for counter, test_no in enumerate(range(number_of_sets)): if counter > 0: self.logger.warning("***MULTITEST-FILE (not recommended)") if not ALLOW_MULTI_TEST_FILE: break data = Cell() data.cell_no = test_no data.loaded_from = file_name fid = FileID(file_name) # name of the .res file it is loaded from: # data.parent_filename = os.path.basename(file_name) data.channel_index = int(global_data_df[ self.headers_global["channel_index_txt"]][test_no]) data.channel_number = int(global_data_df[ self.headers_global["channel_number_txt"]][test_no]) data.creator = global_data_df[ self.headers_global["creator_txt"]][test_no] data.item_ID = global_data_df[ self.headers_global["item_id_txt"]][test_no] data.schedule_file_name = global_data_df[ self.headers_global["schedule_file_name_txt"]][test_no] data.start_datetime = global_data_df[ self.headers_global["start_datetime_txt"]][test_no] data.test_ID = int( global_data_df[self.headers_normal.test_id_txt][test_no]) data.test_name = global_data_df[ self.headers_global["test_name_txt"]][test_no] data.raw_data_files.append(fid) self.logger.debug("reading raw-data") if not use_mdbtools: # --------- read raw-data (normal-data) ------------------------ length_of_test, normal_df = self._load_res_normal_table( conn, data.test_ID, bad_steps) # --------- read stats-data (summary-data) --------------------- sql = "select * from %s where %s=%s order by %s" % ( table_name_stats, self.headers_normal.test_id_txt, data.test_ID, self.headers_normal.data_point_txt, ) summary_df = pd.read_sql_query(sql, conn) if counter > number_of_sets: self._clean_up_loadres(None, conn, temp_filename) else: normal_df = pd.read_csv(temp_csv_filename_normal) # filter on test ID normal_df = normal_df[normal_df[ self.headers_normal.test_id_txt] == data.test_ID] # sort on data point if prms._sort_if_subprocess: normal_df = normal_df.sort_values( self.headers_normal.data_point_txt) length_of_test = normal_df.shape[0] summary_df = pd.read_csv(temp_csv_filename_stats) # clean up for f in [ temp_filename, temp_csv_filename_stats, temp_csv_filename_normal, temp_csv_filename_global, ]: if os.path.isfile(f): try: os.remove(f) except WindowsError as e: self.logger.warning( f"could not remove tmp-file\n{f} {e}") if summary_df.empty and prms.Reader.use_cellpy_stat_file: txt = "\nCould not find any summary (stats-file)!" txt += "\n -> issue make_summary(use_cellpy_stat_file=False)" logging.debug(txt) # normal_df = normal_df.set_index("Data_Point") data.summary = summary_df if DEBUG_MODE: mem_usage = normal_df.memory_usage() logging.debug(f"memory usage for " f"loaded data: \n{mem_usage}" f"\ntotal: {humanize_bytes(mem_usage.sum())}") logging.debug(f"time used: {(time.time() - time_0):2.4f} s") data.raw = normal_df data.raw_data_files_length.append(length_of_test) data = self._post_process(data) new_tests.append(data) new_tests = self._inspect(new_tests) return new_tests