def parse_update_master(core_file_name, database_name, datatype, decoded_dataframe): """Takes the file and calculates dq/dv from the raw data, uploads that ot the database as the raw data, and updates the master table with prefixes useful for accessing that data related to the file uploaded.""" # name = get_filename_pref(file_name) update_database_newtable(decoded_dataframe, core_file_name + 'UnalteredRaw', database_name) data = calc_dq_dqdv(decoded_dataframe, datatype) update_database_newtable(data, core_file_name + 'Raw', database_name) update_dict = { 'Dataset_Name': core_file_name, 'Raw_Data_Prefix': core_file_name + 'Raw', 'Cleaned_Data_Prefix': core_file_name + 'CleanSet', 'Cleaned_Cycles_Prefix': core_file_name + '-CleanCycle', 'Descriptors_Prefix': core_file_name + '-descriptors', 'Model_Parameters_Prefix': core_file_name + 'ModParams', 'Model_Points_Prefix': core_file_name + '-ModPoints', 'Raw_Cycle_Prefix': core_file_name + '-Cycle', 'Original_Data_Prefix': core_file_name + 'UnalteredRaw' } update_master_table(update_dict, database_name) return
def generate_model(df_clean, filename, peak_thresh, database): """Wrapper for the get_model_dfs function. Takes those results and adds them to the database with three new tables with the suffices: '-ModPoints', 'ModParams', and '-descriptors'.""" datatype = df_clean['datatype'].iloc[0] (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col, char_cap_col, charge_or_discharge) = col_variables(datatype) chargeloc_dict = {} param_df = pd.DataFrame(columns=[ 'Cycle', 'Model_Parameters_charge', 'Model_Parameters_discharge' ]) if len(df_clean[cycle_ind_col].unique()) > 1: length_list = [ len(df_clean[df_clean[cycle_ind_col] == cyc]) for cyc in df_clean[cycle_ind_col].unique() if cyc != 1 ] lenmax = max(length_list) else: length_list = 1 lenmax = len(df_clean) mod_pointsdf = pd.DataFrame() cycles_no_models = [] for cyc in df_clean[cycle_ind_col].unique(): try: new_df_mody, model_c_vals, model_d_vals, \ peak_heights_c, peak_heights_d = get_model_dfs( df_clean, datatype, cyc, lenmax, peak_thresh) mod_pointsdf = mod_pointsdf.append(new_df_mody) param_df = param_df.append( { 'Cycle': cyc, 'Model_Parameters_charge': str(model_c_vals), 'Model_Parameters_discharge': str(model_d_vals), 'charge_peak_heights': str(peak_heights_c), 'discharge_peak_heights': str(peak_heights_d) }, ignore_index=True) except Exception as e: cycles_no_models.append(cyc) # want this outside of for loop to update the db with the complete df of # new params update_database_newtable(mod_pointsdf, filename.split('.')[0] + '-ModPoints', database) # this will replace the data table in there if it exists already update_database_newtable(param_df, filename.split('.')[0] + 'ModParams', database) # the below also updates the database with the new descriptors after # evaluating the spit out dictionary and putting those parameters # into a nicely formatted datatable. param_dicts_to_df(filename.split('.')[0] + 'ModParams', database) if len(cycles_no_models) > 0: return 'That model has been added to the database.' \ + 'No model was generated for Cycle(s) ' + str(cycles_no_models) return 'That model has been added to the database'
def get_clean_sets(clean_cycle_dict, core_file_name, database_name): """Imports all clean cycles of data from import path and appends them into complete sets of battery data, saved into save_filepath""" clean_set_df = pd.DataFrame() for k, v in clean_cycle_dict.items(): clean_set_df = clean_set_df.append(v, ignore_index=True) update_database_newtable(clean_set_df, core_file_name + 'CleanSet', database_name) return clean_set_df
def test_update_database_newtable(): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [10, 20, 30], 'C': [100, 200, 300] }) upload_filename = 'my_amazing_file' database_name = 'amazing_database.db' update_database_newtable(df, upload_filename, database_name) assert 'my_amazing_file' in get_table_names(database_name) os.remove('amazing_database.db') return
def load_sep_cycles(core_file_name, database_name, datatype): """Loads cycles from an existing uploaded file from the database, and saves them as separate dataframes with the cycle number as the key.""" (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col, char_cap_col, charge_or_discharge) = col_variables(datatype) name = core_file_name + 'Raw' df_single = get_file_from_database(name, database_name) gb = df_single.groupby(by=[cycle_ind_col]) cycle_dict = dict(iter(gb)) for key in cycle_dict.keys(): cycle_dict[key]['Battery_Label'] = core_file_name update_database_newtable(cycle_dict[key], core_file_name + '-' + 'Cycle' + str(key), database_name) return cycle_dict
def test_get_file_from_database(): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [10, 20, 30], 'C': [100, 200, 300] }) upload_filename = 'my_other_amazing_file' database_name = 'another_amazing_database.db' update_database_newtable(df, upload_filename, database_name) assert os.path.exists('another_amazing_database.db') result = get_file_from_database('my_other_amazing_file', 'another_amazing_database.db') assert pd.DataFrame.equals(result, df) neg_result = get_file_from_database('something_else', 'another_amazing_database.db') assert neg_result == None os.remove('another_amazing_database.db') return
def get_clean_cycles(cycle_dict, core_file_name, database_name, datatype, windowlength=9, polyorder=3): """Imports all separated out cycles in given path and cleans them and saves them in the database""" (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col, char_cap_col, charge_or_discharge) = col_variables(datatype) clean_cycle_dict = {} for i in range(1, len(cycle_dict) + 1): charge, discharge = clean_calc_sep_smooth(cycle_dict[i], datatype, windowlength, polyorder) clean_data = charge.append(discharge, ignore_index=True) clean_data = clean_data.sort_values([data_point_col], ascending=True) clean_data = clean_data.reset_index(drop=True) cyclename = core_file_name + '-CleanCycle' + str(i) clean_cycle_dict.update({cyclename: clean_data}) update_database_newtable(clean_data, cyclename, database_name) return clean_cycle_dict
def param_dicts_to_df(mod_params_name, database): """Uses the already generated parameter dictionaries stored in the filename+ModParams datatable in the database, to add in the dictionary data table with those parameter dictionaries formatted nicely into one table. """ mod_params_df = get_file_from_database(mod_params_name, database) charge_descript = pd.DataFrame() discharge_descript = pd.DataFrame() for i in range(len(mod_params_df)): param_dict_charge = ast.literal_eval( mod_params_df.loc[i, ('Model_Parameters_charge')]) param_dict_discharge = ast.literal_eval( mod_params_df.loc[i, ('Model_Parameters_discharge')]) charge_peak_heights = ast.literal_eval( mod_params_df.loc[i, ('charge_peak_heights')]) discharge_peak_heights = ast.literal_eval( mod_params_df.loc[i, ('discharge_peak_heights')]) charge_keys = [] new_dict_charge = {} if param_dict_charge is not None: for key, value in param_dict_charge.items(): if '_amplitude' in key and 'base_' not in key: charge_keys.append(key.split('_')[0]) c_update_dict = { 'c_gauss_sigma': param_dict_charge['base_sigma'], 'c_gauss_center': param_dict_charge['base_center'], 'c_gauss_amplitude': param_dict_charge['base_amplitude'], 'c_gauss_fwhm': param_dict_charge['base_fwhm'], 'c_gauss_height': param_dict_charge['base_height'], } new_dict_charge.update(c_update_dict) new_dict_charge.update( {'c_cycle_number': float(mod_params_df.loc[i, ('Cycle')])}) peaknum = 0 for item in charge_keys: peaknum = peaknum + 1 center = param_dict_charge[item + '_center'] amp = param_dict_charge[item + '_amplitude'] fract = param_dict_charge[item + '_fraction'] sigma = param_dict_charge[item + '_sigma'] height = param_dict_charge[item + '_height'] fwhm = param_dict_charge[item + '_fwhm'] raw_peakheight = charge_peak_heights[peaknum - 1] PeakArea, PeakAreaError = scipy.integrate.quad(my_pseudovoigt, 0.0, 100, args=(center, amp, fract, sigma)) new_dict_charge.update({ 'c_area_peak_' + str(peaknum): PeakArea, 'c_center_peak_' + str(peaknum): center, 'c_amp_peak_' + str(peaknum): amp, 'c_fract_peak_' + str(peaknum): fract, 'c_sigma_peak_' + str(peaknum): sigma, 'c_height_peak_' + str(peaknum): height, 'c_fwhm_peak_' + str(peaknum): fwhm, 'c_rawheight_peak_' + str(peaknum): raw_peakheight }) new_dict_df = pd.DataFrame(columns=new_dict_charge.keys()) for key1, val1 in new_dict_charge.items(): new_dict_df.at[0, key1] = new_dict_charge[key1] charge_descript = pd.concat([charge_descript, new_dict_df], sort=True) charge_descript = charge_descript.reset_index(drop=True) charge_descript2 = dfsortpeakvals(charge_descript, 'c') discharge_keys = [] if param_dict_discharge is not None: for key, value in param_dict_discharge.items(): if '_amplitude' in key and 'base_' not in key: discharge_keys.append(key.split('_')[0]) new_dict_discharge = {} update_dict = { 'd_gauss_sigma': param_dict_discharge['base_sigma'], 'd_gauss_center': param_dict_discharge['base_center'], 'd_gauss_amplitude': param_dict_discharge['base_amplitude'], 'd_gauss_fwhm': param_dict_discharge['base_fwhm'], 'd_gauss_height': param_dict_discharge['base_height'], } new_dict_discharge.update(update_dict) new_dict_discharge.update( {'d_cycle_number': float(mod_params_df.loc[i, ('Cycle')])}) peaknum = 0 for item in discharge_keys: peaknum = peaknum + 1 center = param_dict_discharge[item + '_center'] amp = param_dict_discharge[item + '_amplitude'] fract = param_dict_discharge[item + '_fraction'] sigma = param_dict_discharge[item + '_sigma'] height = param_dict_discharge[item + '_height'] fwhm = param_dict_discharge[item + '_fwhm'] raw_peakheight = discharge_peak_heights[peaknum - 1] PeakArea, PeakAreaError = scipy.integrate.quad( my_pseudovoigt, 0.0, 100, args=(center, amp, fract, sigma)) new_dict_discharge.update({ 'd_area_peak_' + str(peaknum): PeakArea, 'd_center_peak_' + str(peaknum): center, 'd_amp_peak_' + str(peaknum): amp, 'd_fract_peak_' + str(peaknum): fract, 'd_sigma_peak_' + str(peaknum): sigma, 'd_height_peak_' + str(peaknum): height, 'd_fwhm_peak_' + str(peaknum): fwhm, 'd_rawheight_peak_' + str(peaknum): raw_peakheight }) else: new_dict_discharge = None if new_dict_discharge is not None: new_dict_df_d = pd.DataFrame(columns=new_dict_discharge.keys()) for key1, val1 in new_dict_discharge.items(): new_dict_df_d.at[0, key1] = new_dict_discharge[key1] discharge_descript = pd.concat([discharge_descript, new_dict_df_d], sort=True) discharge_descript = discharge_descript.reset_index(drop=True) discharge_descript2 = dfsortpeakvals(discharge_descript, 'd') else: discharge_descript2 = None # append the two dfs (charge and discharge) before putting them in # database full_df_descript = pd.concat([charge_descript2, discharge_descript2], sort=True, axis=1) update_database_newtable(full_df_descript, mod_params_name[:-9] + '-descriptors', database) return