def reload_data(LOGFILE = None, PICKLE_DATA = True, root_folder = 'Shared Sepsis Data', csv_filename = 'Sepsis_JCM.csv'): ''' Reloads raw_data from folders. IN: LOGFILE - fileobj - an open text file where logs are written PICKLE_DATA - bool - whether to pickle data once loaded root_folder - str - relative path to top level folder for all data and csv_file csv_filename - str - name of csv file containing the trial labels and locations ''' csv_file = os.path.join(root_folder, csv_filename) X, y, used_column_headers, df, df_raw = load_data(root_folder, csv_file, verbose=False, LOGFILE=LOGFILE) # pickle data for later loading efficiency if PICKLE_DATA: start = time.time() ptf( '\n>> Pickling data ...\n', LOGFILE) for z, zname in izip([X,y,used_column_headers], PICKLE_NAMES): my_pickle(z, zname) end = time.time() ptf( 'Data pickled in %d seconds (%d total trials)' % ((end-start), len(X)), LOGFILE) return X, y, used_column_headers, df, df_raw
def reload_data(LOGFILE=None, PICKLE_DATA=True, root_folder='Shared Sepsis Data', csv_filename='Sepsis_JCM.csv'): ''' Reloads raw_data from folders. IN: LOGFILE - fileobj - an open text file where logs are written PICKLE_DATA - bool - whether to pickle data once loaded root_folder - str - relative path to top level folder for all data and csv_file csv_filename - str - name of csv file containing the trial labels and locations OUT: X - pd Series - Series of features. Each row is a trial (index) and a number of features + 1 X number of times numpy array (data) y - pd DataFrame - labels data frame. Each row is a trial (index) and the labels of each class the columns used_column_headers - list of str - df - pd DataFrame - DataFrame containing all trial data after elmination of extraneous spots, trials df_raw - pd DataFrame - DataFrame containing all trial data (before pruning) ''' csv_file = os.path.join(root_folder, csv_filename) X, y, used_column_headers, df, df_raw = load_data(root_folder, csv_file, verbose=False, LOGFILE=LOGFILE) # pickle data for later loading efficiency if PICKLE_DATA: start = time.time() ptf('\n>> Pickling data ...\n', LOGFILE) for z, zname in izip([X, y, used_column_headers], PICKLE_NAMES): my_pickle(z, zname) end = time.time() ptf( 'Data pickled in %d seconds (%d total trials)' % ((end - start), len(X)), LOGFILE) return X, y, used_column_headers, df, df_raw
def reload_data(LOGFILE = None, PICKLE_DATA = True, root_folder = 'Shared Sepsis Data', csv_filename = 'Sepsis_JCM.csv'): ''' Reloads raw_data from folders. IN: LOGFILE - fileobj - an open text file where logs are written PICKLE_DATA - bool - whether to pickle data once loaded root_folder - str - relative path to top level folder for all data and csv_file csv_filename - str - name of csv file containing the trial labels and locations OUT: X - pd Series - Series of features. Each row is a trial (index) and a number of features + 1 X number of times numpy array (data) y - pd DataFrame - labels data frame. Each row is a trial (index) and the labels of each class the columns used_column_headers - list of str - df - pd DataFrame - DataFrame containing all trial data after elmination of extraneous spots, trials df_raw - pd DataFrame - DataFrame containing all trial data (before pruning) ''' csv_file = os.path.join(root_folder, csv_filename) X, y, used_column_headers, df, df_raw = load_data(root_folder, csv_file, verbose=False, LOGFILE=LOGFILE) # pickle data for later loading efficiency if PICKLE_DATA: start = time.time() ptf( '\n>> Pickling data ...\n', LOGFILE) for z, zname in izip([X,y,used_column_headers], PICKLE_NAMES): my_pickle(z, zname) end = time.time() ptf( 'Data pickled in %d seconds (%d total trials)' % ((end-start), len(X)), LOGFILE) return X, y, used_column_headers, df, df_raw