Python read_xlsx示例，pandas.read_xlsx Python示例

示例#1

0

显示文件

文件： comp_490 _spring_4.py 项目： Electronick79/ndossantos_comp490_Second_pjt_spring2_2021

        def writer(header, data, filename, option):
            with open(filename, "w", newline="") as xlsxfile:
                if option == "write":

                    movies = xlsxfile.writer(csvfile)
                    movies.writerow(header)
                    for x in data:
                        movies.writerow(x)
                elif option == "update":
                    writer = xlsx.DictWriter(csvfile, fieldnames=header)
                    writer.writeheader()
                    writer.writerows(data)
                else:
                    print("Option is not known")

                    # The data visualization: provide the user the ability to do two forms of data analysis,The first
                    # analysis.should display the data in a color coded text format as a list in ascending or descending
                    # order (let the user choose) and the second should render a map to visualize the data.

                    iris = pd.read_xlsx(
                        'iris.xlsx',
                        names=[
                            "school.city_id INT PRIMARY KEY, school.city VARCHAR() NOT NULL, school.city "
                            "VARCHAR() NOT NULL, school.city_phone VARCHAR(6), school.city INT"
                        ])
                    print(iris.head())

示例#2

0

显示文件

    def trajectory(self):
        """
        Gets the position of the cell over time.
        
        Returns a numpy array. 

        """
        if str(self.path_to_data).find(
                ".xlsx") != -1:  # TODO make this check only last 5 chars
            data = pd.read_xlsx(self.path_to_data)
        elif str(self.path_to_data).find(".csv") != -1:
            data = pd.read_csv(self.path_to_data)
        else:
            raise ValueError(
                f"'{self.path_to_data}' is not an .xlsx or .csv file.")
        # Now subset the data for the given id and time
        data = data.loc[data['TrackID'] == self.id]
        # Now extract the coordinates of the cell's position at each time point
        # (Convert to numpy array to permit a for loop).
        data = data.to_numpy(na_value='NAN')
        trajectory = np.zeros((len(data), 4), dtype=float)
        for i in range(0, len(data)):
            row = data[i]
            x, y, z, t = row[0], row[1], row[2], row[4]
            trajectory[i][0:4] = x, y, z, t
        return trajectory

示例#3

0

显示文件

文件： oneway.py 项目： junloongs/kaligit

    def getxls(self):
        global all_files
        w_dir = ''.join(list(all_files[0])[0:2])
        output_file = w_dir + '/csv_result.csv'
        all_data_frame = []
        for file in all_files:
            with open(file, 'rb') as f:
                encoding1 = chardet.detect(f.read())['encoding']
            try:
                data_frame = pd.read_xlsx(file,
                                          encoding=encoding1,
                                          dtype={
                                              "本端基站ID": str,
                                              "对端基站ID": str
                                          })
            except Exception as err:
                pass
        # try:
        #     data_frame = pd.read_xlsx(file, encoding='utf_8_sig')
        # except Exception as err:
        #     pass
        all_data_frame.append(data_frame)
        # pandas.concat()函数将数据框数据垂直堆叠(axis=0), 当水平连接数据时(asis=1)
        data_frame_concat = pd.concat(all_data_frame,
                                      axis=0,
                                      ignore_index=True)
        data_frame_concat.to_csv(output_file, encoding=encoding1, index=False)
        MyFile = (output_file)
        with open(MyFile, 'rb') as f:
            MyFileCode = chardet.detect(f.read())['encoding']
        df1 = pd.read_csv(MyFile,
                          encoding=MyFileCode,
                          header=0,
                          dtype={
                              "本端基站ID": str,
                              "对端基站ID": str
                          })  # 索引第1行为表头。
        # print(df.head(5))
        df1.fillna(value="-", inplace=True)
        # df2.to_csv(output_file1,encoding=encoding1)
        new_df1 = df1["本端基站ID"] + ";" + df1["对端基站ID"]
        new_df2 = df1["对端基站ID"] + ";" + df1["本端基站ID"]
        c = new_df1.append(new_df2)
        c.drop_duplicates(keep=False, inplace=True)
        # 这里想要说明的是，drop_duplicates当中的参数keep=False，意为重复项全部删除，它还有keep="first"与keep="last"，
        # 分别对应在有多项重复时，保留第一项（或最后一项）。
        c.reset_index(drop=True)
        # 不想保留原来的index，直接使用重置后的索引，那么可以使用参数drop=True，默认值是False
        c.name = "单向对"
        # 对DataFrame列的重命名d.columns=['a','b','C'];或者d.rename({'a':'A', 'b':'B'},inplace=True)。Series用s.name="newname";或者s.rename("newname1",inplace=True)。
        c.to_csv(output_file1, encoding=encoding1, index=False, header=True)

        self.statusbar.showMessage("结果保存在：" + w_dir + "/csv_result_oneway.csv")

示例#4

0

显示文件

文件： mlp_predictor.py 项目： Ayush-Parhi/PID-at-LHCb

    def predict(self, filename):
        """Predict on the data file."""
        if 'csv' in filename:
            data = pd.read_csv(filename)
        elif 'xlsx' in filename:
            data = pd.read_xlsx(filename)

        pred, _ = self.model.predict_mlp(scale(data, features).values)

        prediction = pandas.DataFrame({'ID': ids})
        for name in ['Ghost', 'Electron', 'Muon', 'Pion', 'Kaon', 'Proton']:
            prediction[name] = pred[:, label_class_correspondence[name]]
        prediction.to_csv('predictions.csv.gz',
                          index=False,
                          float_format='%.5f',
                          compression="gzip")
        return FileLink('predictions.csv.gz')

示例#5

0

显示文件

def file_read(path, csv, xlsx):

    if csv == True and xlsx == True:
        messagebox.showerror("ERROR", "Bitte waehlen Sie nur 1 Datentyp aus")
    elif csv == True:
        data = pd.read_csv(path, usecols=['month', 'temp', 'prec'])
    elif xlsx == True:
        data = pd.read_xlsx(path)
    else:
        messagebox.showerror("ERROR", "Bitte waehlen Sie einen Datentyp aus")

    data = data.to_numpy()

    month = data[:, 0]
    temp = data[:, 1]
    prec = data[:, 2]

    return month, temp, prec

示例#6

0

显示文件

def open_file(file_path):

    logger.info(os.path.splitext(file_path)[-1])
    if os.path.splitext(file_path)[-1] == '.parquet':
        df = pd.read_parquet(file_path)
    elif os.path.splitext(file_path)[-1] == '.csv':
        logger.info('read csv')
        df = pd.read_csv(file_path)
    elif os.path.splitext(file_path)[-1] == '.xlsx':
        logger.info('read xlsx')
        df = pd.read_xlsx(file_path)
    elif os.path.splitext(file_path)[-1] in ['.pkl', '.pickle']:
        logger.info('read pickle')
        df = pd.read_pickle(file_path)
    else:
        raise Exception(
            f'i dont know how to read this file extension: {os.path.splitext(file_path)[-1]}'
        )

    return df

示例#7

0

显示文件

    def getxls(self):
        global all_files
        w_dir = ''.join(list(all_files[0])[0:2])
        output_file = w_dir + '/csv_result.csv'
        all_data_frame = []
        for file in all_files:
            with open(file, 'rb') as f:
                encoding1 = chardet.detect(f.read())['encoding']
            try:
                data_frame = pd.read_xlsx(file, encoding=encoding1)
            except Exception as err:
                pass
         # try:
         #     data_frame = pd.read_xlsx(file, encoding='utf_8_sig')
         # except Exception as err:
         #     pass
        all_data_frame.append(data_frame)
        # pandas.concat()函数将数据框数据垂直堆叠(axis=0), 当水平连接数据时(asis=1)
        data_frame_concat = pd.concat(all_data_frame, axis=0, ignore_index=True)
        

        data_frame_concat.to_csv(output_file, encoding=encoding1, index=False)
        self.statusbar.showMessage("结果保存在：" + w_dir + "/csv_result.csv")

示例#8

0

显示文件

文件： comp_490-spring_3.py 项目： Electronick79/ndossantos_comp490_Second_pjt_spring2_2021

                               password="******",
                               database="mydatabase")
# creating database_cursor to perform SQL operation
db_cursor = db_connection.cursor()

# executing cursor with execute method and pass SQL query
db_cursor.execute("CREATE DATABASE comp490_spring_3")

# get list of all databases
db_cursor.execute("SHOW DATABASES")

#print all databases
for db in db_cursor:
    print(db)

df = pd.read_xlsx(r'Path where the XLSX file is stored\COMP490_SPRING_3.XLSX')
print(df)

df = pd.read_xlsx(
    r'C:\Users\Electronick\OneDrive\Desktop\COMP_490\COMP490_SPRING_3.xlsx'
)  #read the csv file (put 'r' before the path string to address any special characters in the path, such as '\').

print(df)

conn = sqlite3.connect('TestDB.db')
c = conn.cursor()

# Create table - occupation occupational
c.execute('''CREATE TABLE OCCUPATIONAL
             ([generated_id] INTEGER PRIMARY KEY,[Occupational_Name] text, [Title] text, [Date_start] dates, [Occupation] occupation, [Wage] integer, [State] text,,)'''
          )

示例#9

0

显示文件

文件： afsp_data_clean.py 项目： christophereddy/Nock-Lab

import pandas as pd
import numpy as np
import glob

path = r"/Users/matthewnock/Desktop/Coding/AFSP"
files = glob.glob(path + r"*.xlsx")
frames = []
for filename in files:
    print(filename)
    df = pd.read_xlsx(filename)
print(df)

new_columns = ("stroop","afsp","survey","other","valid_data")
    
df['new_columns'] = np.NaN

df.\name of column\.repalce('',np.NaN)
df.loc[df.\name of column\ =NaN,'stroop']=df[df.\name of column\=NaN].Stroop.replace({NaN:0})

df_to.xlsx(path +'r/output/compiled')

# Create a new column named accuracy and place data from the choice_prob column in the new accuracy column
# Look at the Reversal column, in all instances where Reversal = 0 go to the accuracy column and change 25 to 0 and 75 to 1
# Look at the Reversal column, in all instances where Reversal = 1 go to the accuracy column and change 25 to 1 and 75 to 0

# frame['accuracy']=frame.choice_prob
# frame.loc[frame.Reversal==0,'accuracy']=frame[frame.Reversal==0].accuracy.replace({25:0,75:1})
# frame.loc[frame.Reversal==1,'accuracy']=frame[frame.Reversal==1].accuracy.replace({25:1,75:0})

# Look at the participants, and for each participant take the mean of their accuracy column

示例#10

0

显示文件

文件： pandas_library.py 项目： SCelisV/python

# axis=1 axis='columns'

import pandas as banana
df_banana = banana.DataFrame({"a": [11, 21, 31], "b": [21, 22, 23]})
df_banana.head()

import pandas as pd
import numpy as np

# Reading files csv
csv_path = 'file.csv'
df_read_csv = pd.read_csv(csv_path)

# Reading files excel
xlsx_path = 'file.xlsx'
df_read_xlsx = pd.read_xlsx(xlsx_path)

# Create df from a a dictionary
songs = {
    "Album" : ["Thriller", "Back in Black", "The Dark Side of the Moon",\
               "The Bodyguard", "Bast Out of Hell", \
               'Their Greatest Hits (1971-1975)', 'Saturday Night Fever', 'Rumours'],
        "Released" : [1982,1980,1973,1992,1977,1976,1977,1977],
        "Length" : ["00:42:19", "00:42:22", "00:42:49", "00:57:44", "00:46:33", "", "", ""]
}

df_songs = pd.DataFrame(songs)

# Conocer todos los types de datos de df
df_songs.dtypes
df_songs.info

示例#11

0

显示文件

文件： GPS.py 项目： netdb407/vehicleTrust

import pandas as pd
import geopandas as gpd  # Import geopandas
import fiona  #공간데이터를 딕셔너리 형태 등으로 접근할 수 있는 라이브러리
# import warnings
# warnings.filterwarnings(action='ignore') #경고 메시지 무시
# from IPython.display import display #print가 아닌 display()로 연속 출력
# from IPython.display import HTML #출력 결과를 HTML로 생성
# CCTV CSV 로딩
df_cctv = pd.read_xlsx('./1.xlsx', encoding="EUC-KR")
df_cctv.head()


def RSU(longitude, latitude, v1, v2):
    #여러 차량의 VTI를 수집하고 기하평균? 가중평균
    '''
    RSU는 input으로 들어오는 v의 모든 정보를 append하여 저장시키고
    GPS 데이터를 2차원 형태로 그려서 클러스터링 해야 함
    그리고 클러스터링 별로 전역 신뢰도 값을 주어야 함
    '''

    # latitude = [] #위도(뒤)
    # longitude = [] #경도(앞)

    v1_temp = []
    v2_temp = []
    for key, info in v1.items():
        for second_key in info:
            real_info = info['VDI']
            for third_key in real_info:
                v1_temp = real_info['createdData']

示例#12

0

显示文件

 import os, sys, getopt
 t = 0
 t1, t2 = (False, False)
 warnings.filterwarnings("ignore")
 TK = tk.Tk()
 files = list(tkf.askopenfilenames())
 savepath = "%s/split/" % list(os.path.split(files[0]))[0]
 os.system("mkdir %s" % savepath)
 # if input('where to save? Somewhere else? {y/[n]}').lower().find('y')!= -1:
 #     savepath = tkf.askdirectory()
 TK.destroy()
 for file in tqdm(files):
     if "dta" in file: data = pd.read_stata(file)
     elif 'csv' in file: data = pd.read_csv(file)
     elif 'txt' in file: data = pd.read_csv(file)
     elif 'xls' in file: data = pd.read_xlsx(file)
     data = data.rename(columns={'agency_name': 'hp_name'})
     try:
         data[data.eval('hp_type+hc+level').isna()]['level'] = data[
             data.eval('hp_type+hc+level').isna()]['hp_name'].apply(level)
     except:
         data['level'] = data.hp_name.apply(level)
     if all(['hp_type' in data.columns, 'hc' in data.columns]):
         try:
             try:
                 data.hp_type = data.hp_type.astype('float')
             except:
                 data.hp_type = data.hp_name.astype('str').apply(
                     get_hp_type)
             data.hc = data.hc.astype('float')
             data.level = data.level.astype('float')

示例#13

0

显示文件

文件： Make_Final_Dataset.py 项目： hwijongkim/2017_bigcontest_movie_aud_predict

####더미 변수 생성 및 데이터셋 구성####

import requests
import pandas as pd
import numpy as np
import os
import time
import datetime
import json
from bs4 import BeautifulSoup

data_set = pd.read_xlsx('C:/py_saving/movie_data/total_movie.xlsx')
data_set["openDt"] = data_set["openDt"].apply(lambda x: str(x)[4:6])

def month_change(i):
    if i == "01":
        return 'jan'
    elif i == '02':
        return 'feb'
    elif i == '03':
        return 'mar'
    elif i == '04':
        return 'apr'
    elif i == '05':
        return 'may'
    elif i == '06':
        return 'jun'
    elif i == '07':
        return 'jul'
    elif i == '08':

示例#14

0

显示文件

文件： comp_490_spring_4.py 项目： Electronick79/ndossantos_comp490_first_pjt_spring_2021

    def __init__(self, **kwargs):

        # Set icon
        try:
            self.__root.wm_iconbitmap("Excel.ico")
        except:
            pass

        # Set window size (the default is 300x300)

        try:
            self.__thisWidth = kwargs['width']
        except KeyError:
            pass

        try:
            self.__thisHeight = kwargs['height']
        except KeyError:
            pass

        # Set the window text
        self.__root.title("Untitled - Excel")

        # Center the window
        screenWidth = self.__root.winfo_screenwidth()
        screenHeight = self.__root.winfo_screenheight()

        # For left-alling
        left = (screenWidth / 2) - (self.__thisWidth / 2)

        # For right-allign
        top = (screenHeight / 2) - (self.__thisHeight / 2)

        # For top and bottom
        self.__root.geometry('%dx%d+%d+%d' % (self.__thisWidth,
                                              self.__thisHeight,
                                              left, top))

        # To make the textarea auto resizable
        self.__root.grid_rowconfigure(0, weight=1)
        self.__root.grid_columnconfigure(0, weight=1)

        # Add controls (widget)
        self.__thisTextArea.grid(sticky=N + E + S + W)

        # To open new file
        self.__thisFileMenu.add_command(label="New",
                                        command=self.__newFile)

        # To open a already existing file
        self.__thisFileMenu.add_command(label="Open",
                                        command=self.__openFile)

        # To save current file
        self.__thisFileMenu.add_command(label="Save",
                                        command=self.__saveFile)

        # To create a line in the dialog
        self.__thisFileMenu.add_separator()
        self.__thisFileMenu.add_command(label="Exit",
                                        command=self.__quitApplication)
        self.__thisMenuBar.add_cascade(label="File",
                                       menu=self.__thisFileMenu)

        # To give a feature of cut
        self.__thisEditMenu.add_command(label="Cut",
                                        command=self.__cut)

        # to give a feature of copy
        self.__thisEditMenu.add_command(label="Copy",
                                        command=self.__copy)

        # To give a feature of paste
        self.__thisEditMenu.add_command(label="Paste",
                                        command=self.__paste)

        # To give a feature of editing
        self.__thisMenuBar.add_cascade(label="Edit",
                                       menu=self.__thisEditMenu)

        # To create a feature of description of the notepad
        self.__thisHelpMenu.add_command(label="About Excel",
                                        command=self.__showAbout)
        self.__thisMenuBar.add_cascade(label="Help",
                                       menu=self.__thisHelpMenu)

        self.__root.config(menu=self.__thisMenuBar)

        self.__thisScrollBar.pack(side=RIGHT, fill=Y)

        # Scrollbar will adjust automatically according to the content
        self.__thisScrollBar.config(command=self.__thisTextArea.yview)
        self.__thisTextArea.config(yscrollcommand=self.__thisScrollBar.set)

        # ////////////////////////////////////////////////////////////////////////////////////////////////
        # When your program first starts up, with the python GUI, allow the user to choose to either
        # update the data run the data visualization
        # https://towardsdatascience.com/how-to-create-an-interactive-geographic-map-using-python
        # -and-bokeh-12981ca0b567
        from bokeh.io import output_notebook, show, output_file
        from bokeh.plotting import figure
        from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
        from bokeh.palettes import brewer

        from bokeh.io.doc import curdoc
        from bokeh.models import Slider, HoverTool, Select
        from bokeh.layouts import widgetbox, row, column

        # Read the geojson map file for Realtor Neighborhoods into a GeoDataframe object
        sf = geopandas.read_file('https://raw.githubusercontent.com/JimKing100/SF_Real_Estate_Live/master'
                                 '/data/Realtor%20Neighborhoods.geojson')

        # Set the Coordinate Referance System (crs) for projections
        # ESPG code 4326 is also referred to as WGS84 lat-long projection
        sf.crs = {'init': 'epsg:4326'}
        # Rename columns in geojson map file
        sf = sf.rename(columns={'geometry': 'geometry', 'nbrhood': 'neighborhood_name',
                                'nid': 'subdist_no'}).set_geometry('geometry')

        # Change neighborhood id (subdist_no) for correct code for Mount Davidson Manor and for parks
        sf.loc[sf['neighborhood_name'] == 'Mount Davidson Manor', 'subdist_no'] = '4n'
        sf.loc[sf['neighborhood_name'] == 'Golden Gate Park', 'subdist_no'] = '12a'
        sf.loc[sf['neighborhood_name'] == 'Presidio', 'subdist_no'] = '12b'
        sf.loc[sf['neighborhood_name'] == 'Lincoln Park', 'subdist_no'] = '12c'

        sf.sort_values(by=['subdist_no'])

        # Determine where the visualization will be rendered
        output_file('filename.html')
        output_notebook()  # Render inline in a Jupyter Notebook

        # Set up the figure(s)
        fig = figure()
        show(fig)

        df = pd.read_xlsx(r'C:\Users\Electronick\OneDrive\Desktop\COMP_490\COMP490_SPRING_3.xlsx')
        # Create an empty string called ticker_string
        ticker_string = ''
        # Loop through every element of `tickers` and add them and a comma to ticker_string
        for ticker in tickers:
            ticker_string += ticker
            ticker_string += ','
        # Drop the last comma from `ticker_string`
        ticker_string = ticker_string[:-1]

        # Create the endpoint and years strings
        endpoints = 'chart'
        years = '5'


        #   When updating the data: let the user choose the file name for the excel file
        def writer(header, data, filename, option):
            with open(filename, "w", newline="") as xlsxfile:
                if option == "write":

                    movies = xlsxfile.writer(csvfile)
                    movies.writerow(header)
                    for x in data:
                        movies.writerow(x)
                elif option == "update":
                    writer = xlsx.DictWriter(csvfile, fieldnames=header)
                    writer.writeheader()
                    writer.writerows(data)
                else:
                    print("Option is not known")

                    # The data visualization: provide the user the ability to do two forms of data analysis,The first
                    # analysis.should display the data in a color coded text format as a list in ascending or descending
                    # order (let the user choose) and the second should render a map to visualize the data.

                    iris = pd.read_xlsx('iris.xlsx',
                                        names=[ "school.city_id INT PRIMARY KEY, school.city VARCHAR() NOT NULL, school.city "
                                            "VARCHAR() NOT NULL, school.city_phone VARCHAR(6), school.city INT"])
                    print(iris.head())

示例#15

0

显示文件

文件： multivariate_timeseries.py 项目： xavierharmon/Machine-Learning

def import_data():
    global startdata
    filepath_attempts = parameters.zero
    while filepath_attempts < parameters.attempts_max:
        filepath = input(wrapper.fill('Please place the complete file path here with the file name for your dataset in .csv or .xlsx format.'))
        if os.path.exists(filepath):
            break
        else:
            filepath_attempts += 1
            print(wrapper.fill(f'The file path you have tried to import is not valid, please check to make sure you have the correct file path with the file name and try again. You have {3-filepath_attempts} attempts remaining.'))
            if filepath_attempts == parameters.attempts_max:
                raise(ExceededAttempts)
                exit()
    filepath_last_chars = filepath[-3:]
    if filepath_last_chars == 'csv':
        startdata = pd.read_csv(filepath)
    elif filepath_last_chars == 'xlsx':
        startdata = pd.read_xlsx(filepath)
    else:
        print(wrapper.fill('Please make sure your file is in .csv or .xlsx format for the import data function and try again.'))
    startdata = pd.read_csv(fr'{filepath}')
    startdata.columns = startdata.columns.str.lower()
    global y
    global timevariable
    global group
    global testdate
    global resamplefreq
    global splitdf
    global aggregate
    timevar_attempts = parameters.zero
    while timevar_attempts < parameters.attempts_max:
        timevariablebox = input('What is the time variable you wish to use for your model?')
        timevariable = timevariablebox.lower()
        if timevariable in startdata.columns:
            break
        else:
            timevar_attempts += 1
            print(f'Time variable cannot be found in data file you imported.Please try again. You have {3-timevar_attempts} attempts remaining before the script will close.')
            if timevar_attempts == parameters.attempts_max:
                raise(ExceededAttempts)
                exit()
    resample_attempt = parameters.zero
    while resample_attempt < parameters.attempts_max:
        resamplefreq = input('\n'+ wrapper.fill('What time frequency do you want for your output? Type MS for Monthly and W for weekly results. If your data is already aggregated to a weekly or monthly level please choose that option.'))
        if resamplefreq in ['MS','W']:
            break
        else:
            resample_attempt += 1
            print(f'You did not select a valid frequency, please use MS for monthly time series results and W for weekly results.')
            if resample_attempt == attempts_max:
                raise(ExceededAttempts)
                exit()
    aggregate = input('\n'+ wrapper.fill('By default your data will be averaged for use in this model, if you need to sum your input data please type sum here otherwise press enter to continue.'))
    forecaststeps = input('\n'+ wrapper.fill('How many periods into the future would you like to generate your forecast for, by default the package has set this value to 12? Keep in mind the granularity of your data. Here 12 would be a year for monthly data while 52 would be a year for weekly data. Please use integers'))
    forecaststeps = int(forecaststeps)
    splitdf = input('\n'+ wrapper.fill('Will you need to split your dataset into a testing and training dataset for validating your model? y/n'))
    if splitdf.lower() == 'y':
        inputdate = input('\n'+ wrapper.fill('What date do you wish to split your dataset into a training and testing dataset? This is not the date the forecast will begin.'))
        if resamplefreq == 'MS':
            testdate = first_day_of_month(inputdate)
        else:
            inputdate = datetime.datetime.strptime(inputdate, '%m-%d-%Y').date()
            testdate = last_sunday(inputdate, 6) 
    groupsindata = input('\n'+ 'Do you have groups in your dataset? With groups you will be able to run a seperate timeseries model for each group. Please type (Y/N)')
    if groupsindata.lower() == 'y':
        group_attempt = parameters.zero
        while group_attempt < parameters.attempts_max:
            groupbox = input('\n'+ 'What is the column name for the groups in your dataset?')
            group = groupbox.lower()
            if group in startdata.columns:
                startdata[group] = startdata[group].astype(str)
                break
            else:
                group_attempt += 1
                print(f'Time variable cannot be found in data file you imported.Please try again. You have {3-group_attempt} attempts remaining before the script will close.')
                if group_attempt == parameters.attempts_max:
                    raise(ExceededAttempts)
                    exit()
    startdata[timevariable] = pd.to_datetime(startdata[timevariable])
    startdata['period'] = (startdata[timevariable].dt.strftime('%B'))
    startdata['year'] = (startdata[timevariable].dt.year)
    normalizedata = input('\n'+ 'Do you want to normalize any monetary data? Yes/No')
    if normalizedata.upper() == 'YES':
        cpi_frame = pd.DataFrame()
        headers = {'Content-type': 'application/json'}
        endyear_cpi = datetime.date.today().year
        beginyear_cpi = endyear_cpi - 10
        jsondata = json.dumps({"seriesid": ['CUUR0000SA0'],"startyear":beginyear_cpi, "endyear": endyear_cpi})
        p = requests.get('https://api.bls.gov/publicAPI/v1/timeseries/data/', data=jsondata, headers=headers, auth = HTTPBasicAuth('apikey', 'e5f82668f98943a6becb6c6dfb08841f'))
        json_data = json.loads(p.text)
        print('\n' + wrapper.fill('You are about to run a function to generate the Consumer Price Index (CPI). The CPI can be used to account for inflation in monetary data.'))
        chooseyear = input('What year do you want to index data to?')
        #Checking to make sure the year is valid otherwise raising a custom error
        if int(chooseyear) < beginyear_cpi or int(chooseyear) > endyear_cpi: 
            raise InvalidYear(beginyear_cpi, endyear_cpi)
        choosemonth = input(f'What month of {chooseyear} do you want to index data to?')
        #Checking to make sure the month is not in the future, if it is raising a custom error
        if int(chooseyear) == endyear_cpi and int(datetime.datetime.strptime(choosemonth.capitalize(),"%B").strftime("%m")) > (date.today().month - 1):
            raise InvalidMonth()
        #If the user inputs an integer instead of spelling out the month this will find the proper month text to avoid errors.
        if choosemonth.isnumeric() == True: 
            datetime_object = datetime.datetime.strptime(choosemonth, "%m")
            choosemonth = datetime_object.strftime("%B")
        for series in json_data['Results']['series']:
            cs = ["series id","year","period","value"]
            for item in series['data']:
                data_ses = np.array([series['seriesID'],item['year'], item['periodName'], item['value']])
                row_seperator = item['year'] + '_' + item['periodName']
                cpi_f = pd.DataFrame([data_ses],[row_seperator],columns = cs)
                cpi_frame = cpi_frame.append(cpi_f)
        x = cpi_frame.loc[(cpi_frame['year'] == chooseyear)&(cpi_frame['period'] == choosemonth.capitalize()), 'value'].values
        cpi_frame['CPI'] = x.astype(float)/cpi_frame['value'].astype(float)
        cpi_frame['year'] = cpi_frame['year'].astype(int)  
    if normalizedata.upper() == 'YES':
        startdata = pd.merge(startdata, cpi_frame, on = ["period", "year"], how = 'left')
        #Here make it to where user can input data to normalize
        startdata['NormalizedValue'] = startdata['Cost'] * startdata['CPI']
    return startdata