def Randomforestregressor(self, n_estimators): rf = RandomForestRegressor(n_estimators) rf.fit(self.x_train, self.y_train) pred = rf.predict(self.x_test) pred = pd.DataFrame(pred) predictions = pd.merge(self.x_test, pred, left_index=True, right_index=True, how='inner') predictions['num_orders'] = predictions[0] predictions = predictions.drop([0], axis=1) ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum() ts_tot_pred = pd.DataFrame(ts_tot_pred) fig = Figure(figsize=(5, 5), dpi=100) fig.add_subplot(111).plot(self.ts_tot_orders, color= 'Blue') fig.add_subplot(111).plot(ts_tot_pred, color= 'Red') ideaLib.py2idea(dataframe= ts_tot_pred, databaseName= 'ts_tot_pred_rf', client= client) if self.is_canvas_ml == 1: self.canvas.get_tk_widget().pack_forget() self.canvas = FigureCanvasTkAgg(fig, master =self.ml) # A tk.DrawingArea. self.canvas.get_tk_widget().pack(side=RIGHT) self.canvas.draw() self.is_canvas_ml = 1
def load_test(self): self.filename = filedialog.askopenfilename(initialdir="/", title="Select a File", filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"), ("Excel files", ".csv*"))) self.datatype = self.filename.split('.') if (self.datatype[-1] == 'csv'): self.test = pd.read_csv(self.filename) elif self.datatype[-1] == 'IMD': # Read from .IMD file self.filename=self.filename.split('/') self.filename=self.filename[-1] # Dataset must be in IDEA working directory self.test = ideaLib.idea2py(database = self.filename) if self.test is None: messagebox.showinfo("Info", "There was something wrong with the import process of IDEA database to Pandas dataframe") elif self.test.empty: messagebox.showinfo("Info","You selected an empty IDEA database") if self.datatype[-1] == 'IMD' or 'csv': self.test.columns = map(str.lower, self.test.columns) self.x_test = self.test.copy() else : messagebox.showerror('Error', 'Invalid Data Type')
def load(self): self.lbdata.delete(0, 'end') self.filename = filedialog.askopenfilename( initialdir="/", title="Select a File", filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"), ("Excel files", ".csv*"))) self.datatype = self.filename.split('.') if (self.datatype[-1] == 'csv'): self.df = pd.read_csv(self.filename) # self.indexedDataset= self.df.groupby(['date'])['num_orders'].sum().reset_index() # self.indexedDataset.set_index(['date'],inplace=True) elif self.datatype[-1] == 'IMD': self.datalocation['text'] = self.filename # Read from .IMD file self.filename = self.filename.split('/') self.filename = self.filename[ -1] # Dataset must be in IDEA working directory self.df = ideaLib.idea2py(database=self.filename) if self.df is None: messagebox.showinfo( "Info", "There was something wrong with the import process of IDEA database to Pandas dataframe" ) elif self.df.empty: messagebox.showinfo("Info", "You selected an empty IDEA database") if self.datatype[-1] == 'IMD': if 'CENTER_TYPE' in self.df and \ 'CATEGORY' in self.df and \ 'CUISINE' in self.df: self.df = self.df.astype({ "CENTER_TYPE": str, "CATEGORY": str, "CUISINE": str }) self.df.columns = map(str.lower, self.df.columns) # self.indexedDataset= self.df.groupby(['date'])['num_orders'].sum().reset_index() # self.indexedDataset.set_index(['date'],inplace=True) i = 0 for column in self.df.columns: self.lbdata.insert(i, column) i += 1 elif self.datatype[-1] == 'csv': self.df.columns = map(str.lower, self.df.columns) i = 0 for column in self.df.columns: self.lbdata.insert(i, column) i += 1 else: messagebox.showerror('Error', 'Invalid Data Type')
def load_train(self): self.filename = filedialog.askopenfilename( initialdir="/", title="Select a File", filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"), ("Excel files", ".csv*"))) self.datatype = self.filename.split('.') if (self.datatype[-1] == 'csv'): self.train = pd.read_csv(self.filename) elif self.datatype[-1] == 'IMD': # Read from .IMD file self.filename = self.filename.split('/') self.filename = self.filename[ -1] # Dataset must be in IDEA working directory self.train = ideaLib.idea2py(database=self.filename) if self.train is None: messagebox.showinfo( "Info", "There was something wrong with the import \ process of IDEA database to Pandas dataframe") elif self.train.empty: messagebox.showinfo("Info", "You selected an empty IDEA database") # pd.set_option('display.max_columns', None) # pd.set_option("display.float_format",lambda x:"%.4f" % x) if self.datatype[-1] == 'csv' or 'IMD': self.train.columns = map(str.lower, self.train.columns) if "week" in self.train: self.ts_tot_orders = self.train.groupby( ['week'])['num_orders'].sum() else: self.ts_tot_orders = self.train.groupby( ['date'])['sales'].sum().reset_index() self.ts_tot_orders['date'] = pd.to_datetime( self.ts_tot_orders['date']) # self.ts_tot_orders['date']=self.ts_tot_orders['date'].map(dt.datetime.toordinal) self.ts_tot_orders = self.ts_tot_orders.set_index(['date']) self.ts_tot_orders = self.ts_tot_orders.iloc[:, -1] self.y_train = self.train.iloc[:, -1] self.x_train = self.train.iloc[:, 0:-1] if 'id' in self.x_train: self.x_train = self.x_train.drop(['id'], axis=1) if 'date' in self.x_train: self.x_train['date'] = pd.to_datetime(self.x_train['date']) # self.x_train['date']=self.x_train['date'].map(dt.datetime.toordinal) self.x_train = self.x_train.set_index(['date']) else: messagebox.showerror('Error', 'Invalid Data Type')
def Linearregression(self): lr = LinearRegression() lr.fit(self.x_train, self.y_train) pred = lr.predict(self.x_test) pred = pd.DataFrame(pred) # predictions = pd.merge(self.x_test, pred, left_index=True, # right_index=True, how='inner') x_test = self.x_test.reset_index() predictions = pd.concat([x_test, pred], axis=1) predictions['num_orders'] = predictions[0] predictions = predictions.drop([0], axis=1) print("predictions\n", predictions) if 'week' in predictions: ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum() ts_tot_pred = pd.DataFrame(ts_tot_pred) else: ts_tot_pred = pd.DataFrame(predictions) ts_tot_pred = predictions.groupby(['date'])['num_orders'].sum() print("ts_tot_pred\n", ts_tot_pred) fig = Figure(figsize=(5, 5), dpi=100) fig.add_subplot(111).plot(self.ts_tot_orders, color='Blue') fig.add_subplot(111).plot(ts_tot_pred, color='Red') ideaLib.py2idea(dataframe=ts_tot_pred, databaseName='ts_tot_pred_linear', client=client) if self.is_canvas_ml == 1: self.canvas.get_tk_widget().pack_forget() self.canvas = FigureCanvasTkAgg(fig, master=self.ml) # A tk.DrawingArea. self.canvas.get_tk_widget().pack(side=RIGHT) self.canvas.draw() self.is_canvas_ml = 1
def Decisiontreeregressor(self): dt = DecisionTreeRegressor() dt.fit(self.x_train, self.y_train) pred = dt.predict(self.x_test) pred = pd.DataFrame(pred) x_test = self.x_test.reset_index() predictions = pd.concat([x_test, pred], axis=1) predictions['num_orders'] = predictions[0] predictions = predictions.drop([0], axis=1) print("predictions\n", predictions) if 'week' in predictions: ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum() ts_tot_pred = pd.DataFrame(ts_tot_pred) else: ts_tot_pred = pd.DataFrame(predictions) ts_tot_pred = predictions.groupby(['date'])['num_orders'].sum() print("ts_tot_pred\n", ts_tot_pred) if self.is_canvas_ml == 1: self.canvas.get_tk_widget().pack_forget() fig = Figure(figsize=(5, 5), dpi=100) fig.add_subplot(111).plot(self.ts_tot_orders, color='Blue') fig.add_subplot(111).plot(ts_tot_pred, color='Red') ideaLib.py2idea(dataframe=ts_tot_pred, databaseName='ts_tot_pred_dt', client=client) self.canvas = FigureCanvasTkAgg(fig, master=self.ml) # A tk.DrawingArea. self.canvas.get_tk_widget().pack(side=RIGHT) self.canvas.draw() self.is_canvas_ml = 1
def load(self): self.filename = filedialog.askopenfilename( initialdir="/", title="Select a File", filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"), ("Excel files", ".csv*"))) self.datatype = self.filename.split('.') if (self.datatype[-1] == 'csv'): messagebox.showinfo('Info', 'Please try again later') # self.df = pd.read_csv(self.filename) # self.df = client.OpenDatabase(self.df) elif self.datatype[-1] == 'IMD': self.datalocation['text'] = self.filename # Read from .IMD file self.filename = self.filename.split('/') self.filename = self.filename[ -1] # Dataset must be in IDEA working directory self.df = ideaLib.idea2py(database=self.filename) if self.df is None: messagebox.showinfo( "Info", "There was something wrong with the import process of IDEA database to Pandas dataframe" ) elif self.df.empty: messagebox.showinfo("Info", "You selected an empty IDEA database") if self.datatype[-1] == 'IMD': # pd.set_option('display.max_columns', None) # pd.set_option("display.float_format",lambda x:"%.4f" % x) self.df = self.df.astype({ "CENTER_TYPE": str, "CATEGORY": str, "CUISINE": str }) self.df.columns = map(str.lower, self.df.columns) elif self.datatype[-1] == 'csv': self.df.columns = map(str.lower, self.df.columns) else: messagebox.showerror('Error', 'Invalid Data Type')