Exemplo n.º 1
0
 def Randomforestregressor(self, n_estimators):
     
     rf = RandomForestRegressor(n_estimators)
     rf.fit(self.x_train, self.y_train)
     pred = rf.predict(self.x_test)
     pred = pd.DataFrame(pred)
     
     predictions = pd.merge(self.x_test, pred, left_index=True, right_index=True, how='inner')
     predictions['num_orders'] = predictions[0]
     predictions = predictions.drop([0], axis=1)
     ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum()
     ts_tot_pred = pd.DataFrame(ts_tot_pred)
           
     
     fig = Figure(figsize=(5, 5), dpi=100)
     fig.add_subplot(111).plot(self.ts_tot_orders, color= 'Blue')
     fig.add_subplot(111).plot(ts_tot_pred, color= 'Red')
     ideaLib.py2idea(dataframe= ts_tot_pred, 
                     databaseName= 'ts_tot_pred_rf',
                     client= client)
     
     if self.is_canvas_ml == 1:
         self.canvas.get_tk_widget().pack_forget() 
     
     self.canvas = FigureCanvasTkAgg(fig, master =self.ml)  # A tk.DrawingArea.
     self.canvas.get_tk_widget().pack(side=RIGHT)
     self.canvas.draw()        
     self.is_canvas_ml = 1
Exemplo n.º 2
0
    def load_test(self):
        
        
        self.filename = filedialog.askopenfilename(initialdir="/",
                        title="Select a File",
                        filetypes=(("Excel files", ".IMD*"), ("all files", 
                                                               "*.*"),
                                   ("Excel files", ".csv*")))
        
        self.datatype = self.filename.split('.')
        if (self.datatype[-1] == 'csv'):
            self.test = pd.read_csv(self.filename)	
        
        elif self.datatype[-1] == 'IMD':
            
            # Read from .IMD file
            self.filename=self.filename.split('/')
            self.filename=self.filename[-1]                  # Dataset must be in IDEA working directory
            self.test = ideaLib.idea2py(database = self.filename)
            
        if self.test is None:
           messagebox.showinfo("Info",
                               "There was something wrong with the import process of IDEA database to Pandas dataframe")
        elif self.test.empty:
          messagebox.showinfo("Info","You selected an empty IDEA database")
        if self.datatype[-1] == 'IMD' or 'csv':
            self.test.columns = map(str.lower, self.test.columns)
            
            self.x_test = self.test.copy()

        else :
            messagebox.showerror('Error', 'Invalid Data Type')
    def load(self):

        self.lbdata.delete(0, 'end')

        self.filename = filedialog.askopenfilename(
            initialdir="/",
            title="Select a File",
            filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"),
                       ("Excel files", ".csv*")))

        self.datatype = self.filename.split('.')
        if (self.datatype[-1] == 'csv'):
            self.df = pd.read_csv(self.filename)
            # self.indexedDataset= self.df.groupby(['date'])['num_orders'].sum().reset_index()
            # self.indexedDataset.set_index(['date'],inplace=True)

        elif self.datatype[-1] == 'IMD':
            self.datalocation['text'] = self.filename

            # Read from .IMD file
            self.filename = self.filename.split('/')
            self.filename = self.filename[
                -1]  # Dataset must be in IDEA working directory
            self.df = ideaLib.idea2py(database=self.filename)

        if self.df is None:
            messagebox.showinfo(
                "Info",
                "There was something wrong with the import process of IDEA database to Pandas dataframe"
            )
        elif self.df.empty:
            messagebox.showinfo("Info", "You selected an empty IDEA database")
        if self.datatype[-1] == 'IMD':

            if 'CENTER_TYPE' in self.df and  \
            'CATEGORY' in self.df and  \
            'CUISINE' in self.df:
                self.df = self.df.astype({
                    "CENTER_TYPE": str,
                    "CATEGORY": str,
                    "CUISINE": str
                })

            self.df.columns = map(str.lower, self.df.columns)

            # self.indexedDataset= self.df.groupby(['date'])['num_orders'].sum().reset_index()
            # self.indexedDataset.set_index(['date'],inplace=True)
            i = 0
            for column in self.df.columns:
                self.lbdata.insert(i, column)
                i += 1
        elif self.datatype[-1] == 'csv':
            self.df.columns = map(str.lower, self.df.columns)
            i = 0
            for column in self.df.columns:
                self.lbdata.insert(i, column)
                i += 1
        else:
            messagebox.showerror('Error', 'Invalid Data Type')
    def load_train(self):

        self.filename = filedialog.askopenfilename(
            initialdir="/",
            title="Select a File",
            filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"),
                       ("Excel files", ".csv*")))

        self.datatype = self.filename.split('.')
        if (self.datatype[-1] == 'csv'):
            self.train = pd.read_csv(self.filename)

        elif self.datatype[-1] == 'IMD':

            # Read from .IMD file
            self.filename = self.filename.split('/')
            self.filename = self.filename[
                -1]  # Dataset must be in IDEA working directory
            self.train = ideaLib.idea2py(database=self.filename)

        if self.train is None:
            messagebox.showinfo(
                "Info", "There was something wrong with the import \
                               process of IDEA database to Pandas dataframe")
        elif self.train.empty:
            messagebox.showinfo("Info", "You selected an empty IDEA database")
            # pd.set_option('display.max_columns', None)
            # pd.set_option("display.float_format",lambda x:"%.4f" % x)

        if self.datatype[-1] == 'csv' or 'IMD':

            self.train.columns = map(str.lower, self.train.columns)

            if "week" in self.train:
                self.ts_tot_orders = self.train.groupby(
                    ['week'])['num_orders'].sum()
            else:
                self.ts_tot_orders = self.train.groupby(
                    ['date'])['sales'].sum().reset_index()
                self.ts_tot_orders['date'] = pd.to_datetime(
                    self.ts_tot_orders['date'])
                # self.ts_tot_orders['date']=self.ts_tot_orders['date'].map(dt.datetime.toordinal)
                self.ts_tot_orders = self.ts_tot_orders.set_index(['date'])
                self.ts_tot_orders = self.ts_tot_orders.iloc[:, -1]

            self.y_train = self.train.iloc[:, -1]
            self.x_train = self.train.iloc[:, 0:-1]

            if 'id' in self.x_train:
                self.x_train = self.x_train.drop(['id'], axis=1)
            if 'date' in self.x_train:
                self.x_train['date'] = pd.to_datetime(self.x_train['date'])
                # self.x_train['date']=self.x_train['date'].map(dt.datetime.toordinal)
                self.x_train = self.x_train.set_index(['date'])

        else:
            messagebox.showerror('Error', 'Invalid Data Type')
    def Linearregression(self):

        lr = LinearRegression()
        lr.fit(self.x_train, self.y_train)

        pred = lr.predict(self.x_test)
        pred = pd.DataFrame(pred)
        # predictions = pd.merge(self.x_test, pred, left_index=True,
        #                        right_index=True, how='inner')
        x_test = self.x_test.reset_index()
        predictions = pd.concat([x_test, pred], axis=1)
        predictions['num_orders'] = predictions[0]
        predictions = predictions.drop([0], axis=1)
        print("predictions\n", predictions)
        if 'week' in predictions:
            ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum()
            ts_tot_pred = pd.DataFrame(ts_tot_pred)

        else:
            ts_tot_pred = pd.DataFrame(predictions)
            ts_tot_pred = predictions.groupby(['date'])['num_orders'].sum()
        print("ts_tot_pred\n", ts_tot_pred)

        fig = Figure(figsize=(5, 5), dpi=100)
        fig.add_subplot(111).plot(self.ts_tot_orders, color='Blue')
        fig.add_subplot(111).plot(ts_tot_pred, color='Red')

        ideaLib.py2idea(dataframe=ts_tot_pred,
                        databaseName='ts_tot_pred_linear',
                        client=client)

        if self.is_canvas_ml == 1:
            self.canvas.get_tk_widget().pack_forget()

        self.canvas = FigureCanvasTkAgg(fig,
                                        master=self.ml)  # A tk.DrawingArea.
        self.canvas.get_tk_widget().pack(side=RIGHT)
        self.canvas.draw()
        self.is_canvas_ml = 1
    def Decisiontreeregressor(self):

        dt = DecisionTreeRegressor()
        dt.fit(self.x_train, self.y_train)
        pred = dt.predict(self.x_test)
        pred = pd.DataFrame(pred)

        x_test = self.x_test.reset_index()
        predictions = pd.concat([x_test, pred], axis=1)
        predictions['num_orders'] = predictions[0]
        predictions = predictions.drop([0], axis=1)
        print("predictions\n", predictions)
        if 'week' in predictions:
            ts_tot_pred = predictions.groupby(['week'])['num_orders'].sum()
            ts_tot_pred = pd.DataFrame(ts_tot_pred)

        else:
            ts_tot_pred = pd.DataFrame(predictions)
            ts_tot_pred = predictions.groupby(['date'])['num_orders'].sum()
        print("ts_tot_pred\n", ts_tot_pred)

        if self.is_canvas_ml == 1:
            self.canvas.get_tk_widget().pack_forget()

        fig = Figure(figsize=(5, 5), dpi=100)
        fig.add_subplot(111).plot(self.ts_tot_orders, color='Blue')
        fig.add_subplot(111).plot(ts_tot_pred, color='Red')
        ideaLib.py2idea(dataframe=ts_tot_pred,
                        databaseName='ts_tot_pred_dt',
                        client=client)

        self.canvas = FigureCanvasTkAgg(fig,
                                        master=self.ml)  # A tk.DrawingArea.
        self.canvas.get_tk_widget().pack(side=RIGHT)
        self.canvas.draw()
        self.is_canvas_ml = 1
Exemplo n.º 7
0
    def load(self):

        self.filename = filedialog.askopenfilename(
            initialdir="/",
            title="Select a File",
            filetypes=(("Excel files", ".IMD*"), ("all files", "*.*"),
                       ("Excel files", ".csv*")))

        self.datatype = self.filename.split('.')
        if (self.datatype[-1] == 'csv'):
            messagebox.showinfo('Info', 'Please try again later')
            # self.df = pd.read_csv(self.filename)
            # self.df = client.OpenDatabase(self.df)

        elif self.datatype[-1] == 'IMD':
            self.datalocation['text'] = self.filename

            # Read from .IMD file
            self.filename = self.filename.split('/')
            self.filename = self.filename[
                -1]  # Dataset must be in IDEA working directory
            self.df = ideaLib.idea2py(database=self.filename)

        if self.df is None:
            messagebox.showinfo(
                "Info",
                "There was something wrong with the import process of IDEA database to Pandas dataframe"
            )
        elif self.df.empty:
            messagebox.showinfo("Info", "You selected an empty IDEA database")
        if self.datatype[-1] == 'IMD':
            # pd.set_option('display.max_columns', None)
            # pd.set_option("display.float_format",lambda x:"%.4f" % x)
            self.df = self.df.astype({
                "CENTER_TYPE": str,
                "CATEGORY": str,
                "CUISINE": str
            })
            self.df.columns = map(str.lower, self.df.columns)
        elif self.datatype[-1] == 'csv':
            self.df.columns = map(str.lower, self.df.columns)
        else:
            messagebox.showerror('Error', 'Invalid Data Type')