Exemplo n.º 1
    def get_data_fromfile(
        self, data_type, filename
    ):  #, dates=None): #it should be added a list "dates" for the cases where there are several measurement sets (e.g. GC)

        if data_type == "SCADA":
            ExcelTable = pd.read_excel(filename, sheet_name="_DATA")
            name_timecolumn = Experiment.name_timecolumn[data_type]
            ExcelTable[name_timecolumn] = pd.to_datetime(
            d_t0 = ExcelTable[
                name_timecolumn] >= self.date_ini  ## d_t0=ExcelTable["Acquisition Date & Time"].dt.strftime("%H:%M")>t0 (when t0 is in HH:MM format)
            d_t1 = ExcelTable[name_timecolumn] <= self.date_end
            Table_timeinterval = ExcelTable[d_t0 & d_t1]
            return Table_timeinterval

        elif data_type == "GC1" or data_type == "INFERNO":  #this must to be checkd because it could exist several measurements sets within the experiment time
            ExcelTable = pd.read_excel(filename)
            name_timecolumn = Experiment.name_timecolumn[data_type]
            ExcelTable[name_timecolumn] = pd.to_datetime(
            d_t0 = ExcelTable[
                name_timecolumn] >= self.date_ini  #dates[0]  #It must to be read the whole file (this is why it is important tha the experiment dates be wider enough)
            d_t1 = ExcelTable[name_timecolumn] <= self.date_end  #dates[1]
            Table_timeinterval = ExcelTable[d_t0 & d_t1]
            return Table_timeinterval

        elif data_type == "SPA":  #this must to be checked because it could exist several measurements sets within the experiment time
            spa_win = guiSPA.Ui_MainWindow()
            )  #displays a window where the different sheets are assigned to the different times when the sample was collected
            while spa_win.finish_window == False:
                sheets_dates = spa_win.sh_dates
                guiSPA.Message_popup("Error", "Error reading SPA table",
                                     "The sheets were not read from SPA file")
                #sheets_dates is a dictionary where the key is the time => dict["YYY-MM-DD HH:MM:SS"]. the keys must be sorted by the time
                #the value is a list. list[0]=GPX, list[1]=FR/CR, list[2]=sheet_name of R. (R is the repetition of the GC, X is the spa sample number, P is the point)
            else:  #read the excel tables from the respective files
                print("done with the SPA file!")
                Table_timeinterval = {}
                for tm, v_list in sheets_dates.items():
                    Table_timeinterval[tm] = [[
                        v[0], v[1],
                        pd.read_excel(filename, sheet_name=v[2])
                    ] for v in v_list]
                print("SPA directory created")
                #     #sys.exit(app.exec_())
                return Table_timeinterval
Exemplo n.º 2
    def set_point_data(self, collect_data, time_type, date_ini, date_end,
                       db_experiment):  #,name_timecolumn): #delete data_type
        #collect_data (str) = type of data to be introduced SCADA,GC1,Inferno,SPA
        #time_type (str)= SCADA or GC
        #date_ini (str) = initial date of the point
        #date_end (str) = end date of the point
        #delay (str) = delay in minutes respect the SCADA time (SCADA -> delay=0 minutes)
        #db_experiment = dictionary with all the data added to the experiment given by the attribute data_experiment of the Experiment class

        #this method defines the attributes of the point created and add the data to the time_db

        date_ini = datetime.strptime(date_ini, "%Y-%m-%d %H:%M:%S")
        date_end = datetime.strptime(date_end, "%Y-%m-%d %H:%M:%S")
        delay = float(delay)

        self.delay = delay
        self.date_ini = date_ini
        self.date_end = date_end
        #self.point_route=point_route#it is a string with the indexes of Project/Season/experiment/point

        delay_db = {k: 0 for k in ["SCADA", "GC1", "INFERNO", "SPA"]}
        if time_type == "GC":  #if the time is the time of the GC file then the SCADA must to be read at time-delay from scada file. (however the point date will be always the scada time)
                "SCADA"] = -delay  #it is negative because the SCADA is the real time
            date_i = self.date_ini - delay  #the point date will be always the scada time
            date_e = self.date_end - delay  #the point date will be always the scada time
        elif time_type == "SCADA":  #if the time is the one of the SCADA file then the GC and inferno must to be read at time+delay
            delay_db["GC1"] = delay_db["INFERNO"] = delay_db["SPA"] = delay
            date_i = self.date_ini
            date_e = self.date_end
        self.time_db_pnt = {
            fd: []
            for fd in Point.time_db_fields
        }  #initializing the directory time_db for the evaluated point (this later will be added to the global time_db directory)
        #time_dp_pnt will be transformed into a pandas dataframe

        #key="DATE" => creates a list of rounded time (floor minute) from date_ini to date_end
        while date_i <= date_e:
            t_rounded = datetime(
                date_i.year, date_i.month, date_i.day, date_i.hour,
                date_i.minute, 00
            )  #time floor-rounded to the minute (this is in order to create the point time slot list)
            date_i += timedelta(minutes=1)

        if collect_data == "AUTOMATIC":  #the time taken by the point is the SCADA time
            #collect data from all databases
            Nentries = {k: 0 for k in Experiment.db_names}
            for k in db_experiment.keys():
                #if k in self.data_point.keys(): #the data from the database k has already added to the point
                #    tk.messagebox.showwarning("Database already added", f"the database {k} has been already added to this point")
                #    continue
                if len(db_experiment[k]) == 0:
                    #pop up a message saying that the database k is missing
                        "Error", "Missed Database",
                        f"the database {k} is missing, please add it to the experiment"
                if k in [
                        "SCADA", "GC1", "INFERNO"
                ]:  #this must to be taken from a function (to generalize for the case when data_type!=automatic)
                    d_t0, d_t1 = None, None
                    Nentries[k] = 0
                    #search which of the list elements added to the db_experiment[k] has the time interval it is being looking for
                    for db in db_experiment[
                            k]:  #db_experiment[k] is a dictionary with a list per each key (when more that one SCADA or GC has been added to the experiment)
                        d_t0 = db[Experiment.name_timecolumn[
                            k]] >= self.date_ini + timedelta(
                        d_t1 = db[Experiment.name_timecolumn[
                            k]] <= self.date_end + timedelta(
                        if all(d_t0 == False) and all(
                                d_t1 == False
                        ):  #means that the timeslot defined is not on the evaluated data entry of the experiment (an experiment can have several GC´s or SCADA´s)
                                "Warning", "time error",
                                f"the times defined are not within the database {k}, please add the data within the timeframe or check the time intervals defined"

                        #extracts the row dataframes from the different databases to create self.time_db_pnt[database]
                        for t_i in self.time_db_pnt["DATE"]:
                            t0 = db[Experiment.name_timecolumn[
                                k]] >= t_i + timedelta(minutes=delay_db[k])
                            t1 = db[Experiment.
                                    name_timecolumn[k]] <= t_i + timedelta(
                                        minutes=delay_db[k]) + timedelta(
                                db[t0 & t1]
                            )  #in this way each entry in the list will correspond with a time in the row of the pnadas dataframe
                            if len(self.time_db_pnt[k][-1]) > 0:
                                Nentries[k] += 1

                elif k == "SPA":
                    Nentries[k] = 0  #number of entries of the database k
                    for t_i in self.time_db_pnt[
                            "DATE"]:  #goes for all the dates that are stored in time_db_pnt (each date will be at each row of the pandas dataframe)
                        t0 = t_i + timedelta(minutes=delay_db[k])
                        t1 = t_i + timedelta(minutes=delay_db[k]) + timedelta(
                        fv = 0  #number of entries found
                        G_PX0 = "G_None"
                        SPA_samples = []
                        for db in db_experiment[
                                k]:  #considering that several SPA files were added in that experiment

                            #When two SPA's are in series both share the same time, therefore two dataframes must to be in the db_experiment["SPA"] dictionary for a specific key (since the key is the time)
                            for t_spa, v_list in db.items():
                                time_SPA = datetime.strptime(
                                    t_spa, "%Y-%m-%d %H:%M:%S"
                                )  #gets the time from the keys of the SPA data (check method get_data_fromfile)
                                if t0 <= time_SPA <= t1:
                                        [[t_spa, v[0], v[1], v[2]]
                                         for v in v_list]
                                    )  #collect the different F/C matrices for the evaluated time
                                    #each SPA file has its data grouped into certain times, then once the time is found it must to jump to the other SPA file added

                                    break  # maybe not <- the loop must continue because it must allow the case when 2 SPA syringes are used (both are marked at the same hour)

                        #at the end, all the list fields should be joined together into just one list (not a list of lists as it is right know (one list for each SPA file))
                        #guiSPA.Message_popup("Info","SPA Added",f"It has been added {len(SPA_samples)} samples at time t_i={t_i} of the SPA taken at t_spa={t_spa}")

                        if len(SPA_samples) > 0:
                            Nentries[k] += 1
                        #as the times t_spa are sorted within the db_experiment["SPA"] it must to be found the time t_spa within the interval [t0,t1]
                        #print(f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates")
                        # guiSPA.Message_popup("Error","time error",
                        #                      f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates")

                        #    tk.message.showinfo("No SPA Added","No data of SPA was added at time t_i=. Please check the SPA dates")

                #Add the log of the database added to show it in the table of the databases added in the Add_point window
                #data_added[k]=[database date_ini,database date_end,delay,Nentries]

                self.data_added[k] = [
                    self.date_ini + timedelta(minutes=delay_db[k]),
                    self.date_end + timedelta(minutes=delay_db[k]),
                    delay_db[k] + delay * (delay_db["SCADA"] != 0), Nentries[k]

        for k, v in self.time_db_pnt.items():
            if len(v) == 0:
                self.time_db_pnt[k] = [
                    "No Data" for i in self.time_db_pnt["DATE"]

Exemplo n.º 3
    def add_data(self, data_type, filename, delay, comments=""):
        #data_type (str) = type of data to be introduced SCADA,GC1,Inferno,SPA
        #filename (str)= this is the route of the file where the information will be extracted out (user defined through a explorer window)
        #delay(str) = delay time respect of the SCADA time (format: HH:MM:SS)

        #The data can be read from a predefined excel sheet format
        #the different data should be stored in a dictionary (with the data_type string as the key)
        #each experiment can have several time intervals (to check)
        data_addedtolist = False
        if data_type == "SCADA":
                self.get_data_fromfile(data_type, filename)
            )  #it must to search all the files which are within the dates given (maybe not)
        elif data_type == "GC1" or data_type == "INFERNO":  #The time interval of the GC corresponds with the whole time registered in the file
                self.get_data_fromfile(data_type, filename))  #maybe this one
        elif data_type == "SPA":
                self.get_data_fromfile(data_type, filename))

        if data_type in ["SCADA", "GC1", "INFERNO"]:
            if len(self.data_experiment[data_type][-1].index
                   ) > 0:  #check if the last dataframe added has data on it
                data_addedtolist = True
        elif data_type in ["SPA"]:
            if len(self.data_experiment[data_type][-1].items()
                   ) > 0:  #check if the last dataframe added has data on it
                data_addedtolist = True

        if data_addedtolist:  #check if the last dataframe added has data on it
            d_min, d_max = self.get_dates_db(
                data_type, -1
            )  #-1 because always must to take the last db loaded for a given data_type

            if len(self.data_experiment_info[data_type]) == 0:
                    (data_type + "_" +
                     str(len(self.data_experiment[data_type]) - 1), d_min,
                     d_max, delay, comments))
                d_min_list = [
                    datetime.strptime(d_info[1], "%Y-%m-%d %H:%M:%S")
                    for d_info in self.data_experiment_info[data_type]
                d_max_list = [
                    datetime.strptime(d_info[2], "%Y-%m-%d %H:%M:%S")
                    for d_info in self.data_experiment_info[data_type]
                if datetime.strptime(d_min, "%Y-%m-%d %H:%M:%S") >= min(
                        d_min_list) or datetime.strptime(
                            d_max, "%Y-%m-%d %H:%M:%S") <= max(d_max_list):
                    yesorno = guiSPA.Message_popup(
                        "YesorNo", "Time intervals overlapped",
                        "The time interval of the new data is overlapping with one of the databases already uploaded. keep it anyway?"
                    if yesorno.ret == "Yes":
                            (data_type + "_" +
                             str(len(self.data_experiment[data_type]) - 1),
                             d_min, d_max, delay, comments))
                        del self.data_experiment[data_type][-1]
                            "Warning", "Data overlapped",
                            "Data times overlapped. Check the file and upload it again"

            del self.data_experiment[data_type][
                -1]  #deletes the last element because the times dont interesect
                "Error", "Time interval error",
                "The time interval of the experiment does not intersect with the time interval of the data in the file selected. Please check the times"