Пример #1
0
    def fix_data_issues(self):
        """
        TODO: Remove this method!
        This is a temporary method to remedy the data issues in this example project so it can be used for testing.
        """
        p = self
        # Project doesn't have turbine ids, but analysis requires it.
        p.scada.df["id"] = "T0"
        p.scada.df.index = pd.to_datetime(p.scada.df.index)

        # Project is missing reanalysis data, but analysis requires it.
        def generate_reanal(index):
            d = pd.DataFrame(index=index)
            d["u_ms"] = np.random.random(d.shape[0]) * 15
            d["v_ms"] = np.random.random(d.shape[0]) * 15
            d["windspeed_ms"] = np.sqrt(d["u_ms"]**2 + d["v_ms"]**2)
            d["winddirection_deg"] = compute_wind_direction(
                d["u_ms"], d["v_ms"])
            d["rho_kgm-3"] = np.random.random(d.shape[0]) * 20
            return d

        index_reanal = pd.date_range(start='1990-01-01', end='2017-12-31')
        p._reanalysis._product['merra2'].df = generate_reanal(index_reanal)
        p._reanalysis._product['erai'].df = generate_reanal(index_reanal)
        p._reanalysis._product['ncep2'].df = generate_reanal(index_reanal)

        # Project is missing energy column
        p.scada.df['energy_kwh'] = convert_power_to_energy(
            p.scada.df['wtur_W_avg'], sample_rate_min='10T')
Пример #2
0
    def test_convert_power_to_energy(self):
        np.random.seed(42)
        power = np.random.random(100) * 100
        time_interval = [10, 30, 60]  # Minutes
        df = pd.DataFrame(data={'power_kw': power})

        for t in time_interval:
            energy = power * t / 60
            energy_test = unit_conversion.convert_power_to_energy(df['power_kw'], t)
            nptest.assert_almost_equal(energy, energy_test, err_msg="Convert power to energy is broken")
Пример #3
0
    def test_convert_power_to_energy(self):
        np.random.seed(42)
        power = np.random.random(100) * 100
        time_interval = {'10T': 10, '30T': 30., '1H': 60}  # Minutes
        df = pd.DataFrame(data={'power_kw': power})

        for key, item in time_interval.items():
            energy = power * item / 60
            energy_test = unit_conversion.convert_power_to_energy(
                df['power_kw'], key)
            nptest.assert_almost_equal(
                energy,
                energy_test,
                err_msg="Convert power to energy is broken")
Пример #4
0
    def prepare(self):
        """
        Do all loading and preparation of the data for this plant.
        """

        # Extract data if necessary
        self.extract_data()

        # Set time frequencies of data in minutes
        self._meter_freq = '10T'  # Daily meter data
        self._curtail_freq = '10T'  # Daily curtailment data
        self._scada_freq = '10T'  # 10-min

        # Load meta data
        self._lat_lon = (48.452, 5.588)
        self._plant_capacity = 8.2  # MW
        self._num_turbines = 4
        self._turbine_capacity = 2.05  # MW

        ###################
        # SCADA DATA #
        ###################
        logger.info("Loading SCADA data")
        self._scada.load(self._path, "la-haute-borne-data-2014-2015",
                         "csv")  # Load Scada data
        logger.info("SCADA data loaded")

        logger.info("Timestamp QC and conversion to UTC")
        # Get 'time' field in datetime format. Local time zone information is
        # encoded, so convert to UTC

        self._scada.df['time'] = pd.to_datetime(self._scada.df['Date_time'],
                                                utc=True).dt.tz_localize(None)

        # Remove duplicated timestamps and turbine id
        self._scada.df = self._scada.df.drop_duplicates(
            subset=['time', 'Wind_turbine_name'], keep='first')

        # Set time as index
        self._scada.df.set_index('time', inplace=True, drop=False)

        logger.info("Correcting for out of range of temperature variables")
        # Handle extrema values for temperature. All other variables appear to
        # be reasonable.
        self._scada.df = self._scada.df[(self._scada.df["Ot_avg"] >= -15.0)
                                        & (self._scada.df["Ot_avg"] <= 45.0)]

        logger.info("Flagging unresponsive sensors")
        # Due to data discretization, there appear to be a lot of repeating
        # values. But these filters seem to catch the obvious unresponsive
        # sensors.
        for id in self._scada.df.Wind_turbine_name.unique():
            temp_flag = filters.unresponsive_flag(
                self._scada.df.loc[self._scada.df.Wind_turbine_name == id,
                                   'Va_avg'], 3)
            self._scada.df.loc[(self._scada.df.Wind_turbine_name == id) \
                & (temp_flag),['Ba_avg','P_avg','Ws_avg','Va_avg','Ot_avg', \
                'Ya_avg','Wa_avg']] = np.nan
            temp_flag = filters.unresponsive_flag(
                self._scada.df.loc[self._scada.df.Wind_turbine_name == id,
                                   'Ot_avg'], 20)
            self._scada.df.loc[(self._scada.df.Wind_turbine_name == id) \
                & (temp_flag),'Ot_avg'] = np.nan

        # Put power in watts
        self._scada.df["Power_W"] = self._scada.df["P_avg"] * 1000

        # Convert pitch to range -180 to 180.
        self._scada.df["Ba_avg"] = self._scada.df["Ba_avg"] % 360
        self._scada.df.loc[self._scada.df["Ba_avg"] > 180.0,"Ba_avg"] \
            = self._scada.df.loc[self._scada.df["Ba_avg"] > 180.0,"Ba_avg"] - 360.0

        # Calculate energy
        self._scada.df['energy_kwh'] = un.convert_power_to_energy(
            self._scada.df["Power_W"], self._scada_freq) / 1000

        logger.info("Converting field names to IEC 61400-25 standard")
        #Map to -25 standards

        # Note: there is no vane direction variable defined in -25, so
        # making one up
        scada_map = {
            "time": "time",
            "Wind_turbine_name": "id",
            "Power_W": "wtur_W_avg",
            "Ws_avg": "wmet_wdspd_avg",
            "Wa_avg": "wmet_HorWdDir_avg",
            "Va_avg": "wmet_VaneDir_avg",
            "Ya_avg": "wyaw_YwAng_avg",
            "Ot_avg": "wmet_EnvTmp_avg",
            "Ba_avg": "wrot_BlPthAngVal1_avg",
        }

        self._scada.df.rename(scada_map, axis="columns", inplace=True)

        # Remove the fields we are not yet interested in
        self._scada.df.drop(['Date_time', 'time', 'P_avg'],
                            axis=1,
                            inplace=True)

        ##############
        # METER DATA #
        ##############
        self._meter.load(self._path, "plant_data", "csv")  # Load Meter data

        # Create datetime field
        self._meter.df['time'] = pd.to_datetime(
            self._meter.df.time_utc).dt.tz_localize(None)
        self._meter.df.set_index('time', inplace=True, drop=False)

        # Drop the fields we don't need
        self._meter.df.drop(
            ['time_utc', 'availability_kwh', 'curtailment_kwh'],
            axis=1,
            inplace=True)

        self._meter.df.rename(columns={'net_energy_kwh': 'energy_kwh'},
                              inplace=True)

        #####################################
        # Availability and Curtailment Data #
        #####################################
        self._curtail.load(self._path, "plant_data", "csv")  # Load Meter data

        # Create datetime field
        self._curtail.df['time'] = pd.to_datetime(
            self._curtail.df.time_utc).dt.tz_localize(None)
        self._curtail.df.set_index('time', inplace=True, drop=False)

        # Already have availability and curtailment in kwh, so not much to do.

        # Drop the fields we don't need
        self._curtail.df.drop(['time_utc', 'net_energy_kwh'],
                              axis=1,
                              inplace=True)

        ###################
        # REANALYSIS DATA #
        ###################
        # merra2
        self._reanalysis._product['merra2'].load(self._path,
                                                 "merra2_la_haute_borne",
                                                 "csv")

        # calculate wind direction from u, v
        self._reanalysis._product['merra2'].df["winddirection_deg"] \
            = met.compute_wind_direction(self._reanalysis._product['merra2'].df["u_50"], \
            self._reanalysis._product['merra2'].df["v_50"])

        self._reanalysis._product['merra2'].rename_columns({
            "time":
            "datetime",
            "windspeed_ms":
            "ws_50m",
            "u_ms":
            "u_50",
            "v_ms":
            "v_50",
            "temperature_K":
            "temp_2m",
            "rho_kgm-3":
            "dens_50m"
        })
        self._reanalysis._product['merra2'].normalize_time_to_datetime(
            "%Y-%m-%d %H:%M:%S")
        self._reanalysis._product['merra2'].df.set_index('time',
                                                         inplace=True,
                                                         drop=False)

        # Drop the fields we don't need
        self._reanalysis._product['merra2'].df.drop(['Unnamed: 0', 'datetime'],
                                                    axis=1,
                                                    inplace=True)

        # era5
        self._reanalysis._product['era5'].load(self._path,
                                               "era5_wind_la_haute_borne",
                                               "csv")

        # calculate wind direction from u, v
        self._reanalysis._product['era5'].df["winddirection_deg"] \
            = met.compute_wind_direction(self._reanalysis._product['era5'].df["u_100"], \
            self._reanalysis._product['era5'].df["v_100"])

        self._reanalysis._product['era5'].rename_columns({
            "time":
            "datetime",
            "windspeed_ms":
            "ws_100m",
            "u_ms":
            "u_100",
            "v_ms":
            "v_100",
            "temperature_K":
            "t_2m",
            "rho_kgm-3":
            "dens_100m"
        })
        self._reanalysis._product['era5'].normalize_time_to_datetime(
            "%Y-%m-%d %H:%M:%S")
        self._reanalysis._product['era5'].df.set_index('time',
                                                       inplace=True,
                                                       drop=False)

        # Drop the fields we don't need
        self._reanalysis._product['era5'].df.drop(['Unnamed: 0', 'datetime'],
                                                  axis=1,
                                                  inplace=True)
Пример #5
0
    def prepare(self):
        """
        Do all loading and preparation of the data for this plant.
        """
        # Set time frequencies of data in minutes
        self._scada_freq = '10T'  # 10-min

        # Load meta data
        self._lat_lon = (48.4461, 5.5925)
        self._plant_capacity = 8.2  # MW
        self._num_turbines = 4
        self._turbine_capacity = 2.05  # MW

        ###################
        # SCADA DATA #
        ###################
        logger.info("Loading SCADA data")
        self._scada.load(self._path, "engie_scada", "csv")  # Load Scada data
        logger.info("SCADA data loaded")

        logger.info("Timestamp QC and conversion to UTC")
        # Get 'time' field in datetime format
        self._scada.df['time'] = pd.to_datetime(self._scada.df['time'])

        # Convert local to UTC time, simple shift forward since no DST present in data
        self._scada.df['time_utc'] = self._scada.df['time'] + pd.Timedelta(
            hours=0)

        # Remove duplicated timestamps and turbine id
        self._scada.df = self._scada.df[self._scada.df.duplicated(
            subset=['time', 'ID']) == False]

        # Set time as index
        self._scada.df['time'] = self._scada.df['time_utc']
        self._scada.df.set_index('time', inplace=True,
                                 drop=False)  # Set datetime as index

        logger.info(
            "Correcting for out of range of power, wind speed, and wind direction variables"
        )
        #Handle extrema values
        self._scada.df = self._scada.df[
            (self._scada.df["wmet_wdspd_avg"] >= 0.0)
            & (self._scada.df["wmet_wdspd_avg"] <= 40.0)]
        self._scada.df = self._scada.df[
            (self._scada.df["wtur_W_avg"] >= -1000.0)
            & (self._scada.df["wtur_W_avg"] <= 2200.0)]
        self._scada.df = self._scada.df[
            (self._scada.df["wmet_wDir_avg"] >= 0.0)
            & (self._scada.df["wmet_wDir_avg"] <= 360.0)]

        logger.info("Flagging unresponsive sensors")
        #Flag repeated values from frozen sensors
        temp_flag = filters.unresponsive_flag(self._scada.df["wmet_wdspd_avg"],
                                              3)
        self._scada.df.loc[temp_flag, 'wmet_wdspd_avg'] = np.nan
        temp_flag = filters.unresponsive_flag(self._scada.df["wmet_wDir_avg"],
                                              3)
        self._scada.df.loc[temp_flag, 'wmet_wDir_avg'] = np.nan

        # Put power in watts; note although the field name suggests 'watts', it was really reporting in kw
        self._scada.df["Power_W"] = self._scada.df["wtur_W_avg"] * 1000

        # Calculate energy
        self._scada.df['energy_kwh'] = un.convert_power_to_energy(
            self._scada.df["wtur_W_avg"], self._scada_freq)

        logger.info("Converting field names to IEC 61400-25 standard")
        #Map to -25 standards

        scada_map = {
            "time": "time",
            "ID": "id",
            "Power_W": "wtur_W_avg",
            "wmet_wdspd_avg": "wmet_wdspd_avg",
            "wmet_wDir_avg": "wmet_HorWd_Dir"
        }

        self._scada.df.rename(scada_map, axis="columns", inplace=True)

        # Remove the fields we are not yet interested in
        self._scada.df.drop(['time_utc'], axis=1, inplace=True)