def temporal_validation(result_paths, ed_fueltype_yh, elec_factored_yh, plot_criteria): """National hourly electricity data is validated with the summed modelled hourly demand for all regions. Because the total annual modelled and real demands do not match (because of different data sources and because Northern Ireland is not included in the validation data) a correction factor is used. Arguments --------- result_paths : dict Paths ed_fueltype_yh : array Fuel type specific yh energy demand plot_criteria : bool Criteria to show plots or not """ # ---------------- # Plot a full year # ---------------- days_to_plot = list(range(0, 365)) elec_national_data.compare_results( 'validation_temporal_electricity_8760h.pdf', result_paths['data_results_validation'], elec_factored_yh, ed_fueltype_yh, 'all_submodels', days_to_plot, plot_crit=plot_criteria) # Plot four weeks (one of each season) winter_week = list( range(date_prop.date_to_yearday(2015, 1, 12), date_prop.date_to_yearday(2015, 1, 19))) #Jan spring_week = list( range(date_prop.date_to_yearday(2015, 5, 11), date_prop.date_to_yearday(2015, 5, 18))) #May summer_week = list( range(date_prop.date_to_yearday(2015, 7, 13), date_prop.date_to_yearday(2015, 7, 20))) #Jul autumn_week = list( range(date_prop.date_to_yearday(2015, 10, 12), date_prop.date_to_yearday(2015, 10, 19))) #Oct days_to_plot = winter_week + spring_week + summer_week + autumn_week elec_national_data.compare_results( 'validation_temporal_electricity_weeks_selection.pdf', result_paths['data_results_validation'], elec_factored_yh, ed_fueltype_yh, 'all_submodels', days_to_plot, plot_crit=plot_criteria) return
def test_date_to_yearday(): """Testing """ in_year = 2015 in_month = 6 in_day = 13 expected = 164 - 1 # call function out_value = date_prop.date_to_yearday(in_year, in_month, in_day) assert out_value == expected
def assign_hes_data_to_year(nr_of_appliances, hes_data, base_yr): """Fill every base year day with correct data Arguments ---------- nr_of_appliances : dict Defines how many appliance types are stored (max 10 provided in original hes file) hes_data : array HES raw data for every month and daytype and appliance base_yr : float Base year to generate shapes Returns ------- year_raw_values : array Energy data for every day in the base year for every appliances """ year_raw_values = np.zeros((365, 24, nr_of_appliances), dtype=float) #yeardays, houry, appliances # Create list with all dates of a whole year list_dates = date_prop.fullyear_dates( start=date(base_yr, 1, 1), end=date(base_yr, 12, 31)) # Assign every date to the place in the array of the year for yearday_date in list_dates: month_python = date_prop.get_month_from_yeraday( yearday_date.timetuple().tm_year, yearday_date.timetuple().tm_yday) yearday_python = date_prop.date_to_yearday( yearday_date.timetuple().tm_year, yearday_date.timetuple().tm_mon, yearday_date.timetuple().tm_mday) daytype_str = date_prop.get_weekday_type(yearday_date) # Get day from HES raw data array year_raw_values[yearday_python] = hes_data[daytype_str][month_python] return year_raw_values
def run( path_files, path_out_files, crit_missing_values=100, crit_nr_of_zeros=500, nr_daily_zeros=10 ): """Iterate weather data from MIDAS and generate annual hourly temperatre data files for every weather station by interpolating missing values http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ The data are obtained from the Centre for Environmental Data Analysis [Download link]http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ () http://badc.nerc.ac.uk/artefacts/badc_datadocs/ukmo-midas/WH_Table.html (metadata) Weather Stations information: http://badc.nerc.ac.uk/search/midas_stations/ http://badc.nerc.ac.uk/cgi-bin/midas_stations/search_by_name.cgi.py?name=&minyear=&maxyear= The values are then written to a created folder as follows: year__stationname.txt Arguments --------- path_files : str Path to folder with original weather data files path_out_files : str Path to folder where the cleaned data are stored crit_missing_values : int Criteria of how many missing values there mus bet until a weather station is discarded crit_nr_of_zeros : int Criteria of how many zeros there must be per year until weather station is discarde nr_daily_zeros : int How many zero values there can be maxmum in a day in the year until the station is discarded Note ----- - In case of a leap year the 29 of February is ignored """ print("... starting to clean original weather files") # Stations which are outisde of the uk and are ignored stations_outside_UK = [ 1605, # St. Helena 1585, # Gibraltar 1609] # Falkland Islands # Create out folder to store station specific csv if os.path.isdir(path_out_files): basic_functions.delete_folder(path_out_files) os.mkdir(path_out_files) else: os.mkdir(path_out_files) # Placeholder value for missing entry placeholder_value = np.nan # Read all files all_annual_raw_files = os.listdir(path_files) # Sort according to year all_annual_raw_files.sort() # Annual temperature file for file_name in all_annual_raw_files: print(" ") print("... reading csv file: " + str(file_name), flush=True) print(" ") path_to_csv = os.path.join(path_files, file_name) temp_stations = {} with open(path_to_csv, 'r') as csvfile: read_lines = csv.reader(csvfile, delimiter=',') for row in read_lines: date_measurement = row[0].split(" ") year = int(date_measurement[0].split("-")[0]) month = int(date_measurement[0].split("-")[1]) day = int(date_measurement[0].split("-")[2]) hour = int(date_measurement[1][:2]) yearday = date_prop.date_to_yearday(year, month, day) if date_prop.is_leap_year(year): yearday = yearday - 1 # Substract because 29. of Feb is later ignored else: pass year_hour = (yearday * 24) + hour # Weather station id station_id = int(row[5]) # If station is outside uk or leap year day if (station_id in stations_outside_UK) or (month == 2 and day == 29): pass else: # Air temperature in Degrees Celcius if row[35] == ' ' or row[35] == '': # If no data point air_temp = placeholder_value else: air_temp = float(row[35]) # Add weather station if not already added to dict if station_id not in temp_stations: temp_stations[station_id] = np.zeros((8760), dtype="float") else: pass temp_stations[station_id][year_hour] = air_temp # ------------------------------------------ # Interpolate missing values (np.nan) # ------------------------------------------ temp_stations_cleaned_reshaped = {} for station in list(temp_stations.keys()): # ------------------------ # Number of empty values # ------------------------ nans, x = nan_helper(temp_stations[station]) nr_of_nans = list(nans).count(True) # ------------------------ # nr of zeros # ------------------------ try: nr_of_zeros = collections.Counter(temp_stations[station])[0] except KeyboardInterrupt: nr_of_zeros = 0 # -- # Count number of zeros which follow # -- max_cnt_zeros = count_sequence_of_zeros(temp_stations[station]) if nr_of_nans > crit_missing_values or nr_of_zeros > crit_nr_of_zeros or max_cnt_zeros > nr_daily_zeros: print("Zeros in sequence: {} nr_of_nans: {} nr_of_zeros: {} Ignored station: {} {}".format( max_cnt_zeros, nr_of_nans, nr_of_zeros, station, year), flush=True) else: # Interpolate missing np.nan values temp_stations[station][nans] = np.interp( x(nans), x(~nans), temp_stations[station][~nans]) # test if still np.nan value list_with_all_nan_args = list(np.argwhere(np.isnan(temp_stations[station]))) if list_with_all_nan_args: raise Exception("Still has np.nan entries") # Replace with day, hour array interpolated_values_reshaped = temp_stations[station].reshape(365, 24) temp_stations_cleaned_reshaped[station] = interpolated_values_reshaped # Write temperature data out to csv file for station_name, temp_values in temp_stations_cleaned_reshaped.items(): file_name = "{}__{}.{}".format(year, station_name, "txt") path_out = os.path.join(path_out_files, file_name) np.savetxt( path_out, temp_values, delimiter=",") print("--Number of stations '{}'".format(len(list(temp_stations_cleaned_reshaped.keys())))) print("... finished cleaning weather data")
def read_raw_elec_2015(path_to_csv, year=2015): """Read in national electricity values provided in MW and convert to GWh Arguments --------- path_to_csv : str Path to csv file year : int Year of data Returns ------- elec_data_indo : array Hourly INDO electricity in GWh (INDO - National Demand) elec_data_itsdo : array Hourly ITSDO electricity in GWh (Transmission System Demand) Note ----- Half hourly measurements are aggregated to hourly values Necessary data preparation: On 29 March and 25 Octobre there are 46 and 48 values because of the changing of the clocks The 25 Octobre value is omitted, the 29 March hour interpolated in the csv file Source ------ http://www2.nationalgrid.com/uk/Industry-information/electricity-transmission-operational-data/ For more information on INDO and ISTDO see DemandData Field Descriptions file: http://www2.nationalgrid.com/WorkArea/DownloadAsset.aspx?id=8589934632 National Demand is calculated as a sum of generation based on National Grid operational generation metering """ elec_data_indo = np.zeros((365, 24), dtype="float") elec_data_itsdo = np.zeros((365, 24), dtype="float") with open(path_to_csv, 'r') as csvfile: read_lines = csv.reader(csvfile, delimiter=',') _headings = next(read_lines) hour = 0 counter_half_hour = 0 for line in read_lines: month = basic_functions.get_month_from_string( line[0].split("-")[1]) day = int(line[0].split("-")[0]) # Get yearday yearday = date_prop.date_to_yearday(year, month, day) if counter_half_hour == 1: counter_half_hour = 0 # Sum value of first and second half hour hour_elec_demand_INDO = half_hour_demand_indo + float(line[2]) hour_elec_demand_ITSDO = half_hour_demand_itsdo + float( line[4]) # Convert MW to GWH (input is MW aggregated for two half # hourly measurements, therfore divide by 0.5) elec_data_indo[yearday][hour] = conversions.mw_to_gwh( hour_elec_demand_INDO, 0.5) elec_data_itsdo[yearday][hour] = conversions.mw_to_gwh( hour_elec_demand_ITSDO, 0.5) hour += 1 else: counter_half_hour += 1 half_hour_demand_indo = float( line[2]) # INDO - National Demand half_hour_demand_itsdo = float( line[4]) # Transmission System Demand if hour == 24: hour = 0 return elec_data_indo, elec_data_itsdo
def run(path_files, path_out_files, path_weather_stations, crit_min_max=False): """Iterate weather data from MIDAS and generate annual hourly temperatre data files for every weather station by interpolating missing values http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ The data are obtained from the Centre for Environmental Data Analysis [Download link]http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ () http://badc.nerc.ac.uk/artefacts/badc_datadocs/ukmo-midas/WH_Table.html (metadata) Weather Stations information: http://badc.nerc.ac.uk/search/midas_stations/ http://badc.nerc.ac.uk/cgi-bin/midas_stations/search_by_name.cgi.py?name=&minyear=&maxyear= The values are then written to a created folder as follows: year__stationname.txt Arguments --------- path_files : str Path to folder with original weather data files path_out_files : str Path to folder where the cleaned data are stored crit_missing_values : int Criteria of how many missing values there mus bet until a weather station is discarded crit_nr_of_zeros : int Criteria of how many zeros there must be per year until weather station is discarde nr_daily_zeros : int How many zero values there can be maxmum in a day in the year until the station is discarded Note ----- - In case of a leap year the 29 of February is ignored """ logging.info("... starting to clean original weather files") # Load coordinates of weather stations weather_stations = data_loader.read_weather_stations_raw( path_weather_stations) # Stations which are outisde of the uk and are ignored stations_outside_UK = [ 1605, # St. Helena 1585, # Gibraltar 1609 ] # Falkland Islands # Create out folder to store station specific csv if os.path.isdir(path_out_files): basic_functions.delete_folder(path_out_files) os.mkdir(path_out_files) else: os.mkdir(path_out_files) # Placeholder value for missing entry placeholder_value = np.nan # Read all files all_annual_raw_files = os.listdir(path_files) # Sort according to year all_annual_raw_files.sort() # Annual temperature file for file_name in all_annual_raw_files: logging.info("... reading in file: %s", file_name) path_to_csv = os.path.join(path_files, file_name) temp_stations = {} temp_stations_min_max = defaultdict(dict) with open(path_to_csv, 'r') as csvfile: read_lines = csv.reader(csvfile, delimiter=',') for row in read_lines: date_measurement = row[0].split(" ") year = int(date_measurement[0].split("-")[0]) month = int(date_measurement[0].split("-")[1]) day = int(date_measurement[0].split("-")[2]) hour = int(date_measurement[1][:2]) yearday = date_prop.date_to_yearday(year, month, day) if date_prop.is_leap_year(year): yearday = yearday - 1 # Substract because 29. of Feb is later ignored else: pass year_hour = (yearday * 24) + hour # Weather station id station_id = int(row[5]) # If station is outside uk or leap year day if (station_id in stations_outside_UK) or (month == 2 and day == 29): pass else: # Air temperature in Degrees Celcius if row[35] == ' ' or row[35] == '': # If no data point air_temp = placeholder_value else: air_temp = float(row[35]) # Add weather station if not already added to dict if station_id not in temp_stations: if crit_min_max: temp_stations_min_max[station_id][ 't_min'] = np.zeros((365), dtype="float") temp_stations_min_max[station_id][ 't_max'] = np.zeros((365), dtype="float") temp_stations[station_id] = [] else: pass if air_temp is not placeholder_value: if yearday not in temp_stations[station_id]: temp_stations_min_max[station_id]['t_min'][ yearday] = air_temp temp_stations_min_max[station_id]['t_max'][ yearday] = air_temp temp_stations[station_id].append(yearday) if crit_min_max: # Update min and max daily temperature if air_temp < temp_stations_min_max[station_id][ 't_min'][yearday]: temp_stations_min_max[station_id]['t_min'][ yearday] = air_temp if air_temp > temp_stations_min_max[station_id][ 't_max'][yearday]: temp_stations_min_max[station_id]['t_max'][ yearday] = air_temp # ------------------------ # Delete weather stations with missing daily data inputs # ------------------------ stations_to_delete = [] for station_id in temp_stations_min_max.keys(): nans, x = nan_helper(temp_stations_min_max[station_id]['t_min']) nr_of_nans_t_min = list(nans).count(True) nans, x = nan_helper(temp_stations_min_max[station_id]['t_max']) nr_of_nans_t_max = list(nans).count(True) if nr_of_nans_t_min > 0 or nr_of_nans_t_max > 0: print("Station '{}' contains {} {} .nan values and is deleted". format(station_id, nr_of_nans_t_min, nr_of_nans_t_max)) stations_to_delete.append(station_id) print("Number of stations to delete: {}".format( len(stations_to_delete))) for i in stations_to_delete: del temp_stations_min_max[i] # -------------------- # Write out files # -------------------- path_out_stations = os.path.join(path_out_files, '{}_stations.csv'.format(str(year))) path_out_t_min = os.path.join(path_out_files, "{}_t_min.npy".format(str(year))) path_out_t_max = os.path.join(path_out_files, "{}_t_max.npy".format(str(year))) # Check if weather station is defined stations_to_delete = [] for name in temp_stations_min_max.keys(): try: _ = weather_stations[name]['latitude'] _ = weather_stations[name]['longitude'] except KeyError: print( "... no coordinates are available for weather station '{}'" .format(name)) stations_to_delete.append(name) print("... number of stations to delete: {}".format( len(stations_to_delete))) for name in stations_to_delete: del temp_stations_min_max[name] stations = list(temp_stations_min_max.keys()) out_list = [] for station_name in stations: out_list.append([ station_name, weather_stations[station_name]['latitude'], weather_stations[station_name]['longitude'] ]) # Write df = pd.DataFrame(np.array(out_list), columns=['station_id', 'latitude', 'longitude']) df.to_csv(path_out_stations, index=False) stations_t_min = list(i['t_min'] for i in temp_stations_min_max.values()) stations_t_max = list(i['t_max'] for i in temp_stations_min_max.values()) stations_t_min = np.array(stations_t_min) stations_t_max = np.array(stations_t_max) np.save(path_out_t_min, stations_t_min) np.save(path_out_t_max, stations_t_max) logging.info("... finished cleaning weather data")