Exemplo n.º 1
0
async def main():
	"""
	Main function of the application.
	:return: Nothing.
	"""
	print_header()
	timer_main = Timer()

	config = default_config()

	logger.info("start predicting new time")

	config["influx"]["drops"] = '["pm1", "pm4.0", "result", "table", "_time"]'
	config["influx"]["limit"] = "10000"
	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		data = client.get_data()
		imputed_data = impute_simple_imputer(data) # Impute
		avg_data = moving_average(imputed_data) # Average input
		logger.info(f"data len {len(avg_data)}")

		sns.set_theme(style="darkgrid")
		g = sns.jointplot(x="pm2.5", y="pm10", data=avg_data, kind="reg", truncate=False, xlim=(0, 40), ylim=(0, 40),
																										   color="m", height=7)
		g2 = sns.jointplot(x="temperature", y="humidity", data=avg_data, kind="reg", truncate=False, color="m", height=7)
		g3 = sns.jointplot(x="humidity", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)
		g4 = sns.jointplot(x="temperature", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)
Exemplo n.º 2
0
def test_lstm_model():
	n = 100000
	start = 0
	stop = 140
	# produce some test data: X contains sin, cos and y contains 1/(1+cos^2). Random noise is also added to both
	data_sin = np.sin(np.linspace(start, stop, n)) + (np.random.random(n) * 0.4 - 0.2)
	data_cos = np.cos(np.linspace(start, stop, n)) + (np.random.random(n) * 0.4 - 0.2)

	data_res = 1.0 / (1.0 + np.cos(np.linspace(start, stop, n)) ** 2) + (np.random.random(n) * 0.2 - 0.1)
	data = DataFrame([data_sin, data_cos, data_res], index=["sin", "cos", "res"]).transpose()

	# For fitting, we need need x.shape == [n, v, f] and y.shape == [n, v, f] where:
	# - n is the number of samples
	# - v is the number of feature vectors in each sample
	# - f is the number of features
	# The sequence y[i, :-1, :] should equal x[i, 1:, :], i.e. is offset by one

	conf = default_config()
	conf['lstm']['features_in'] = '["sin", "cos", "res"]'
	conf['lstm']['features_out'] = '["sin", "cos", "res"]'
	model = train_lstm_model_predict(conf['lstm'], data)

	x_test, y_test = prepare_window_off_by_1(data[-50:], 20)
	y_pred = np.array([model.predict_next_n(x_test[i]) for i in range(x_test.shape[0])])  # model.predict(x_test, fh=10)
	y_pred_seq = model.predict_sequence_n(x_test[0], y_test.shape[0])

	plt.plot(y_test[:, -1, 0])  # blue
	plt.plot(y_pred[:, 0])  # green
	plt.plot(y_pred_seq[:, 0])  # orange
	plt.show()
Exemplo n.º 3
0
 def __init__(self, config: SectionProxy = None):
     if config is None:
         config = default_config()['normalization']
     self.pm_max = float(config['pm_max'])
     self.humidity_max = float(config['humidity_max'])
     self.pressure_min = float(config['pressure_min'])
     self.pressure_max = float(config['pressure_max'])
     self.temperature_min = float(config['temperature_min'])
     self.temperature_max = float(config['temperature_max'])
Exemplo n.º 4
0
def graph_comparison_our_vs_hist():
    """
		Graphs a day of our data and the same day of historical data
	"""
    _from = "2021-05-10 00:00:00"
    _to = "2021-05-15 00:00:00"

    config = default_config()

    adapter_hist = HistDataAdapter(config, "graph_comparison_our_hist_hist")
    adapter_influx = InfluxSensorData(config,
                                      "graph_comparison_our_hist_influx")

    hist_data = adapter_hist.get_data(output=False)
    our_data = adapter_influx.get_data_8610()

    our_data.columns = [
        "Date", "Humidity [%] (Our)", "PM10 [µg/m³] (Our)",
        "Temperature [°C] (Our)"
    ]
    our_data.set_index("Date", inplace=True)

    # select some days
    our_data = our_data[our_data.index.to_series() < _to]
    our_data = our_data[our_data.index.to_series() >= _from]

    # prepare historical data
    hist_data.set_index("date", inplace=True)
    hist_data = hist_data[hist_data.index.to_series() < _to]
    hist_data = hist_data[hist_data.index.to_series() >= _from]

    hist_data.drop(labels=[
        "Zch_Stampfenbachstrasse.Pressure", "Zch_Stampfenbachstrasse.PM2.5"
    ],
                   axis=1,
                   inplace=True)
    hist_data.columns = [
        "PM10 [µg/m³] (Official)", "Humidity [%] (Official)",
        "Temperature [°C] (Official)"
    ]
    our_data.index = our_data.index.tz_localize(None)

    data = our_data.join(hist_data, how="outer")
    data.sort_index(axis=1, inplace=True)

    # plot our data
    palette = sns.color_palette(
        ["#2222ff", "#0000ff", "#777777", "#666666", "#ff2222", "#ff0000"])
    sns.set_theme(style="darkgrid")
    plot = sns.relplot(kind="line",
                       palette=palette,
                       data=data,
                       dashes=[(2, 2), "", (2, 2), "", (2, 2), ""])
    plot.tight_layout()
    plot.ax.set_title("Comparison Our Data vs Official Data")
    plot.fig.autofmt_xdate()
    plot.savefig("comparison_our_hist.png")
Exemplo n.º 5
0
def upload_influx_db(con: Connection, table: str, start_time: datetime = None):
	from influxdb_client import InfluxDBClient
	from influxdb_client.client.write_api import SYNCHRONOUS
	client = InfluxDBClient.from_config_file(default_config()['influx']['config'])
	write_api = client.write_api(write_options=SYNCHRONOUS)

	cols = set(con.execute(f'SELECT * FROM {table} LIMIT 1').keys())
	places = set(p.split('.', 1)[0] for p in cols)
	places_pm10 = set(p for p in places if f'{p}.PM10' in cols)
	places_pm2_5 = set(p for p in places if f'{p}.PM2.5' in cols)
	places_temperature = set(p for p in places if f'{p}.Temperature' in cols)
	places_rainfall = set(p for p in places if f'{p}.Rainfall' in cols)
	places_humidity = set(p for p in places if f'{p}.Humidity' in cols)
	places_pressure = set(p for p in places if f'{p}.Pressure' in cols)

	places = sorted(set(chain(places_pm10, places_pm2_5, places_temperature, places_rainfall)))

	q = " || '\n' || ".join(
		f"""'pollution,place={p} ' || SUBSTR(""" +
		(f"""IFNULL(',pm10=' || "{p}.PM10", '') || """ if p in places_pm10 else '') +
		(f"""IFNULL(',pm2.5=' || "{p}.PM2.5", '') || """ if p in places_pm2_5 else '') +
		f"""'', 2) || STRFTIME(' %s000000000', date) || '\n' || """ +

		f"""'weather,place={p} ' || SUBSTR(""" +
		(f"""IFNULL(',temperature=' || "{p}.Temperature", '') || """ if p in places_temperature else '') +
		(f"""IFNULL(',rainfall=' || "{p}.Rainfall", '') || """ if p in places_rainfall else '') +
		(f"""IFNULL(',pressure=' || "{p}.Pressure", '') || """ if p in places_pressure else '') +
		(f"""IFNULL(',humidity=' || "{p}.Humidity", '') || """ if p in places_humidity else '') +
		f"""'', 2) || STRFTIME(' %s000000000', date)"""

		for p in places
	)

	query = f"""SELECT {q} AS line_data FROM {table}"""
	if start_time is not None:
		query += f" WHERE date >= '{start_time.isoformat()}'"

	res = con.execute(query)

	sequence = [l for row in res for l in row['line_data'].split('\n') if '  ' not in l]

	print('SQL query:', query)
	# print('\n'.join(sequence))

	write_api.write(INFLUXDB_HIST_DATA_BUCKET, records=sequence)

	print('Done!')
Exemplo n.º 6
0
def main():

    global_config = dict(lr=[0.001, 0.0001],
                         schedule=[np.inf],
                         batch_size=64,
                         dev_every=1,
                         seed=0,
                         model=None,
                         use_nesterov=False,
                         gpu_no=0,
                         cache_size=32768,
                         momentum=0.9,
                         weight_decay=0.00001)
    builder = ConfigBuilder(default_config(), global_config)
    parser = builder.build_argparse()
    # parser.add_argument("--no_cuda", type=str2bool, nargs='?', const=True)

    config = builder.config_from_argparse(parser)
    if config["model_type"] == "EdgeCRNN":
        model = EdgeCRNN(width_mult=config["width_mult"])
        model = torch.nn.DataParallel(model)
    if config["model_type"] == "shuffleNet":
        from nets.ShuffleNetV2 import shufflenetv2
        model_shuffle = shufflenetv2(width_mult=config["width_mult"])
        model = torch.nn.DataParallel(model_shuffle)
    elif config["model_type"] == "mobileNet":
        from nets.MobileNetV2 import MobileNetV2
        model = MobileNetV2(width_mult=config["width_mult"])
    elif config["model_type"] == "mobileNetV3-Small":
        from nets.MobileNetV3 import MobileNetV3_Small
        model = MobileNetV3_Small()
    elif config["model_type"] == "mobileNetV3-Large":
        from utils.MobileNetV3 import MobileNetV3_Large
        model = MobileNetV3_Large()
    elif config["model_type"] == "Tpool2":
        from nets.Tpool2 import CNN
        model = CNN()
    else:
        pass

    config["model"] = model
    set_seed(config)
    if config["type"] == "train":
        train(config)
    elif config["type"] == "eval":
        evaluate(config)
Exemplo n.º 7
0
async def main():
	"""
	Main function of the application.
	:return: Nothing.
	"""
	print_header()
	timer_main = Timer()

	config = default_config()

	# read and prepare dataset for training
	# df_timeseries_complete = load_dataset("zurich_adapter", config)
	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		df_timeseries_complete = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature",
			"_time": "date"
		})

	df_timeseries = chop_first_fringe(df_timeseries_complete)  # Chop first improper filled rows
	imputed_timeseries = impute_simple_imputer(df_timeseries)
	smooth_timeseries = moving_average(imputed_timeseries)
	smooth_timeseries.dropna(inplace=True)  # Make sure there really is no empty cell anymore, else drop row
	# Split training/testing data in 80%/20%
	df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20)

	# Define all models at our disposal
	models = [
		ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
		ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config),
		ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config),
		ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
		ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
	]

	# Train the models
	trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models])
	[model.model.store(model.config) for model in trained_models]  # Stores if not existing. Does NOT OVERWRITE!!!

	# Test the generalization performance of our models
	forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models]

	all: DataFrame = df_test.copy()
	all["Arima.PM10"] = forecast_test[0].values
	all["AutoArima.PM10"] = forecast_test[1].values
	all["ExpSmoothing.PM10"] = forecast_test[2].values
	all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred'])
	all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred'])
	# all.to_csv(PROJECT_DIR / 'pm10_predictions.csv')

	print(all)
	logger.info(f"Script completed in {timer_main}.")
	logger.info("Terminating gracefully...")
	exit(0)

	logger.info("start predicting new time")

	with InfluxSensorData(config=config, name="influx") as client:
		# Load the data from the server
		data = client.get_data().rename(columns={
			"humidity": "Live.Humidity",
			"pm10": "Live.PM10",
			"temperature": "Live.Temperature"
		})
		imputed_data = impute_simple_imputer(data) # Impute
		avg_data = moving_average(imputed_data) # Average input
		logger.debug("Forecasting")
		forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions

		logger.info(forecast_list)
		forecast_dict = {
			"arima": forecast_list[0],
			"autoarima": forecast_list[1],
			"expsmoothing": forecast_list[2],
			"lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")],
			"lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")]
		}

		forecast = pd.DataFrame(data=forecast_dict)
		logger.debug(forecast)
		forecast=forecast.mean(axis=1).head(n=50)
		logger.info(f"Forcasting finished with forecast value\n {forecast}")

		sns.set_theme(style="darkgrid")
		sns.lineplot(data=forecast)
Exemplo n.º 8
0
from pathlib import Path

from models import autoarima
from models import expsmoothing
from models import arima
from models import lstm

from preprocessing.imputing import impute_simple_imputer
from preprocessing.moving_average import moving_average

from utils.sqlite_utils import get_engine, get_time_series
from utils.config import default_config

# TODO: determine some sane forecasting horizon here
fh = 48
config = default_config()
from_excel = False


def write_model_graph(y_train, y_test, y_pred, name):
	data = np.array([
		np.append(y_train, [np.nan] * y_test.shape[0]),
		np.append([np.nan] * y_train.shape[0], y_test),
		np.append([np.nan] * y_train.shape[0], y_pred)
	]).transpose()
	
	idx = y_train.shape[0] - 1
	data[idx][1] = data[idx][0]
	data[idx][2] = data[idx][0]
	
	palette = sns.color_palette(["#0000ff", "#00bb00", "#ff0000"])
Exemplo n.º 9
0
            '|> pivot(columnKey: ["_field"], rowKey: ["_time"], valueColumn: "_value") // Instread of having sequence of fields get table of entries\n'
            '|> filter(fn: (r) => r.device == "device3")'
            f'|> limit(n: {self.limit})// debug remove me after \n'
            '|> sort(columns: ["_time"]) // Before exit always sort as we want to have a timeline \n'
            '|> yield()\n').drop(labels=self.drops, axis=1)

    def send_data(self, value):
        point = Point("prediction").field("prediction",
                                          value).time(datetime.now())
        with self.client.write_api(write_options=SYNCHRONOUS) as write_api:
            write_api.write(point)

    def __enter__(self):
        self.client = InfluxDBClient.from_config_file(
            self.config["influx"]["config"],
            debug=self.config[self.name]["debug"].lower()
            in ['true', 't', 'yes'])
        return self

    def __exit__(self, type, value, traceback):
        self.client.close()


if __name__ == '__main__':
    conf = default_config()
    conf['DEFAULT']['bucket'] = 'hist_data'
    conf['DEFAULT']['start'] = '-60d'
    adapt = InfluxSensorData(conf, 'DEFAULT')
    data = adapt.get_data()
    print(data)
Exemplo n.º 10
0
async def main():
    """
	Main function of the application.
	:return: Nothing.
	"""
    print_header()
    timer_main = Timer()

    config = default_config()

    # read and prepare dataset for training
    df_timeseries_complete = load_dataset("zurich_adapter", config)

    df_timeseries = chop_first_fringe(
        df_timeseries_complete)  # Chop first improper filled rows
    imputed_timeseries = impute_simple_imputer(df_timeseries)
    smooth_timeseries = moving_average(imputed_timeseries)
    smooth_timeseries.dropna(
        inplace=True
    )  # Make sure there really is no empty cell anymore, else drop row
    # Split training/testing data in 80%/20%
    df_train_val, df_test = temporal_train_test_split(smooth_timeseries,
                                                      test_size=.20)

    # Define all models at our disposal
    models = [
        ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
        ModelHolder(name="autoarima",
                    trainer=train_or_load_autoARIMA,
                    config=config),
        ModelHolder(name="expsmooting",
                    trainer=train_or_load_expSmoothing,
                    config=config),
        ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
        ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
    ]

    # Train the models
    trained_models = await gather(*[
        to_thread(train_model, model=model, data=df_train_val)
        for model in models
    ])
    [model.model.store(model.config) for model in trained_models
     ]  # Stores if not existing. Does NOT OVERWRITE!!!

    # Test the generalization performance of our models
    forecast_test = [
        model.model.predict(x=df_test, fh=5) for model in trained_models
    ]

    print(forecast_test)

    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']])
    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']])
    # plt.show()

    logger.info(f"Script completed in {timer_main}.")
    logger.info("Terminating gracefully...")

    logger.info("start predicting new time")

    forecast_dict = {
        "arima": pd.Series(),
        "autoarima": pd.Series(),
        "expsmoothing": pd.Series(),
        "lstm": pd.Series(),
        "lstm_seq": pd.Series()
    }
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().rename(
            columns={
                "humidity": "Live.Humidity",
                "pm10": "Live.PM10",
                "temperature": "Live.Temperature"
            })
        imputed_data = impute_simple_imputer(data)  # Impute
        avg_data = moving_average(imputed_data)  # Average input
        logger.debug("Forecasting")
        forecast_list = [
            model.model.predict(x=avg_data, fh=5) for model in trained_models
        ]  # Make predictions

        logger.info(forecast_list)
        forecast_dict = {
            "arima":
            forecast_list[0],
            "autoarima":
            forecast_list[1],
            "expsmoothing":
            forecast_list[2],
            "lstm":
            forecast_list[0].iloc[:, forecast_list[0].columns.
                                  get_loc("Live.PM10_Pred")],  # was item 3
            "lstm_seq":
            forecast_list[1].iloc[:, forecast_list[1].columns.get_loc(
                "Live.PM10_Pred")]  # was item 4
        }

    forecast = pd.DataFrame(data=forecast_dict)
    logger.debug(forecast)
    forecast = forecast.mean(axis=1).head(n=50)
    forecast.name = "forecast"
    logger.info(f"Forcasting finished with forecast value\n {forecast}")

    config["influx"]["limit"] = "150"
    config["influx"][
        "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]'
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().tail(n=50)
        data.index = range(len(data))
        data = data.iloc[:, 0]
        print(f"data {data}")

    sns.set_theme(style="darkgrid")
    sns.lineplot(data=[forecast, data])
Exemplo n.º 11
0
def graph_typical_day():
    """
		Graphs the typical day down to a 90th percentile
	"""

    _from = "2020-05-04 00:00:00"
    _to = "2021-05-03 00:00:00"

    config = default_config()

    adapter_hist = HistDataAdapter(config, "graph_typical_day")

    hist_data = adapter_hist.get_data(output=False).drop(
        labels=["Zch_Stampfenbachstrasse.PM2.5"], axis=1)
    hist_data.set_index("date", inplace=True)
    hist_data = hist_data[hist_data.index.to_series() <= _to]
    hist_data = hist_data[hist_data.index.to_series() >= _from]

    data_pm10 = {i: [] for i in range(24)}
    data_humidity = {i: [] for i in range(24)}
    data_temperature = {i: [] for i in range(24)}

    for line in hist_data.to_records():
        hour = pd.Timestamp(line["date"]).hour
        hum = line["Zch_Stampfenbachstrasse.Humidity"]
        temp = line["Zch_Stampfenbachstrasse.Temperature"]
        pm10 = line["Zch_Stampfenbachstrasse.PM10"]

        if not np.isnan(pm10):
            data_pm10[hour].append(pm10)
        if not np.isnan(hum):
            data_humidity[hour].append(hum)
        if not np.isnan(temp):
            data_temperature[hour].append(temp)

    x_pm10 = []
    y_pm10 = []
    x_humidity = []
    y_humidity = []
    x_temperature = []
    y_temperature = []

    for x in range(24):
        for y in data_pm10[x]:
            x_pm10.append(x)
            y_pm10.append(y)
        for y in data_humidity[x]:
            x_humidity.append(x)
            y_humidity.append(y)
        for y in data_temperature[x]:
            x_temperature.append(x)
            y_temperature.append(y)

    sns.set_theme(style="darkgrid")
    plot = sns.relplot(x=x_pm10, y=y_pm10, kind="line", ci="sd")
    plot.set(xlabel="Time of Day",
             ylabel="PM10 [µg/m³]",
             title="Typical Day of PM10")
    plot.tight_layout()
    plot.savefig("typical_pm10.png")

    sns.set_theme(style="darkgrid")
    plot = sns.relplot(x=x_humidity, y=y_humidity, kind="line", ci="sd")
    plot.set(xlabel="Time of Day",
             ylabel="Humidity [%]",
             title="Typical Day of Humidity")
    plot.tight_layout()
    plot.savefig("typical_humidity.png")

    sns.set_theme(style="darkgrid")
    plot = sns.relplot(x=x_temperature, y=y_temperature, kind="line", ci="sd")
    plot.set(xlabel="Time of Day",
             ylabel="Temperature [°C]",
             title="Typical Day of Temperature")
    plot.tight_layout()
    plot.savefig("typical_temperature.png")