def __open_or_create(self): if not path.exists(self.config_folder): makedirs(self.config_folder) if not path.exists(self.config_folder + self.config_name): self.config.add_section('LastPrediction') self.config.add_section('LastParse') self.config.add_section('LastFit') tm = get_date_now() - timedelta(days=1) self.config.set('LastFit', 'date', str(get_date_now().timestamp())) self.config.set('LastParse', 'date', str(tm.timestamp())) self.config.set('LastPrediction', 'date', str(tm.timestamp())) self.__save_config() else: self.config.read(self.config_folder + self.config_name)
def fit_all_old(self, back_days=7): """Дообучение всех существующих моделей """ last_fit_timestamp = float(self.config.get_last_fit_date()) last_fit_date = date_from_timestamp(last_fit_timestamp) if (get_date_now() - last_fit_date).days < back_days: return stations = get_stations() pollutions = [p.type for p in get_all_pollution_types()] for pollution in pollutions: for station in stations: model_name = f'{pollution}_mg_m3_{station.mosecom_id}' if path.exists(self.single_folder + model_name + '.h5'): self.fit_old(model_name, station.mosecom_id, station.id, pollution) self.config.set_last_fit_date(str(get_date_now().timestamp()))
def run_prediction(): config = Config() last_prediction_date = date_from_timestamp( float(config.get_last_prediction_date())) last_parse_date = date_from_timestamp(float(config.get_last_parse_date())) if last_prediction_date < last_parse_date: single_predictor.predict(predict_range=3) neighborhood_predictor.predict(predict_range=3) aggregator_predictor.predict(predict_range=3) fit_predictors.fit_all_old(back_days=7) config.set_last_prediction_date(str(get_date_now().timestamp()))
def predict(self, predict_range: int = 1): """Получение прогноза на predict_range часов вперед :param predict_range: int значение количества прогнозов с шагом в час :return: list прогнозов загрязнения """ date = get_date_now() stations = get_stations() for station in stations: # Извлечение данных по станции station_data = station.to_dict() station_name = station_data['mosecom_id'] # Извлечение данных о погоде и прогнозе погоды, а также маппинг данных weather = get_weather(station)[0].to_dict() weather_forecast = [ get_weather_forecast(station, hour)[0].to_dict() for hour in range(1, predict_range + 1) ] # Получение видов загрязнений pollution_entities = get_all_pollution_types() # Инициация прогнозирования по каждому загрязнению для predict_range часов for pollution_type in pollution_entities: # Текущее загрязнение на станции value = get_pollutions(station=station, pollution_type=pollution_type) if value: # Список прогнозов для predict_range часов predictions = [None for _ in range(predict_range + 1)] predictions[0] = value[0].to_dict()['mg_m3_value'] # Предзагрузка модели try: self.preload_model(pollution_type.to_dict()['type'], station_name) except Exception as e: # TODO: Logger continue # Инициация прогноза почасового прогноза и добавление в БД for i in range(predict_range): raw_prediction = self.__make_predict(weather, weather_forecast[i], predictions[i]) predictions[i + 1] = raw_prediction # Добавление данных в БД add_pollution_forecast(station=station, date=date, delta_hour=i + 1, predictor='single', pollution_type=pollution_type, value=raw_prediction)
def fit_old(self, model_name: str, station_name: str, station_id: int, pollution_type: str, back_days=7): """Дообучение существующей конкретной модели @param model_name: имя существующего файла модели @param station_name: имя станции модели @param station_id: id станции модели @param pollution_type: тип загрязнителя @param back_days: прошедшее кол-во дней для обучения """ back_time = get_date_now() - timedelta(days=back_days) back_pollution = Pollution.select(lambda p: p.datetime >= back_time and p.station_id.id == station_id and p.pollution_type.type == pollution_type) back_pollution_list = [p.to_dict() for p in back_pollution] back_weather = Weather.select(lambda w: w.datetime >= back_time and w.station_id.id == station_id) back_weather_list = [w.to_dict() for w in back_weather] back_weather_forecast = WeatherForecast.select(lambda wf: wf.obs_datetime >= back_time and wf.station_id.id == station_id) back_weather_forecast_list = [wf.to_dict() for wf in back_weather_forecast] df = pd.DataFrame([], columns=SINGLE_COLUMNS) for pollution in back_pollution_list: pol_day = pollution['datetime'].day pol_hour = pollution['datetime'].hour weather = next((w for w in back_weather_list if w['datetime'].day == pol_day and w['datetime'].hour == pol_hour), None) weather_forecast = next((wf for wf in back_weather_forecast_list if wf['obs_datetime'].day == pol_day and wf['obs_datetime'].hour == pol_hour), None) next_pollution = next((p for p in back_pollution_list if p['datetime'].day == pol_day and p['datetime'].hour == pol_hour), None) if weather and weather_forecast and next_pollution: merged = merge_auto_fit_data(weather, weather_forecast, pollution['mg_m3_value'], next_pollution['mg_m3_value']) df = df.append(merged, ignore_index=True) df = predict_mapping_df(df) df = df.dropna() df = df.astype(float) if df.empty: return self.model_folder = self.single_folder self.scaler_folder = self.single_minmaxscaler_folder self.preload_model(pollution_type, station_name) y_data = pd.DataFrame(df[df.columns[1]]) y_data = pd.DataFrame(self.predictor.yscaler.transform(y_data)) x_data = df.drop(df.columns[1], axis=1) x_data = pd.DataFrame(self.predictor.xscaler.transform(x_data)) archive_dir = self.archive_folder if not path.exists(archive_dir): makedirs(archive_dir) archive_name = f'{archive_dir}{model_name}_{get_date_now().strftime("%Y-%m-%d_%H-%M-%S")}.h5' self.predictor.model.save(archive_name) model = self.__additional_training(self.predictor.model, x_data, y_data) print(model_name) model.save(f'{self.single_folder}{model_name}.h5')
def predict(self, predict_range: int = 1): """Получение прогноза на predict_range часов вперед :param predict_range: int значение количества прогнозов с шагом в час :return: list прогнозов загрязнения """ date = get_date_now() for station_name, neighbors in STATION_NEIGHBORHOOD.items(): # Извлечение данных по станции station_entity = get_station_by_name(station_name) if station_entity: # Извлечение данных о погоде и прогнозе погоды weather = get_weather(station_entity)[0].to_dict() weather_forecast = [ get_weather_forecast(station_entity, hour)[0].to_dict() for hour in range(1, predict_range + 1) ] # Данные о погоде и прогнозе погоды в соседних станциях neighbor_weather_dict = {} # Структура: # neighbor_station: [weather, forecast_weather] ... for neighbor_station in neighbors: neighbor_station_entity = get_station_by_name( neighbor_station) if neighbor_station_entity: neighbor_weather_dict[neighbor_station] = [ get_weather(neighbor_station_entity)[0].to_dict(), [ get_weather_forecast(station_entity, hour)[0].to_dict() for hour in range(1, predict_range + 1) ] ] # Получение видов загрязнений pollution_entities = get_all_pollution_types() # Инициация прогнозирования по каждому загрязнению для predict_range часов for pollution_type in pollution_entities: # Текущее загрязнение на станции value = get_pollutions(station=station_entity, pollution_type=pollution_type) if value: # Список прогнозов для predict_range часов predictions = [None for _ in range(predict_range + 1)] predictions[0] = value[0].to_dict()['mg_m3_value'] # Предзагрузка модели try: self.preload_model( pollution_type.to_dict()['type'], station_name) except Exception as e: # TODO: Logger continue # Данные о текущем загрязнении и прогнозе загрязнения первого компонента на соседних станциях # Структура: # neighbor_station: [pollution, forecast_pollution1, ...] ... neighbor_bad_pollution_flag = False neighbor_pollution_dict = {} for neighbor_station in neighbors: neighbor_station_entity = get_station_by_name( neighbor_station) if neighbor_station: try: neighbor_pollution_dict[ neighbor_station] = [ get_pollutions( neighbor_station_entity, pollution_type)[0].to_dict() ['mg_m3_value'] ] + [ f.to_dict()['value'] for f in get_pollution_forecast( neighbor_station_entity, pollution_type, 'single') ] except IndexError as e: neighbor_bad_pollution_flag = True # Проверка наличия прогнозов if None in neighbor_pollution_dict.get(neighbor_station, [None]) or \ len(neighbor_pollution_dict.get(neighbor_station, [])) != predict_range + 1: neighbor_bad_pollution_flag = True # Переход к следующей станции, если по этой станции недостаточно данных if neighbor_bad_pollution_flag: continue # Инициация прогноза почасового прогноза и добавление в БД for i in range(predict_range): neighborhood_data = self.__make_neighborhood_data( neighbor_weather_dict, neighbor_pollution_dict, i) raw_prediction = self.__make_neighborhood_predict( weather, weather_forecast[i], predictions[i], neighborhood_data) predictions[i + 1] = raw_prediction # Добавление данных в БД add_pollution_forecast( station=station_entity, date=date, delta_hour=i + 1, predictor='neighborhood', pollution_type=pollution_type, value=raw_prediction)
def run_parser(): # Запуск парсинга новых данных! database_extender.auto_extend(forecast_hour_range=3) config = Config() config.set_last_parse_date(str(get_date_now().timestamp()))
def predict(self, predict_range: int = 1): """Получение прогноза аггрегирующего компонента на predict_range часов вперед :param predict_range: int значение количества прогнозов с шагом в час :return: list прогнозов загрязнения """ date = get_date_now() for station_name, neighbors in STATION_NEIGHBORHOOD.items(): # Извлечение данных по станции station_entity = get_station_by_name(station_name) if station_entity: # Извлечение данных о погоде и прогнозе погоды weather = get_weather(station_entity)[0].to_dict() weather_forecast = [ get_weather_forecast(station_entity, hour)[0].to_dict() for hour in range(1, predict_range + 1) ] # Данные о погоде и прогнозе погоды в соседних станциях neighbor_weather_dict = {} # Структура: # neighbor_station: [weather, forecast_weather] ... for neighbor_station in neighbors: neighbor_station_entity = get_station_by_name( neighbor_station) if neighbor_station_entity: neighbor_weather_dict[neighbor_station] = [ get_weather(neighbor_station_entity)[0].to_dict(), [ get_weather_forecast(station_entity, hour)[0].to_dict() for hour in range(1, predict_range + 1) ] ] # Получение видов загрязнений pollution_entities = get_all_pollution_types() # Инициация прогнозирования по каждому загрязнению для predict_range часов for pollution_type in pollution_entities: # Данные прогнозов первого и второго компонентов single_value = [ p.to_dict()['value'] for p in get_pollution_forecast( station_entity, pollution_type, 'single') ] neighbor_value = [ p.to_dict()['value'] for p in get_pollution_forecast( station_entity, pollution_type, 'neighborhood') ] # Текущее загрязнение на станции value = get_pollutions(station=station_entity, pollution_type=pollution_type) if single_value and neighbor_value and value: # Список прогнозов для predict_range часов predictions = [None for _ in range(predict_range + 1)] predictions[0] = value[0].to_dict()['mg_m3_value'] # Предзагрузка модели try: self.preload_model( pollution_type.to_dict()['type'], station_name) except Exception as e: # TODO: Logger continue # Инициация прогноза почасового прогноза и добавление в БД for i in range(predict_range): neighborhood_data = self.__make_neighborhood_data( neighbor_weather_dict, i) raw_prediction = self.__make_aggregator_predict( predictions[i], weather, weather_forecast[i], [single_value[i], neighbor_value[i]], neighborhood_data) predictions[i + 1] = raw_prediction # Добавление данных в БД add_pollution_forecast( station=station_entity, date=date, delta_hour=i + 1, predictor='aggregator', pollution_type=pollution_type, value=raw_prediction)