def get_station_dicts(start_date, end_date):
    # repository_parameter = RepositoryParameter.START
    repository_parameter = RepositoryParameter.ONLY_OUTDOOR_AND_SHADED
    station_repository = StationRepository(
        *get_repository_parameters(repository_parameter))
    station_dicts = station_repository.load_all_stations(start_date, end_date)
    return station_dicts
def check_correlation(start_date, end_date):
    station_repository = StationRepository(
        *get_repository_parameters(RepositoryParameter.ONLY_OUTDOOR))
    station_dicts = station_repository.load_all_stations(start_date, end_date)
    reference_temperature_df = load_husconet_temperature_average(
        start_date, end_date)
    reference_radiation_df = load_husconet_radiation_average(
        start_date, end_date)

    r_and_p_values = [
        check_station(station_dict["data_frame"],
                      reference_temperature_df,
                      reference_radiation_df,
                      just_check_correlation=True)
        for station_dict in station_dicts
    ]
    r_values = []
    for r_value, p_value in r_and_p_values:
        if p_value < .05:
            r_values.append(r_value)
    pyplot.hist(r_values, 10, color="gray")
    pyplot.xlim(-1, 1)
    pyplot.xlabel("Korrelationskoeffizient $r$")
    pyplot.ylabel("Anzahl PWS")
    pyplot.show()
Пример #3
0
def run():
    start_date = "2016-01-01T00:00"
    end_date = "2016-12-31T23:59"

    eddh_df = load_eddh(start_date, end_date)
    station_repository = StationRepository(*get_repository_parameters(
        RepositoryParameter.ONLY_OUTDOOR_AND_SHADED))
    station_dicts = station_repository.load_all_stations(
        start_date,
        end_date,
        # limit=5,  # for testing purposes
        limit_to_temperature=False)

    random.shuffle(station_dicts)
    split_point = int(len(station_dicts) * .7)
    training_dicts, evaluation_dicts = station_dicts[:
                                                     split_point], station_dicts[
                                                         split_point:]

    logging.info("training stations: %s" %
                 [station["name"] for station in training_dicts])
    logging.info("evaluation stations: %s" %
                 [station["name"] for station in evaluation_dicts])

    training_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks",
                                     "training_data_filtered.csv")
    join_to_big_vector(training_csv_file, training_dicts, eddh_df)

    evaluation_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks",
                                       "evaluation_data_filtered.csv")
    join_to_big_vector(evaluation_csv_file, evaluation_dicts, eddh_df)
def gather_statistics(repository_parameter, start_date, end_date):
    logging.info("repository: %s" % repository_parameter.value)
    station_repository = StationRepository(
        *get_repository_parameters(repository_parameter))
    availabilities = []
    station_dicts = station_repository.load_all_stations(start_date=start_date,
                                                         end_date=end_date)
    logging.info("total: %i" % len(station_dicts))
    while True:
        if len(station_dicts) == 0:
            break
        station_dict = station_dicts.pop()
        position = station_dict["meta_data"]["position"]
        station_dict["data_frame"] = sample_up(station_dict["data_frame"],
                                               start_date, end_date)
        row_result = {
            "station_name": station_dict["name"],
            "lat": position["lat"],
            "lon": position["lon"],
            "available_data": get_available_data(station_dict)
        }
        availabilities.append(row_result)
        logging.debug("{station_name}: {lat} {lon} -- {available_data}".format(
            **row_result))
    df = pandas.DataFrame(availabilities)
    result_file = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "log",
        "calculate_available_data_%s.csv" % repository_parameter.value)
    df.to_csv(result_file)
Пример #5
0
def demo():
    start_date = "2016-01-31"
    end_date = "2016-02-01"
    repository_parameters = get_repository_parameters(
        RepositoryParameter.ONLY_OUTDOOR_AND_SHADED)
    score_algorithm(start_date,
                    end_date,
                    repository_parameters,
                    limit=10,
                    interpolation_name="test")
def demo():
    start_date = "2016-01-31"
    end_date = "2016-02-15"
    repository_parameters = get_repository_parameters(
        RepositoryParameter.START)
    score_algorithm(start_date,
                    end_date,
                    repository_parameters,
                    limit=30,
                    interpolation_name="test")
Пример #7
0
def run(testing=False):
    start_date = "2016-01-01T00:00"
    end_date = "2016-12-31T23:59" if not testing else "2016-03-31"

    eddh_df = load_eddh(start_date, end_date)
    station_repository = StationRepository(
        *get_repository_parameters(RepositoryParameter.START_FULL_SENSOR))
    station_dicts = station_repository.load_all_stations(
        start_date,
        end_date,
        limit_to_temperature=False,
        limit=0 if not testing else 15  # for testing purposes
    )

    husconet_dicts = HusconetStationRepository().load_all_stations(
        start_date,
        end_date,
        limit=0 if not testing else 3  # for testing purposes
    )
    random.shuffle(husconet_dicts)
    split_point = int(len(husconet_dicts) * .7)
    training_dicts, evaluation_dicts = husconet_dicts[:
                                                      split_point], husconet_dicts[
                                                          split_point:]
    logging.info("training stations: %s" %
                 [station["name"] for station in training_dicts])
    logging.info("evaluation stations: %s" %
                 [station["name"] for station in evaluation_dicts])

    logging.debug("prepare evaluation")
    evaluation_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks",
                                       "evaluation_data_husconet.csv")
    join_to_big_vector(evaluation_csv_file, station_dicts[:], evaluation_dicts,
                       eddh_df)

    logging.debug("prepare training")
    training_csv_file = os.path.join(PROCESSED_DATA_DIR, "neural_networks",
                                     "training_data_husconet.csv")
    join_to_big_vector(training_csv_file, station_dicts, training_dicts,
                       eddh_df)
    logging.debug("done")
def run_clustering(repository_parameter_name, start_date, end_date, limit):
    """

    :param repository_parameter_name: One of the types from ``RepositoryParameter``
    :param start_date: First day
    :param end_date: Last day
    :param limit: Limit the number of examined stations
    :return: Show clustering
    """
    params = get_repository_parameters(repository_parameter_name)
    station_repository = StationRepository(*params)
    station_dicts = station_repository.load_all_stations(start_date,
                                                         end_date,
                                                         limit=limit)
    station_time_series_comparator = StationTimeSeriesComparator(station_dicts)
    stations = [Station(station_dict) for station_dict in station_dicts]

    cluster = HierarchicalClustering(
        stations,
        station_time_series_comparator.compare_time_series,
        num_processes=4)
    cluster.cluster()
    cluster.display(print_function=logging.debug)
    logging.info(cluster._data)
def plot():
    start = get_repository_parameters(RepositoryParameter.START_FULL_SENSOR)
    start_date = "2016-01-01"
    end_date = "2016-12-31"
    plot_station("Niederschlag 2016", *start, start_date, end_date)
def gather_statistics(repository_parameter, start_date, end_date):
    logging.info("repository: %s" % repository_parameter.value)
    station_repository = StationRepository(
        *get_repository_parameters(repository_parameter))
    available_precipitation = {}
    available_wind = {}
    station_dicts = station_repository.load_all_stations(
        start_date=start_date,
        end_date=end_date,
        limit_to_temperature=False,
        # limit=10  # for testing purposes
    )
    logging.info("total: %i" % len(station_dicts))
    stations_with_precipitation = set()
    stations_with_wind = set()
    while True:
        if len(station_dicts) == 0:
            break
        station_dict = station_dicts.pop()  # free memory whenever you can
        precipitation = get_available_precipitation(station_dict)
        if len(precipitation):
            available_precipitation[station_dict["name"]] = precipitation
            stations_with_precipitation.add(station_dict["name"])
        wind = get_available_wind(station_dict)
        if len(wind):
            available_wind[station_dict["name"]] = wind
            stations_with_wind.add(station_dict["name"])
    df_precipitation = pandas.DataFrame(available_precipitation)
    df_wind = pandas.DataFrame(available_wind)
    result_file_precipitation = os.path.join(
        PROCESSED_DATA_DIR, "misc",
        "precipitation_per_month_%s.csv" % repository_parameter.value)
    df_precipitation.to_csv(result_file_precipitation)
    result_file_wind = os.path.join(
        PROCESSED_DATA_DIR, "misc",
        "wind_per_month_%s.csv" % repository_parameter.value)
    df_wind.to_csv(result_file_wind)

    station_dicts_wind = os.path.join(PROCESSED_DATA_DIR, "filtered_stations",
                                      "station_dicts_wind.csv")
    df_data = []
    for station_with_wind in stations_with_wind:
        meta_info = station_repository.get_meta_info(station_with_wind)
        df_data.append({
            "station": station_with_wind,
            "lat": meta_info.lat,
            "lon": meta_info.lon
        })
    df = pandas.DataFrame(df_data)
    df.set_index("station", inplace=True)
    df.to_csv(station_dicts_wind)

    station_dicts_precipitation = os.path.join(
        PROCESSED_DATA_DIR, "filtered_stations",
        "station_dicts_precipitation.csv")
    df_data = []
    for station_with_precipitation in stations_with_precipitation:
        meta_info = station_repository.get_meta_info(
            station_with_precipitation)
        df_data.append({
            "station": station_with_precipitation,
            "lat": meta_info.lat,
            "lon": meta_info.lon,
        })
    df = pandas.DataFrame(df_data)
    df.set_index("station", inplace=True)
    df.to_csv(station_dicts_precipitation)