Пример #1
0
def predict_check():
    try:
        local_dir = os.path.dirname(__file__)
        data_path = os.path.join(local_dir, 'data')

        # read in data from files
        data = read_in_data(data_path)
        data_train, data_val, data_test = split_data(data)

        data_train, data_val, data_test = list(
            map(lambda data_set: normalize(data_set), (data_train, data_val, data_test)))

        model = train(data_train[0], data_train[1])
        coeff = model.coef_

        assert coeff.shape == (3, ), "The shape of coefficient matrix should be (3, )"

        predict(data_test, model)
    except Exception:
        print(f'Training test failed')
        print(f'Exception trace back:')
        print(traceback.print_exc())
    else:
        print('Training test passed.')
        print(f"Linear regression coefficient: ")
        print(coeff)
def pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain):
    data = data.reset_index(drop=True)
    data = separateDate(data)
    data = totalUnionData(data, dirFestivos)
    data = df.concat([data, dataMet], axis=1, join='inner')
    #data =  data.merge(dataMet, how='left', on='fecha')
    data = filterData(data, dirData + estacion + "_" + contaminant + ".csv")
    data = data.fillna(value=-1)
    index = data.index.values
    arrayPred = []
    for x in index:
        pred = data.ix[x].values
        valPred = pred[2:]
        valNorm = pre.normalize(valPred, estacion, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData)
    columnContaminant = findTable2(contaminant)
    real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_')
    for xs in range(len(real)):
        fechaPronostico = data['fecha'].iloc[xs].values
        fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S')
        fechaPronostico = fechaPronostico - timedelta(days=1)
        pronostico = real[xs]
        guardarPrediccion(estacion, fechaPronostico, [pronostico],contaminant,2)
    return 1
Пример #3
0
def normalization_check():
    try:
        # construct data dir
        local_dir = os.path.dirname(__file__)
        data_path = os.path.join(local_dir, 'data')

        # read in data from files
        data = read_in_data(data_path)
        data_train, data_val, data_test = split_data(data)

        data_train, data_val, data_test = list(
            map(lambda data_set: normalize(data_set), (data_train, data_val, data_test)))

        assert is_normalized(data_train[0]), "data_train is not normalized"
        assert is_normalized(data_val[0]), "data_val is not normalized"
        assert is_normalized(data_test[0]), "data_test is not normalized"

        assert data_train[0].shape[1] == 3, "the column of data_train should be 3"
        assert data_train[0].shape[1] == 3, "the column of data_train should be 3"
        assert data_train[0].shape[1] == 3, "the column of data_train should be 3"
    except Exception:
        print(f'Test failed')
        print(f'Exception trace back:')
        print(traceback.print_exc())
    else:
        print('Normalization test passed.')
def forecast_month(month, year, dirData, dirTotalCsv, dirTrain,estacion, contaminant):
    lastDay = calendar.monthrange(year,month)[1]
    fechaInicio =  str(year) + '-' + numString(month) + '-01 00:00:00'
    fechaFinal = str(year) + '-' + numString(month) + '-'+ numString(lastDay) +' 23:00:00'
    #print(fechaInicio)
    #print(fechaFinal)
    data = fd.readData(fechaInicio, fechaFinal, [estacion], contaminant)
    data = separateDate(data)
    data = unionMeteorologia(data,dirTotalCsv)
    data = data.fillna(value=-1)
    #print(data)
    #sys.out
    frame_dates = data['fecha'].values
    data =  filterData(data, dirData + estacion + "_" + contaminant + ".csv")
    data = data.fillna(value=-1)
    index = data.index.values
    arrayPred = []
    for x in index:
        pred = data.ix[x].values
        valPred= pred[1:]
        valNorm = pre.normalize(valPred,estacion, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData)
    nameCont = findTable2(contaminant)
    real = pre.desNorm(result, estacion,contaminant, dirData, nameCont + '_')
    for xs in range(len(frame_dates)):
        fecha = frame_dates[xs]
        ts = df.to_datetime(str(fecha))
        fecha_string = ts.strftime('%Y-%m-%d %H:%M:%S')
        pronostico = real[xs]
        guardarPrediccion(estacion, fecha_string,[pronostico],contaminant,4)
Пример #5
0
def prediccion(estacion, data, dirData, dirTrain, contaminant):
    """
    function that sends the data to the neural network for the prediction of the pollutant

    :param estacion: name the station
    :type estacion: String
    :param data: information for the prediction
    :type data : list float32
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :return: prdiction values
    :type return : float32
    """
    temp = data.ix[0].values
    temp = temp[1:]
    dataPred = pre.normalize(temp, estacion, contaminant, dirData)
    dataPred = convert(dataPred)
    prediccion = pre.prediction(estacion, contaminant, [dataPred], dirTrain,
                                dirData)
    print(prediccion)
    columnContaminant = findTable2(contaminant)
    prediccion1 = pre.desNorm(prediccion, estacion, contaminant, dirData,
                              columnContaminant + '_')
    return prediccion1
Пример #6
0
def dataCorrelacion(contaminant, estacion, fechaInicio, fechaFin, dataMet,
                    dirData, dirTrain, dirFestivos):
    print('COrrelacion')
    data_Corr = df.read_csv(
        '/media/storageBK/AirQualityForecast/Scripts/ContaminationForecast/Data/Correlacion_table.csv',
        index_col=0)
    corr_est = data_Corr[estacion].sort_values(ascending=False)
    estacion_corr = corr_est.index[1]
    data = fd.readData_corr(fechaInicio, fechaFin, [estacion_corr],
                            contaminant)
    if data.empty:
        useClimatology(contaminant, estacion, fechaUltima, fechaFin, dataMet,
                       dirData, dirTrain, dirFestivos)
    else:
        data = data.drop_duplicates(keep='first')
        data = data.reset_index(drop=True)
        index_values = data.columns.values[1:]
        for xs in index_values:
            data.rename(columns={
                xs: xs.replace(estacion_corr.lower(), estacion.lower())
            },
                        inplace=True)
        data = separateDate(data)
        data = totalUnionData(data, dirFestivos)
        data = df.concat([data, dataMet], axis=1, join='inner')
        print(data)
        #data =  data.merge(dataMet, how='left', on='fecha')
        data = filterData(data,
                          dirData + estacion + "_" + contaminant + ".csv")
        data = data.fillna(value=-1)
        index = data.index.values
        arrayPred = []
        for x in index:
            pred = data.ix[x].values
            valPred = pred[2:]
            print(valPred)
            valNorm = pre.normalize(valPred, estacion, contaminant, dirData)
            arrayPred.append(convert(valNorm))
        result = pre.prediction(estacion, contaminant, arrayPred, dirTrain,
                                dirData)
        columnContaminant = findTable2(contaminant)
        real = pre.desNorm(result, estacion, contaminant, dirData,
                           columnContaminant + '_')
        for xs in range(len(real)):
            fechaPronostico = data['fecha'].iloc[xs].values
            fechaPronostico = datetime.strptime(fechaPronostico[1],
                                                '%Y-%m-%d %H:%M:%S')
            pronostico = real[xs]
            guardarPrediccionRep(estacion, fechaPronostico, [pronostico],
                                 contaminant, 5)
Пример #7
0
def useClimatology(contaminant, estacion, fechaInicio, fechaFinal, dataMet,
                   dirData, dirTrain, dirFestivos):
    """
    function to make the forecast using climatologies

    :param contaminant: name of the pollutant
    :type contaminant: String
    :param estacion: name of the weather station
    :type estacion: String
    :param fechaInicio: range of data wit wich the vaues of tue query are extracted
    :type fechaInicio: datetime
    :param fechaFinal: range of data wit wich the vaues of tue query are extracted
    :type fechaFinal: datetime
    :param dataMet: dataframe with the climatological information
    :type dataMet: DataFrame
    """
    data = fd.get_climatology(fechaInicio, fechaFinal, estacion)
    print(data)
    data = makeDates(fechaInicio, fechaFinal, data)
    #sys.out
    print(data)
    data = data.reset_index(drop=True)
    print(data)
    data = separateDate(data)
    data = totalUnionData(data, dirFestivos)
    data = df.concat([data, dataMet], axis=1, join='inner')
    #data = data.merge(dataMet, how='left', on='fecha')
    data = data.fillna(value=-1)
    data = filterData(data, dirData + estacion + "_" + contaminant + ".csv")
    data = data.fillna(value=-1)
    index = data.index.values
    arrayPred = []
    for x in index:
        pred = data.ix[x].values
        valPred = pred[2:]
        valNorm = pre.normalize(valPred, estacion, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(estacion, contaminant, arrayPred, dirTrain,
                            dirData)
    columnContaminant = findTable2(contaminant)
    real = pre.desNorm(result, estacion, contaminant, dirData,
                       columnContaminant + '_')
    fechaPronostico = fechaInicio
    for xs in real:
        print(fechaPronostico)
        fechaUpdate = fechaPronostico
        fechaUpdate = fechaUpdate - timedelta(days=1)
        guardarPrediccion(estacion, fechaUpdate, [xs], contaminant, 1)
        fechaPronostico = fechaPronostico + timedelta(hours=1)
    print('Climatologia:' + estacion)
Пример #8
0
def forecastDate2(station, dirData, dirrDataC, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin, dirTotalCsv):
    sta = station
    name = sta + '_' + contaminant
    tempData  = baseContaminantes(fechaInicio, fechaFin, station, contaminant)
    if tempData.empty:
        dataBackup = back(dirData, contaminant)
        data = dataBackup
        data = data.fillna(value=-1)
        data = filterData(data, dirData + name + ".csv")
        data = data.fillna(value=-1)
        temp = data.ix[0].values
        temp = temp[1:]
        dataPred = pre.normalize(temp, sta, contaminant, dirData)
        dataPred = convert(dataPred)
        prediccion = pre.prediction(sta, contaminant, [dataPred], dirTrain, dirData)
    else:
        data =  tempData.dropna(axis=1, how = 'all')
        data = data.fillna(value = -1)
        data = data.reset_index(drop = True)
        data = separateDate(data)
        data = unionData(data,dirTotalCsv)
        data = data.drop_duplicates(keep='first')
        data = filterData(data,dirData + name + '.csv')
        data = data.fillna(value = -1)
        dataTemp = data['fecha']
        index = data.index.values
        arrayPred = []
        for x in index:
            pred = data.ix[x].values
            valPred = pred[1:]
            valNorm = pre.normalize(valPred, sta,  contaminant, dirData)
            arrayPred.append(convert(valNorm))
        result = pre.prediction(sta,contaminant,arrayPred, dirTrain,dirData)
        real = desNorm(result, sta, contaminant, dirData, columnContaminant)
        dataPrediccion =  real
        savePrediccion(station, dataPrediccion, contaminant, dataTemp)
Пример #9
0
def forecastDate(station, dirData, dirrDataC, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin):
    """
    function to make the forecast of a whole year and graph it

    :param station: name the station
    :type station: String
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :param columnContaminant:name of the pollutant in the DataFrame
    :type columnContaminant: String
    :param fechaInicio: start date of the forecast
    :type fechaInicio: date
    :param fechaFin: end date of the forecast
    :type fechaFin: date
    """
    sta = station
    name = sta + '_' + contaminant
    temp = df.read_csv(dirrDataC + name + '.csv')  # we load the data in the Variable data
    temp = temp.fillna(value=-1.0)
    data = temp[(temp['fecha'] <= fechaFin) & (temp['fecha'] >= fechaInicio)]
    data = data.reset_index(drop=True)
    data = filterData(data, dirData + name + '.csv')
    data = data.fillna(value=-1.0)
    dataTemp = data['fecha'].values
    print(dataTemp)
    nameColumn = columnContaminant +'_'+ sta + '_delta'
    index = data.index.values
    arrayPred = []
    for x in index:
        pred = data.ix[x].values
        valPred = pred[1:]
        valNorm = pre.normalize(valPred, sta, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(sta, contaminant, arrayPred, dirTrain + contaminant + '/', dirData)
    real = desNorm(result, sta, contaminant, dirData, columnContaminant)
    dataPrediccion = real
    savePrediccion(station, dataPrediccion, contaminant, dataTemp)
Пример #10
0
def trial(station, dirData, dirrDataC, dirGraficas, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin):
    """
    function to make the forecast of a whole year and graph it

    :param station: name the station
    :type station: String
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirGraficas: address where the graphics are saved
    :type dirGraficas: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :param columnContaminant:name of the pollutant in the DataFrame
    :type columnContaminant: String
    :param fechaInicio: start date of the forecast
    :type fechaInicio: date
    :param fechaFin: end date of the forecast
    :type fechaFin: date
    """
    sta = station
    name = sta + '_' + contaminant
    temp = df.read_csv(dirrDataC + name + '.csv')  # we load the data in the Variable data
    temp = temp.fillna(value=-1.0)
    data = temp[(temp['fecha'] <= fechaFin) & (temp['fecha'] >= fechaInicio)]
    data = data.reset_index(drop=True)
    data = filterData(data, dirData + name + '.csv')
    data = data.fillna(value=-1.0)
    tempBuild = df.read_csv(dirrDataC + name + '_pred.csv')  # we load the data in the Variable build
    tempBuild = tempBuild.fillna(value=-1.0)
    build = tempBuild[(tempBuild['fecha'] <= fechaFin) & (tempBuild['fecha'] >= fechaInicio)];
    build = build.reset_index(drop=True)
    build = build.fillna(value=-1.0)
    l = xlabel(data)
    labels = l[0]
    location = l[1]
    print(labels)
    if (station == 'SAG') | (station == 'UIZ'):
        #loc = labels.index('Marzo')
        #lugar = location[loc] + 1
        #nombre = labels[loc]
        nombre = 'anio'
    else:
        print('no mes')
        #loc = labels.index('Marzo')
        #lugar = location[loc] + 1
        #nombre = labels[loc]
        nombre = 'anio'
    arrayPred = []
    nameColumn = columnContaminant +'_'+ sta + '_delta'
    inf = build[nameColumn].values
    index = data.index.values
    for x in index:
        pred = data.ix[x].values
        valPred = pred[1:]
        valNorm = pre.normalize(valPred, sta, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(sta, contaminant, arrayPred, dirTrain, dirData)
    real = desNorm(result, sta, contaminant, dirData, columnContaminant)
    #metri.append(metricas(inf, real, station))
    plt.figure(figsize=(22.2, 11.4))
    plt.plot(inf, color='tomato', linestyle="solid", marker='o', label='Valor observado.');
    plt.plot(real, color='darkgreen', linestyle='solid', marker='o', label='Pronóstico 24h NN.');
    plt.title(nombreEst(station) + ' (' + station + ') comparación de ' + contaminant+' observado vs red neuronal' + ' para la primer semana de ' + nombre + ' 2016' ,fontsize=25, y=1.1 )
    plt.xlabel('Fecha', fontsize=18)
    #n = 'Primera semana de '+nombre
    #plt.xlabel(n,fontsize=22);
    plt.ylabel('Partes por millon (PPM)', fontsize=22)
    plt.legend(loc='best')
    plt.grid(True, axis='both', alpha= 0.3, linestyle="--", which="both")
    # plt.xticks(location,labels,fontsize=8,rotation=80)
    plt.xticks(location,labels,fontsize=16,rotation=80)
    #plt.xlim(lugar,lugar+144);
    plt.axhspan(20, 40, color='lightgray', alpha=0.3)
    plt.axhspan(60, 80, color='lightgray', alpha=0.3)
    plt.axhspan(100, 120, color='lightgray', alpha=0.3)
    plt.gca().spines['bottom'].set_color('dimgray')
    plt.gca().spines['left'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.tight_layout()
    plt.savefig(dirGraficas + station + '_' + nombre + '.png')
    plt.show();
    plt.clf();
    plt.close()