def forecast_month(month, year, dirData, dirTotalCsv, dirTrain,estacion, contaminant):
    lastDay = calendar.monthrange(year,month)[1]
    fechaInicio =  str(year) + '-' + numString(month) + '-01 00:00:00'
    fechaFinal = str(year) + '-' + numString(month) + '-'+ numString(lastDay) +' 23:00:00'
    #print(fechaInicio)
    #print(fechaFinal)
    data = fd.readData(fechaInicio, fechaFinal, [estacion], contaminant)
    data = separateDate(data)
    data = unionMeteorologia(data,dirTotalCsv)
    data = data.fillna(value=-1)
    #print(data)
    #sys.out
    frame_dates = data['fecha'].values
    data =  filterData(data, dirData + estacion + "_" + contaminant + ".csv")
    data = data.fillna(value=-1)
    index = data.index.values
    arrayPred = []
    for x in index:
        pred = data.ix[x].values
        valPred= pred[1:]
        valNorm = pre.normalize(valPred,estacion, contaminant, dirData)
        arrayPred.append(convert(valNorm))
    result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData)
    nameCont = findTable2(contaminant)
    real = pre.desNorm(result, estacion,contaminant, dirData, nameCont + '_')
    for xs in range(len(frame_dates)):
        fecha = frame_dates[xs]
        ts = df.to_datetime(str(fecha))
        fecha_string = ts.strftime('%Y-%m-%d %H:%M:%S')
        pronostico = real[xs]
        guardarPrediccion(estacion, fecha_string,[pronostico],contaminant,4)
def saveData2(listEstations, startDate, nameContaminant, endDate, dirr,
              dirTotalCsv, contaminant):
    """
    Function for the save data in the type file .csv

    :param listEstations: list with stations
    :type listEstations: String list
    :param startDate: start date
    :type startDate: date
    :param nameContaminant: name of the pollutant in the database
    :type nameContaminant: String
    :param endDate: end date
    :type endDate: date
    :param dirr: direction of save data
    :type dirr: String
    :param dirTotalCsv: address of the cvs files
    :type dirTotalCsv: String
    :param contaminant: name pollutant
    """
    #createFile()
    est = listEstations
    tam = len(est) - 1
    i = 0
    while i <= tam:  # 21
        print(est[i])
        print(startDate[i])
        nameDelta = nameContaminant + est[i] + '_delta'
        nameD = est[i] + '_' + contaminant + '.csv'
        nameB = est[i] + '_' + contaminant + '_pred.csv'
        tempData = fd.readData(startDate[i], endDate, [est[i]], contaminant)
        tempBuild = fd.buildClass2(tempData, [est[i]], contaminant, 24,
                                   startDate[i], endDate)
        temAllData = tempData.dropna(axis=1, how='all')
        # allD = temAllData.dropna(axis=0,how='any')
        allD = temAllData.fillna(value=-1)
        allD = allD.reset_index()
        allD = allD.drop(labels='index', axis=1)
        allData = allD.merge(tempBuild, how='left', on='fecha')
        build = df.DataFrame(allData['fecha'], columns=['fecha'])
        val = df.DataFrame(allData[nameDelta], columns=[nameDelta])
        build[nameDelta] = val
        data = allData.drop(labels=nameDelta, axis=1)
        data = data.reset_index()
        build = build.reset_index()
        build = build.drop(labels='index', axis=1)
        data = data.drop(labels='index', axis=1)
        dataTemp = separateDate(data)
        dataTemp2 = unionData(dataTemp, dirTotalCsv)
        maxAndMinValues(dataTemp2, est[i], contaminant, dirr)
        data = dataTemp2
        data = data.drop_duplicates(keep='first')
        build = build.drop_duplicates(keep='first')
        build = filterData(data, build)
        data.to_csv(dirr + nameD, encoding='utf-8', index=False
                    )  # save the data in file "data/[station_contaminant].csv"
        build.to_csv(
            dirr + nameB, encoding='utf-8', index=False
        )  # save the data in file "data/[station_contaminant_pred].csv]
        i += 1
def baseContaminantes(fecha, estacion, contaminant):
    """
    function to bring the information of the contaminants from the database

    :param fecha: date to bring the information
    :type fecha: date
    :param estacion:name of the station from which the information is extracted
    :type estacion: String
    :return: array with pollutant information
    :type return: array float32
    """
    fechaActual = str(fecha.year) + '-' + numString(fecha.month) + '-' + numString(fecha.day)+' '+numString(fecha.hour)+':00:00'
    data = fd.readData(fechaActual, fechaActual, [estacion], contaminant)
    return data
示例#4
0
def training(fechaAyer, estacion, dirTrain, dirData, dirCsv, dirFestivos,
             variables, contaminant):
    """
    function to train the neural network with the information of 24 hours before

    :param fechaAyer: date of the previous day
    :type fechaAyer: date
    :param estacion: name the station
    :type estacion: String
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :param dirFestivos: address of the file with the holidays
    :type dirFestivos: String
    :param dirCsv: Address of processed meteorology archives
    :type dirCsv : String
    :param variables: meteorological variables
    :type variables: string list
    """
    print(estacion)
    fecha = str(fechaAyer.year) + '/' + numString(
        fechaAyer.month) + '/' + numString(fechaAyer.day) + ' ' + numString(
            fechaAyer.hour) + ':00:00'
    fechaMet = str(fechaAyer.year) + "-" + numString(
        fechaAyer.month) + "-" + numString(fechaAyer.day)
    fechaBuild = str(fechaAyer.year) + "/" + numString(
        fechaAyer.month) + "/" + numString(fechaAyer.day)
    data = fd.readData(fecha, fecha, [estacion], contaminant)
    build = fd.buildClass2(data, [estacion], contaminant, 24, fechaBuild,
                           fechaBuild)
    if data.empty:
        print("No se puede hacer el entrenamiento")
    else:
        dataMet = unionMeteorologia(fechaMet, fechaAyer, dirCsv, variables)
        dataMet = dataMet.drop('fecha', axis=1)
        data = separateDate(data)
        data = unionData(data, fechaAyer, dirFestivos)
        data = df.concat([data, dataMet], axis=1)
        data = filterData(data,
                          dirData + estacion + "_" + contaminant + ".csv")
        data = data.fillna(value=-1)
        xy_values = an(data, build, contaminant)  # preprocessing
        tr.training(xy_values[0], xy_values[1], estacion, dirTrain,
                    contaminant, dirData)
示例#5
0
def tiempo():
    time_cpu = []
    time_gpu = []
    time_base = []
    start = datetime.strptime(startDate[20], '%Y/%m/%d')
    end = datetime.strptime(endDate, '%Y/%m/%d')
    dy = 8760 * 2
    estation = est[20]
    date = start + timedelta(hours=dy)
    while date <= end:
        sDate = date.strftime('%Y/%m/%d')
        initData = time()
        data = FormatData.readData(start, date, [estation], contaminant)
        build = FormatData.buildClass2(data, [estation], contaminant, 24,
                                       startDate[20], sDate)
        xy_values = an(data, build, contaminant)
        finData = time()
        initCpu = time()
        temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000,
                       estation, contaminant)
        loss_vec.append(temp_loss)
        finCpu = time()
        initGpu = time()
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000)
        loss_vec.append(temp_loss)
        finGpu = time()
        totalCpu = finCpu - initCpu
        totalGpu = finGpu - initGpu
        totalBase = finData - initData
        time_base.append(totalBase)
        time_cpu.append(totalCpu)
        time_gpu.append(totalGpu)
        date = date + timedelta(hours=dy)
    plt.plot(time_base, 'g-', label='time Data base')
    plt.plot(time_cpu, 'k-', label='time CPU')
    plt.plot(time_gpu, 'r-', label='time GPU')
    plt.title('GPU vs CPU')
    plt.xlabel('Years')
    plt.ylabel('Time')
    plt.legend(loc='best')
    plt.savefig('tiempo.png', dpi=600)
    plt.show()
示例#6
0
def estationsGpu():
    start = startDate[0]
    estation = []
    for x in est:
        estation += [x]
        print(estation)
        data = FormatData.readData(start, endDate, estation, contaminant)
        build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start,
                                       endDate)
        xy_values = an(data, build, contaminant)
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000)
        loss_vec.append(temp_loss)
    print(loss_vec)
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de estaciones')
    plt.xlabel('Numero de estaciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.savefig("estacionesGpu.png", dpi=600)
    plt.show()
示例#7
0
def iterationGpu():
    i = 200
    start = startDate[0]
    estation = est[10]
    data = FormatData.readData(start, endDate, [estation], contaminant)
    build = FormatData.buildClass2(data, [est[10]], contaminant, 24, start,
                                   endDate)
    xy_values = an(data, build, contaminant)
    while i <= 3000:
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], i)
        loss_vec.append(temp_loss)
        i = i + 200
        print(i)
    print(loss_vec)
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de iteraciones de entrenamiento')
    plt.xlabel('Numero de iteraciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.savefig("iteraciones.png", dpi=600)
    plt.show()
示例#8
0
def testData2():
    i = 0
    dataBase_time = []
    file_time = []
    s = []
    while i <= 21:
        s.append(est[i])
        print(s)
        init_dataBase = time()
        data = FormatData.readData(startDate[i], endDate, s, contaminant)
        build = FormatData.buildClass2(data, s, contaminant, 24, startDate[i],
                                       endDate)
        #xy_values = an(data,build, contaminant);
        fin_dataBase = time()
        init_fileTime = time()
        for x in s:
            station = x
            name = station + '_' + contaminant
            data = df.read_csv('data/' + name + '.csv')
            build = df.read_csv('data/' + name + '_pred.csv')
            #xy_values = an(data,build, contaminant);
        fin_fileTime = time()
        total_dataBase = fin_dataBase - init_dataBase
        total_file = fin_fileTime - init_fileTime
        dataBase_time.append(total_dataBase)
        file_time.append(total_file)
        i += 1
    plt.figure(figsize=(12.2, 6.4))
    plt.plot(file_time, 'g-', label='time File')
    plt.plot(dataBase_time, 'r-', label='time DataBase')
    plt.title('DataBase vs File')
    plt.xlabel('stations')
    plt.ylabel('Time (second)')
    plt.legend(loc='best')
    location = np.arange(len(est))
    plt.xticks(location, est, fontsize=7, rotation='vertical')
    plt.savefig('Graficas/tiempoDataBase2.png', dpi=600)
    plt.show()
示例#9
0
def estations():
    start = startDate[0]
    estation = []
    for x in est:
        estation += [x]
        print(estation)
        data = FormatData.readData(start, endDate, estation, contaminant)
        build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start,
                                       endDate)
        xy_values = an(data, build, contaminant)
        temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, est[0],
                       contaminant)
        loss_vec.append(temp_loss)
    print(loss_vec)
    plt.figure(figsize=(12.2, 6.4))
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de estaciones')
    plt.xlabel('Numero de estaciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    location = np.range(len(est))
    plt.xticks(location, est, rotation='vertical')
    plt.savefig("estaciones.png", dpi=600)
    plt.show()
def update4hours(estacion, contaminant, fecha, dirData, dirTrain, dirCsv,dirFestivos, variables, fechaString):
    """
    function to make the last 4 hours of forecast

    :param estacion: name of the weather station
    :type estacion: String
    :param contaminant: name of the pollutant
    :type contaminant: String
    :param fecha: current day
    :type fecha: datetime
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :param dirCsv: Address of processed meteorology archives
    :type dirCsv : String
    :param dirFestivos: address of the file with the holidays
    :type dirFestivos: String
    :param variables: meteorological variables
    :type variables: list(Strings)
    """
    nameC = findT(contaminant)
    dataForecast = ultimate_data(estacion,nameC, 2,1)
    if dataForecast.empty:
        print('No se ha hecho pronostico para la estacion:'+ estacion)
        return 0
    else:
        fechaUltima = dataForecast['fecha'][0]
        if estacion == 'SFE':
            fechaUltima = fechaUltima -timedelta(hours=6)
        elif estacion == 'TAH':
            fechaUltima = fechaUltima - timedelta(hours=15)
        elif estacion == 'UAX':
            fechaUltima = fechaUltima - timedelta(hours=13)
        elif estacion == 'NEZ':
            fechaUltima = fechaUltima - timedelta(hours=11)

        fechaUltima = fechaUltima - timedelta(days = 1)
        print('Fecha Actual: ' + str(fecha))
        print('Fecha Ultimo Registro: ' + str(fechaUltima))
        if fechaUltima == fecha:
            print('Pronostico actualizado')
            return 0
        elif fechaUltima < fecha:
            print('Pronostico retrasado')
            fechaTemp = fechaUltima + timedelta(hours=1)
            fechaInicio = str(fechaTemp.year) + '-' + numString(fechaTemp.month) + '-' + numString(fechaTemp.day)+' '+numString(fechaTemp.hour)+':00:00'
            fechaFin = str(fecha.year) + '-' + numString(fecha.month) + '-' + numString(fecha.day)+' '+numString(fecha.hour)+':00:00'
            data = fd.readData(fechaInicio,fechaFin,[estacion],contaminant)
            data = data.drop_duplicates(keep='first')
            dataMet = unionTotalMeteorologia(fechaString,dirCsv,variables,fechaInicio,fechaFin)
            print('Numero de horas retrasado: ' + str(fecha-fechaUltima))
            if data.empty and (fecha-fechaUltima) > timedelta(hours=3):
                print('Pronostico con climatologia')
                useClimatology(contaminant,estacion,fechaTemp,fecha,dataMet,dirData,dirTrain, dirFestivos)
                return 1
            elif (fecha-fechaUltima) < timedelta(hours=3):
                print('Climatologia cada 4 horas')
                return 0
            elif not(data.empty):
                primer_fecha = data['fecha'][0]
                if primer_fecha > fechaTemp:
                    fechaFinClim =  primer_fecha - timedelta(hours=1)
                    useClimatology(contaminant,estacion,fechaTemp,fechaFinClim,dataMet,dirData,dirTrain,dirFestivos)
                    #pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain)
                    return 1
                else:
                    pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain)
                    print('Pronostico normal C')
                    return 1
            elif data.empty:
                print('No hay datos para la prediccion')
                return 0
            else:
                print('Pronostico normal')
                pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain)
                return 1
        else:
            print('Pronostico actualizado')
            return 0