Пример #1
0
def consecutiveHours(city, expr):
    data = hourly.load(city)
    tz = stations.city[city].timezone

    runs = {}
    allRuns = []
    curRun = []
    mostRecentRun = None
    #
    timestamps = sorted(data.keys())
    #
    for utc in hourrange(timestamps[0], timestamps[-1]):
        hourdata = data.get(utc)
        ltime = utc.astimezone(tz)
        v = None
        if hourdata is not None:
            v = hourdata.eval(expr, {'time':ltime} )
        if v is not None and v is not False:
            curRun.append((ltime, v))
        else:
            r = addTo(runs, allRuns, curRun)
            if r is not None:
                mostRecentRun = r
            curRun = []
    r = addTo(runs, allRuns, curRun)
    if r is not None:
        mostRecentRun = r
    return runs, allRuns, mostRecentRun
Пример #2
0
def loadHourlyData(city, dayField, hourField, day):
    if dayField == daily.TOTAL_SNOW_CM:
        return metarParse.loadSnowWithHours(city)
    elif dayField in (daily.MAX_TEMP, daily.MIN_TEMP):
        return loadHourlyDataSwob(city, dayField, day)
    mytimezone = stations.city[city].timezone
    cityData = daily.load(city)
    utcDayStart = datetime.datetime(day.year,
                                    day.month,
                                    day.day,
                                    6,
                                    tzinfo=datetime.timezone.utc)
    dayStart = utcDayStart.astimezone(mytimezone)
    dayEnd = dayStart + datetime.timedelta(days=1)
    cityHourData = hourly.load(city, dateRange=(dayStart, dayEnd))

    valByHour = {}
    for utcHour, values in cityHourData.items():
        v = hourField(values)
        if v is None:
            continue
        l = utcHour.astimezone(mytimezone)
        valByHour[l] = v

    return {
        day: metarParse.SnowAndHourly(cityData[day][dayField.index], valByHour)
    }
Пример #3
0
def loadHourlyDataSwob(city, dayField, day):
    if dayField == daily.TOTAL_SNOW_CM:
        return metarParse.loadSnowWithHours(city)
    mytimezone = stations.city[city].timezone
    cityData = daily.load(city)
    utcDayStart = datetime.datetime(day.year,
                                    day.month,
                                    day.day,
                                    6,
                                    tzinfo=datetime.timezone.utc)
    dayStart = utcDayStart.astimezone(mytimezone)
    dayEnd = dayStart + datetime.timedelta(days=1)
    cityHourData = gatherSwob.parse(city)
    hourly.load(city, dateRange=(dayStart, dayEnd))

    valByHour = {}
    for valuesDict in cityHourData:
        ts = valuesDict['time']
        if ts < dayStart or ts >= dayEnd:
            continue
        if ts == utcDayStart:
            v = valuesDict['air_temp']
        elif dayField == daily.MAX_TEMP:
            v = valuesDict['max_air_temp_pst1hr']
        elif dayField == daily.MIN_TEMP:
            v = valuesDict['min_air_temp_pst1hr']
        else:
            assert (False)
        if v is None:
            continue
        v = D(v)
        l = ts.astimezone(mytimezone)
        valByHour[l] = v

    dayData = cityData.get(day, None)
    if dayData is not None:
        return {
            day: metarParse.SnowAndHourly(dayData[dayField.index], valByHour)
        }
    return {day: metarParse.SnowAndHourly(None, valByHour)}
Пример #4
0
def dispatch(cityName,
             firstYear,
             startMonth,
             startDay,
             endMonth,
             endDay,
             expression,
             verbose=True,
             skipIncomplete=False,
             lineChart=False):

    data = hourly.load(cityName)
    global START_MONTH, START_DAY, END_MONTH, END_DAY
    START_MONTH = startMonth
    START_DAY = startDay
    END_MONTH = endMonth
    END_DAY = endDay

    checkList = {
        '>=30': [
            GreaterThanOrEqualToWithFlag(hourly.TEMP, 30),
            "hours above or at 30"
        ],
        '<10': [LessThanOrEqualToWithFlag(hourly.TEMP, 10), "hours below 10"],
        '<=-20':
        [LessThanOrEqualToWithFlag(hourly.TEMP, -20), "hours below or at -20"],
    }

    args = checkList[expression]
    return count(cityName,
                 data,
                 args[0],
                 args[1],
                 verbose,
                 skipIncomplete=skipIncomplete,
                 lineChart=lineChart)
Пример #5
0
                    text = ("{} between {}➜{},"
                            " the temp dropped from {:.1f}℃➜{:.1f}℃"
                            " which was #{}'s largest 1-hour drop {}"
                            .format(dayName,
                                    clock12(city, 'H{}'.format(maxDiffD[0].hour)),
                                    clock12(city, 'H{}'.format(maxDiffD[1].hour)),
                                    float(maxDiffValue[0]),
                                    float(maxDiffValue[1]),
                                    stations.city[city].name,
                                    since))
                    (use, tweet) = shouldTweetSuffix(
                        city, text)
                    if use:
                        delayedTweets.addToListForCity(city, tweet, urgent=True)
                        print('ok')
                    else:
                        print('skipping')
                    break
                maxDiff = diff
                maxDiffValue = v, lastV
                maxDiffD = localTime, lastD
        lastV = v
        lastD = localTime
        lastUtcTime = utcTime



if __name__ == '__main__':
    data = hourly.load('ottawa')
    checkMaxDrop2('ottawa', 'maxDrop', recentLimit=1)
Пример #6
0
# -*- coding: utf-8 -*-
import hourly, sys, copy, fnmatch
import argparse, datetime
import stations

parser = argparse.ArgumentParser(
    description='Determine the last time a field has been this high/low.')
parser.add_argument('-f', help='Field')
parser.add_argument('--city', default='ottawa')
parser.add_argument('-d', help='Mask', nargs='*')

args = parser.parse_args()

field = args.f

data = hourly.load(args.city)
days = args.d

tz = stations.city[args.city].timezone

for d in days:
    dayStart = datetime.datetime.strptime(d, "%Y-%m-%d").date()
    print('---', dayStart)
    for utc in sorted(data.keys()):
        ltime = utc.astimezone(tz)
        if ltime.date() != dayStart:
            continue
        if utc in data:
            windchill = data[utc].windchill
            if windchill is not None:
                windchill = '{:.1f}'.format(windchill)
Пример #7
0
#!/usr/bin/python
from __future__ import print_function
import hourly

data = hourly.load("ottawa")

hours = []

for hour, hourdata in data.iteritems():
    visibility = hourdata[hourly.VISIBILITY]
    if len(visibility):
        visibility = float(visibility)
        #if visibility <= 0.2 and 'fog' in hourdata[hourly.WEATHER].lower():
        if visibility <= 1 and 'fog' in hourdata[hourly.WEATHER].lower():
            hours.append(hour)
            #print hour

hours.sort()
for hour in hours:
    print hour
exit(0)

days = {}

for hour in hours:
    day = hour[:-1]
    if day not in days:
        days[day] = 0

    days[day] += 1
def main(city, year):
    data = hourly.load(
        city,
        (datetime.datetime(year, 1, 1), datetime.datetime(year + 1, 1, 1)))
    statsByDay = {}

    for dateTime in sorted(data.keys()):
        info = data[dateTime]
        localTime = dateTime.astimezone(stations.city[city].timezone)
        date = localTime.date()
        #if (datetime.date.today() - date).days < 3:
        #print(dateTime, info)
        try:
            thisDayStats = statsByDay[date]
        except KeyError:
            statsByDay[date] = {}
            thisDayStats = statsByDay[date]

        for hourFieldName, val in info._asdict().items():
            fieldName = nameMap[hourFieldName]
            if val is None or (type(val) is str and len(val) == 0):
                # Skip empty fields and textual weather field
                continue
            try:
                thisDayStats[fieldName][localTime.hour] = val
            except KeyError:
                thisDayStats[fieldName] = {localTime.hour: val}
            #print('hourFieldName, val,', repr(hourFieldName), repr(val), repr(thisDayStats[hourFieldName]))

        val = info.humidex
        if val != None:
            fname = 'Humidex'
            try:
                thisDayStats[fname][localTime.hour] = val
            except KeyError:
                thisDayStats[fname] = {localTime.hour: val}

        val = info.windchill
        if val != None:
            fname = 'Windchill'
            try:
                thisDayStats[fname][localTime.hour] = val
            except KeyError:
                thisDayStats[fname] = {localTime.hour: val}

    fname = "{city}/data/{year}.dailyextra-csv.bz2".format(**locals())
    #print(fname)
    csvWriter = csv.writer(io.TextIOWrapper(bz2.BZ2File(fname, 'w')))

    flagMap = {'Humidex': 'Temp', 'Windchill': 'Temp'}

    for date in sorted(statsByDay.keys()):
        infodict = statsByDay[date]
        #print(date, infodict)
        info = {}
        if date == envCanToday():
            _, _, summaryMaxGust = todayXmlParse.getMinMax(city)
            info['maxGust'] = summaryMaxGust
            info['maxGustFlag'] = 'H'
        for maxName in ('Temp', 'Humidex'):
            maxVal = None
            maxFlag = ''
            lenVals = 0
            if maxName in infodict:
                valByHour = infodict[maxName]
                lenVals = len(valByHour)
                minKey, maxKey = keyOfMinMaxFloatValue(valByHour)
                if maxKey is not None:
                    maxHourlyHour = maxKey[0]
                    maxHourlyValue = valByHour[maxHourlyHour]
                    maxVal = maxHourlyValue
                    maxFlag = 'H' + str(maxHourlyHour)
            info['max' + maxName] = maxVal
            #print(infodict)
            flagName = flagMap.get(maxName, maxName) + 'Flag'
            try:
                flagSet = set(infodict[flagName].values())
            except KeyError:
                flagSet = set()

            if len(maxFlag) > 0:
                # MAX_Temp should always be marked as HOURLY to avoid
                # confusion with official daytime highs
                if 'H' in flagSet:
                    flagSet.remove('H')
                flagSet.add(maxFlag)
            if 'M' in flagSet and len(flagSet) > 1:
                flagSet.remove('M')
            if lenVals > 0 and lenVals < hourCount(city, date):
                flagSet.add('I')
            info['max' + maxName + 'Flag'] = '+'.join(sorted(flagSet))

        for minName in ('Temp', 'Windchill'):
            #if minName == 'Windchill' and date == datetime.date(1972,1,26):
            #    import pudb; pu.db
            minVal = None
            minFlag = ''
            lenVals = 0
            if minName in infodict:
                valByHour = infodict[minName]
                lenVals = len(valByHour)
                minKey, maxKey = keyOfMinMaxFloatValue(valByHour)
                if minKey is not None:
                    minHourlyHour = minKey[0]
                    minHourlyValue = valByHour[minHourlyHour]
                    minVal = minHourlyValue
                    minFlag = 'H' + str(minHourlyHour)
            flagName = flagMap.get(minName, minName) + 'Flag'
            try:
                flagSet = set(infodict[flagName].values())
            except KeyError:
                flagSet = set()
            if len(minFlag) > 0:
                # minTemp should always be marked as HOURLY to avoid
                # confusion with official daytime lows
                if 'H' in flagSet:
                    flagSet.remove('H')
                flagSet.add(minFlag)
            if 'M' in flagSet and len(flagSet) > 1:
                flagSet.remove('M')
            if lenVals > 0 and lenVals < hourCount(city, date):
                flagSet.add('I')
            info['min' + minName] = minVal
            info['min' + minName + 'Flag'] = '+'.join(sorted(flagSet))

        for avgName in ('Temp', 'Windchill', 'Wind', 'Humidity', 'Dewpoint'):
            avgVal = None
            lenVals = 0
            if avgName in infodict:
                valByHour = infodict[avgName].values()
                floatVals = tuple(filter(lambda t: t is not None, valByHour))
                if len(floatVals) > 0:
                    avgVal = numpy.average(tuple(map(float, floatVals)))
                    if floatVals[0] is int:
                        avgVal = int(avgVal)
                    else:
                        avgVal = D(avgVal).quantize(floatVals[0],
                                                    decimal.ROUND_HALF_UP)
                lenVals = len(floatVals)
            flagName = flagMap.get(avgName, avgName) + 'Flag'
            try:
                flagSet = set(infodict[flagName].values())
            except KeyError:
                flagSet = set()
            if 'M' in flagSet and avgVal is not None:
                flagSet.remove('M')
            #print(date, avgName, lenVals)
            if lenVals > 0 and lenVals < hourCount(city, date):
                flagSet.add('I')
            # All averages should always be marked as HOURLY to avoid
            # confusion with official MEAN_Temp
            if avgVal is not None:
                flagSet.add('H')
            info['avg' + avgName] = avgVal
            info['avg' + avgName + 'Flag'] = '+'.join(sorted(flagSet))

        info = HourlyDailyData(**info)
        intInfo = prepForDb(info)

        csvWriter.writerow((date.year, date.month, date.day) + intInfo)
        if (datetime.date.today() - date).days < 3:
            print(date, intInfo)
Пример #9
0
def main():
    HISTORY = 10

    parser = argparse.ArgumentParser(
        description='Determine how often some weather occurs.')
    parser.add_argument('expr', help='Which observation to check')
    parser.add_argument('--city', default='ottawa')
    parser.add_argument('--run', type=int, default=1)
    parser.add_argument('-m', help='Mask', default=['*'], nargs='*')
    parser.add_argument(
        '--between',
        help=
        'Only consider dates between these two. Comma separated like 05-15,07-01'
    )
    parser.add_argument('--holiday', help='The name of a holiday')
    parser.add_argument('--hour',
                        help='Use hourly data instead of daily.',
                        action='store_true',
                        default=False)
    parser.add_argument('--winters',
                        help='Count by winter instead of by year.',
                        action='store_true',
                        default=False)
    parser.add_argument('--group-by-year', action='store_true', default=False)
    args = parser.parse_args()

    run = args.run
    city = args.city

    fieldDict = {
        'min': Value(daily.MIN_TEMP),
        'max': Value(daily.MAX_TEMP),
        'meanTemp': Value(daily.MEAN_TEMP),
        'tempSpan': ValueDiff(daily.MAX_TEMP, daily.MIN_TEMP),
        'rain': Value(daily.TOTAL_RAIN_MM),
        'precip': Value(daily.TOTAL_PRECIP_MM),
        'humidex': Value(daily.MAX_HUMIDEX),
        'snow': Value(daily.TOTAL_SNOW_CM),
        'snowpack': ValueEmptyZero(daily.SNOW_ON_GRND_CM),
        'windgust': Value(daily.SPD_OF_MAX_GUST_KPH),
        'wind': Value(daily.AVG_WIND),
        'windchill': Value(daily.MIN_WINDCHILL),
        'avgWindchill': Value(daily.MIN_WINDCHILL),
    }
    if args.hour:
        fieldDict = {
            'temp': Value(hourly.TEMP),
            'dewpoint': Value(hourly.DEW_POINT_TEMP),
            'humidity': Value(hourly.REL_HUM),
            'windDir': Value(hourly.WIND_DIR),
            'wind': Value(hourly.WIND_SPD),
            'visibility': Value(hourly.VISIBILITY),
            'pressure': Value(hourly.STN_PRESS),
            'weather': ValueNoFlag(hourly.WEATHER),
            'windchill': ValueNoFlag(hourly.WINDCHILL),
        }

    def allFieldNames():
        return fieldDict.keys()

    dateFilter = args.m
    between = parseBetween(args.between)
    if args.hour:
        data = hourly.load(city)
        for time, conditions in parseWeatherStatsRealtime.parse(city).items():
            if time.minute == 0:
                values = data.get(time, None)
                if values is None:
                    data[time] = hourly.HourData(WEATHER=conditions)
                else:
                    data[time] = values._replace(WEATHER=conditions)
        for time, metarWeather in metarParse.genHourlyWeather(city):
            if time.minute != 0:
                continue
            #if time.date() == dt.date(2017,3,27):
            #    import pudb; pu.db
            values = data.get(time, None)
            if values is None:
                data[time] = hourly.HourData(WEATHER=metarWeather)
            elif (values.WEATHER is None or values.WEATHER == 'NA'):
                data[time] = values._replace(WEATHER=metarWeather)
    else:
        data = daily.load(city)

    maxVal = None

    curRun = deque()
    curDates = deque()

    fieldValues = fieldDict.values()
    firstDate = None
    matches = []

    referencedValues = set()
    compiledExpr = compile(args.expr, filename='.', mode='eval')
    exprWords = pythonWords(args.expr)
    for fieldName in fieldDict.keys():
        if fieldName in exprWords:
            referencedValues.add(fieldName)

    #print(referencedValues)
    #print(tuple(allFieldNames()))

    class refCountedList(deque):
        def __init__(self):
            self.indexSet = set()

        def __getitem__(self, ind):
            if type(ind) != slice:  #print('[{ind}]'.format(**locals()))
                self.indexSet.add(ind)
                return deque.__getitem__(self, ind)
            return list(self)[ind]

        def clearIndexSet(self):
            self.indexSet.clear()

    historyDates = deque([])
    history = refCountedList()

    class History():
        pass

    expectedDiff = dt.timedelta(days=1)
    if args.hour:
        expectedDiff = dt.timedelta(hours=1)
    mytimezone = stations.city[args.city].timezone

    for date in sorted(data.keys()):

        if args.hour:
            utchour = date
            localhour = utchour.astimezone(mytimezone)
            date = localhour

        if (len(historyDates) > 0 and date - historyDates[0] != expectedDiff):
            historyDates.clear()
            history.clear()

        if matchDate(date, args.m, between, args.holiday):
            #if date.year == 2016 and date.month == 11 and date.day == 30:
            #    import pudb; pu.db
            vals = {
                'history': history,
                "__builtins__": __builtins__,
                'time': date
            }
            history.appendleft(History())
            historyDates.appendleft(date)
            for fieldName, fieldCall in fieldDict.items():
                vals[fieldName] = fieldCall(data[date], date)
                flagValue = fieldCall.getFlag(data[date])
                if flagValue is not None:
                    vals[fieldName + 'Flag'] = flagValue
                if type(vals[fieldName]) is not SpecialNone:
                    history[0].__setattr__(fieldName, vals[fieldName])
                    if flagValue is not None:
                        history[0].__setattr__(fieldName + 'Flag', flagValue)

            skip = False
            usedVals = {}
            for fieldName in referencedValues:
                if type(vals[fieldName]) is SpecialNone:
                    #print('Skipping {} because {} is None'.format(date, fieldName))
                    skip = True
                    break
                #print(fieldName, type(vals[fieldName]) is SpecialNone)
                usedVals[fieldName] = vals[fieldName]
            if skip:
                continue

            if firstDate is None:
                firstDate = date
            lastDate = date

            #expr = args.expr.format(**vals)
            #print(date, args.expr, usedVals, vals)
            history.clearIndexSet()
            try:
                val = eval(compiledExpr, vals)
            except IndexError:
                val = False
            except AttributeError:
                val = False
            except TypeError:
                for offset, date in enumerate(historyDates):
                    print(date)
                    for name in allFieldNames():
                        if hasattr(history[offset], name):
                            print(name, history[offset].__getattribute__(name))
                raise
            #print(val)
            if val is True or type(val) is tuple and val[0] is True:
                #print('+++')
                #for offset, date in enumerate(historyDates):
                #    print(date)
                #    for name in allFieldNames():
                #        if name in dir(history[offset]):
                #            print(name, history[offset].__getattribute__(name))
                #print('---')
                for i in history.indexSet:
                    for fieldName in referencedValues:
                        usedVals['history[{}].{}'.format(i, fieldName)] = (
                            history[i].__getattribute__(fieldName))
                #print(history.indexSet)

                expectedDate = date
                if len(curDates) > 0:
                    expectedDate = curDates[-1] + expectedDiff
                if date != expectedDate:
                    #print('Clearing run', date, curDates[-1])
                    curDates = deque([date])
                    curRun = deque([usedVals])
                    if type(val) is tuple:
                        curRun = deque([{'expr': val[1]}])
                else:
                    curDates.append(date)
                    if type(val) is tuple:
                        curRun.append({'expr': val[1]})
                    else:
                        curRun.append(usedVals)
                    if len(curRun) > run:
                        curRun.popleft()
                        curDates.popleft()

                #print(date, len(curRun), run)
                if len(curRun) == run:
                    printDate(curDates)

                    if len(curRun[0]) == 1:
                        print(' '.join(
                            [str(first(a.values())) for a in curRun]))
                    else:
                        print(curRun)
                    matches.append(curDates)

    if args.winters:
        grouping = tuple([winterFromDate(a[0]) for a in matches])
        group = 'winter'
    else:
        grouping = tuple([a[0].year for a in matches])
        group = 'year'
    if args.group_by_year:
        print(tuple(enumerate(Counter(grouping).most_common(30))))
    print('Total count: {}'.format(len(grouping)))
    print('Occurance: ', end='')
    yearSpan = lastDate.year - firstDate.year + 1
    if len(matches) < yearSpan:
        print('once every {:.1f} {}s'.format(yearSpan / len(matches), group))
    else:
        print('{:.1f} times per {}'.format(len(matches) / yearSpan, group))

    print('Yearly Occurance:', end=' ')
    uniqGrouping = sorted(list(set(grouping)))
    if len(uniqGrouping) < yearSpan:
        occurance = yearSpan / len(uniqGrouping)
        print('once every {occurance:.1f} {group}s,'.format(**locals()),
              end=' ')
    else:
        occurance = len(uniqGrouping) / yearSpan
        print('{occurance:.1f} times per {group},'.format(**locals()), end=' ')

    recentFilter = lambda t: t >= today().year - 30 and t < today().year
    if args.winters:
        recentFilter = lambda t: t >= thisWinter() - 30 and t < thisWinter()
    recentOccursYears = tuple(filter(recentFilter, grouping))
    recentUniqYears = tuple(sorted(set(recentOccursYears)))
    print('{} out of the past 30 years: {}'.format(len(recentUniqYears),
                                                   str(recentUniqYears)))

    recentYears = range(thisYear() - 30, thisYear())
    if args.winters:
        recentYears = range(thisWinter() - 30, thisWinter())
    recentLen = len(recentUniqYears)
    recentYearCounter = Counter(recentOccursYears)
    recentLens = sorted(recentYearCounter[a] for a in recentYears)
    print('{} during the past 30 years'.format(recentLen))
    if True:  #recentLen > 30:
        print('Average is {:.1f}/year during the past 30 years'.format(
            len(recentOccursYears) / 30))
        print('Median is {},{}/year during the past 30 years'.format(
            recentLens[int(len(recentLens) / 2)], recentLens[int(
                (len(recentLens) + 1) / 2)]))
        print(
            '80% CI is {},{}'.format(
                recentLens[len(recentLens) // 10],
                recentLens[len(recentLens) - (len(recentLens) // 10) - 1]),
            recentLens)
        #print(sorted([(len(o),y) for y,o in recentByYear.items()]))
    else:
        print('Every {:.1f} years during the past 30 years'.format(30 /
                                                                   recentLen))
    occurDays = '∞'
    if recentLen > 0:
        occurDays = (dt.date.today() - yearsAgo(30)).days / recentLen
    print('Every {} days during the past 30 years'.format(occurDays))
    if recentLen > 0:
        print('Every {} months, {:.1f} days during the past 30 years'.format(
            int(occurDays // 30), occurDays % 30))

    if args.hour:
        recentOccur = tuple(
            filter(lambda t: t[0].date() >= yearsAgo(1), matches))
    else:
        recentOccur = tuple(filter(lambda t: t[0] >= yearsAgo(1), matches))
    print('{} during the past year'.format(len(recentOccur)))
    if args.winters:
        thisWinterCount = grouping.count(thisWinter())
        print('{} so far this winter'.format(thisWinterCount))
    else:
        thisYearCount = grouping.count(thisYear())
        print('{} so far this year'.format(thisYearCount))